diff options
Diffstat (limited to 'xlators/features/changelog/src')
18 files changed, 8023 insertions, 0 deletions
diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am new file mode 100644 index 00000000000..eee7dfa238d --- /dev/null +++ b/xlators/features/changelog/src/Makefile.am @@ -0,0 +1,29 @@ +xlator_LTLIBRARIES = changelog.la + +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \ + changelog-rpc-common.h changelog-misc.h changelog-encoders.h \ + changelog-rpc-common.h changelog-rpc.h changelog-ev-handle.h \ + changelog-messages.h + +changelog_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \ + changelog-encoders.c changelog-rpc.c changelog-barrier.c \ + changelog-rpc-common.c changelog-ev-handle.c +changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/rpc/rpc-transport/socket/src \ + -I$(top_srcdir)/xlators/features/changelog/lib/src/ \ + -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ + -DDATADIR=\"$(localstatedir)\" + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/changelog/src/changelog-barrier.c b/xlators/features/changelog/src/changelog-barrier.c new file mode 100644 index 00000000000..0fb89ddb127 --- /dev/null +++ b/xlators/features/changelog/src/changelog-barrier.c @@ -0,0 +1,131 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-helpers.h" +#include "changelog-messages.h" +#include <glusterfs/call-stub.h> + +/* Enqueue a stub*/ +void +__chlog_barrier_enqueue(xlator_t *this, call_stub_t *stub) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + + list_add_tail(&stub->list, &priv->queue); + priv->queue_size++; + + return; +} + +/* Dequeue a stub */ +call_stub_t * +__chlog_barrier_dequeue(xlator_t *this, struct list_head *queue) +{ + call_stub_t *stub = NULL; + changelog_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT(priv); + + if (list_empty(queue)) + goto out; + + stub = list_entry(queue->next, call_stub_t, list); + list_del_init(&stub->list); + +out: + return stub; +} + +/* Dequeue all the stubs and call corresponding resume functions */ +void +chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue) +{ + call_stub_t *stub = NULL; + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, + NULL); + + while ((stub = __chlog_barrier_dequeue(this, queue))) + call_resume(stub); + + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, NULL); + return; +} + +/* Function called on changelog barrier timeout */ +void +chlog_barrier_timeout(void *data) +{ + xlator_t *this = NULL; + changelog_priv_t *priv = NULL; + struct list_head queue = { + 0, + }; + + this = data; + THIS = this; + priv = this->private; + + INIT_LIST_HEAD(&queue); + + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_TIMEOUT, NULL); + + LOCK(&priv->lock); + { + __chlog_barrier_disable(this, &queue); + } + UNLOCK(&priv->lock); + + chlog_barrier_dequeue_all(this, &queue); + + return; +} + +/* Disable changelog barrier enable flag */ +void +__chlog_barrier_disable(xlator_t *this, struct list_head *queue) +{ + changelog_priv_t *priv = this->private; + GF_ASSERT(priv); + + if (priv->timer) { + gf_timer_call_cancel(this->ctx, priv->timer); + priv->timer = NULL; + } + + list_splice_init(&priv->queue, queue); + priv->queue_size = 0; + priv->barrier_enabled = _gf_false; +} + +/* Enable chagelog barrier enable with timer */ +int +__chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv) +{ + int ret = -1; + + priv->timer = gf_timer_call_after(this->ctx, priv->timeout, + chlog_barrier_timeout, (void *)this); + if (!priv->timer) { + gf_smsg(this->name, GF_LOG_CRITICAL, 0, + CHANGELOG_MSG_TIMEOUT_ADD_FAILED, NULL); + goto out; + } + + priv->barrier_enabled = _gf_true; + ret = 0; +out: + return ret; +} diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c new file mode 100644 index 00000000000..63754516c2e --- /dev/null +++ b/xlators/features/changelog/src/changelog-encoders.c @@ -0,0 +1,232 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-encoders.h" + +size_t +entry_fn(void *data, char *buffer, gf_boolean_t encode) +{ + char *tmpbuf = NULL; + size_t bufsz = 0; + struct changelog_entry_fields *ce = NULL; + + ce = (struct changelog_entry_fields *)data; + + if (encode) { + tmpbuf = uuid_utoa(ce->cef_uuid); + CHANGELOG_FILL_BUFFER(buffer, bufsz, tmpbuf, strlen(tmpbuf)); + } else { + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_uuid, sizeof(uuid_t)); + } + + CHANGELOG_FILL_BUFFER(buffer, bufsz, "/", 1); + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_bname, strlen(ce->cef_bname)); + return bufsz; +} + +size_t +del_entry_fn(void *data, char *buffer, gf_boolean_t encode) +{ + char *tmpbuf = NULL; + size_t bufsz = 0; + struct changelog_entry_fields *ce = NULL; + + ce = (struct changelog_entry_fields *)data; + + if (encode) { + tmpbuf = uuid_utoa(ce->cef_uuid); + CHANGELOG_FILL_BUFFER(buffer, bufsz, tmpbuf, strlen(tmpbuf)); + } else { + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_uuid, sizeof(uuid_t)); + } + + CHANGELOG_FILL_BUFFER(buffer, bufsz, "/", 1); + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_bname, strlen(ce->cef_bname)); + CHANGELOG_FILL_BUFFER(buffer, bufsz, "\0", 1); + + if (ce->cef_path[0] == '\0') { + CHANGELOG_FILL_BUFFER(buffer, bufsz, "\0", 1); + } else { + CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_path, + strlen(ce->cef_path)); + } + + return bufsz; +} + +size_t +fop_fn(void *data, char *buffer, gf_boolean_t encode) +{ + char buf[10] = { + 0, + }; + size_t bufsz = 0; + glusterfs_fop_t fop = 0; + + fop = *(glusterfs_fop_t *)data; + + if (encode) { + (void)snprintf(buf, sizeof(buf), "%d", fop); + CHANGELOG_FILL_BUFFER(buffer, bufsz, buf, strlen(buf)); + } else + CHANGELOG_FILL_BUFFER(buffer, bufsz, &fop, sizeof(fop)); + + return bufsz; +} + +size_t +number_fn(void *data, char *buffer, gf_boolean_t encode) +{ + size_t bufsz = 0; + unsigned int nr = 0; + char buf[20] = { + 0, + }; + + nr = *(unsigned int *)data; + + if (encode) { + (void)snprintf(buf, sizeof(buf), "%u", nr); + CHANGELOG_FILL_BUFFER(buffer, bufsz, buf, strlen(buf)); + } else + CHANGELOG_FILL_BUFFER(buffer, bufsz, &nr, sizeof(unsigned int)); + + return bufsz; +} + +void +entry_free_fn(void *data) +{ + changelog_opt_t *co = data; + + if (!co) + return; + + GF_FREE(co->co_entry.cef_bname); +} + +void +del_entry_free_fn(void *data) +{ + changelog_opt_t *co = data; + + if (!co) + return; + + GF_FREE(co->co_entry.cef_bname); + GF_FREE(co->co_entry.cef_path); +} + +/** + * try to write all data in one shot + */ + +static void +changelog_encode_write_xtra(changelog_log_data_t *cld, char *buffer, + size_t *off, gf_boolean_t encode) +{ + int i = 0; + size_t offset = 0; + void *data = NULL; + changelog_opt_t *co = NULL; + + offset = *off; + + co = (changelog_opt_t *)cld->cld_ptr; + + for (; i < cld->cld_xtra_records; i++, co++) { + CHANGELOG_FILL_BUFFER(buffer, offset, "\0", 1); + + switch (co->co_type) { + case CHANGELOG_OPT_REC_FOP: + data = &co->co_fop; + break; + case CHANGELOG_OPT_REC_ENTRY: + data = &co->co_entry; + break; + case CHANGELOG_OPT_REC_UINT32: + data = &co->co_uint32; + break; + } + + if (co->co_convert) + offset += co->co_convert(data, buffer + offset, encode); + else /* no coversion: write it out as it is */ + CHANGELOG_FILL_BUFFER(buffer, offset, data, co->co_len); + } + + *off = offset; +} + +int +changelog_encode_ascii(xlator_t *this, changelog_log_data_t *cld) +{ + size_t off = 0; + size_t gfid_len = 0; + char *gfid_str = NULL; + char *buffer = NULL; + changelog_priv_t *priv = NULL; + + priv = this->private; + + gfid_str = uuid_utoa(cld->cld_gfid); + gfid_len = strlen(gfid_str); + + /* extra bytes for decorations */ + buffer = alloca(gfid_len + cld->cld_ptr_len + 10); + CHANGELOG_STORE_ASCII(priv, buffer, off, gfid_str, gfid_len, cld); + + if (cld->cld_xtra_records) + changelog_encode_write_xtra(cld, buffer, &off, _gf_true); + + CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1); + + return changelog_write_change(priv, buffer, off); +} + +int +changelog_encode_binary(xlator_t *this, changelog_log_data_t *cld) +{ + size_t off = 0; + char *buffer = NULL; + changelog_priv_t *priv = NULL; + + priv = this->private; + + /* extra bytes for decorations */ + buffer = alloca(sizeof(uuid_t) + cld->cld_ptr_len + 10); + CHANGELOG_STORE_BINARY(priv, buffer, off, cld->cld_gfid, cld); + + if (cld->cld_xtra_records) + changelog_encode_write_xtra(cld, buffer, &off, _gf_false); + + CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1); + + return changelog_write_change(priv, buffer, off); +} + +static struct changelog_encoder cb_encoder[] = { + [CHANGELOG_ENCODE_BINARY] = + { + .encoder = CHANGELOG_ENCODE_BINARY, + .encode = changelog_encode_binary, + }, + [CHANGELOG_ENCODE_ASCII] = + { + .encoder = CHANGELOG_ENCODE_ASCII, + .encode = changelog_encode_ascii, + }, +}; + +void +changelog_encode_change(changelog_priv_t *priv) +{ + priv->ce = &cb_encoder[priv->encode_mode]; +} diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h new file mode 100644 index 00000000000..26252696d56 --- /dev/null +++ b/xlators/features/changelog/src/changelog-encoders.h @@ -0,0 +1,50 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_ENCODERS_H +#define _CHANGELOG_ENCODERS_H + +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> + +#include "changelog-helpers.h" + +#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) \ + do { \ + CHANGELOG_FILL_BUFFER(buffer, off, priv->maps[cld->cld_type], 1); \ + CHANGELOG_FILL_BUFFER(buffer, off, gfid, gfid_len); \ + } while (0) + +#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) \ + do { \ + CHANGELOG_FILL_BUFFER(buffer, off, priv->maps[cld->cld_type], 1); \ + CHANGELOG_FILL_BUFFER(buffer, off, gfid, sizeof(uuid_t)); \ + } while (0) + +size_t +entry_fn(void *data, char *buffer, gf_boolean_t encode); +size_t +del_entry_fn(void *data, char *buffer, gf_boolean_t encode); +size_t +fop_fn(void *data, char *buffer, gf_boolean_t encode); +size_t +number_fn(void *data, char *buffer, gf_boolean_t encode); +void +entry_free_fn(void *data); +void +del_entry_free_fn(void *data); +int +changelog_encode_binary(xlator_t *, changelog_log_data_t *); +int +changelog_encode_ascii(xlator_t *, changelog_log_data_t *); +void +changelog_encode_change(changelog_priv_t *); + +#endif /* _CHANGELOG_ENCODERS_H */ diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c new file mode 100644 index 00000000000..aa94459de5a --- /dev/null +++ b/xlators/features/changelog/src/changelog-ev-handle.c @@ -0,0 +1,412 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-ev-handle.h" +#include "changelog-rpc-common.h" +#include "changelog-helpers.h" + +struct rpc_clnt_program changelog_ev_program; + +#define NR_IOVEC (MAX_IOVEC - 3) +struct ev_rpc_vec { + int count; + struct iovec vector[NR_IOVEC]; + + /* sequence number */ + unsigned long seq; +}; + +struct ev_rpc { + rbuf_list_t *rlist; + struct rpc_clnt *rpc; + struct ev_rpc_vec vec; +}; + +/** + * As of now this just does the minimal (retval logging). Going further + * un-acknowledges sequence numbers can be retransmitted and other + * intelligence can be built into the server. + */ +int +changelog_event_dispatch_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return 0; +} + +/* dispatcher RPC */ +int +changelog_dispatch_vec(call_frame_t *frame, xlator_t *this, + struct rpc_clnt *rpc, struct ev_rpc_vec *vec) +{ + struct timeval tv = { + 0, + }; + changelog_event_req req = { + 0, + }; + + (void)gettimeofday(&tv, NULL); + + /** + * Event dispatch RPC header contains a sequence number for each + * dispatch. This allows the receiver to order the request before + * processing. + */ + req.seq = vec->seq; + req.tv_sec = tv.tv_sec; + req.tv_usec = tv.tv_usec; + + return changelog_rpc_sumbit_req( + rpc, (void *)&req, frame, &changelog_ev_program, + CHANGELOG_REV_PROC_EVENT, vec->vector, vec->count, NULL, this, + changelog_event_dispatch_cbk, (xdrproc_t)xdr_changelog_event_req); +} + +int +changelog_event_dispatch_rpc(call_frame_t *frame, xlator_t *this, void *data) +{ + int idx = 0; + int count = 0; + int ret = 0; + unsigned long sequence = 0; + rbuf_iovec_t *rvec = NULL; + struct ev_rpc *erpc = NULL; + struct rlist_iter riter = { + { + 0, + }, + }; + + /* dispatch NR_IOVEC IO vectors at a time. */ + + erpc = data; + sequence = erpc->rlist->seq[0]; + + rlist_iter_init(&riter, erpc->rlist); + + rvec_for_each_entry(rvec, &riter) + { + idx = count % NR_IOVEC; + if (++count == NR_IOVEC) { + erpc->vec.vector[idx] = rvec->iov; + erpc->vec.seq = sequence++; + erpc->vec.count = NR_IOVEC; + + ret = changelog_dispatch_vec(frame, this, erpc->rpc, &erpc->vec); + if (ret) + break; + count = 0; + continue; + } + + erpc->vec.vector[idx] = rvec->iov; + } + + if (ret) + goto error_return; + + idx = count % NR_IOVEC; + if (idx) { + erpc->vec.seq = sequence; + erpc->vec.count = idx; + + ret = changelog_dispatch_vec(frame, this, erpc->rpc, &erpc->vec); + } + +error_return: + return ret; +} + +int +changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, + void *data) +{ + xlator_t *this = NULL; + changelog_rpc_clnt_t *crpc = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_priv_t *priv = NULL; + changelog_ev_selector_t *selection = NULL; + uint64_t clntcnt = 0; + uint64_t xprtcnt = 0; + + crpc = mydata; + this = crpc->this; + c_clnt = crpc->c_clnt; + + priv = this->private; + + switch (event) { + case RPC_CLNT_CONNECT: + selection = &priv->ev_selection; + GF_ATOMIC_INC(priv->clntcnt); + + LOCK(&c_clnt->wait_lock); + { + LOCK(&c_clnt->active_lock); + { + changelog_select_event(this, selection, crpc->filter); + list_move_tail(&crpc->list, &c_clnt->active); + } + UNLOCK(&c_clnt->active_lock); + } + UNLOCK(&c_clnt->wait_lock); + + break; + case RPC_CLNT_DISCONNECT: + rpc_clnt_disable(crpc->rpc); + + /* rpc_clnt_disable doesn't unref the rpc. It just marks + * the rpc as disabled and cancels reconnection timer. + * Hence unref the rpc object to free it. + */ + rpc_clnt_unref(crpc->rpc); + + if (priv) + selection = &priv->ev_selection; + + LOCK(&crpc->lock); + { + if (selection) + changelog_deselect_event(this, selection, crpc->filter); + changelog_set_disconnect_flag(crpc, _gf_true); + } + UNLOCK(&crpc->lock); + LOCK(&c_clnt->active_lock); + { + list_del_init(&crpc->list); + } + UNLOCK(&c_clnt->active_lock); + + break; + case RPC_CLNT_MSG: + case RPC_CLNT_DESTROY: + /* Free up mydata */ + changelog_rpc_clnt_unref(crpc); + clntcnt = GF_ATOMIC_DEC(priv->clntcnt); + xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); + if (this->cleanup_starting) { + if (!clntcnt && !xprtcnt) + changelog_process_cleanup_event(this); + } + break; + case RPC_CLNT_PING: + break; + } + + return 0; +} + +void * +changelog_ev_connector(void *data) +{ + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; + + c_clnt = data; + this = c_clnt->this; + + while (1) { + pthread_mutex_lock(&c_clnt->pending_lock); + { + while (list_empty(&c_clnt->pending)) + pthread_cond_wait(&c_clnt->pending_cond, &c_clnt->pending_lock); + crpc = list_first_entry(&c_clnt->pending, changelog_rpc_clnt_t, + list); + crpc->rpc = changelog_rpc_client_init(this, crpc, crpc->sock, + changelog_rpc_notify); + if (!crpc->rpc) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_RPC_CONNECT_ERROR, "path=%s", crpc->sock, + NULL); + crpc->cleanup(crpc); + goto mutex_unlock; + } + + LOCK(&c_clnt->wait_lock); + { + list_move_tail(&crpc->list, &c_clnt->waitq); + } + UNLOCK(&c_clnt->wait_lock); + } + mutex_unlock: + pthread_mutex_unlock(&c_clnt->pending_lock); + } + + return NULL; +} + +void +changelog_ev_cleanup_connections(xlator_t *this, changelog_clnt_t *c_clnt) +{ + changelog_rpc_clnt_t *crpc = NULL; + + /* cleanup active connections */ + LOCK(&c_clnt->active_lock); + { + list_for_each_entry(crpc, &c_clnt->active, list) + { + rpc_clnt_disable(crpc->rpc); + } + } + UNLOCK(&c_clnt->active_lock); +} + +/** + * TODO: granularize lock + * + * If we have multiple threads dispatching events, doing it this way is + * a performance bottleneck. + */ + +static changelog_rpc_clnt_t * +get_client(changelog_clnt_t *c_clnt, struct list_head **next) +{ + changelog_rpc_clnt_t *crpc = NULL; + + LOCK(&c_clnt->active_lock); + { + if (*next == &c_clnt->active) + goto unblock; + crpc = list_entry(*next, changelog_rpc_clnt_t, list); + /* ref rpc as DISCONNECT might unref the rpc asynchronously */ + changelog_rpc_clnt_ref(crpc); + rpc_clnt_ref(crpc->rpc); + *next = (*next)->next; + } +unblock: + UNLOCK(&c_clnt->active_lock); + + return crpc; +} + +static void +put_client(changelog_clnt_t *c_clnt, changelog_rpc_clnt_t *crpc) +{ + LOCK(&c_clnt->active_lock); + { + rpc_clnt_unref(crpc->rpc); + changelog_rpc_clnt_unref(crpc); + } + UNLOCK(&c_clnt->active_lock); +} + +void +_dispatcher(rbuf_list_t *rlist, void *arg) +{ + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; + struct ev_rpc erpc = { + 0, + }; + struct list_head *next = NULL; + + c_clnt = arg; + this = c_clnt->this; + + erpc.rlist = rlist; + next = c_clnt->active.next; + + while (1) { + crpc = get_client(c_clnt, &next); + if (!crpc) + break; + erpc.rpc = crpc->rpc; + (void)changelog_invoke_rpc(this, crpc->rpc, &changelog_ev_program, + CHANGELOG_REV_PROC_EVENT, &erpc); + put_client(c_clnt, crpc); + } +} + +/** this is called under rotbuff's lock */ +void +sequencer(rbuf_list_t *rlist, void *mydata) +{ + unsigned long range = 0; + changelog_clnt_t *c_clnt = 0; + + c_clnt = mydata; + + range = (RLIST_ENTRY_COUNT(rlist)) / NR_IOVEC; + if ((RLIST_ENTRY_COUNT(rlist)) % NR_IOVEC) + range++; + RLIST_STORE_SEQ(rlist, c_clnt->sequence, range); + + c_clnt->sequence += range; +} + +void * +changelog_ev_dispatch(void *data) +{ + int ret = 0; + void *opaque = NULL; + xlator_t *this = NULL; + changelog_clnt_t *c_clnt = NULL; + struct timeval tv = { + 0, + }; + + c_clnt = data; + this = c_clnt->this; + + while (1) { + /* TODO: change this to be pthread cond based.. later */ + + tv.tv_sec = 1; + tv.tv_usec = 0; + select(0, NULL, NULL, NULL, &tv); + + ret = rbuf_get_buffer(c_clnt->rbuf, &opaque, sequencer, c_clnt); + if (ret != RBUF_CONSUMABLE) { + if (ret != RBUF_EMPTY) + gf_smsg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_BUFFER_STARVATION_ERROR, + "Failed to get buffer for RPC dispatch", + "rbuf_retval=%d", ret, NULL); + continue; + } + + ret = rbuf_wait_for_completion(c_clnt->rbuf, opaque, _dispatcher, + c_clnt); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_PUT_BUFFER_FAILED, NULL); + } + + return NULL; +} + +void +changelog_ev_queue_connection(changelog_clnt_t *c_clnt, + changelog_rpc_clnt_t *crpc) +{ + pthread_mutex_lock(&c_clnt->pending_lock); + { + list_add_tail(&crpc->list, &c_clnt->pending); + pthread_cond_signal(&c_clnt->pending_cond); + } + pthread_mutex_unlock(&c_clnt->pending_lock); +} + +struct rpc_clnt_procedure changelog_ev_procs[CHANGELOG_REV_PROC_MAX] = { + [CHANGELOG_REV_PROC_NULL] = {"NULL", NULL}, + [CHANGELOG_REV_PROC_EVENT] = {"EVENT DISPATCH", + changelog_event_dispatch_rpc}, +}; + +struct rpc_clnt_program changelog_ev_program = { + .progname = "CHANGELOG EVENT DISPATCHER", + .prognum = CHANGELOG_REV_RPC_PROCNUM, + .progver = CHANGELOG_REV_RPC_PROCVER, + .numproc = CHANGELOG_REV_PROC_MAX, + .proctable = changelog_ev_procs, +}; diff --git a/xlators/features/changelog/src/changelog-ev-handle.h b/xlators/features/changelog/src/changelog-ev-handle.h new file mode 100644 index 00000000000..cc1af58a276 --- /dev/null +++ b/xlators/features/changelog/src/changelog-ev-handle.h @@ -0,0 +1,136 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __CHANGELOG_EV_HANDLE_H +#define __CHANGELOG_EV_HANDLE_H + +#include <glusterfs/list.h> +#include <glusterfs/xlator.h> +#include "rpc-clnt.h" + +#include <glusterfs/rot-buffs.h> + +struct changelog_clnt; + +typedef struct changelog_rpc_clnt { + xlator_t *this; + + gf_lock_t lock; + + gf_atomic_t ref; + gf_boolean_t disconnected; + + unsigned int filter; + char sock[UNIX_PATH_MAX]; + + struct changelog_clnt *c_clnt; /* back pointer to list holder */ + + struct rpc_clnt *rpc; /* RPC client endpoint */ + + struct list_head list; /* ->pending, ->waitq, ->active */ + + void (*cleanup)(struct changelog_rpc_clnt *); /* cleanup handler */ +} changelog_rpc_clnt_t; + +static inline void +changelog_rpc_clnt_ref(changelog_rpc_clnt_t *crpc) +{ + GF_ATOMIC_INC(crpc->ref); +} + +static inline void +changelog_set_disconnect_flag(changelog_rpc_clnt_t *crpc, gf_boolean_t flag) +{ + crpc->disconnected = flag; +} + +static inline int +changelog_rpc_clnt_is_disconnected(changelog_rpc_clnt_t *crpc) +{ + return (crpc->disconnected == _gf_true); +} + +static inline void +changelog_rpc_clnt_unref(changelog_rpc_clnt_t *crpc) +{ + gf_boolean_t gone = _gf_false; + uint64_t ref = 0; + + ref = GF_ATOMIC_DEC(crpc->ref); + + if (!ref && changelog_rpc_clnt_is_disconnected(crpc)) { + list_del(&crpc->list); + gone = _gf_true; + } + + if (gone) + crpc->cleanup(crpc); +} + +/** + * This structure holds pending and active clients. On probe RPC all + * an instance of the above structure (@changelog_rpc_clnt) is placed + * in ->pending and gets moved to ->active on a successful connect. + * + * locking rules: + * + * Manipulating ->pending + * ->pending_lock + * ->pending + * + * Manipulating ->active + * ->active_lock + * ->active + * + * Moving object from ->pending to ->active + * ->pending_lock + * ->active_lock + * + * Objects are _never_ moved from ->active to ->pending, i.e., during + * disconnection, the object is destroyed. Well, we could have tried + * to reconnect, but that's pure waste.. let the other end reconnect. + */ + +typedef struct changelog_clnt { + xlator_t *this; + + /* pending connections */ + pthread_mutex_t pending_lock; + pthread_cond_t pending_cond; + struct list_head pending; + + /* current active connections */ + gf_lock_t active_lock; + struct list_head active; + + gf_lock_t wait_lock; + struct list_head waitq; + + /* consumer part of rot-buffs */ + rbuf_t *rbuf; + unsigned long sequence; +} changelog_clnt_t; + +void * +changelog_ev_connector(void *); + +void * +changelog_ev_dispatch(void *); + +/* APIs */ +void +changelog_ev_queue_connection(changelog_clnt_t *, changelog_rpc_clnt_t *); + +void +changelog_ev_cleanup_connections(xlator_t *, changelog_clnt_t *); + +void +changelog_process_cleanup_event(xlator_t *); +#endif diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c new file mode 100644 index 00000000000..e561997d858 --- /dev/null +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -0,0 +1,1977 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/logging.h> +#include <glusterfs/iobuf.h> +#include <glusterfs/syscall.h> + +#include "changelog-helpers.h" +#include "changelog-encoders.h" +#include "changelog-mem-types.h" +#include "changelog-messages.h" + +#include "changelog-encoders.h" +#include "changelog-rpc-common.h" +#include <pthread.h> +#include <time.h> + +static void +changelog_cleanup_free_mutex(void *arg_mutex) +{ + pthread_mutex_t *p_mutex = (pthread_mutex_t *)arg_mutex; + + if (p_mutex) + pthread_mutex_unlock(p_mutex); +} + +int +changelog_thread_cleanup(xlator_t *this, pthread_t thr_id) +{ + int ret = 0; + void *retval = NULL; + + /* send a cancel request to the thread */ + ret = pthread_cancel(thr_id); + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL); + goto out; + } + + ret = pthread_join(thr_id, &retval); + if ((ret != 0) || (retval != PTHREAD_CANCELED)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL); + } + +out: + return ret; +} + +void * +changelog_get_usable_buffer(changelog_local_t *local) +{ + changelog_log_data_t *cld = NULL; + + if (!local) + return NULL; + + cld = &local->cld; + if (!cld->cld_iobuf) + return NULL; + + return cld->cld_iobuf->ptr; +} + +static int +changelog_selector_index(unsigned int selector) +{ + return (ffs(selector) - 1); +} + +int +changelog_ev_selected(xlator_t *this, changelog_ev_selector_t *selection, + unsigned int selector) +{ + int idx = 0; + + idx = changelog_selector_index(selector); + gf_msg_debug(this->name, 0, "selector ref count for %d (idx: %d): %d", + selector, idx, selection->ref[idx]); + /* this can be lockless */ + return (idx < CHANGELOG_EV_SELECTION_RANGE && (selection->ref[idx] > 0)); +} + +void +changelog_select_event(xlator_t *this, changelog_ev_selector_t *selection, + unsigned int selector) +{ + int idx = 0; + + LOCK(&selection->reflock); + { + while (selector) { + idx = changelog_selector_index(selector); + if (idx < CHANGELOG_EV_SELECTION_RANGE) { + selection->ref[idx]++; + gf_msg_debug(this->name, 0, "selecting event %d", idx); + } + selector &= ~(1 << idx); + } + } + UNLOCK(&selection->reflock); +} + +void +changelog_deselect_event(xlator_t *this, changelog_ev_selector_t *selection, + unsigned int selector) +{ + int idx = 0; + + LOCK(&selection->reflock); + { + while (selector) { + idx = changelog_selector_index(selector); + if (idx < CHANGELOG_EV_SELECTION_RANGE) { + selection->ref[idx]--; + gf_msg_debug(this->name, 0, "de-selecting event %d", idx); + } + selector &= ~(1 << idx); + } + } + UNLOCK(&selection->reflock); +} + +int +changelog_init_event_selection(xlator_t *this, + changelog_ev_selector_t *selection) +{ + int ret = 0; + int j = CHANGELOG_EV_SELECTION_RANGE; + + ret = LOCK_INIT(&selection->reflock); + if (ret != 0) + return -1; + + LOCK(&selection->reflock); + { + while (j--) { + selection->ref[j] = 0; + } + } + UNLOCK(&selection->reflock); + + return 0; +} + +static void +changelog_perform_dispatch(xlator_t *this, changelog_priv_t *priv, void *mem, + size_t size) +{ + char *buf = NULL; + void *opaque = NULL; + + buf = rbuf_reserve_write_area(priv->rbuf, size, &opaque); + if (!buf) { + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_DISPATCH_EVENT_FAILED, + "failed to dispatch event"); + return; + } + + memcpy(buf, mem, size); + rbuf_write_complete(opaque); +} + +void +changelog_dispatch_event(xlator_t *this, changelog_priv_t *priv, + changelog_event_t *ev) +{ + changelog_ev_selector_t *selection = NULL; + + selection = &priv->ev_selection; + if (changelog_ev_selected(this, selection, ev->ev_type)) { + changelog_perform_dispatch(this, priv, ev, CHANGELOG_EV_SIZE); + } +} + +void +changelog_set_usable_record_and_length(changelog_local_t *local, size_t len, + int xr) +{ + changelog_log_data_t *cld = NULL; + + cld = &local->cld; + + cld->cld_ptr_len = len; + cld->cld_xtra_records = xr; +} + +void +changelog_local_cleanup(xlator_t *xl, changelog_local_t *local) +{ + int i = 0; + changelog_opt_t *co = NULL; + changelog_log_data_t *cld = NULL; + + if (!local) + return; + + cld = &local->cld; + + /* cleanup dynamic allocation for extra records */ + if (cld->cld_xtra_records) { + co = (changelog_opt_t *)cld->cld_ptr; + for (; i < cld->cld_xtra_records; i++, co++) + if (co->co_free) + co->co_free(co); + } + + CHANGELOG_IOBUF_UNREF(cld->cld_iobuf); + + if (local->inode) + inode_unref(local->inode); + + mem_put(local); +} + +int +changelog_write(int fd, char *buffer, size_t len) +{ + ssize_t size = 0; + size_t written = 0; + + while (written < len) { + size = sys_write(fd, buffer + written, len - written); + if (size <= 0) + break; + + written += size; + } + + return (written != len); +} + +int +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer) +{ + char changelog_path[PATH_MAX + 1] = { + 0, + }; + int len = -1; + char x_value[25] = { + 0, + }; + /* time stamp(10) + : (1) + rolltime (12 ) + buffer (2) */ + int ret = 0; + + if (priv->htime_fd == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + "reason=fd not available", NULL); + ret = -1; + goto out; + } + len = snprintf(changelog_path, PATH_MAX, "%s", buffer); + if (len >= PATH_MAX) { + ret = -1; + goto out; + } + if (changelog_write(priv->htime_fd, (void *)changelog_path, len + 1) < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + "reason=write failed", NULL); + ret = -1; + goto out; + } + + len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts, + priv->rollover_count); + if (len >= sizeof(x_value)) { + ret = -1; + goto out; + } + + if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, XATTR_REPLACE)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR, + "reason=xattr updation failed", "XATTR_REPLACE=true", + "changelog=%s", changelog_path, NULL); + + if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, 0)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR, + "reason=xattr updation failed", "changelog=%s", + changelog_path, NULL); + ret = -1; + goto out; + } + } + + priv->rollover_count += 1; + +out: + return ret; +} + +/* + * Description: Check if the changelog to rollover is empty or not. + * It is assumed that fd passed is already verified. + * + * Returns: + * 1 : If found empty, changed path from "CHANGELOG.<TS>" to "changelog.<TS>" + * 0 : If NOT empty, proceed usual. + */ +int +cl_is_empty(xlator_t *this, int fd) +{ + int ret = -1; + size_t elen = 0; + int encoding = -1; + char buffer[1024] = { + 0, + }; + struct stat stbuf = { + 0, + }; + int major_version = -1; + int minor_version = -1; + + ret = sys_fstat(fd, &stbuf); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED, + NULL); + goto out; + } + + ret = sys_lseek(fd, 0, SEEK_SET); + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED, + NULL); + goto out; + } + + CHANGELOG_GET_HEADER_INFO(fd, buffer, sizeof(buffer), encoding, + major_version, minor_version, elen); + + if (elen == stbuf.st_size) { + ret = 1; + } else { + ret = 0; + } + +out: + return ret; +} + +/* + * Description: Updates "CHANGELOG" to "changelog" for writing changelog path + * to htime file. + * + * Returns: + * 0 : Success + * -1 : Error + */ +int +update_path(xlator_t *this, char *cl_path) +{ + const char low_cl[] = "changelog"; + const char up_cl[] = "CHANGELOG"; + char *found = NULL; + int ret = -1; + + found = strstr(cl_path, up_cl); + + if (found == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PATH_NOT_FOUND, + NULL); + goto out; + } else { + memcpy(found, low_cl, sizeof(low_cl) - 1); + } + + ret = 0; +out: + return ret; +} + +static int +changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts) +{ + int ret = -1; + int notify = 0; + int cl_empty_flag = 0; + struct tm *gmt; + char yyyymmdd[40]; + char ofile[PATH_MAX] = { + 0, + }; + char nfile[PATH_MAX] = { + 0, + }; + char nfile_dir[PATH_MAX] = { + 0, + }; + changelog_event_t ev = { + 0, + }; + + if (priv->changelog_fd != -1) { + ret = sys_fsync(priv->changelog_fd); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); + } + ret = cl_is_empty(this, priv->changelog_fd); + if (ret == 1) { + cl_empty_flag = 1; + } else if (ret == -1) { + /* Log error but proceed as usual */ + gf_smsg(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, NULL); + } + sys_close(priv->changelog_fd); + priv->changelog_fd = -1; + } + + /* Get GMT time. */ + gmt = gmtime(&ts); + + strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt); + + (void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME, + priv->changelog_dir); + (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld", + priv->changelog_dir, yyyymmdd, ts); + (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd); + + if (cl_empty_flag == 1) { + ret = sys_unlink(ofile); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_UNLINK_OP_FAILED, "path=%s", ofile, NULL); + ret = 0; /* Error in unlinking empty changelog should + not break further changelog operation, so + reset return value to 0*/ + } + } else { + ret = sys_rename(ofile, nfile); + + /* Changelog file rename gets ENOENT when parent dir doesn't exist */ + if (errno == ENOENT) { + ret = mkdir_p(nfile_dir, 0600, _gf_true); + + if ((ret == -1) && (EEXIST != errno)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_MKDIR_ERROR, "%s", nfile_dir, NULL); + goto out; + } + + ret = sys_rename(ofile, nfile); + } + + if (ret && (errno == ENOENT)) { + ret = 0; + goto out; + } + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_RENAME_ERROR, + "from=%s", ofile, "to=%s", nfile, NULL); + } + } + + if (!ret && (cl_empty_flag == 0)) { + notify = 1; + } + + if (!ret) { + if (cl_empty_flag) { + update_path(this, nfile); + } + ret = htime_update(this, priv, ts, nfile); + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR, + NULL); + goto out; + } + } + + if (notify) { + ev.ev_type = CHANGELOG_OP_TYPE_JOURNAL; + memcpy(ev.u.journal.path, nfile, strlen(nfile) + 1); + changelog_dispatch_event(this, priv, &ev); + } +out: + /* If this is explicit rollover initiated by snapshot, + * wakeup reconfigure thread waiting for changelog to + * rollover. This should happen even in failure cases as + * well otherwise snapshot will timeout and fail. Hence + * moved under out. + */ + if (priv->explicit_rollover) { + priv->explicit_rollover = _gf_false; + + pthread_mutex_lock(&priv->bn.bnotify_mutex); + { + if (ret) { + priv->bn.bnotify_error = _gf_true; + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, NULL); + } else { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO, + "changelog=%s", nfile, NULL); + } + priv->bn.bnotify = _gf_false; + pthread_cond_signal(&priv->bn.bnotify_cond); + } + pthread_mutex_unlock(&priv->bn.bnotify_mutex); + } + return ret; +} + +int +filter_cur_par_dirs(const struct dirent *entry) +{ + if (entry == NULL) + return 0; + + if ((strcmp(entry->d_name, ".") == 0) || (strcmp(entry->d_name, "..") == 0)) + return 0; + else + return 1; +} + +/* + * find_current_htime: + * It finds the latest htime file and sets the HTIME_CURRENT + * xattr. + * RETURN VALUE: + * -1 : Error + * ret: Number of directory entries; + */ + +int +find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname) +{ + struct dirent **namelist = NULL; + int ret = 0; + int cnt = 0; + int i = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(ht_dir_path); + + cnt = scandir(ht_dir_path, &namelist, filter_cur_par_dirs, alphasort); + if (cnt < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED, + NULL); + } else if (cnt > 0) { + if (snprintf(ht_file_bname, NAME_MAX, "%s", + namelist[cnt - 1]->d_name) >= NAME_MAX) { + ret = -1; + goto out; + } + if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, + strlen(ht_file_bname), 0)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSETXATTR_FAILED, "HTIME_CURRENT", NULL); + ret = -1; + goto out; + } + + if (sys_fsync(ht_dir_fd) < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); + ret = -1; + goto out; + } + } + +out: + for (i = 0; i < cnt; i++) + free(namelist[i]); + free(namelist); + + if (ret) + cnt = ret; + + return cnt; +} + +/* Returns 0 on successful open of htime file + * returns -1 on failure or error + */ +int +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts) +{ + int ht_file_fd = -1; + int ht_dir_fd = -1; + int ret = 0; + int cnt = 0; + char ht_dir_path[PATH_MAX] = { + 0, + }; + char ht_file_path[PATH_MAX] = { + 0, + }; + char ht_file_bname[NAME_MAX] = { + 0, + }; + char x_value[NAME_MAX] = { + 0, + }; + int flags = 0; + unsigned long min_ts = 0; + unsigned long max_ts = 0; + unsigned long total = 0; + unsigned long total1 = 0; + ssize_t size = 0; + struct stat stat_buf = { + 0, + }; + unsigned long record_len = 0; + int32_t len = 0; + + CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); + + /* Open htime directory to get HTIME_CURRENT */ + ht_dir_fd = open(ht_dir_path, O_RDONLY); + if (ht_dir_fd == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", ht_dir_path, NULL); + ret = -1; + goto out; + } + + size = sys_fgetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, + sizeof(ht_file_bname)); + if (size < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED, + "name=HTIME_CURRENT", NULL); + + /* If upgrade scenario, find the latest HTIME.TSTAMP file + * and use the same. If error, create a new HTIME.TSTAMP + * file. + */ + cnt = find_current_htime(ht_dir_fd, ht_dir_path, ht_file_bname); + if (cnt <= 0) { + gf_smsg(this->name, GF_LOG_INFO, errno, + CHANGELOG_MSG_NO_HTIME_CURRENT, NULL); + sys_close(ht_dir_fd); + return htime_create(this, priv, ts); + } + + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_HTIME_CURRENT_ERROR, NULL); + } + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_CURRENT, "path=%s", + ht_file_bname, NULL); + len = snprintf(ht_file_path, PATH_MAX, "%s/%s", ht_dir_path, ht_file_bname); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + /* Open in append mode as existing htime file is used */ + flags |= (O_RDWR | O_SYNC | O_APPEND); + ht_file_fd = open(ht_file_path, flags, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (ht_file_fd < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", ht_file_path, NULL); + ret = -1; + goto out; + } + + /* save this htime_fd in priv->htime_fd */ + priv->htime_fd = ht_file_fd; + + ret = sys_fstat(ht_file_fd, &stat_buf); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_STAT_ERROR, + "path=%s", ht_file_path, NULL); + ret = -1; + goto out; + } + + /* Initialize rollover-number in priv to current number */ + size = sys_fgetxattr(ht_file_fd, HTIME_KEY, x_value, sizeof(x_value)); + if (size < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED, + "name=%s", HTIME_KEY, "path=%s", ht_file_path, NULL); + ret = -1; + goto out; + } + + sscanf(x_value, "%lu:%lu", &max_ts, &total); + + /* 22 = 1(/) + 20(CHANGELOG.TIMESTAMP) + 1(\x00) */ + record_len = strlen(priv->changelog_dir) + 22; + total1 = stat_buf.st_size / record_len; + if (total != total1) { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, + "xattr_total=%lu", total, "size_total=%lu", total1, NULL); + } + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, "min=%lu", + min_ts, "max=%lu", max_ts, "total_changelogs=%lu", total, NULL); + + if (total < total1) + priv->rollover_count = total1 + 1; + else + priv->rollover_count = total + 1; + +out: + if (ht_dir_fd != -1) + sys_close(ht_dir_fd); + return ret; +} + +/* Returns 0 on successful creation of htime file + * returns -1 on failure or error + */ +int +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts) +{ + int ht_file_fd = -1; + int ht_dir_fd = -1; + int ret = 0; + char ht_dir_path[PATH_MAX] = { + 0, + }; + char ht_file_path[PATH_MAX] = { + 0, + }; + char ht_file_bname[NAME_MAX + 1] = { + 0, + }; + int flags = 0; + int32_t len = 0; + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE, + "name=%ld", ts, NULL); + + CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path); + + /* get the htime file name in ht_file_path */ + len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path, + HTIME_FILE_NAME, ts); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + flags |= (O_CREAT | O_RDWR | O_SYNC); + ht_file_fd = open(ht_file_path, flags, + S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (ht_file_fd < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", ht_file_path, NULL); + ret = -1; + goto out; + } + + if (sys_fsetxattr(ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE, + sizeof(HTIME_INITIAL_VALUE) - 1, 0)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_XATTR_INIT_FAILED, NULL); + ret = -1; + goto out; + } + + ret = sys_fsync(ht_file_fd); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED, + NULL); + goto out; + } + + /* save this htime_fd in priv->htime_fd */ + priv->htime_fd = ht_file_fd; + + ht_file_fd = -1; + + /* Set xattr HTIME_CURRENT on htime directory to htime filename */ + ht_dir_fd = open(ht_dir_path, O_RDONLY); + if (ht_dir_fd == -1) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", ht_dir_path, NULL); + ret = -1; + goto out; + } + + (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld", + HTIME_FILE_NAME, ts); + if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname, + strlen(ht_file_bname), 0)) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED, + " HTIME_CURRENT", NULL); + ret = -1; + goto out; + } + + ret = sys_fsync(ht_dir_fd); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED, + NULL); + goto out; + } + + /* initialize rollover-number in priv to 1 */ + priv->rollover_count = 1; + +out: + if (ht_dir_fd != -1) + sys_close(ht_dir_fd); + if (ht_file_fd != -1) + sys_close(ht_file_fd); + return ret; +} + +/* Description: + * Opens the snap changelog to log call path fops in it. + * This changelos name is "CHANGELOG.SNAP", stored in + * path ".glusterfs/changelogs/csnap". + * Returns: + * 0 : On success. + * -1 : On failure. + */ +int +changelog_snap_open(xlator_t *this, changelog_priv_t *priv) +{ + int fd = -1; + int ret = 0; + int flags = 0; + char buffer[1024] = { + 0, + }; + char c_snap_path[PATH_MAX] = { + 0, + }; + char csnap_dir_path[PATH_MAX] = { + 0, + }; + int32_t len = 0; + + CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir_path); + + len = snprintf(c_snap_path, PATH_MAX, "%s/" CSNAP_FILE_NAME, + csnap_dir_path); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + flags |= (O_CREAT | O_RDWR | O_TRUNC); + + fd = open(c_snap_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", c_snap_path, NULL); + ret = -1; + goto out; + } + priv->c_snap_fd = fd; + + (void)snprintf(buffer, 1024, CHANGELOG_HEADER, CHANGELOG_VERSION_MAJOR, + CHANGELOG_VERSION_MINOR, priv->ce->encoder); + ret = changelog_snap_write_change(priv, buffer, strlen(buffer)); + if (ret < 0) { + sys_close(priv->c_snap_fd); + priv->c_snap_fd = -1; + goto out; + } + +out: + return ret; +} + +/* + * Description: + * Starts logging fop details in CSNAP journal. + * Returns: + * 0 : On success. + * -1 : On Failure. + */ +int +changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + ret = changelog_snap_open(this, priv); + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "starting", + NULL); + + return ret; +} + +/* + * Description: + * Stops logging fop details in CSNAP journal. + * Returns: + * 0 : On success. + * -1 : On Failure. + */ +int +changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + sys_close(priv->c_snap_fd); + priv->c_snap_fd = -1; + + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "Stopped", + NULL); + + return ret; +} + +int +changelog_open_journal(xlator_t *this, changelog_priv_t *priv) +{ + int fd = 0; + int ret = -1; + int flags = 0; + char buffer[1024] = { + 0, + }; + char changelog_path[PATH_MAX] = { + 0, + }; + + (void)snprintf(changelog_path, PATH_MAX, "%s/" CHANGELOG_FILE_NAME, + priv->changelog_dir); + + flags |= (O_CREAT | O_RDWR); + if (priv->fsync_interval == 0) + flags |= O_SYNC; + + fd = open(changelog_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + if (fd < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED, + "path=%s", changelog_path, NULL); + goto out; + } + + priv->changelog_fd = fd; + + (void)snprintf(buffer, 1024, CHANGELOG_HEADER, CHANGELOG_VERSION_MAJOR, + CHANGELOG_VERSION_MINOR, priv->ce->encoder); + ret = changelog_write_change(priv, buffer, strlen(buffer)); + if (ret) { + sys_close(priv->changelog_fd); + priv->changelog_fd = -1; + goto out; + } + + ret = 0; + +out: + return ret; +} + +int +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale) +{ + int ret = -1; + + ret = changelog_rollover_changelog(this, priv, ts); + + if (!ret && !finale) + ret = changelog_open_journal(this, priv); + + return ret; +} + +/** + * return the length of entry + */ +size_t +changelog_entry_length() +{ + return sizeof(changelog_log_data_t); +} + +void +changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last) +{ + cld->cld_type = CHANGELOG_TYPE_ROLLOVER; + cld->cld_roll_time = gf_time(); + cld->cld_finale = is_last; +} + +int +changelog_snap_write_change(changelog_priv_t *priv, char *buffer, size_t len) +{ + return changelog_write(priv->c_snap_fd, buffer, len); +} + +int +changelog_write_change(changelog_priv_t *priv, char *buffer, size_t len) +{ + return changelog_write(priv->changelog_fd, buffer, len); +} + +/* + * Descriptions: + * Writes fop details in ascii format to CSNAP. + * Issues: + * Not Encoding agnostic. + * Returns: + * 0 : On Success. + * -1 : On Failure. + */ +int +changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld) +{ + size_t off = 0; + size_t gfid_len = 0; + char *gfid_str = NULL; + char *buffer = NULL; + changelog_priv_t *priv = NULL; + int ret = 0; + + if (this == NULL) { + ret = -1; + goto out; + } + + priv = this->private; + + if (priv == NULL) { + ret = -1; + goto out; + } + + gfid_str = uuid_utoa(cld->cld_gfid); + gfid_len = strlen(gfid_str); + + /* extra bytes for decorations */ + buffer = alloca(gfid_len + cld->cld_ptr_len + 10); + CHANGELOG_STORE_ASCII(priv, buffer, off, gfid_str, gfid_len, cld); + + CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1); + + ret = changelog_snap_write_change(priv, buffer, off); + + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED, + "csnap", NULL); + } + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_WROTE_TO_CSNAP, NULL); + ret = 0; +out: + return ret; +} + +int +changelog_handle_change(xlator_t *this, changelog_priv_t *priv, + changelog_log_data_t *cld) +{ + int ret = 0; + + if (CHANGELOG_TYPE_IS_ROLLOVER(cld->cld_type)) { + changelog_encode_change(priv); + ret = changelog_start_next_change(this, priv, cld->cld_roll_time, + cld->cld_finale); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_GET_TIME_OP_FAILED, NULL); + goto out; + } + + /** + * case when there is reconfigure done (disabling changelog) and there + * are still fops that have updates in prgress. + */ + if (priv->changelog_fd == -1) + return 0; + + if (CHANGELOG_TYPE_IS_FSYNC(cld->cld_type)) { + ret = sys_fsync(priv->changelog_fd); + if (ret < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_FSYNC_OP_FAILED, NULL); + } + goto out; + } + + ret = priv->ce->encode(this, cld); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED, + "changelog", NULL); + } + +out: + return ret; +} + +changelog_local_t * +changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, + int xtra_records, gf_boolean_t update_flag) +{ + changelog_local_t *local = NULL; + struct iobuf *iobuf = NULL; + + /** + * We relax the presence of inode if @update_flag is true. + * The caller (implementation of the fop) needs to be careful to + * not blindly use local->inode. + */ + if (!update_flag && !inode) { + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, + CHANGELOG_MSG_INODE_NOT_FOUND, + "inode needed for version checking !!!"); + + goto out; + } + + if (xtra_records) { + iobuf = iobuf_get2(this->ctx->iobuf_pool, + xtra_records * CHANGELOG_OPT_RECORD_LEN); + if (!iobuf) + goto out; + } + + local = mem_get0(this->local_pool); + if (!local) { + CHANGELOG_IOBUF_UNREF(iobuf); + goto out; + } + + local->update_no_check = update_flag; + + gf_uuid_copy(local->cld.cld_gfid, gfid); + + local->cld.cld_iobuf = iobuf; + local->cld.cld_xtra_records = 0; /* set by the caller */ + + if (inode) + local->inode = inode_ref(inode); + +out: + return local; +} + +int +changelog_forget(xlator_t *this, inode_t *inode) +{ + uint64_t ctx_addr = 0; + changelog_inode_ctx_t *ctx = NULL; + + inode_ctx_del(inode, this, &ctx_addr); + if (!ctx_addr) + return 0; + + ctx = (changelog_inode_ctx_t *)(long)ctx_addr; + GF_FREE(ctx); + + return 0; +} + +int +changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv, + changelog_log_data_t *cld) +{ + return priv->cd.dispatchfn(this, priv, priv->cd.cd_data, cld, NULL); +} + +/* Wait till all the black fops are drained */ +void +changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + /* clean up framework of pthread_mutex is required here as + * 'reconfigure' terminates the changelog_rollover thread + * on graph change. + */ + pthread_cleanup_push(changelog_cleanup_free_mutex, + &priv->dm.drain_black_mutex); + ret = pthread_mutex_lock(&priv->dm.drain_black_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, + "error=%d", ret, NULL); + while (priv->dm.black_fop_cnt > 0) { + gf_msg_debug(this->name, 0, "Conditional wait on black fops: %ld", + priv->dm.black_fop_cnt); + priv->dm.drain_wait_black = _gf_true; + ret = pthread_cond_wait(&priv->dm.drain_black_cond, + &priv->dm.drain_black_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret, + NULL); + } + priv->dm.drain_wait_black = _gf_false; + ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, + "error=%d", ret, NULL); + pthread_cleanup_pop(0); + gf_msg_debug(this->name, 0, "Woke up: Conditional wait on black fops"); +} + +/* Wait till all the white fops are drained */ +void +changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + /* clean up framework of pthread_mutex is required here as + * 'reconfigure' terminates the changelog_rollover thread + * on graph change. + */ + pthread_cleanup_push(changelog_cleanup_free_mutex, + &priv->dm.drain_white_mutex); + ret = pthread_mutex_lock(&priv->dm.drain_white_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, + "error=%d", ret, NULL); + while (priv->dm.white_fop_cnt > 0) { + gf_msg_debug(this->name, 0, "Conditional wait on white fops : %ld", + priv->dm.white_fop_cnt); + priv->dm.drain_wait_white = _gf_true; + ret = pthread_cond_wait(&priv->dm.drain_white_cond, + &priv->dm.drain_white_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret, + NULL); + } + priv->dm.drain_wait_white = _gf_false; + ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR, + "error=%d", ret, NULL); + pthread_cleanup_pop(0); + gf_msg_debug(this->name, 0, "Woke up: Conditional wait on white fops"); +} + +/** + * TODO: these threads have many thing in common (wake up after + * a certain time etc..). move them into separate routine. + */ +void * +changelog_rollover(void *data) +{ + int ret = 0; + xlator_t *this = NULL; + struct timespec tv = { + 0, + }; + changelog_log_data_t cld = { + 0, + }; + changelog_time_slice_t *slice = NULL; + changelog_priv_t *priv = data; + + this = priv->cr.this; + slice = &priv->slice; + + while (1) { + (void)pthread_testcancel(); + + tv.tv_sec = gf_time() + priv->rollover_time; + tv.tv_nsec = 0; + ret = 0; /* Reset ret to zero */ + + /* The race between actual rollover and explicit rollover is + * handled. If actual rollover is being done and the + * explicit rollover event comes, the event is not missed. + * Since explicit rollover sets 'cr.notify' to true, this + * thread doesn't wait on 'pthread_cond_timedwait'. + */ + pthread_cleanup_push(changelog_cleanup_free_mutex, &priv->cr.lock); + pthread_mutex_lock(&priv->cr.lock); + { + while (ret == 0 && !priv->cr.notify) + ret = pthread_cond_timedwait(&priv->cr.cond, &priv->cr.lock, + &tv); + if (ret == 0) + priv->cr.notify = _gf_false; + } + pthread_mutex_unlock(&priv->cr.lock); + pthread_cleanup_pop(0); + + if (ret == 0) { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO, + NULL); + priv->explicit_rollover = _gf_true; + } else if (ret && ret != ETIMEDOUT) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_SELECT_FAILED, NULL); + continue; + } else if (ret && ret == ETIMEDOUT) { + gf_msg_debug(this->name, 0, "Wokeup on timeout"); + } + + /* Reading curent_color without lock is fine here + * as it is only modified here and is next to reading. + */ + if (priv->current_color == FOP_COLOR_BLACK) { + LOCK(&priv->lock); + priv->current_color = FOP_COLOR_WHITE; + UNLOCK(&priv->lock); + gf_msg_debug(this->name, 0, + "Black fops" + " to be drained:%ld", + priv->dm.black_fop_cnt); + changelog_drain_black_fops(this, priv); + } else { + LOCK(&priv->lock); + priv->current_color = FOP_COLOR_BLACK; + UNLOCK(&priv->lock); + gf_msg_debug(this->name, 0, + "White fops" + " to be drained:%ld", + priv->dm.white_fop_cnt); + changelog_drain_white_fops(this, priv); + } + + /* Adding delay of 1 second only during explicit rollover: + * + * Changelog rollover can happen either due to actual + * or the explicit rollover during snapshot. Actual + * rollover is controlled by tuneable called 'rollover-time'. + * The minimum granularity for rollover-time is 1 second. + * Explicit rollover is asynchronous in nature and happens + * during snapshot. + * + * Basically, rollover renames the current CHANGELOG file + * to CHANGELOG.TIMESTAMP. Let's assume, at time 't1', + * actual and explicit rollover raced against each + * other and actual rollover won the race renaming the + * CHANGELOG file to CHANGELOG.t1 and opens a new + * CHANGELOG file. There is high chance that, an immediate + * explicit rollover at time 't1' can happen with in the same + * second to rename CHANGELOG file to CHANGELOG.t1 resulting in + * purging the earlier CHANGELOG.t1 file created by actual + * rollover. So adding a delay of 1 second guarantees unique + * CHANGELOG.TIMESTAMP during explicit rollover. + */ + if (priv->explicit_rollover == _gf_true) + sleep(1); + + changelog_fill_rollover_data(&cld, _gf_false); + + _mask_cancellation(); + + LOCK(&priv->lock); + { + ret = changelog_inject_single_event(this, priv, &cld); + if (!ret) + SLICE_VERSION_UPDATE(slice); + } + UNLOCK(&priv->lock); + + _unmask_cancellation(); + } + + return NULL; +} + +void * +changelog_fsync_thread(void *data) +{ + int ret = 0; + xlator_t *this = NULL; + struct timeval tv = { + 0, + }; + changelog_log_data_t cld = { + 0, + }; + changelog_priv_t *priv = data; + + this = priv->cf.this; + cld.cld_type = CHANGELOG_TYPE_FSYNC; + + while (1) { + (void)pthread_testcancel(); + + tv.tv_sec = priv->fsync_interval; + tv.tv_usec = 0; + + ret = select(0, NULL, NULL, NULL, &tv); + if (ret) + continue; + + _mask_cancellation(); + + ret = changelog_inject_single_event(this, priv, &cld); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_INJECT_FSYNC_FAILED, NULL); + + _unmask_cancellation(); + } + + return NULL; +} + +/* macros for inode/changelog version checks */ + +#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) \ + do { \ + LOCK(&inode->lock); \ + { \ + LOCK(&priv->lock); \ + { \ + *iver = slice->changelog_version[type]; \ + } \ + UNLOCK(&priv->lock); \ + } \ + UNLOCK(&inode->lock); \ + } while (0) + +#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) \ + do { \ + LOCK(&priv->lock); \ + { \ + upd = (ver == slice->changelog_version[type]) ? _gf_false \ + : _gf_true; \ + } \ + UNLOCK(&priv->lock); \ + } while (0) + +static int +__changelog_inode_ctx_set(xlator_t *this, inode_t *inode, + changelog_inode_ctx_t *ctx) +{ + uint64_t ctx_addr = (uint64_t)(uintptr_t)ctx; + return __inode_ctx_set(inode, this, &ctx_addr); +} + +/** + * one shot routine to get the address and the value of a inode version + * for a particular type. + */ +changelog_inode_ctx_t * +__changelog_inode_ctx_get(xlator_t *this, inode_t *inode, unsigned long **iver, + unsigned long *version, changelog_log_type type) +{ + int ret = 0; + uint64_t ctx_addr = 0; + changelog_inode_ctx_t *ctx = NULL; + + ret = __inode_ctx_get(inode, this, &ctx_addr); + if (ret < 0) + ctx_addr = 0; + if (ctx_addr != 0) { + ctx = (changelog_inode_ctx_t *)(long)ctx_addr; + goto out; + } + + ctx = GF_CALLOC(1, sizeof(*ctx), gf_changelog_mt_inode_ctx_t); + if (!ctx) + goto out; + + ret = __changelog_inode_ctx_set(this, inode, ctx); + if (ret) { + GF_FREE(ctx); + ctx = NULL; + } + +out: + if (ctx && iver && version) { + *iver = CHANGELOG_INODE_VERSION_TYPE(ctx, type); + *version = **iver; + } + + return ctx; +} + +static changelog_inode_ctx_t * +changelog_inode_ctx_get(xlator_t *this, inode_t *inode, unsigned long **iver, + unsigned long *version, changelog_log_type type) +{ + changelog_inode_ctx_t *ctx = NULL; + + LOCK(&inode->lock); + { + ctx = __changelog_inode_ctx_get(this, inode, iver, version, type); + } + UNLOCK(&inode->lock); + + return ctx; +} + +/** + * This is the main update routine. Locking has been made granular so as to + * maximize parallelism of fops - I'll try to explain it below using execution + * timelines. + * + * Basically, the contention is between multiple execution threads of this + * routine and the roll-over thread. So, instead of having a big lock, we hold + * granular locks: inode->lock and priv->lock. Now I'll explain what happens + * when there is an update and a roll-over at just about the same time. + * NOTE: + * - the dispatcher itself synchronizes updates via it's own lock + * - the slice version in incremented by the roll-over thread + * + * Case 1: When the rollover thread wins before the inode version can be + * compared with the slice version. + * + * [updater] | [rollover] + * | + * | <SLICE: 1, 1, 1> + * <changelog_update> | + * <changelog_inode_ctx_get> | + * <CTX: 1, 1, 1> | + * | <dispatch-rollover-event> + * | LOCK (&priv->lock) + * | <SLICE_VERSION_UPDATE> + * | <SLICE: 2, 2, 2> + * | UNLOCK (&priv->lock) + * | + * LOCK (&priv->lock) | + * <INODE_VERSION_EQUALS_SLICE> | + * I: 1 <-> S: 2 | + * update: true | + * UNLOCK (&priv->lock) | + * | + * <if update == true> | + * <dispath-update-event> | + * <INODE_VERSION_UPDATE> | + * LOCK (&inode->lock) | + * LOCK (&priv->lock) | + * <CTX: 2, 1, 1> | + * UNLOCK (&priv->lock) | + * UNLOCK (&inode->lock) | + * + * Therefore, the change gets recorded in the next change (no lost change). If + * the slice version was ahead of the inode version (say I:1, S: 2), then + * anyway the comparison would result in a update (I: 1, S: 3). + * + * If the rollover time is too less, then there is another contention when the + * updater tries to bring up inode version to the slice version (this is also + * the case when the roll-over thread wakes up during INODE_VERSION_UPDATE. + * + * <CTX: 1, 1, 1> | <SLICE: 2, 2, 2> + * | + * | + * <dispath-update-event> | + * <INODE_VERSION_UPDATE> | + * LOCK (&inode->lock) | + * LOCK (&priv->lock) | + * <CTX: 2, 1, 1> | + * UNLOCK (&priv->lock) | + * UNLOCK (&inode->lock) | + * | <dispatch-rollover-event> + * | LOCK (&priv->lock) + * | <SLICE_VERSION_UPDATE> + * | <SLICE: 3, 3, 3> + * | UNLOCK (&priv->lock) + * + * + * Case 2: When the fop thread wins + * + * [updater] | [rollover] + * | + * | <SLICE: 1, 1, 1> + * <changelog_update> | + * <changelog_inode_ctx_get> | + * <CTX: 0, 0, 0> | + * | + * LOCK (&priv->lock) | + * <INODE_VERSION_EQUALS_SLICE> | + * I: 0 <-> S: 1 | + * update: true | + * UNLOCK (&priv->lock) | + * | <dispatch-rollover-event> + * | LOCK (&priv->lock) + * | <SLICE_VERSION_UPDATE> + * | <SLICE: 2, 2, 2> + * | UNLOCK (&priv->lock) + * <if update == true> | + * <dispath-update-event> | + * <INODE_VERSION_UPDATE> | + * LOCK (&inode->lock) | + * LOCK (&priv->lock) | + * <CTX: 2, 0, 0> | + * UNLOCK (&priv->lock) | + * UNLOCK (&inode->lock) | + * + * Here again, if the inode version was equal to the slice version (I: 1, S: 1) + * then there is no need to record an update (as the equality of the two version + * signifies an update was recorded in the current time slice). + */ +void +changelog_update(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local, changelog_log_type type) +{ + int ret = 0; + unsigned long *iver = NULL; + unsigned long version = 0; + inode_t *inode = NULL; + changelog_time_slice_t *slice = NULL; + changelog_inode_ctx_t *ctx = NULL; + changelog_log_data_t *cld_0 = NULL; + changelog_log_data_t *cld_1 = NULL; + changelog_local_t *next_local = NULL; + gf_boolean_t need_upd = _gf_true; + + slice = &priv->slice; + + /** + * for fops that do not require inode version checking + */ + if (local->update_no_check) + goto update; + + inode = local->inode; + + ctx = changelog_inode_ctx_get(this, inode, &iver, &version, type); + if (!ctx) + goto update; + + INODE_VERSION_EQUALS_SLICE(priv, version, slice, type, need_upd); + +update: + if (need_upd) { + cld_0 = &local->cld; + cld_0->cld_type = type; + + if ((next_local = local->prev_entry) != NULL) { + cld_1 = &next_local->cld; + cld_1->cld_type = type; + } + + ret = priv->cd.dispatchfn(this, priv, priv->cd.cd_data, cld_0, cld_1); + + /** + * update after the dispatcher has successfully done + * it's job. + */ + if (!local->update_no_check && iver && !ret) + INODE_VERSION_UPDATE(priv, inode, iver, slice, type); + } + + return; +} + +/* Begin: Geo-rep snapshot dependency changes */ + +/* changelog_color_fop_and_inc_cnt: Assign color and inc fop cnt. + * + * Assigning color and increment of corresponding fop count should happen + * in a lock (i.e., there should be no window between them). If it does not, + * we might miss draining those fops which are colored but not yet incremented + * the count. Let's assume black fops are draining. If the black fop count + * reaches zero, we say draining is completed but we miss black fops which are + * not incremented fop count but color is assigned black. + */ + +void +changelog_color_fop_and_inc_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local) +{ + if (!priv || !local) + return; + + LOCK(&priv->lock); + { + local->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, local); + } + UNLOCK(&priv->lock); +} + +/* Increments the respective fop counter based on the fop color */ +void +changelog_inc_fop_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local) +{ + int ret = 0; + + if (local) { + if (local->color == FOP_COLOR_BLACK) { + ret = pthread_mutex_lock(&priv->dm.drain_black_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->dm.black_fop_cnt++; + } + ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + } else { + ret = pthread_mutex_lock(&priv->dm.drain_white_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->dm.white_fop_cnt++; + } + ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + } + } +out: + return; +} + +/* Decrements the respective fop counter based on the fop color */ +void +changelog_dec_fop_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local) +{ + int ret = 0; + + if (local) { + if (local->color == FOP_COLOR_BLACK) { + ret = pthread_mutex_lock(&priv->dm.drain_black_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->dm.black_fop_cnt--; + if (priv->dm.black_fop_cnt == 0 && + priv->dm.drain_wait_black == _gf_true) { + ret = pthread_cond_signal(&priv->dm.drain_black_cond); + CHANGELOG_PTHREAD_ERROR_HANDLE_2( + ret, out, priv->dm.drain_black_mutex); + gf_msg_debug(this->name, 0, + "Signalled " + "draining of black"); + } + } + ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + } else { + ret = pthread_mutex_lock(&priv->dm.drain_white_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->dm.white_fop_cnt--; + if (priv->dm.white_fop_cnt == 0 && + priv->dm.drain_wait_white == _gf_true) { + ret = pthread_cond_signal(&priv->dm.drain_white_cond); + CHANGELOG_PTHREAD_ERROR_HANDLE_2( + ret, out, priv->dm.drain_white_mutex); + gf_msg_debug(this->name, 0, + "Signalled " + "draining of white"); + } + } + ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + } + } +out: + return; +} + +/* Write to a pipe setup between changelog main thread and changelog + * rollover thread to initiate explicit rollover of changelog journal. + */ +int +changelog_barrier_notify(changelog_priv_t *priv, char *buf) +{ + int ret = 0; + + pthread_mutex_lock(&priv->cr.lock); + { + ret = pthread_cond_signal(&priv->cr.cond); + priv->cr.notify = _gf_true; + } + pthread_mutex_unlock(&priv->cr.lock); + return ret; +} + +/* Clean up flags set on barrier notification */ +void +changelog_barrier_cleanup(xlator_t *this, changelog_priv_t *priv, + struct list_head *queue) +{ + int ret = 0; + + LOCK(&priv->bflags.lock); + priv->bflags.barrier_ext = _gf_false; + UNLOCK(&priv->bflags.lock); + + ret = pthread_mutex_lock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + { + priv->bn.bnotify = _gf_false; + } + ret = pthread_mutex_unlock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out); + + /* Disable changelog barrier and dequeue fops */ + LOCK(&priv->lock); + { + if (priv->barrier_enabled == _gf_true) + __chlog_barrier_disable(this, queue); + else + ret = -1; + } + UNLOCK(&priv->lock); + if (ret == 0) + chlog_barrier_dequeue_all(this, queue); + +out: + return; +} +/* End: Geo-Rep snapshot dependency changes */ + +int32_t +changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc, + changelog_local_t **local) +{ + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + char *dup_path = NULL; + char *bname = NULL; + inode_t *parent = NULL; + + GF_ASSERT(this); + + parent = inode_parent(loc->inode, 0, 0); + if (!parent) { + gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_INODE_NOT_FOUND, + "type=parent", "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); + goto err; + } + + CHANGELOG_INIT_NOCHECK(this, *local, loc->inode, loc->inode->gfid, 5); + if (!(*local)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED, + NULL); + goto err; + } + + co = changelog_get_usable_buffer(*local); + if (!co) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_GET_BUFFER_FAILED, + NULL); + goto err; + } + + if (loc->inode->ia_type == IA_IFDIR) { + CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_MKDIR, fop_fn, xtra_len); + co++; + CHANGELOG_FILL_UINT32(co, S_IFDIR | 0755, number_fn, xtra_len); + co++; + } else { + CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_CREATE, fop_fn, xtra_len); + co++; + CHANGELOG_FILL_UINT32(co, S_IFREG | 0644, number_fn, xtra_len); + co++; + } + + CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len); + co++; + + dup_path = gf_strdup(loc->path); + bname = basename(dup_path); + + CHANGELOG_FILL_ENTRY(co, parent->gfid, bname, entry_fn, entry_free_fn, + xtra_len, err); + changelog_set_usable_record_and_length(*local, xtra_len, 5); + + if (dup_path) + GF_FREE(dup_path); + if (parent) + inode_unref(parent); + return 0; + +err: + if (dup_path) + GF_FREE(dup_path); + if (parent) + inode_unref(parent); + return -1; +} + +/* + * resolve_pargfid_to_path: + * It converts given pargfid to path by doing recursive readlinks at the + * backend. If bname is given, it suffixes bname to pargfid to form the + * complete path else it doesn't. It allocates memory for the path and is + * caller's responsibility to free the same. If bname is NULL and pargfid + * is ROOT, then it returns "." + */ + +int +resolve_pargfid_to_path(xlator_t *this, const uuid_t pgfid, char **path, + char *bname) +{ + char *linkname = NULL; + char *dir_handle = NULL; + char *pgfidstr = NULL; + char *saveptr = NULL; + ssize_t len = 0; + int ret = 0; + uuid_t tmp_gfid = { + 0, + }; + uuid_t pargfid = { + 0, + }; + changelog_priv_t *priv = NULL; + char gpath[PATH_MAX] = { + 0, + }; + char result[PATH_MAX] = { + 0, + }; + char *dir_name = NULL; + char pre_dir_name[PATH_MAX] = { + 0, + }; + + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + gf_uuid_copy(pargfid, pgfid); + if (!path || gf_uuid_is_null(pargfid)) { + ret = -1; + goto out; + } + + if (__is_root_gfid(pargfid)) { + if (bname) + *path = gf_strdup(bname); + else + *path = gf_strdup("."); + return ret; + } + + dir_handle = alloca(PATH_MAX); + linkname = alloca(PATH_MAX); + (void)snprintf(gpath, PATH_MAX, "%s/.glusterfs/", priv->changelog_brick); + + while (!(__is_root_gfid(pargfid))) { + len = snprintf(dir_handle, PATH_MAX, "%s/%02x/%02x/%s", gpath, + pargfid[0], pargfid[1], uuid_utoa(pargfid)); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + + len = sys_readlink(dir_handle, linkname, PATH_MAX); + if (len < 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_READLINK_OP_FAILED, + "could not read the " + "link from the gfid handle", + "handle=%s", dir_handle, NULL); + ret = -1; + goto out; + } + + linkname[len] = '\0'; + + pgfidstr = strtok_r(linkname + strlen("../../00/00/"), "/", &saveptr); + dir_name = strtok_r(NULL, "/", &saveptr); + + len = snprintf(result, PATH_MAX, "%s/%s", dir_name, pre_dir_name); + if ((len < 0) || (len >= PATH_MAX)) { + ret = -1; + goto out; + } + if (snprintf(pre_dir_name, len + 1, "%s", result) >= len + 1) { + ret = -1; + goto out; + } + + gf_uuid_parse(pgfidstr, tmp_gfid); + gf_uuid_copy(pargfid, tmp_gfid); + } + + if (bname) + strncat(result, bname, strlen(bname) + 1); + + *path = gf_strdup(result); + +out: + return ret; +} diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h new file mode 100644 index 00000000000..38fa7590c32 --- /dev/null +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -0,0 +1,716 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_HELPERS_H +#define _CHANGELOG_HELPERS_H + +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> +#include "pthread.h" +#include <glusterfs/iobuf.h> +#include <glusterfs/rot-buffs.h> + +#include "changelog-misc.h" +#include <glusterfs/call-stub.h> + +#include "rpcsvc.h" +#include "changelog-ev-handle.h" + +#include "changelog.h" +#include "changelog-messages.h" + +/** + * the changelog entry + */ +typedef struct changelog_log_data { + /* rollover related */ + time_t cld_roll_time; + + /* reopen changelog? */ + gf_boolean_t cld_finale; + + changelog_log_type cld_type; + + /** + * sincd gfid is _always_ a necessity, it's not a part + * of the iobuf. by doing this we do not add any overhead + * for data and metadata related fops. + */ + uuid_t cld_gfid; + + /** + * iobufs are used for optionals records: pargfid, path, + * write offsets etc.. It's the fop implementers job + * to allocate (iobuf_get() in the fop) and get unref'ed + * in the callback (CHANGELOG_STACK_UNWIND). + */ + struct iobuf *cld_iobuf; + +#define cld_ptr cld_iobuf->ptr + + /** + * after allocation you can point this to the length of + * usable data, but make sure it does not exceed the + * the size of the requested iobuf. + */ + size_t cld_iobuf_len; + +#define cld_ptr_len cld_iobuf_len + + /** + * number of optional records + */ + int cld_xtra_records; +} changelog_log_data_t; + +/** + * holder for dispatch function and private data + */ + +typedef struct changelog_priv changelog_priv_t; + +typedef struct changelog_dispatcher { + void *cd_data; + int (*dispatchfn)(xlator_t *, changelog_priv_t *, void *, + changelog_log_data_t *, changelog_log_data_t *); +} changelog_dispatcher_t; + +struct changelog_bootstrap { + changelog_mode_t mode; + int (*ctor)(xlator_t *, changelog_dispatcher_t *); + int (*dtor)(xlator_t *, changelog_dispatcher_t *); +}; + +struct changelog_encoder { + changelog_encoder_t encoder; + int (*encode)(xlator_t *, changelog_log_data_t *); +}; + +/* xlator private */ + +typedef struct changelog_time_slice { + /** + * version of changelog file, incremented each time changes + * rollover. + */ + unsigned long changelog_version[CHANGELOG_MAX_TYPE]; +} changelog_time_slice_t; + +typedef struct changelog_rollover { + /* rollover thread */ + pthread_t rollover_th; + + xlator_t *this; + + pthread_mutex_t lock; + pthread_cond_t cond; + gf_boolean_t notify; +} changelog_rollover_t; + +typedef struct changelog_fsync { + /* fsync() thread */ + pthread_t fsync_th; + + xlator_t *this; +} changelog_fsync_t; + +/* Draining during changelog rollover (for geo-rep snapshot dependency): + * -------------------------------------------------------------------- + * The introduction of draining of in-transit fops during changelog rollover + * (both explicit/timeout triggered) requires coloring of fops. Basically the + * implementation requires two counters, one counter which keeps the count of + * current intransit fops which should end up in current changelog and the other + * counter to keep track of incoming fops which should be drained as part of + * next changelog rollover event. The fops are colored w.r.t these counters. + * The fops that are to be drained as part of current changelog rollover is + * given one color and the fops which keep incoming during this and not + * necessarily should end up in current changelog and should be drained as part + * of next changelog rollover are given other color. The color switching + * continues with each changelog rollover. Two colors(black and white) are + * chosen here and initially black is chosen is default. + */ + +typedef enum chlog_fop_color { + FOP_COLOR_BLACK, + FOP_COLOR_WHITE +} chlog_fop_color_t; + +/* Barrier notify variable */ +typedef struct barrier_notify { + pthread_mutex_t bnotify_mutex; + pthread_cond_t bnotify_cond; + gf_boolean_t bnotify; + gf_boolean_t bnotify_error; +} barrier_notify_t; + +/* Two separate mutex and conditional variable set is used + * to drain white and black fops. */ + +typedef struct drain_mgmt { + pthread_mutex_t drain_black_mutex; + pthread_cond_t drain_black_cond; + pthread_mutex_t drain_white_mutex; + pthread_cond_t drain_white_cond; + /* Represents black fops count in-transit */ + unsigned long black_fop_cnt; + /* Represents white fops count in-transit */ + unsigned long white_fop_cnt; + gf_boolean_t drain_wait_black; + gf_boolean_t drain_wait_white; +} drain_mgmt_t; + +/* External barrier as a result of snap on/off indicating flag*/ +typedef struct barrier_flags { + gf_lock_t lock; + gf_boolean_t barrier_ext; +} barrier_flags_t; + +/* Event selection */ +typedef struct changelog_ev_selector { + gf_lock_t reflock; + + /** + * Array of references for each selection bit. + */ + unsigned int ref[CHANGELOG_EV_SELECTION_RANGE]; +} changelog_ev_selector_t; + +/* changelog's private structure */ +struct changelog_priv { + /* changelog journalling */ + gf_boolean_t active; + + /* changelog live notifications */ + gf_boolean_t rpc_active; + + /* to generate unique socket file per brick */ + char *changelog_brick; + + /* logging directory */ + char *changelog_dir; + + /* htime directory */ + char *htime_dir; + + /* one file for all changelog types */ + int changelog_fd; + + /* htime fd for current changelog session */ + int htime_fd; + + /* c_snap_fd is fd for call-path changelog */ + int c_snap_fd; + + /* rollover_count used by htime */ + int rollover_count; + + gf_lock_t lock; + + /* lock to synchronize CSNAP updation */ + gf_lock_t c_snap_lock; + + /* written end of the pipe */ + int wfd; + + /* rollover time */ + int32_t rollover_time; + + /* fsync() interval */ + int32_t fsync_interval; + + /* changelog type maps */ + const char *maps[CHANGELOG_MAX_TYPE]; + + /* time slicer */ + changelog_time_slice_t slice; + + /* context of the updater */ + changelog_dispatcher_t cd; + + /* context of the rollover thread */ + changelog_rollover_t cr; + + /* context of fsync thread */ + changelog_fsync_t cf; + + /* operation mode */ + changelog_mode_t op_mode; + + /* bootstrap routine for 'current' logger */ + struct changelog_bootstrap *cb; + + /* encoder mode */ + changelog_encoder_t encode_mode; + + /* encoder */ + struct changelog_encoder *ce; + + /** + * snapshot dependency changes + */ + + /* Draining of fops*/ + drain_mgmt_t dm; + + /* Represents the active color. Initially by default black */ + chlog_fop_color_t current_color; + + /* flag to determine explicit rollover is triggered */ + gf_boolean_t explicit_rollover; + + /* barrier notification variable protected by mutex */ + barrier_notify_t bn; + + /* barrier on/off indicating flags */ + barrier_flags_t bflags; + + /* changelog barrier on/off indicating flag */ + gf_boolean_t barrier_enabled; + struct list_head queue; + uint32_t queue_size; + gf_timer_t *timer; + struct timespec timeout; + + /** + * buffers, RPC, event selection, notifications and other + * beasts. + */ + + /* epoll pthread */ + pthread_t poller; + + /* rotational buffer */ + rbuf_t *rbuf; + + /* changelog RPC server */ + rpcsvc_t *rpc; + + /* event selection */ + changelog_ev_selector_t ev_selection; + + /* client handling (reverse connection) */ + pthread_t connector; + + int nr_dispatchers; + pthread_t *ev_dispatcher; + + changelog_clnt_t connections; + + /* glusterfind dependency to capture paths on deleted entries*/ + gf_boolean_t capture_del_path; + + /* Save total no. of listners */ + gf_atomic_t listnercnt; + + /* Save total no. of xprt are associated with listner */ + gf_atomic_t xprtcnt; + + /* Save xprt list */ + struct list_head xprt_list; + + /* Save total no. of client connection */ + gf_atomic_t clntcnt; + + /* Save cleanup brick in victim */ + xlator_t *victim; + + /* Status to save cleanup notify status */ + gf_boolean_t notify_down; +}; + +struct changelog_local { + inode_t *inode; + gf_boolean_t update_no_check; + + changelog_log_data_t cld; + + /** + * ->prev_entry is used in cases when there needs to be + * additional changelog entry for the parent (eg. rename) + * It's analogous to ->next in single linked list world, + * but we call it as ->prev_entry... ha ha ha + */ + struct changelog_local *prev_entry; + + /* snap dependency changes */ + chlog_fop_color_t color; +}; + +typedef struct changelog_local changelog_local_t; + +/* inode version is stored in inode ctx */ +typedef struct changelog_inode_ctx { + unsigned long iversion[CHANGELOG_MAX_TYPE]; +} changelog_inode_ctx_t; + +#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type]) + +/** + * Optional Records: + * fops that need to save additional information request a array of + * @changelog_opt_t struct. The array is allocated via @iobufs. + */ +typedef enum { + CHANGELOG_OPT_REC_FOP, + CHANGELOG_OPT_REC_ENTRY, + CHANGELOG_OPT_REC_UINT32, +} changelog_optional_rec_type_t; + +struct changelog_entry_fields { + uuid_t cef_uuid; + char *cef_bname; + char *cef_path; +}; + +typedef struct { + /** + * @co_covert can be used to do post-processing of the record before + * it's persisted to the CHANGELOG. If this is NULL, then the record + * is persisted as per it's in memory format. + */ + size_t (*co_convert)(void *data, char *buffer, gf_boolean_t encode); + + /* release routines */ + void (*co_free)(void *data); + + /* type of the field */ + changelog_optional_rec_type_t co_type; + + /** + * sizeof of the 'valid' field in the union. This field is not used if + * @co_convert is specified. + */ + size_t co_len; + + union { + unsigned int co_uint32; + glusterfs_fop_t co_fop; + struct changelog_entry_fields co_entry; + }; +} changelog_opt_t; + +#define CHANGELOG_OPT_RECORD_LEN sizeof(changelog_opt_t) + +/** + * helpers routines + */ + +int +changelog_thread_cleanup(xlator_t *this, pthread_t thr_id); + +void * +changelog_get_usable_buffer(changelog_local_t *local); + +void +changelog_set_usable_record_and_length(changelog_local_t *local, size_t len, + int xr); +void +changelog_local_cleanup(xlator_t *xl, changelog_local_t *local); +changelog_local_t * +changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid, + int xtra_records, gf_boolean_t update_flag); +int +changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts, + gf_boolean_t finale); +int +changelog_open_journal(xlator_t *this, changelog_priv_t *priv); +void +changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last); +int +changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv, + changelog_log_data_t *cld); +size_t +changelog_entry_length(); +int +changelog_write(int fd, char *buffer, size_t len); +int +changelog_write_change(changelog_priv_t *priv, char *buffer, size_t len); +int +changelog_handle_change(xlator_t *this, changelog_priv_t *priv, + changelog_log_data_t *cld); +void +changelog_update(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local, changelog_log_type type); +void * +changelog_rollover(void *data); +void * +changelog_fsync_thread(void *data); +int +changelog_forget(xlator_t *this, inode_t *inode); +int +htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer); +int +htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts); +int +htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts); + +/* Geo-Rep snapshot dependency changes */ +void +changelog_color_fop_and_inc_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local); +void +changelog_inc_fop_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local); +void +changelog_dec_fop_cnt(xlator_t *this, changelog_priv_t *priv, + changelog_local_t *local); +int +changelog_barrier_notify(changelog_priv_t *priv, char *buf); +void +changelog_barrier_cleanup(xlator_t *this, changelog_priv_t *priv, + struct list_head *queue); +void +changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv); +void +changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv); + +/* Crash consistency of changelog wrt snapshot */ +int +changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv); +int +changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv); +int +changelog_snap_open(xlator_t *this, changelog_priv_t *priv); +int +changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld); +int +changelog_snap_write_change(changelog_priv_t *priv, char *buffer, size_t len); + +/* Changelog barrier routines */ +void +__chlog_barrier_enqueue(xlator_t *this, call_stub_t *stub); +void +__chlog_barrier_disable(xlator_t *this, struct list_head *queue); +void +chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue); +call_stub_t * +__chlog_barrier_dequeue(xlator_t *this, struct list_head *queue); +int +__chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv); + +int32_t +changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc, + changelog_local_t **local); + +/* event selection routines */ +void +changelog_select_event(xlator_t *, changelog_ev_selector_t *, unsigned int); +void +changelog_deselect_event(xlator_t *, changelog_ev_selector_t *, unsigned int); +int +changelog_init_event_selection(xlator_t *, changelog_ev_selector_t *); +int +changelog_ev_selected(xlator_t *, changelog_ev_selector_t *, unsigned int); +void +changelog_dispatch_event(xlator_t *, changelog_priv_t *, changelog_event_t *); + +changelog_inode_ctx_t * +__changelog_inode_ctx_get(xlator_t *, inode_t *, unsigned long **, + unsigned long *, changelog_log_type); +int +resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path, + char *bname); + +/* macros */ + +#define CHANGELOG_STACK_UNWIND(fop, frame, params...) \ + do { \ + changelog_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + if (frame) { \ + __local = frame->local; \ + __xl = frame->this; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + if (__local && __local->prev_entry) \ + changelog_local_cleanup(__xl, __local->prev_entry); \ + changelog_local_cleanup(__xl, __local); \ + } while (0) + +#define CHANGELOG_IOBUF_REF(iobuf) \ + do { \ + if (iobuf) \ + iobuf_ref(iobuf); \ + } while (0) + +#define CHANGELOG_IOBUF_UNREF(iobuf) \ + do { \ + if (iobuf) \ + iobuf_unref(iobuf); \ + } while (0) + +#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) \ + do { \ + memcpy(buffer + off, val, len); \ + off += len; \ + } while (0) + +#define SLICE_VERSION_UPDATE(slice) \ + do { \ + int i = 0; \ + for (; i < CHANGELOG_MAX_TYPE; i++) { \ + slice->changelog_version[i]++; \ + } \ + } while (0) + +#define CHANGELOG_FILL_UINT32(co, number, converter, xlen) \ + do { \ + co->co_convert = converter; \ + co->co_free = NULL; \ + co->co_type = CHANGELOG_OPT_REC_UINT32; \ + co->co_uint32 = number; \ + xlen += sizeof(unsigned int); \ + } while (0) + +#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) \ + do { \ + co->co_convert = converter; \ + co->co_free = NULL; \ + co->co_type = CHANGELOG_OPT_REC_FOP; \ + co->co_fop = fop; \ + xlen += sizeof(fop); \ + } while (0) + +#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, converter, freefn, xlen, \ + label) \ + do { \ + co->co_convert = converter; \ + co->co_free = freefn; \ + co->co_type = CHANGELOG_OPT_REC_ENTRY; \ + gf_uuid_copy(co->co_entry.cef_uuid, pargfid); \ + co->co_entry.cef_bname = gf_strdup(bname); \ + if (!co->co_entry.cef_bname) \ + goto label; \ + xlen += (UUID_CANONICAL_FORM_LEN + strlen(bname)); \ + } while (0) + +#define CHANGELOG_FILL_ENTRY_DIR_PATH(co, pargfid, bname, converter, \ + del_freefn, xlen, label, capture_del) \ + do { \ + co->co_convert = converter; \ + co->co_free = del_freefn; \ + co->co_type = CHANGELOG_OPT_REC_ENTRY; \ + gf_uuid_copy(co->co_entry.cef_uuid, pargfid); \ + co->co_entry.cef_bname = gf_strdup(bname); \ + if (!co->co_entry.cef_bname) \ + goto label; \ + xlen += (UUID_CANONICAL_FORM_LEN + strlen(bname)); \ + if (!capture_del || \ + resolve_pargfid_to_path(this, pargfid, &(co->co_entry.cef_path), \ + co->co_entry.cef_bname)) { \ + co->co_entry.cef_path = gf_strdup("\0"); \ + xlen += 1; \ + } else { \ + xlen += (strlen(co->co_entry.cef_path)); \ + } \ + } while (0) + +#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \ + local = changelog_local_init(this, inode, gfid, xrec, _gf_false) + +#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \ + local = changelog_local_init(this, inode, gfid, xrec, _gf_true) + +#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) \ + do { \ + if (!priv->active) \ + goto label; \ + /* ignore rebalance process's activity. */ \ + if ((frame->root->pid == GF_CLIENT_PID_DEFRAG) || \ + (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)) \ + goto label; \ + } while (0) + +/* If it is a METADATA entry and fop num being GF_FOP_NULL, don't + * log in the changelog as it is of no use. And also if it is + * logged, since slicing version checking is done for metadata + * entries, the subsequent entries with valid fop num which falls + * to same changelog will be missed. Hence check for boundary + * condition. + */ +#define CHANGELOG_OP_BOUNDARY_CHECK(frame, label) \ + do { \ + if (frame->root->op <= GF_FOP_NULL || \ + frame->root->op >= GF_FOP_MAXVALUE) \ + goto label; \ + } while (0) + +/** + * ignore internal fops for all clients except AFR self-heal daemon + */ +#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \ + do { \ + if ((frame->root->pid != GF_CLIENT_PID_SELF_HEALD) && dict && \ + dict_get(dict, GLUSTERFS_INTERNAL_FOP_KEY)) \ + goto label; \ + } while (0) + +#define CHANGELOG_COND_GOTO(priv, cond, label) \ + do { \ + if (!priv->active || cond) \ + goto label; \ + } while (0) + +/* Begin: Geo-Rep snapshot dependency changes */ + +#define DICT_ERROR -1 +#define BARRIER_OFF 0 +#define BARRIER_ON 1 +#define DICT_DEFAULT 2 + +#define CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, label) \ + do { \ + if (!priv->active) { \ + gf_smsg(this->name, GF_LOG_WARNING, 0, \ + CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, NULL); \ + ret = 0; \ + goto label; \ + } \ + } while (0) + +/* Log pthread error and goto label */ +#define CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, label) \ + do { \ + if (ret) { \ + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ + "error=%d", ret, NULL); \ + ret = -1; \ + goto label; \ + } \ + } while (0); + +/* Log pthread error, set flag and goto label */ +#define CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, label, flag) \ + do { \ + if (ret) { \ + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ + "error=%d", ret, NULL); \ + ret = -1; \ + flag = _gf_true; \ + goto label; \ + } \ + } while (0) + +/* Log pthread error, unlock mutex and goto label */ +#define CHANGELOG_PTHREAD_ERROR_HANDLE_2(ret, label, mutex) \ + do { \ + if (ret) { \ + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \ + "error=%d", ret, NULL); \ + ret = -1; \ + pthread_mutex_unlock(&mutex); \ + goto label; \ + } \ + } while (0) + +/* End: Geo-Rep snapshot dependency changes */ + +#endif /* _CHANGELOG_HELPERS_H */ diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h new file mode 100644 index 00000000000..a2d8a9cbe93 --- /dev/null +++ b/xlators/features/changelog/src/changelog-mem-types.h @@ -0,0 +1,34 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_MEM_TYPES_H +#define _CHANGELOG_MEM_TYPES_H + +#include <glusterfs/mem-types.h> + +enum gf_changelog_mem_types { + gf_changelog_mt_priv_t = gf_common_mt_end + 1, + gf_changelog_mt_str_t = gf_common_mt_end + 2, + gf_changelog_mt_batch_t = gf_common_mt_end + 3, + gf_changelog_mt_rt_t = gf_common_mt_end + 4, + gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5, + gf_changelog_mt_rpc_clnt_t = gf_common_mt_end + 6, + gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 7, + gf_changelog_mt_libgfchangelog_entry_t = gf_common_mt_end + 8, + gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 9, + gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 10, + gf_changelog_mt_history_data_t = gf_common_mt_end + 11, + gf_changelog_mt_libgfchangelog_call_pool_t = gf_common_mt_end + 12, + gf_changelog_mt_libgfchangelog_event_t = gf_common_mt_end + 13, + gf_changelog_mt_ev_dispatcher_t = gf_common_mt_end + 14, + gf_changelog_mt_end +}; + +#endif diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h new file mode 100644 index 00000000000..cb0e16c85d8 --- /dev/null +++ b/xlators/features/changelog/src/changelog-messages.h @@ -0,0 +1,172 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ + +#ifndef _CHANGELOG_MESSAGES_H_ +#define _CHANGELOG_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID( + CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_BARRIER_FOP_FAILED, + CHANGELOG_MSG_VOL_MISCONFIGURED, CHANGELOG_MSG_RENAME_ERROR, + CHANGELOG_MSG_READ_ERROR, CHANGELOG_MSG_HTIME_ERROR, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, CHANGELOG_MSG_CHILD_MISCONFIGURED, + CHANGELOG_MSG_DIR_OPTIONS_NOT_SET, CHANGELOG_MSG_CLOSE_ERROR, + CHANGELOG_MSG_PIPE_CREATION_ERROR, CHANGELOG_MSG_DICT_GET_FAILED, + CHANGELOG_MSG_BARRIER_INFO, CHANGELOG_MSG_BARRIER_ERROR, + CHANGELOG_MSG_GET_TIME_OP_FAILED, CHANGELOG_MSG_WRITE_FAILED, + CHANGELOG_MSG_PTHREAD_ERROR, CHANGELOG_MSG_INODE_NOT_FOUND, + CHANGELOG_MSG_FSYNC_OP_FAILED, CHANGELOG_MSG_TOTAL_LOG_INFO, + CHANGELOG_MSG_SNAP_INFO, CHANGELOG_MSG_SELECT_FAILED, + CHANGELOG_MSG_FCNTL_FAILED, CHANGELOG_MSG_BNOTIFY_INFO, + CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, + CHANGELOG_MSG_LOCAL_INIT_FAILED, CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, + CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, CHANGELOG_MSG_HANDLE_PROBE_ERROR, + CHANGELOG_MSG_SET_FD_CONTEXT, CHANGELOG_MSG_FREEUP_FAILED, + CHANGELOG_MSG_RECONFIGURE, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, + CHANGELOG_MSG_RPC_BUILD_ERROR, CHANGELOG_MSG_RPC_CONNECT_ERROR, + CHANGELOG_MSG_RPC_START_ERROR, CHANGELOG_MSG_BUFFER_STARVATION_ERROR, + CHANGELOG_MSG_SCAN_DIR_FAILED, CHANGELOG_MSG_FSETXATTR_FAILED, + CHANGELOG_MSG_FGETXATTR_FAILED, CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF, + CHANGELOG_MSG_DISPATCH_EVENT_FAILED, CHANGELOG_MSG_PUT_BUFFER_FAILED, + CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, + CHANGELOG_MSG_INJECT_FSYNC_FAILED, CHANGELOG_MSG_CREATE_FRAME_FAILED, + CHANGELOG_MSG_FSTAT_OP_FAILED, CHANGELOG_MSG_LSEEK_OP_FAILED, + CHANGELOG_MSG_STRSTR_OP_FAILED, CHANGELOG_MSG_UNLINK_OP_FAILED, + CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, + CHANGELOG_MSG_READLINK_OP_FAILED, CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, CHANGELOG_MSG_MEMORY_INIT_FAILED, + CHANGELOG_MSG_NO_MEMORY, CHANGELOG_MSG_HTIME_STAT_ERROR, + CHANGELOG_MSG_HTIME_CURRENT_ERROR, CHANGELOG_MSG_BNOTIFY_COND_INFO, + CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT, + CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR, + CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED, + CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0, + CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY, + CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, + CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE, + CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET, + CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS, + CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, + CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED, + CHANGELOG_MSG_CLEANUP_ALREADY_SET); + +#define CHANGELOG_MSG_BARRIER_FOP_FAILED_STR \ + "failed to barrier FOPs, disabling changelog barrier" +#define CHANGELOG_MSG_MEMORY_INIT_FAILED_STR "memory accounting init failed" +#define CHANGELOG_MSG_NO_MEMORY_STR "failed to create local memory pool" +#define CHANGELOG_MSG_ENTRY_BUF_INFO_STR \ + "Entry cannot be captured for gfid, Capturing DATA entry." +#define CHANGELOG_MSG_PTHREAD_ERROR_STR "pthread error" +#define CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED_STR "pthread_mutex_init failed" +#define CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED_STR "pthread_cond_init failed" +#define CHANGELOG_MSG_HTIME_ERROR_STR "failed to update HTIME file" +#define CHANGELOG_MSG_HTIME_STAT_ERROR_STR "unable to stat htime file" +#define CHANGELOG_MSG_HTIME_CURRENT_ERROR_STR "Error extracting HTIME_CURRENT." +#define CHANGELOG_MSG_UNLINK_OP_FAILED_STR "error unlinking empty changelog" +#define CHANGELOG_MSG_RENAME_ERROR_STR "error renaming" +#define CHANGELOG_MSG_MKDIR_ERROR_STR "unable to create directory" +#define CHANGELOG_MSG_BNOTIFY_INFO_STR \ + "Explicit rollover changelog signaling bnotify" +#define CHANGELOG_MSG_BNOTIFY_COND_INFO_STR "Woke up: bnotify conditional wait" +#define CHANGELOG_MSG_RECONFIGURE_STR "Reconfigure: Changelog Enable" +#define CHANGELOG_MSG_NO_HTIME_CURRENT_STR \ + "HTIME_CURRENT not found. Changelog enabled before init" +#define CHANGELOG_MSG_HTIME_CURRENT_STR "HTIME_CURRENT" +#define CHANGELOG_MSG_NEW_HTIME_FILE_STR \ + "Changelog enable: Creating new HTIME file" +#define CHANGELOG_MSG_FGETXATTR_FAILED_STR "fgetxattr failed" +#define CHANGELOG_MSG_TOTAL_LOG_INFO_STR "changelog info" +#define CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED_STR "pthread cond wait failed" +#define CHANGELOG_MSG_INODE_NOT_FOUND_STR "inode not found" +#define CHANGELOG_MSG_READLINK_OP_FAILED_STR \ + "could not read the link from the gfid handle" +#define CHANGELOG_MSG_OPEN_FAILED_STR "unable to open file" +#define CHANGELOG_MSG_RPC_CONNECT_ERROR_STR "failed to connect back" +#define CHANGELOG_MSG_BUFFER_STARVATION_ERROR_STR \ + "Failed to get buffer for RPC dispatch" +#define CHANGELOG_MSG_PTHREAD_CANCEL_FAILED_STR "could not cancel thread" +#define CHANGELOG_MSG_FSTAT_OP_FAILED_STR "Could not stat (CHANGELOG)" +#define CHANGELOG_MSG_LSEEK_OP_FAILED_STR "Could not lseek (changelog)" +#define CHANGELOG_MSG_PATH_NOT_FOUND_STR \ + "Could not find CHANGELOG in changelog path" +#define CHANGELOG_MSG_FSYNC_OP_FAILED_STR "fsync failed" +#define CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED_STR \ + "Error detecting empty changelog" +#define CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED_STR \ + "Fail snapshot because of previous errors" +#define CHANGELOG_MSG_SCAN_DIR_FAILED_STR "scandir failed" +#define CHANGELOG_MSG_FSETXATTR_FAILED_STR "fsetxattr failed" +#define CHANGELOG_MSG_XATTR_INIT_FAILED_STR "Htime xattr initialization failed" +#define CHANGELOG_MSG_SNAP_INFO_STR "log in call path" +#define CHANGELOG_MSG_WRITE_FAILED_STR "error writing to disk" +#define CHANGELOG_MSG_WROTE_TO_CSNAP_STR "Successfully wrote to csnap" +#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)" +#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify" +#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed" +#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event" +#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \ + "changelog local initialization failed" +#define CHANGELOG_MSG_GET_BUFFER_FAILED_STR "Failed to get buffer" +#define CHANGELOG_MSG_SET_FD_CONTEXT_STR \ + "could not set fd context(for release cbk)" +#define CHANGELOG_MSG_DICT_GET_FAILED_STR "Barrier failed" +#define CHANGELOG_MSG_BARRIER_STATE_NOTIFY_STR "Barrier notification" +#define CHANGELOG_MSG_BARRIER_ERROR_STR \ + "Received another barrier off notification while already off" +#define CHANGELOG_MSG_BARRIER_DISABLED_STR "disabled changelog barrier" +#define CHANGELOG_MSG_BARRIER_ALREADY_DISABLED_STR \ + "Changelog barrier already disabled" +#define CHANGELOG_MSG_BARRIER_ON_ERROR_STR \ + "Received another barrier on notification when last one is not served yet" +#define CHANGELOG_MSG_BARRIER_ENABLE_STR "Enabled changelog barrier" +#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found" +#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \ + "Something went wrong in dict_get_str_boolean" +#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set" +#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper" +#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \ + "translator needs a single subvolume" +#define CHANGELOG_MSG_VOL_MISCONFIGURED_STR \ + "dangling volume. please check volfile" +#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_STR \ + "Dequeuing all the changelog barriered fops" +#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED_STR \ + "Dequeuing changelog barriered fops is finished" +#define CHANGELOG_MSG_BARRIER_TIMEOUT_STR \ + "Disabling changelog barrier because of the timeout" +#define CHANGELOG_MSG_TIMEOUT_ADD_FAILED_STR \ + "Couldn't add changelog barrier timeout event" +#define CHANGELOG_MSG_RPC_BUILD_ERROR_STR "failed to build rpc options" +#define CHANGELOG_MSG_NOTIFY_REGISTER_FAILED_STR "failed to register notify" +#define CHANGELOG_MSG_RPC_START_ERROR_STR "failed to start rpc" +#define CHANGELOG_MSG_CREATE_FRAME_FAILED_STR "failed to create frame" +#define CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED_STR "failed to serialize reply" +#define CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED_STR "cannot register program" +#define CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE_STR \ + "Changelog is not active, return success" +#define CHANGELOG_MSG_PUT_BUFFER_FAILED_STR \ + "failed to put buffer after consumption" +#define CHANGELOG_MSG_CLEANUP_ALREADY_SET_STR \ + "cleanup_starting flag is already set for xl" +#define CHANGELOG_MSG_HANDLE_PROBE_ERROR_STR "xdr decoding error" +#endif /* !_CHANGELOG_MESSAGES_H_ */ diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h new file mode 100644 index 00000000000..e2addc09414 --- /dev/null +++ b/xlators/features/changelog/src/changelog-misc.h @@ -0,0 +1,131 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_MISC_H +#define _CHANGELOG_MISC_H + +#include <glusterfs/glusterfs.h> +#include <glusterfs/common-utils.h> + +#define CHANGELOG_MAX_TYPE 4 +#define CHANGELOG_FILE_NAME "CHANGELOG" +#define HTIME_FILE_NAME "HTIME" +#define CSNAP_FILE_NAME "CHANGELOG.SNAP" +#define HTIME_KEY "trusted.glusterfs.htime" +#define HTIME_CURRENT "trusted.glusterfs.current_htime" +#define HTIME_INITIAL_VALUE "0:0" + +#define CHANGELOG_VERSION_MAJOR 1 +#define CHANGELOG_VERSION_MINOR 2 + +#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY "/changelog-%s.sock" +#define CHANGELOG_TMP_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY "/.%s%lu.sock" + +/** + * header starts with the version and the format of the changelog. + * 'version' not much of a use now. + */ +#define CHANGELOG_HEADER \ + "GlusterFS Changelog | version: v%d.%d | encoding : %d\n" + +#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) \ + do { \ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { \ + 0, \ + }; \ + gf_xxh64_wrapper((unsigned char *)brick_path, strlen(brick_path), \ + GF_XXHSUM64_DEFAULT_SEED, xxh64); \ + (void)snprintf(sockpath, len, CHANGELOG_UNIX_SOCK, xxh64); \ + } while (0) + +#define CHANGELOG_MAKE_TMP_SOCKET_PATH(brick_path, sockpath, len) \ + do { \ + unsigned long pid = 0; \ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { \ + 0, \ + }; \ + pid = (unsigned long)getpid(); \ + gf_xxh64_wrapper((unsigned char *)brick_path, strlen(brick_path), \ + GF_XXHSUM64_DEFAULT_SEED, xxh64); \ + (void)snprintf(sockpath, len, CHANGELOG_TMP_UNIX_SOCK, xxh64, pid); \ + } while (0) + +/** + * ... used by libgfchangelog. + */ +#define CHANGELOG_GET_HEADER_INFO(fd, buffer, len, enc, maj, min, elen) \ + do { \ + FILE *fp; \ + int fd_dup; \ + \ + enc = -1; \ + maj = -1; \ + min = -1; \ + fd_dup = dup(fd); \ + \ + if (fd_dup != -1) { \ + fp = fdopen(fd_dup, "r"); \ + if (fp) { \ + if (fgets(buffer, len, fp)) { \ + elen = strlen(buffer); \ + sscanf(buffer, CHANGELOG_HEADER, &maj, &min, &enc); \ + } \ + fclose(fp); \ + } else { \ + sys_close(fd_dup); \ + } \ + } \ + } while (0) + +#define CHANGELOG_FILL_HTIME_DIR(changelog_dir, path) \ + do { \ + snprintf(path, sizeof(path), "%s/htime", changelog_dir); \ + } while (0) + +#define CHANGELOG_FILL_CSNAP_DIR(changelog_dir, path) \ + do { \ + snprintf(path, sizeof(path), "%s/csnap", changelog_dir); \ + } while (0) +/** + * everything after 'CHANGELOG_TYPE_METADATA_XATTR' are internal types + * (ie. none of the fops trigger this type of event), hence + * CHANGELOG_MAX_TYPE = 4 + */ +typedef enum { + CHANGELOG_TYPE_DATA = 0, + CHANGELOG_TYPE_METADATA, + CHANGELOG_TYPE_ENTRY, + CHANGELOG_TYPE_METADATA_XATTR, + CHANGELOG_TYPE_ROLLOVER, + CHANGELOG_TYPE_FSYNC, +} changelog_log_type; + +/* operation modes - RT for now */ +typedef enum { + CHANGELOG_MODE_RT = 0, +} changelog_mode_t; + +/* encoder types */ + +typedef enum { + CHANGELOG_ENCODE_MIN = 0, + CHANGELOG_ENCODE_BINARY, + CHANGELOG_ENCODE_ASCII, + CHANGELOG_ENCODE_MAX, +} changelog_encoder_t; + +#define CHANGELOG_VALID_ENCODING(enc) \ + (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX) + +#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY) +#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER) +#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC) + +#endif /* _CHANGELOG_MISC_H */ diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c new file mode 100644 index 00000000000..125246a17e1 --- /dev/null +++ b/xlators/features/changelog/src/changelog-rpc-common.c @@ -0,0 +1,359 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "changelog-rpc-common.h" +#include "changelog-messages.h" + +#include <glusterfs/syscall.h> +/** +***************************************************** + Client Interface +***************************************************** +*/ + +/** + * Initialize and return an RPC client object for a given unix + * domain socket. + */ + +void * +changelog_rpc_poller(void *arg) +{ + xlator_t *this = arg; + + (void)gf_event_dispatch(this->ctx->event_pool); + return NULL; +} + +struct rpc_clnt * +changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile, + rpc_clnt_notify_t fn) +{ + int ret = 0; + struct rpc_clnt *rpc = NULL; + dict_t *options = NULL; + + if (!cbkdata) + cbkdata = this; + + options = dict_new(); + if (!options) + goto error_return; + + ret = rpc_transport_unix_options_build(options, sockfile, 0); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR, + NULL); + goto dealloc_dict; + } + + rpc = rpc_clnt_new(options, this, this->name, 16); + if (!rpc) + goto dealloc_dict; + + ret = rpc_clnt_register_notify(rpc, fn, cbkdata); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL); + goto dealloc_rpc_clnt; + } + + ret = rpc_clnt_start(rpc); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR, + NULL); + goto dealloc_rpc_clnt; + } + + dict_unref(options); + return rpc; + +dealloc_rpc_clnt: + rpc_clnt_unref(rpc); +dealloc_dict: + dict_unref(options); +error_return: + return NULL; +} + +/** + * Generic RPC client routine to dispatch a request to an + * RPC server. + */ +int +changelog_rpc_sumbit_req(struct rpc_clnt *rpc, void *req, call_frame_t *frame, + rpc_clnt_prog_t *prog, int procnum, + struct iovec *payload, int payloadcnt, + struct iobref *iobref, xlator_t *this, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) +{ + int ret = 0; + int count = 0; + struct iovec iov = { + 0, + }; + struct iobuf *iobuf = NULL; + char new_iobref = 0; + ssize_t xdr_size = 0; + + GF_ASSERT(this); + + if (req) { + xdr_size = xdr_sizeof(xdrproc, req); + + iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size); + if (!iobuf) { + goto out; + }; + + if (!iobref) { + iobref = iobref_new(); + if (!iobref) { + goto out; + } + + new_iobref = 1; + } + + iobref_add(iobref, iobuf); + + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_size(iobuf); + + /* Create the xdr payload */ + ret = xdr_serialize_generic(iov, req, xdrproc); + if (ret == -1) { + goto out; + } + + iov.iov_len = ret; + count = 1; + } + + ret = rpc_clnt_submit(rpc, prog, procnum, cbkfn, &iov, count, payload, + payloadcnt, iobref, frame, NULL, 0, NULL, 0, NULL); + +out: + if (new_iobref) + iobref_unref(iobref); + if (iobuf) + iobuf_unref(iobuf); + return ret; +} + +/** + * Entry point to perform a remote procedure call + */ +int +changelog_invoke_rpc(xlator_t *this, struct rpc_clnt *rpc, + rpc_clnt_prog_t *prog, int procidx, void *arg) +{ + int ret = 0; + call_frame_t *frame = NULL; + rpc_clnt_procedure_t *proc = NULL; + + if (!this || !prog) + goto error_return; + + frame = create_frame(this, this->ctx->pool); + if (!frame) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED, + NULL); + goto error_return; + } + + proc = &prog->proctable[procidx]; + if (proc->fn) + ret = proc->fn(frame, this, arg); + + STACK_DESTROY(frame->root); + return ret; + +error_return: + return -1; +} + +/** +***************************************************** + Server Interface +***************************************************** +*/ + +struct iobuf * +__changelog_rpc_serialize_reply(rpcsvc_request_t *req, void *arg, + struct iovec *outmsg, xdrproc_t xdrproc) +{ + struct iobuf *iob = NULL; + ssize_t retlen = 0; + ssize_t rsp_size = 0; + + rsp_size = xdr_sizeof(xdrproc, arg); + iob = iobuf_get2(req->svc->ctx->iobuf_pool, rsp_size); + if (!iob) + goto error_return; + + iobuf_to_iovec(iob, outmsg); + + retlen = xdr_serialize_generic(*outmsg, arg, xdrproc); + if (retlen == -1) + goto unref_iob; + + outmsg->iov_len = retlen; + return iob; + +unref_iob: + iobuf_unref(iob); +error_return: + return NULL; +} + +int +changelog_rpc_sumbit_reply(rpcsvc_request_t *req, void *arg, + struct iovec *payload, int payloadcount, + struct iobref *iobref, xdrproc_t xdrproc) +{ + int ret = -1; + struct iobuf *iob = NULL; + struct iovec iov = { + 0, + }; + char new_iobref = 0; + + if (!req) + goto return_ret; + + if (!iobref) { + iobref = iobref_new(); + if (!iobref) + goto return_ret; + new_iobref = 1; + } + + iob = __changelog_rpc_serialize_reply(req, arg, &iov, xdrproc); + if (!iob) + gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED, + NULL); + else + iobref_add(iobref, iob); + + ret = rpcsvc_submit_generic(req, &iov, 1, payload, payloadcount, iobref); + + if (new_iobref) + iobref_unref(iobref); + if (iob) + iobuf_unref(iob); +return_ret: + return ret; +} + +void +changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile, + rpcsvc_notify_t fn, struct rpcsvc_program **progs) +{ + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + struct rpcsvc_program *prog = NULL; + rpc_transport_t *trans = NULL; + + if (!rpc) + return; + + while (*progs) { + prog = *progs; + (void)rpcsvc_program_unregister(rpc, prog); + progs++; + } + + list_for_each_entry_safe(listener, next, &rpc->listeners, list) + { + if (listener->trans) { + trans = listener->trans; + rpc_transport_disconnect(trans, _gf_false); + } + } + + (void)rpcsvc_unregister_notify(rpc, fn, this); + + /* TODO Avoid freeing rpc object in case of brick multiplex + after freeing rpc object svc->rpclock corrupted and it takes + more time to detach a brick + */ + if (!this->cleanup_starting) { + if (rpc->rxpool) { + mem_pool_destroy(rpc->rxpool); + rpc->rxpool = NULL; + } + GF_FREE(rpc); + } +} + +rpcsvc_t * +changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata, + rpcsvc_notify_t fn, struct rpcsvc_program **progs) +{ + int ret = 0; + rpcsvc_t *rpc = NULL; + dict_t *options = NULL; + struct rpcsvc_program *prog = NULL; + + if (!cbkdata) + cbkdata = this; + + options = dict_new(); + if (!options) + return NULL; + + ret = rpcsvc_transport_unix_options_build(options, sockfile); + if (ret) + goto dealloc_dict; + + rpc = rpcsvc_init(this, this->ctx, options, 8); + if (rpc == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR, + NULL); + goto dealloc_dict; + } + + ret = rpcsvc_register_notify(rpc, fn, cbkdata); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL); + goto dealloc_rpc; + } + + ret = rpcsvc_create_listeners(rpc, options, this->name); + if (ret != 1) { + gf_msg_debug(this->name, 0, "failed to create listeners"); + goto dealloc_rpc; + } + + while (*progs) { + prog = *progs; + ret = rpcsvc_program_register(rpc, prog, _gf_false); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, "name%s", + prog->progname, "prognum=%d", prog->prognum, "pogver=%d", + prog->progver, NULL); + goto dealloc_rpc; + } + + progs++; + } + + dict_unref(options); + return rpc; + +dealloc_rpc: + GF_FREE(rpc); +dealloc_dict: + dict_unref(options); + return NULL; +} diff --git a/xlators/features/changelog/src/changelog-rpc-common.h b/xlators/features/changelog/src/changelog-rpc-common.h new file mode 100644 index 00000000000..4d9aa2c694b --- /dev/null +++ b/xlators/features/changelog/src/changelog-rpc-common.h @@ -0,0 +1,85 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __CHANGELOG_RPC_COMMON_H +#define __CHANGELOG_RPC_COMMON_H + +#include "rpcsvc.h" +#include "rpc-clnt.h" +#include <glusterfs/gf-event.h> +#include <glusterfs/call-stub.h> + +#include "changelog-xdr.h" +#include "xdr-generic.h" + +#include "changelog.h" + +/** + * Let's keep this non-configurable for now. + */ +#define NR_ROTT_BUFFS 4 +#define NR_DISPATCHERS (NR_ROTT_BUFFS - 1) + +enum changelog_rpc_procnum { + CHANGELOG_RPC_PROC_NULL = 0, + CHANGELOG_RPC_PROBE_FILTER = 1, + CHANGELOG_RPC_PROC_MAX = 2, +}; + +#define CHANGELOG_RPC_PROGNUM 1885957735 +#define CHANGELOG_RPC_PROGVER 1 + +/** + * reverse connection: data xfer path + */ +enum changelog_reverse_rpc_procnum { + CHANGELOG_REV_PROC_NULL = 0, + CHANGELOG_REV_PROC_EVENT = 1, + CHANGELOG_REV_PROC_MAX = 2, +}; + +#define CHANGELOG_REV_RPC_PROCNUM 1886350951 +#define CHANGELOG_REV_RPC_PROCVER 1 + +typedef struct changelog_rpc { + rpcsvc_t *svc; + struct rpc_clnt *rpc; + char sock[UNIX_PATH_MAX]; /* tied to server */ +} changelog_rpc_t; + +/* event poller */ +void * +changelog_rpc_poller(void *); + +/* CLIENT API */ +struct rpc_clnt * +changelog_rpc_client_init(xlator_t *, void *, char *, rpc_clnt_notify_t); + +int +changelog_rpc_sumbit_req(struct rpc_clnt *, void *, call_frame_t *, + rpc_clnt_prog_t *, int, struct iovec *, int, + struct iobref *, xlator_t *, fop_cbk_fn_t, xdrproc_t); + +int +changelog_invoke_rpc(xlator_t *, struct rpc_clnt *, rpc_clnt_prog_t *, int, + void *); + +/* SERVER API */ +int +changelog_rpc_sumbit_reply(rpcsvc_request_t *, void *, struct iovec *, int, + struct iobref *, xdrproc_t); +rpcsvc_t * +changelog_rpc_server_init(xlator_t *, char *, void *, rpcsvc_notify_t, + struct rpcsvc_program **); +void +changelog_rpc_server_destroy(xlator_t *, rpcsvc_t *, char *, rpcsvc_notify_t, + struct rpcsvc_program **); + +#endif diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c new file mode 100644 index 00000000000..440b88091a6 --- /dev/null +++ b/xlators/features/changelog/src/changelog-rpc.c @@ -0,0 +1,440 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/syscall.h> +#include "changelog-rpc.h" +#include "changelog-mem-types.h" +#include "changelog-ev-handle.h" + +static struct rpcsvc_program *changelog_programs[]; + +static void +changelog_cleanup_dispatchers(xlator_t *this, changelog_priv_t *priv, int count) +{ + for (count--; count >= 0; count--) { + (void)changelog_thread_cleanup(this, priv->ev_dispatcher[count]); + priv->ev_dispatcher[count] = 0; + } +} + +int +changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + changelog_clnt_t *conn = NULL; + + conn = &priv->connections; + if (!conn) + return 0; + + /** terminate RPC thread(s) */ + ret = changelog_thread_cleanup(this, priv->connector); + if (ret != 0) + goto error_return; + priv->connector = 0; + + /** terminate dispatcher thread(s) */ + changelog_cleanup_dispatchers(this, priv, priv->nr_dispatchers); + + /* destroy locks */ + ret = pthread_mutex_destroy(&conn->pending_lock); + if (ret != 0) + goto error_return; + ret = pthread_cond_destroy(&conn->pending_cond); + if (ret != 0) + goto error_return; + ret = LOCK_DESTROY(&conn->active_lock); + if (ret != 0) + goto error_return; + ret = LOCK_DESTROY(&conn->wait_lock); + if (ret != 0) + goto error_return; + return 0; + +error_return: + return -1; +} + +static int +changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf, + int nr_dispatchers) +{ + int j = 0; + int ret = 0; + changelog_clnt_t *conn = NULL; + + conn = &priv->connections; + + conn->this = this; + conn->rbuf = rbuf; + conn->sequence = 1; /* start with sequence number one */ + + INIT_LIST_HEAD(&conn->pending); + INIT_LIST_HEAD(&conn->active); + INIT_LIST_HEAD(&conn->waitq); + + ret = pthread_mutex_init(&conn->pending_lock, NULL); + if (ret) + goto error_return; + ret = pthread_cond_init(&conn->pending_cond, NULL); + if (ret) + goto cleanup_pending_lock; + + ret = LOCK_INIT(&conn->active_lock); + if (ret) + goto cleanup_pending_cond; + ret = LOCK_INIT(&conn->wait_lock); + if (ret) + goto cleanup_active_lock; + + /* spawn reverse connection thread */ + ret = gf_thread_create(&priv->connector, NULL, changelog_ev_connector, conn, + "clogecon"); + if (ret != 0) + goto cleanup_wait_lock; + + /* spawn dispatcher thread(s) */ + priv->ev_dispatcher = GF_CALLOC(nr_dispatchers, sizeof(pthread_t), + gf_changelog_mt_ev_dispatcher_t); + if (!priv->ev_dispatcher) + goto cleanup_connector; + + /* spawn dispatcher threads */ + for (; j < nr_dispatchers; j++) { + ret = gf_thread_create(&priv->ev_dispatcher[j], NULL, + changelog_ev_dispatch, conn, "clogd%03hx", + j & 0x3ff); + if (ret != 0) { + changelog_cleanup_dispatchers(this, priv, j); + break; + } + } + + if (ret != 0) + goto cleanup_connector; + + priv->nr_dispatchers = nr_dispatchers; + return 0; + +cleanup_connector: + (void)pthread_cancel(priv->connector); +cleanup_wait_lock: + LOCK_DESTROY(&conn->wait_lock); +cleanup_active_lock: + LOCK_DESTROY(&conn->active_lock); +cleanup_pending_cond: + (void)pthread_cond_destroy(&conn->pending_cond); +cleanup_pending_lock: + (void)pthread_mutex_destroy(&conn->pending_lock); +error_return: + return -1; +} + +int +changelog_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + void *data) +{ + xlator_t *this = NULL; + rpc_transport_t *trans = NULL; + rpc_transport_t *xprt = NULL; + rpc_transport_t *xp_next = NULL; + changelog_priv_t *priv = NULL; + uint64_t listnercnt = 0; + uint64_t xprtcnt = 0; + uint64_t clntcnt = 0; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + gf_boolean_t listner_found = _gf_false; + socket_private_t *sockpriv = NULL; + + if (!xl || !data || !rpc) { + gf_msg_callingfn("changelog", GF_LOG_WARNING, 0, + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, + "Calling rpc_notify without initializing"); + goto out; + } + + this = xl; + trans = data; + priv = this->private; + + if (!priv) { + gf_msg_callingfn("changelog", GF_LOG_WARNING, 0, + CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, + "Calling rpc_notify without priv initializing"); + goto out; + } + + if (event == RPCSVC_EVENT_ACCEPT) { + GF_ATOMIC_INC(priv->xprtcnt); + LOCK(&priv->lock); + { + list_add_tail(&trans->list, &priv->xprt_list); + } + UNLOCK(&priv->lock); + goto out; + } + + if (event == RPCSVC_EVENT_DISCONNECT) { + list_for_each_entry_safe(listener, next, &rpc->listeners, list) + { + if (listener && listener->trans) { + if (listener->trans == trans) { + listnercnt = GF_ATOMIC_DEC(priv->listnercnt); + listner_found = _gf_true; + rpcsvc_listener_destroy(listener); + } + } + } + + if (listnercnt > 0) { + goto out; + } + if (listner_found) { + LOCK(&priv->lock); + list_for_each_entry_safe(xprt, xp_next, &priv->xprt_list, list) + { + sockpriv = (socket_private_t *)(xprt->private); + gf_log("changelog", GF_LOG_INFO, + "Send disconnect" + " on socket %d", + sockpriv->sock); + rpc_transport_disconnect(xprt, _gf_false); + } + UNLOCK(&priv->lock); + goto out; + } + LOCK(&priv->lock); + { + list_del_init(&trans->list); + } + UNLOCK(&priv->lock); + + xprtcnt = GF_ATOMIC_DEC(priv->xprtcnt); + clntcnt = GF_ATOMIC_GET(priv->clntcnt); + if (!xprtcnt && !clntcnt) { + changelog_process_cleanup_event(this); + } + } + +out: + return 0; +} + +void +changelog_process_cleanup_event(xlator_t *this) +{ + gf_boolean_t cleanup_notify = _gf_false; + changelog_priv_t *priv = NULL; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + + if (!this) + return; + priv = this->private; + if (!priv) + return; + + LOCK(&priv->lock); + { + cleanup_notify = priv->notify_down; + priv->notify_down = _gf_true; + } + UNLOCK(&priv->lock); + + if (priv->victim && !cleanup_notify) { + default_notify(this, GF_EVENT_PARENT_DOWN, priv->victim); + + if (priv->rpc) { + /* sockfile path could have been saved to avoid this */ + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, + UNIX_PATH_MAX); + sys_unlink(sockfile); + (void)rpcsvc_unregister_notify(priv->rpc, changelog_rpcsvc_notify, + this); + if (priv->rpc->rxpool) { + mem_pool_destroy(priv->rpc->rxpool); + priv->rpc->rxpool = NULL; + } + GF_FREE(priv->rpc); + priv->rpc = NULL; + } + } +} + +void +changelog_destroy_rpc_listner(xlator_t *this, changelog_priv_t *priv) +{ + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + + /* sockfile path could have been saved to avoid this */ + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX); + changelog_rpc_server_destroy(this, priv->rpc, sockfile, + changelog_rpcsvc_notify, changelog_programs); +} + +rpcsvc_t * +changelog_init_rpc_listener(xlator_t *this, changelog_priv_t *priv, + rbuf_t *rbuf, int nr_dispatchers) +{ + int ret = 0; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + rpcsvc_t *svcp; + + ret = changelog_init_rpc_threads(this, priv, rbuf, nr_dispatchers); + if (ret) + return NULL; + + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX); + (void)sys_unlink(sockfile); + svcp = changelog_rpc_server_init( + this, sockfile, NULL, changelog_rpcsvc_notify, changelog_programs); + return svcp; +} + +void +changelog_rpc_clnt_cleanup(changelog_rpc_clnt_t *crpc) +{ + if (!crpc) + return; + crpc->c_clnt = NULL; + LOCK_DESTROY(&crpc->lock); + GF_FREE(crpc); +} + +static changelog_rpc_clnt_t * +changelog_rpc_clnt_init(xlator_t *this, changelog_probe_req *rpc_req, + changelog_clnt_t *c_clnt) +{ + int ret = 0; + changelog_rpc_clnt_t *crpc = NULL; + + crpc = GF_CALLOC(1, sizeof(*crpc), gf_changelog_mt_rpc_clnt_t); + if (!crpc) + goto error_return; + INIT_LIST_HEAD(&crpc->list); + + /* Take a ref, the last unref will be on RPC_CLNT_DESTROY + * which comes as a result of last rpc_clnt_unref. + */ + GF_ATOMIC_INIT(crpc->ref, 1); + changelog_set_disconnect_flag(crpc, _gf_false); + + crpc->filter = rpc_req->filter; + (void)memcpy(crpc->sock, rpc_req->sock, strlen(rpc_req->sock)); + + crpc->this = this; + crpc->c_clnt = c_clnt; + crpc->cleanup = changelog_rpc_clnt_cleanup; + + ret = LOCK_INIT(&crpc->lock); + if (ret != 0) + goto dealloc_crpc; + return crpc; + +dealloc_crpc: + GF_FREE(crpc); +error_return: + return NULL; +} + +/** + * Actor declarations + */ + +/** + * @probe_handler + * A probe RPC call spawns a connect back to the caller. Caller also + * passes an hint which acts as a filter for selecting updates. + */ + +int +changelog_handle_probe(rpcsvc_request_t *req) +{ + int ret = 0; + xlator_t *this = NULL; + rpcsvc_t *svc = NULL; + changelog_priv_t *priv = NULL; + changelog_clnt_t *c_clnt = NULL; + changelog_rpc_clnt_t *crpc = NULL; + + changelog_probe_req rpc_req = { + 0, + }; + changelog_probe_rsp rpc_rsp = { + 0, + }; + + this = req->trans->xl; + if (this->cleanup_starting) { + gf_smsg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_CLEANUP_ALREADY_SET, + NULL); + return 0; + } + + ret = xdr_to_generic(req->msg[0], &rpc_req, + (xdrproc_t)xdr_changelog_probe_req); + if (ret < 0) { + gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR, NULL); + req->rpc_err = GARBAGE_ARGS; + goto handle_xdr_error; + } + + /* ->xl hidden in rpcsvc */ + svc = rpcsvc_request_service(req); + this = svc->xl; + priv = this->private; + c_clnt = &priv->connections; + + crpc = changelog_rpc_clnt_init(this, &rpc_req, c_clnt); + if (!crpc) + goto handle_xdr_error; + + changelog_ev_queue_connection(c_clnt, crpc); + rpc_rsp.op_ret = 0; + + goto submit_rpc; + +handle_xdr_error: + rpc_rsp.op_ret = -1; +submit_rpc: + (void)changelog_rpc_sumbit_reply(req, &rpc_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_changelog_probe_rsp); + return 0; +} + +/** + * RPC declarations + */ + +static rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = { + [CHANGELOG_RPC_PROBE_FILTER] = {"CHANGELOG PROBE FILTER", + changelog_handle_probe, NULL, + CHANGELOG_RPC_PROBE_FILTER, DRC_NA, 0}, +}; + +static struct rpcsvc_program changelog_svc_prog = { + .progname = CHANGELOG_RPC_PROGNAME, + .prognum = CHANGELOG_RPC_PROGNUM, + .progver = CHANGELOG_RPC_PROGVER, + .numactors = CHANGELOG_RPC_PROC_MAX, + .actors = changelog_svc_actors, + .synctask = _gf_true, +}; + +static struct rpcsvc_program *changelog_programs[] = { + &changelog_svc_prog, + NULL, +}; diff --git a/xlators/features/changelog/src/changelog-rpc.h b/xlators/features/changelog/src/changelog-rpc.h new file mode 100644 index 00000000000..b1707565249 --- /dev/null +++ b/xlators/features/changelog/src/changelog-rpc.h @@ -0,0 +1,31 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __CHANGELOG_RPC_H +#define __CHANGELOG_RPC_H + +#include <glusterfs/xlator.h> +#include "changelog-helpers.h" + +/* one time */ +#include "socket.h" +#include "changelog-rpc-common.h" + +#define CHANGELOG_RPC_PROGNAME "GlusterFS Changelog" + +rpcsvc_t * +changelog_init_rpc_listener(xlator_t *, changelog_priv_t *, rbuf_t *, int); + +void +changelog_destroy_rpc_listner(xlator_t *, changelog_priv_t *); + +int +changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv); +#endif diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c new file mode 100644 index 00000000000..841545ae359 --- /dev/null +++ b/xlators/features/changelog/src/changelog-rt.c @@ -0,0 +1,66 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/logging.h> + +#include "changelog-rt.h" +#include "changelog-mem-types.h" + +int +changelog_rt_init(xlator_t *this, changelog_dispatcher_t *cd) +{ + changelog_rt_t *crt = NULL; + + crt = GF_CALLOC(1, sizeof(*crt), gf_changelog_mt_rt_t); + if (!crt) + return -1; + + LOCK_INIT(&crt->lock); + + cd->cd_data = crt; + cd->dispatchfn = &changelog_rt_enqueue; + + return 0; +} + +int +changelog_rt_fini(xlator_t *this, changelog_dispatcher_t *cd) +{ + changelog_rt_t *crt = NULL; + + crt = cd->cd_data; + + LOCK_DESTROY(&crt->lock); + GF_FREE(crt); + + return 0; +} + +int +changelog_rt_enqueue(xlator_t *this, changelog_priv_t *priv, void *cbatch, + changelog_log_data_t *cld_0, changelog_log_data_t *cld_1) +{ + int ret = 0; + changelog_rt_t *crt = NULL; + + crt = (changelog_rt_t *)cbatch; + + LOCK(&crt->lock); + { + ret = changelog_handle_change(this, priv, cld_0); + if (!ret && cld_1) + ret = changelog_handle_change(this, priv, cld_1); + } + UNLOCK(&crt->lock); + + return ret; +} diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h new file mode 100644 index 00000000000..28b9827d85b --- /dev/null +++ b/xlators/features/changelog/src/changelog-rt.h @@ -0,0 +1,33 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CHANGELOG_RT_H +#define _CHANGELOG_RT_H + +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> +#include "pthread.h" + +#include "changelog-helpers.h" + +/* unused as of now - may be you would need it later */ +typedef struct changelog_rt { + gf_lock_t lock; +} changelog_rt_t; + +int +changelog_rt_init(xlator_t *this, changelog_dispatcher_t *cd); +int +changelog_rt_fini(xlator_t *this, changelog_dispatcher_t *cd); +int +changelog_rt_enqueue(xlator_t *this, changelog_priv_t *priv, void *cbatch, + changelog_log_data_t *cld_0, changelog_log_data_t *cld_1); + +#endif /* _CHANGELOG_RT_H */ diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c new file mode 100644 index 00000000000..6a6e5af859e --- /dev/null +++ b/xlators/features/changelog/src/changelog.c @@ -0,0 +1,2989 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/syscall.h> +#include <glusterfs/logging.h> +#include <glusterfs/iobuf.h> + +#include "changelog-rt.h" + +#include "changelog-encoders.h" +#include "changelog-mem-types.h" +#include "changelog-messages.h" + +#include <pthread.h> +#include <signal.h> + +#include "changelog-rpc.h" +#include "errno.h" + +static struct changelog_bootstrap cb_bootstrap[] = { + { + .mode = CHANGELOG_MODE_RT, + .ctor = changelog_rt_init, + .dtor = changelog_rt_fini, + }, +}; + +static int +changelog_init_rpc(xlator_t *this, changelog_priv_t *priv); + +static int +changelog_init(xlator_t *this, changelog_priv_t *priv); + +/* Entry operations - TYPE III */ + +/** + * entry operations do not undergo inode version checking. + */ + +/* {{{ */ + +/* rmdir */ + +int32_t +changelog_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent, + postparent, xdata); + return 0; +} + +int32_t +changelog_rmdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflags, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + + gf_msg_debug(this->name, 0, "Dequeue rmdir"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); + return 0; +} + +int32_t +changelog_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + INIT_LIST_HEAD(&queue); + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 2); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + co++; + if (priv->capture_del_path) { + CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, del_entry_fn, + del_entry_free_fn, xtra_len, wind, + _gf_true); + } else { + CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, del_entry_fn, + del_entry_free_fn, xtra_len, wind, + _gf_false); + } + + changelog_set_usable_record_and_length(frame->local, xtra_len, 2); + + /* changelog barrier */ + /* Color assignment and increment of fop_cnt for rmdir/unlink/rename + * should be made with in priv lock if changelog barrier is not enabled. + * Because if counter is not incremented yet, draining wakes up and + * publishes the changelog but later these fops might hit the disk and + * present in snapped volume but where as the intention is these fops + * should not be present in snapped volume. + */ + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_rmdir_stub(frame, changelog_rmdir_resume, loc, xflags, + xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueue rmdir"); + goto out; + } + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rmdir", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + + /* changelog barrier */ + +wind: + STACK_WIND(frame, changelog_rmdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata); +out: + return 0; +} + +/* unlink */ + +int32_t +changelog_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, + postparent, xdata); + return 0; +} + +int32_t +changelog_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflags, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + + gf_msg_debug(this->name, 0, "Dequeue unlink"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata); + return 0; +} + +int32_t +changelog_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + dht_changelog_rename_info_t *info = NULL; + int ret = 0; + char *old_name = NULL; + char *new_name = NULL; + char *nname = NULL; + + INIT_LIST_HEAD(&queue); + priv = this->private; + + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_bin(xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info); + if (!ret) { /* special case: unlink considered as rename */ + /* 3 == fop + oldloc + newloc */ + old_name = alloca(info->oldname_len); + new_name = alloca(info->newname_len); + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 3); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_RENAME, fop_fn, xtra_len); + + co++; + strncpy(old_name, info->buffer, info->oldname_len); + CHANGELOG_FILL_ENTRY(co, info->old_pargfid, old_name, entry_fn, + entry_free_fn, xtra_len, wind); + + co++; + /* new name resides just after old name */ + nname = info->buffer + info->oldname_len; + strncpy(new_name, nname, info->newname_len); + CHANGELOG_FILL_ENTRY(co, info->new_pargfid, new_name, entry_fn, + entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 3); + } else { /* default unlink */ + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 2); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + co++; + if (priv->capture_del_path) { + CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, + del_entry_fn, del_entry_free_fn, + xtra_len, wind, _gf_true); + } else { + CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, + del_entry_fn, del_entry_free_fn, + xtra_len, wind, _gf_false); + } + + changelog_set_usable_record_and_length(frame->local, xtra_len, 2); + } + + /* changelog barrier */ + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_unlink_stub(frame, changelog_unlink_resume, loc, xflags, + xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueue unlink"); + goto out; + } + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=unlink", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + + /* changelog barrier */ + +wind: + STACK_WIND(frame, changelog_unlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata); +out: + return 0; +} + +/* rename */ + +int32_t +changelog_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, xdata); + return 0; +} + +int32_t +changelog_rename_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + + gf_msg_debug(this->name, 0, "Dequeue rename"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); + return 0; +} + +int32_t +changelog_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + dht_changelog_rename_info_t *info = NULL; + int ret = 0; + + INIT_LIST_HEAD(&queue); + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_bin(xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info); + if (ret && oldloc->inode->ia_type != IA_IFDIR) { + /* xdata "NOT" set for a non-directory, + * Special rename => avoid logging */ + goto wind; + } + + /* 3 == fop + oldloc + newloc */ + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, oldloc->inode->gfid, 3); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY(co, oldloc->pargfid, oldloc->name, entry_fn, + entry_free_fn, xtra_len, wind); + + co++; + CHANGELOG_FILL_ENTRY(co, newloc->pargfid, newloc->name, entry_fn, + entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 3); + /* changelog barrier */ + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_rename_stub(frame, changelog_rename_resume, oldloc, + newloc, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueue rename"); + goto out; + } + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rename", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + /* changelog barrier */ + +wind: + STACK_WIND(frame, changelog_rename_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata); +out: + return 0; +} + +/* link */ + +int32_t +changelog_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; +} + +int32_t +changelog_link_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); + + priv = this->private; + + gf_msg_debug(this->name, 0, "Dequeuing link"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); + return 0; +out: + return -1; +} +int32_t +changelog_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, oldloc->gfid, 2); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + co++; + CHANGELOG_FILL_ENTRY(co, newloc->pargfid, newloc->name, entry_fn, + entry_free_fn, xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 2); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_link_stub(frame, changelog_link_resume, oldloc, newloc, + xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued link"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_FOP_FAILED, + "fop=link", NULL); + chlog_barrier_dequeue_all(this, &queue); + } +wind: + STACK_WIND(frame, changelog_link_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata); +out: + return 0; +} + +/* mkdir */ + +int32_t +changelog_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_mkdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + mode_t mode, mode_t umask, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); + + priv = this->private; + + gf_msg_debug(this->name, 0, "Dequeuing mkdir"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); + return 0; +out: + return -1; +} + +int32_t +changelog_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = { + 0, + }; + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get gfid from dict"); + goto wind; + } + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, S_IFDIR | mode, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn, + xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 5); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_mkdir_stub(frame, changelog_mkdir_resume, loc, mode, + umask, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued mkdir"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mkdir", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + +wind: + STACK_WIND(frame, changelog_mkdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata); +out: + return 0; +} + +/* symlink */ + +int32_t +changelog_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_symlink_resume(call_frame_t *frame, xlator_t *this, + const char *linkname, loc_t *loc, mode_t umask, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); + + priv = this->private; + + gf_msg_debug(this->name, 0, "Dequeuing symlink"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); + return 0; +out: + return -1; +} + +int32_t +changelog_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + int ret = -1; + size_t xtra_len = 0; + uuid_t gfid = { + 0, + }; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get gfid from dict"); + goto wind; + } + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 2); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn, + xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 2); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_symlink_stub(frame, changelog_symlink_resume, linkname, + loc, umask, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued symlink"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=symlink", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + +wind: + STACK_WIND(frame, changelog_symlink_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata); +out: + return 0; +} + +/* mknod */ + +int32_t +changelog_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_mknod_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); + + priv = this->private; + + gf_msg_debug(this->name, 0, "Dequeuing mknod"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; +out: + return -1; +} + +int32_t +changelog_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev, mode_t umask, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = { + 0, + }; + size_t xtra_len = 0; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + + /* Check whether changelog active */ + if (!(priv->active)) + goto wind; + + /* Check whether rebalance activity */ + if (frame->root->pid == GF_CLIENT_PID_DEFRAG) + goto wind; + + /* If tier-dht linkto is SET, ignore about verifiying : + * 1. Whether internal fop AND + * 2. Whether tier rebalance process activity (this will help in + * recording mknod if tier rebalance process calls this mknod) */ + if (!(dict_get(xdata, "trusted.tier.tier-dht.linkto"))) { + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); + if (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG) + goto wind; + } + + ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get gfid from dict"); + goto wind; + } + + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, mode, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn, + xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 5); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_mknod_stub(frame, changelog_mknod_resume, loc, mode, dev, + umask, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued mknod"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mknod", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + +wind: + STACK_WIND(frame, changelog_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata); +out: + return 0; +} + +/* create */ + +int32_t +changelog_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int32_t ret = 0; + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + changelog_event_t ev = { + 0, + }; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + /* fill the event structure.. similar to open() */ + ev.ev_type = CHANGELOG_OP_TYPE_CREATE; + gf_uuid_copy(ev.u.create.gfid, buf->ia_gfid); + ev.u.create.flags = fd->flags; + changelog_dispatch_event(this, priv, &ev); + + if (changelog_ev_selected(this, &priv->ev_selection, + CHANGELOG_OP_TYPE_RELEASE)) { + ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT, + NULL); + } + + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + +int32_t +changelog_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("changelog", this, out); + GF_VALIDATE_OR_GOTO("changelog", this->fops, out); + GF_VALIDATE_OR_GOTO("changelog", frame, out); + + priv = this->private; + + gf_msg_debug(this->name, 0, "Dequeuing create"); + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; + +out: + return -1; +} + +int32_t +changelog_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + int ret = -1; + uuid_t gfid = { + 0, + }; + changelog_opt_t *co = NULL; + changelog_priv_t *priv = NULL; + size_t xtra_len = 0; + call_stub_t *stub = NULL; + struct list_head queue = { + 0, + }; + gf_boolean_t barrier_enabled = _gf_false; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + ret = dict_get_gfuuid(xdata, "gfid-req", &gfid); + if (ret) { + gf_msg_debug(this->name, 0, "failed to get gfid from dict"); + goto wind; + } + + /* init with two extra records */ + CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5); + if (!frame->local) + goto wind; + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, mode, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn, + xtra_len, wind); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 5); + + LOCK(&priv->lock); + { + if ((barrier_enabled = priv->barrier_enabled)) { + stub = fop_create_stub(frame, changelog_create_resume, loc, flags, + mode, umask, fd, xdata); + if (!stub) + __chlog_barrier_disable(this, &queue); + else + __chlog_barrier_enqueue(this, stub); + } else { + ((changelog_local_t *)frame->local)->color = priv->current_color; + changelog_inc_fop_cnt(this, priv, frame->local); + } + } + UNLOCK(&priv->lock); + + if (barrier_enabled && stub) { + gf_msg_debug(this->name, 0, "Enqueued create"); + goto out; + } + + if (barrier_enabled && !stub) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=create", NULL); + chlog_barrier_dequeue_all(this, &queue); + } + +wind: + STACK_WIND(frame, changelog_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); +out: + return 0; +} + +/* }}} */ + +/* Metadata modification fops - TYPE II */ + +/* {{{ */ + +/* {f}setattr */ + +int32_t +changelog_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop_stbuf, struct iatt *postop_stbuf, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, preop_stbuf, + postop_stbuf, xdata); + + return 0; +} + +int32_t +changelog_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1); + if (!frame->local) + goto wind; + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; +} + +int32_t +changelog_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop_stbuf, struct iatt *postop_stbuf, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(setattr, frame, op_ret, op_errno, preop_stbuf, + postop_stbuf, xdata); + + return 0; +} + +int32_t +changelog_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + uuid_t shard_root_gfid = { + 0, + }; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); + + /* Do not record META on .shard */ + gf_uuid_parse(SHARD_ROOT_GFID, shard_root_gfid); + if (gf_uuid_compare(loc->gfid, shard_root_gfid) == 0) { + goto wind; + } + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1); + if (!frame->local) + goto wind; + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; +} + +/* {f}removexattr */ + +int32_t +changelog_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +changelog_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_fremovexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata); + return 0; +} + +int32_t +changelog_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +changelog_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_removexattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, loc, name, xdata); + return 0; +} + +/* {f}setxattr */ + +int32_t +changelog_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +/* changelog_handle_virtual_xattr: + * Handles virtual setxattr 'glusterfs.geo-rep.trigger-sync' on files. + * Following is the behaviour based on the value of xattr. + * 1: Captures only DATA entry in changelog. + * 2: Tries to captures both ENTRY and DATA entry in + * changelog. If failed to get pargfid, only DATA + * entry is captured. + * any other value: ENOTSUP is returned. + */ +static void +changelog_handle_virtual_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + int32_t value = 0; + int ret = 0; + int dict_ret = 0; + gf_boolean_t valid = _gf_false; + + priv = this->private; + GF_ASSERT(priv); + + dict_ret = dict_get_int32(dict, GF_XATTR_TRIGGER_SYNC, &value); + + if ((dict_ret == 0 && value == 1) && ((loc->inode->ia_type == IA_IFDIR) || + (loc->inode->ia_type == IA_IFREG))) + valid = _gf_true; + + if (valid) { + ret = changelog_fill_entry_buf(frame, this, loc, &local); + if (ret) { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_ENTRY_BUF_INFO, + "gfid=%s", uuid_utoa(loc->inode->gfid), NULL); + goto unwind; + } + changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY); + + unwind: + /* Capture DATA only if it's a file. */ + if (loc->inode->ia_type != IA_IFDIR) + changelog_update(this, priv, frame->local, CHANGELOG_TYPE_DATA); + /* Assign local to prev_entry, so unwind will take + * care of cleanup. */ + ((changelog_local_t *)(frame->local))->prev_entry = local; + CHANGELOG_STACK_UNWIND(setxattr, frame, 0, 0, NULL); + return; + } else { + CHANGELOG_STACK_UNWIND(setxattr, frame, -1, ENOTSUP, NULL); + return; + } +} + +int32_t +changelog_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict, int32_t flags, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1); + + /* On setting this virtual xattr on a file, an explicit data + * sync is triggered from geo-rep as CREATE|DATA entry is + * recorded in changelog based on xattr value. + */ + if (dict_get(dict, GF_XATTR_TRIGGER_SYNC)) { + changelog_handle_virtual_xattr(frame, this, loc, dict); + return 0; + } + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_setxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata); + return 0; +} + +int32_t +changelog_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + +int32_t +changelog_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind); + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_fsetxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata); + return 0; +} + +int32_t +changelog_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(xattrop, frame, op_ret, op_errno, xattr, xdata); + + return 0; +} + +int32_t +changelog_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + int ret = 0; + void *size_attr = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + ret = dict_get_ptr(xattr, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) + goto wind; + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_xattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata); + return 0; +} + +int32_t +changelog_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, xattr, xdata); + + return 0; +} + +int32_t +changelog_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; + void *size_attr = NULL; + int ret = 0; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + ret = dict_get_ptr(xattr, GF_XATTR_SHARD_FILE_SIZE, &size_attr); + if (ret) + goto wind; + + CHANGELOG_OP_BOUNDARY_CHECK(frame, wind); + + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1); + + co = changelog_get_usable_buffer(frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length(frame->local, xtra_len, 1); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_fxattrop_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata); + return 0; +} +/* }}} */ + +/* Data modification fops - TYPE I */ + +/* {{{ */ + +/* {f}truncate() */ + +int32_t +changelog_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_DATA); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} + +int32_t +changelog_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 0); + LOCK(&priv->c_snap_lock); + { + if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) { + changelog_snap_handle_ascii_change( + this, &(((changelog_local_t *)(frame->local))->cld)); + } + } + UNLOCK(&priv->c_snap_lock); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +} + +int32_t +changelog_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_DATA); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} + +int32_t +changelog_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 0); + LOCK(&priv->c_snap_lock); + { + if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) { + changelog_snap_handle_ascii_change( + this, &(((changelog_local_t *)(frame->local))->cld)); + } + } + UNLOCK(&priv->c_snap_lock); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; +} + +/* writev() */ + +int32_t +changelog_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + changelog_local_t *local = NULL; + + priv = this->private; + local = frame->local; + + CHANGELOG_COND_GOTO(priv, ((op_ret <= 0) || !local), unwind); + + changelog_update(this, priv, local, CHANGELOG_TYPE_DATA); + +unwind: + changelog_dec_fop_cnt(this, priv, local); + CHANGELOG_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} + +int32_t +changelog_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 0); + LOCK(&priv->c_snap_lock); + { + if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) { + changelog_snap_handle_ascii_change( + this, &(((changelog_local_t *)(frame->local))->cld)); + } + } + UNLOCK(&priv->c_snap_lock); + +wind: + changelog_color_fop_and_inc_cnt(this, priv, frame->local); + STACK_WIND(frame, changelog_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + return 0; +} + +/* }}} */ + +/* open, release and other beasts */ + +/* {{{ */ + +int +changelog_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +{ + int ret = 0; + changelog_priv_t *priv = NULL; + changelog_event_t ev = { + 0, + }; + gf_boolean_t logopen = _gf_false; + + priv = this->private; + if (frame->local) { + frame->local = NULL; + logopen = _gf_true; + } + + CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !logopen), unwind); + + /* fill the event structure */ + ev.ev_type = CHANGELOG_OP_TYPE_OPEN; + gf_uuid_copy(ev.u.open.gfid, fd->inode->gfid); + ev.u.open.flags = fd->flags; + changelog_dispatch_event(this, priv, &ev); + + if (changelog_ev_selected(this, &priv->ev_selection, + CHANGELOG_OP_TYPE_RELEASE)) { + ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT, + NULL); + } + +unwind: + CHANGELOG_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +int +changelog_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) +{ + changelog_priv_t *priv = NULL; + + priv = this->private; + CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind); + + frame->local = (void *)0x1; /* do not dereference in ->cbk */ + +wind: + STACK_WIND(frame, changelog_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +/* }}} */ + +/* {{{ */ + +/* }}} */ + +int32_t +_changelog_generic_dispatcher(dict_t *dict, char *key, data_t *value, + void *data) +{ + xlator_t *this = NULL; + changelog_priv_t *priv = NULL; + + this = data; + priv = this->private; + + changelog_dispatch_event(this, priv, (changelog_event_t *)value->data); + return 0; +} + +/** + * changelog ipc dispatches events, pointers of which are passed in + * @xdata. Dispatching is orderless (whatever order dict_foreach() + * traverses the dictionary). + */ +int32_t +changelog_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + if (op != GF_IPC_TARGET_CHANGELOG) + goto wind; + + /* it's for us, do the job */ + if (xdata) + (void)dict_foreach(xdata, _changelog_generic_dispatcher, this); + + STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL); + return 0; + +wind: + STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); + return 0; +} + +/* {{{ */ + +int32_t +changelog_release(xlator_t *this, fd_t *fd) +{ + changelog_event_t ev = { + 0, + }; + changelog_priv_t *priv = NULL; + + priv = this->private; + + ev.ev_type = CHANGELOG_OP_TYPE_RELEASE; + gf_uuid_copy(ev.u.release.gfid, fd->inode->gfid); + changelog_dispatch_event(this, priv, &ev); + + (void)fd_ctx_del(fd, this, NULL); + + return 0; +} + +/* }}} */ + +/** + * The + * - @init () + * - @fini () + * - @reconfigure () + * ... and helper routines + */ + +/** + * needed if there are more operation modes in the future. + */ +static void +changelog_assign_opmode(changelog_priv_t *priv, char *mode) +{ + if (strncmp(mode, "realtime", 8) == 0) { + priv->op_mode = CHANGELOG_MODE_RT; + } +} + +static void +changelog_assign_encoding(changelog_priv_t *priv, char *enc) +{ + if (strncmp(enc, "binary", 6) == 0) { + priv->encode_mode = CHANGELOG_ENCODE_BINARY; + } else if (strncmp(enc, "ascii", 5) == 0) { + priv->encode_mode = CHANGELOG_ENCODE_ASCII; + } +} + +static void +changelog_assign_barrier_timeout(changelog_priv_t *priv, uint32_t timeout) +{ + LOCK(&priv->lock); + { + priv->timeout.tv_sec = timeout; + } + UNLOCK(&priv->lock); +} + +/* cleanup any helper threads that are running */ +static void +changelog_cleanup_helper_threads(xlator_t *this, changelog_priv_t *priv) +{ + if (priv->cr.rollover_th) { + (void)changelog_thread_cleanup(this, priv->cr.rollover_th); + priv->cr.rollover_th = 0; + } + + if (priv->cf.fsync_th) { + (void)changelog_thread_cleanup(this, priv->cf.fsync_th); + priv->cf.fsync_th = 0; + } +} + +/* spawn helper thread; cleaning up in case of errors */ +static int +changelog_spawn_helper_threads(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + /* Geo-Rep snapshot dependency: + * + * To implement explicit rollover of changlog journal on barrier + * notification, a pipe is created to communicate between + * 'changelog_rollover' thread and changelog main thread. The select + * call used to wait till roll-over time in changelog_rollover thread + * is modified to wait on read end of the pipe. When barrier + * notification comes (i.e, in 'reconfigure'), select in + * changelog_rollover thread is woken up explicitly by writing into + * the write end of the pipe in 'reconfigure'. + */ + + priv->cr.notify = _gf_false; + priv->cr.this = this; + ret = gf_thread_create(&priv->cr.rollover_th, NULL, changelog_rollover, + priv, "clogro"); + if (ret) + goto out; + + if (priv->fsync_interval) { + priv->cf.this = this; + ret = gf_thread_create(&priv->cf.fsync_th, NULL, changelog_fsync_thread, + priv, "clogfsyn"); + } + + if (ret) + changelog_cleanup_helper_threads(this, priv); + +out: + return ret; +} + +int +notify(xlator_t *this, int event, void *data, ...) +{ + changelog_priv_t *priv = NULL; + dict_t *dict = NULL; + char buf[1] = {1}; + int barrier = DICT_DEFAULT; + gf_boolean_t bclean_req = _gf_false; + int ret = 0; + int ret1 = 0; + struct list_head queue = { + 0, + }; + uint64_t xprtcnt = 0; + uint64_t clntcnt = 0; + changelog_clnt_t *conn = NULL; + gf_boolean_t cleanup_notify = _gf_false; + char sockfile[UNIX_PATH_MAX] = { + 0, + }; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + + INIT_LIST_HEAD(&queue); + + priv = this->private; + if (!priv) + goto out; + + if (event == GF_EVENT_PARENT_DOWN) { + priv->victim = data; + gf_log(this->name, GF_LOG_INFO, + "cleanup changelog rpc connection of brick %s", + priv->victim->name); + + if (priv->rpc_active) { + this->cleanup_starting = 1; + changelog_destroy_rpc_listner(this, priv); + conn = &priv->connections; + if (conn) + changelog_ev_cleanup_connections(this, conn); + xprtcnt = GF_ATOMIC_GET(priv->xprtcnt); + clntcnt = GF_ATOMIC_GET(priv->clntcnt); + if (!xprtcnt && !clntcnt) { + LOCK(&priv->lock); + { + cleanup_notify = priv->notify_down; + priv->notify_down = _gf_true; + } + UNLOCK(&priv->lock); + if (priv->rpc) { + list_for_each_entry_safe(listener, next, + &priv->rpc->listeners, list) + { + if (listener->trans) { + rpc_transport_unref(listener->trans); + } + } + rpcsvc_destroy(priv->rpc); + priv->rpc = NULL; + } + CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, + UNIX_PATH_MAX); + sys_unlink(sockfile); + if (!cleanup_notify) + default_notify(this, GF_EVENT_PARENT_DOWN, data); + } + } else { + default_notify(this, GF_EVENT_PARENT_DOWN, data); + } + goto out; + } + + if (event == GF_EVENT_TRANSLATOR_OP) { + dict = data; + + barrier = dict_get_str_boolean(dict, "barrier", DICT_DEFAULT); + + switch (barrier) { + case DICT_ERROR: + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_DICT_GET_FAILED, "dict_get_str_boolean", + NULL); + ret = -1; + goto out; + + case BARRIER_OFF: + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "off", NULL); + + CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out); + LOCK(&priv->c_snap_lock); + { + changelog_snap_logging_stop(this, priv); + } + UNLOCK(&priv->c_snap_lock); + + LOCK(&priv->bflags.lock); + { + if (priv->bflags.barrier_ext == _gf_false) + ret = -1; + } + UNLOCK(&priv->bflags.lock); + + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ERROR, NULL); + goto out; + } + + /* Stop changelog barrier and dequeue all fops */ + LOCK(&priv->lock); + { + if (priv->barrier_enabled == _gf_true) + __chlog_barrier_disable(this, &queue); + else + ret = -1; + } + UNLOCK(&priv->lock); + /* If ret = -1, then changelog barrier is already + * disabled because of error or timeout. + */ + if (ret == 0) { + chlog_barrier_dequeue_all(this, &queue); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_DISABLED, NULL); + } else { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, NULL); + } + + LOCK(&priv->bflags.lock); + { + priv->bflags.barrier_ext = _gf_false; + } + UNLOCK(&priv->bflags.lock); + + goto out; + + case BARRIER_ON: + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "on", NULL); + + CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out); + LOCK(&priv->c_snap_lock); + { + changelog_snap_logging_start(this, priv); + } + UNLOCK(&priv->c_snap_lock); + + LOCK(&priv->bflags.lock); + { + if (priv->bflags.barrier_ext == _gf_true) + ret = -1; + else + priv->bflags.barrier_ext = _gf_true; + } + UNLOCK(&priv->bflags.lock); + + if (ret == -1) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_ON_ERROR, NULL); + goto out; + } + + ret = pthread_mutex_lock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req); + { + priv->bn.bnotify = _gf_true; + } + ret = pthread_mutex_unlock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req); + + /* Start changelog barrier */ + LOCK(&priv->lock); + { + ret = __chlog_barrier_enable(this, priv); + } + UNLOCK(&priv->lock); + if (ret == -1) { + changelog_barrier_cleanup(this, priv, &queue); + goto out; + } + + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BARRIER_ENABLE, NULL); + + ret = changelog_barrier_notify(priv, buf); + if (ret) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_WRITE_FAILED, "Explicit roll over", + NULL); + changelog_barrier_cleanup(this, priv, &queue); + ret = -1; + goto out; + } + + ret = pthread_mutex_lock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req); + { + /* The while condition check is required here to + * handle spurious wakeup of cond wait that can + * happen with pthreads. See man page */ + while (priv->bn.bnotify == _gf_true) { + ret = pthread_cond_wait(&priv->bn.bnotify_cond, + &priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req); + } + if (priv->bn.bnotify_error == _gf_true) { + ret = -1; + priv->bn.bnotify_error = _gf_false; + } + } + ret1 = pthread_mutex_unlock(&priv->bn.bnotify_mutex); + CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret1, out, bclean_req); + gf_smsg(this->name, GF_LOG_INFO, 0, + CHANGELOG_MSG_BNOTIFY_COND_INFO, NULL); + + goto out; + + case DICT_DEFAULT: + gf_smsg(this->name, GF_LOG_ERROR, 0, + CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, NULL); + ret = -1; + goto out; + + default: + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, + CHANGELOG_MSG_ERROR_IN_DICT_GET, NULL); + ret = -1; + goto out; + } + } else { + ret = default_notify(this, event, data); + } + +out: + if (bclean_req) + changelog_barrier_cleanup(this, priv, &queue); + + return ret; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_changelog_mt_end + 1); + + if (ret != 0) { + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + CHANGELOG_MSG_MEMORY_INIT_FAILED, NULL); + return ret; + } + + return ret; +} + +static int +changelog_init(xlator_t *this, changelog_priv_t *priv) +{ + int i = 0; + int ret = 0; + changelog_log_data_t cld = { + 0, + }; + + priv->maps[CHANGELOG_TYPE_DATA] = "D "; + priv->maps[CHANGELOG_TYPE_METADATA] = "M "; + priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M "; + priv->maps[CHANGELOG_TYPE_ENTRY] = "E "; + + for (; i < CHANGELOG_MAX_TYPE; i++) { + /* start with version 1 */ + priv->slice.changelog_version[i] = 1; + } + + if (!priv->active) + return ret; + + /** + * start with a fresh changelog file every time. this is done + * in case there was an encoding change. so... things are kept + * simple here. + */ + changelog_fill_rollover_data(&cld, _gf_false); + + ret = htime_open(this, priv, cld.cld_roll_time); + /* call htime open with cld's rollover_time */ + if (ret) + goto out; + + LOCK(&priv->lock); + { + ret = changelog_inject_single_event(this, priv, &cld); + } + UNLOCK(&priv->lock); + + /* ... and finally spawn the helpers threads */ + ret = changelog_spawn_helper_threads(this, priv); + +out: + return ret; +} + +/** + * Init barrier related condition variables and locks + */ +static int +changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv) +{ + gf_boolean_t bn_mutex_init = _gf_false; + gf_boolean_t bn_cond_init = _gf_false; + gf_boolean_t dm_mutex_black_init = _gf_false; + gf_boolean_t dm_cond_black_init = _gf_false; + gf_boolean_t dm_mutex_white_init = _gf_false; + gf_boolean_t dm_cond_white_init = _gf_false; + gf_boolean_t cr_mutex_init = _gf_false; + gf_boolean_t cr_cond_init = _gf_false; + int ret = 0; + + if ((ret = pthread_mutex_init(&priv->bn.bnotify_mutex, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=bnotify", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + bn_mutex_init = _gf_true; + + if ((ret = pthread_cond_init(&priv->bn.bnotify_cond, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=bnotify", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + bn_cond_init = _gf_true; + + if ((ret = pthread_mutex_init(&priv->dm.drain_black_mutex, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_black", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + dm_mutex_black_init = _gf_true; + + if ((ret = pthread_cond_init(&priv->dm.drain_black_cond, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_black", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + dm_cond_black_init = _gf_true; + + if ((ret = pthread_mutex_init(&priv->dm.drain_white_mutex, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_white", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + dm_mutex_white_init = _gf_true; + + if ((ret = pthread_cond_init(&priv->dm.drain_white_cond, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_white", + "ret=%d", ret, NULL); + ret = -1; + goto out; + } + dm_cond_white_init = _gf_true; + + if ((pthread_mutex_init(&priv->cr.lock, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, + "name=changelog_rollover", "ret=%d", ret, NULL); + ret = -1; + goto out; + } + cr_mutex_init = _gf_true; + + if ((pthread_cond_init(&priv->cr.cond, NULL)) != 0) { + gf_smsg(this->name, GF_LOG_ERROR, errno, + CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, + "changelog_rollover cond init failed", "ret=%d", ret, NULL); + ret = -1; + goto out; + } + cr_cond_init = _gf_true; +out: + if (ret) { + if (bn_mutex_init) + pthread_mutex_destroy(&priv->bn.bnotify_mutex); + if (bn_cond_init) + pthread_cond_destroy(&priv->bn.bnotify_cond); + if (dm_mutex_black_init) + pthread_mutex_destroy(&priv->dm.drain_black_mutex); + if (dm_cond_black_init) + pthread_cond_destroy(&priv->dm.drain_black_cond); + if (dm_mutex_white_init) + pthread_mutex_destroy(&priv->dm.drain_white_mutex); + if (dm_cond_white_init) + pthread_cond_destroy(&priv->dm.drain_white_cond); + if (cr_mutex_init) + pthread_mutex_destroy(&priv->cr.lock); + if (cr_cond_init) + pthread_cond_destroy(&priv->cr.cond); + } + return ret; +} + +/* Destroy barrier related condition variables and locks */ +static void +changelog_barrier_pthread_destroy(changelog_priv_t *priv) +{ + pthread_mutex_destroy(&priv->bn.bnotify_mutex); + pthread_cond_destroy(&priv->bn.bnotify_cond); + pthread_mutex_destroy(&priv->dm.drain_black_mutex); + pthread_cond_destroy(&priv->dm.drain_black_cond); + pthread_mutex_destroy(&priv->dm.drain_white_mutex); + pthread_cond_destroy(&priv->dm.drain_white_cond); + pthread_mutex_destroy(&priv->cr.lock); + pthread_cond_destroy(&priv->cr.cond); + LOCK_DESTROY(&priv->bflags.lock); +} + +static void +changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv) +{ + /* terminate rpc server */ + if (!this->cleanup_starting) + changelog_destroy_rpc_listner(this, priv); + + (void)changelog_cleanup_rpc_threads(this, priv); + /* cleanup rot buffs */ + rbuf_dtor(priv->rbuf); + + /* cleanup poller thread */ + if (priv->poller) + (void)changelog_thread_cleanup(this, priv->poller); +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + int ret = 0; + char *tmp = NULL; + changelog_priv_t *priv = NULL; + gf_boolean_t active_earlier = _gf_true; + gf_boolean_t active_now = _gf_true; + gf_boolean_t rpc_active_earlier = _gf_true; + gf_boolean_t rpc_active_now = _gf_true; + gf_boolean_t iniate_rpc = _gf_false; + changelog_time_slice_t *slice = NULL; + changelog_log_data_t cld = { + 0, + }; + char htime_dir[PATH_MAX] = { + 0, + }; + char csnap_dir[PATH_MAX] = { + 0, + }; + uint32_t timeout = 0; + + priv = this->private; + if (!priv) + goto out; + + ret = -1; + active_earlier = priv->active; + rpc_active_earlier = priv->rpc_active; + + /* first stop the rollover and the fsync thread */ + changelog_cleanup_helper_threads(this, priv); + + GF_OPTION_RECONF("changelog-dir", tmp, options, str, out); + if (!tmp) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET, + NULL); + goto out; + } + + GF_FREE(priv->changelog_dir); + priv->changelog_dir = gf_strdup(tmp); + if (!priv->changelog_dir) + goto out; + + ret = mkdir_p(priv->changelog_dir, 0600, _gf_true); + + if (ret) + goto out; + CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir); + ret = mkdir_p(htime_dir, 0600, _gf_true); + + if (ret) + goto out; + + CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir); + ret = mkdir_p(csnap_dir, 0600, _gf_true); + + if (ret) + goto out; + + GF_OPTION_RECONF("changelog", active_now, options, bool, out); + GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool, + out); + + /* If journalling is enabled, enable rpc notifications */ + if (active_now && !active_earlier) { + if (!rpc_active_earlier) + iniate_rpc = _gf_true; + } + + if (rpc_active_now && !rpc_active_earlier) { + iniate_rpc = _gf_true; + } + + /* TODO: Disable of changelog-notifications is not supported for now + * as there is no clean way of cleaning up of rpc resources + */ + + if (iniate_rpc) { + ret = changelog_init_rpc(this, priv); + if (ret) + goto out; + priv->rpc_active = _gf_true; + } + + /** + * changelog_handle_change() handles changes that could possibly + * have been submit changes before changelog deactivation. + */ + if (!active_now) + priv->active = _gf_false; + + GF_OPTION_RECONF("op-mode", tmp, options, str, out); + changelog_assign_opmode(priv, tmp); + + tmp = NULL; + + GF_OPTION_RECONF("encoding", tmp, options, str, out); + changelog_assign_encoding(priv, tmp); + + GF_OPTION_RECONF("rollover-time", priv->rollover_time, options, int32, out); + GF_OPTION_RECONF("fsync-interval", priv->fsync_interval, options, int32, + out); + GF_OPTION_RECONF("changelog-barrier-timeout", timeout, options, time, out); + changelog_assign_barrier_timeout(priv, timeout); + + GF_OPTION_RECONF("capture-del-path", priv->capture_del_path, options, bool, + out); + + if (active_now || active_earlier) { + changelog_fill_rollover_data(&cld, !active_now); + + slice = &priv->slice; + + LOCK(&priv->lock); + { + ret = changelog_inject_single_event(this, priv, &cld); + if (!ret && active_now) + SLICE_VERSION_UPDATE(slice); + } + UNLOCK(&priv->lock); + + if (ret) + goto out; + + if (active_now) { + if (!active_earlier) { + gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE, + NULL); + htime_create(this, priv, gf_time()); + } + ret = changelog_spawn_helper_threads(this, priv); + } + } + +out: + if (ret) { + /* TODO */ + } else { + gf_msg_debug(this->name, 0, "changelog reconfigured"); + if (active_now && priv) + priv->active = _gf_true; + } + + return ret; +} + +static void +changelog_freeup_options(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + + ret = priv->cb->dtor(this, &priv->cd); + if (ret) + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED, NULL); + GF_FREE(priv->changelog_brick); + GF_FREE(priv->changelog_dir); +} + +static int +changelog_init_options(xlator_t *this, changelog_priv_t *priv) +{ + int ret = 0; + char *tmp = NULL; + uint32_t timeout = 0; + char htime_dir[PATH_MAX] = { + 0, + }; + char csnap_dir[PATH_MAX] = { + 0, + }; + + GF_OPTION_INIT("changelog-brick", tmp, str, error_return); + priv->changelog_brick = gf_strdup(tmp); + if (!priv->changelog_brick) + goto error_return; + + tmp = NULL; + + GF_OPTION_INIT("changelog-dir", tmp, str, dealloc_1); + priv->changelog_dir = gf_strdup(tmp); + if (!priv->changelog_dir) + goto dealloc_1; + + tmp = NULL; + + /** + * create the directory even if change-logging would be inactive + * so that consumers can _look_ into it (finding nothing...) + */ + ret = mkdir_p(priv->changelog_dir, 0600, _gf_true); + + if (ret) + goto dealloc_2; + + CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir); + ret = mkdir_p(htime_dir, 0600, _gf_true); + if (ret) + goto dealloc_2; + + CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir); + ret = mkdir_p(csnap_dir, 0600, _gf_true); + if (ret) + goto dealloc_2; + + GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2); + GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2); + GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2); + + GF_OPTION_INIT("op-mode", tmp, str, dealloc_2); + changelog_assign_opmode(priv, tmp); + + tmp = NULL; + + GF_OPTION_INIT("encoding", tmp, str, dealloc_2); + changelog_assign_encoding(priv, tmp); + changelog_encode_change(priv); + + GF_OPTION_INIT("rollover-time", priv->rollover_time, int32, dealloc_2); + + GF_OPTION_INIT("fsync-interval", priv->fsync_interval, int32, dealloc_2); + + GF_OPTION_INIT("changelog-barrier-timeout", timeout, time, dealloc_2); + changelog_assign_barrier_timeout(priv, timeout); + + GF_ASSERT(cb_bootstrap[priv->op_mode].mode == priv->op_mode); + priv->cb = &cb_bootstrap[priv->op_mode]; + + /* ... now bootstrap the logger */ + ret = priv->cb->ctor(this, &priv->cd); + if (ret) + goto dealloc_2; + + priv->changelog_fd = -1; + + return 0; + +dealloc_2: + GF_FREE(priv->changelog_dir); +dealloc_1: + GF_FREE(priv->changelog_brick); +error_return: + return -1; +} + +static int +changelog_init_rpc(xlator_t *this, changelog_priv_t *priv) +{ + rpcsvc_t *rpc = NULL; + changelog_ev_selector_t *selection = NULL; + + selection = &priv->ev_selection; + + /* initialize event selection */ + changelog_init_event_selection(this, selection); + + priv->rbuf = rbuf_init(NR_ROTT_BUFFS); + if (!priv->rbuf) + goto cleanup_thread; + + rpc = changelog_init_rpc_listener(this, priv, priv->rbuf, NR_DISPATCHERS); + if (!rpc) + goto cleanup_rbuf; + priv->rpc = rpc; + + return 0; + +cleanup_rbuf: + rbuf_dtor(priv->rbuf); +cleanup_thread: + if (priv->poller) + (void)changelog_thread_cleanup(this, priv->poller); + + return -1; +} + +int32_t +init(xlator_t *this) +{ + int ret = -1; + changelog_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("changelog", this, error_return); + + if (!this->children || this->children->next) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED, + NULL); + goto error_return; + } + + if (!this->parents) { + gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED, + NULL); + goto error_return; + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_changelog_mt_priv_t); + if (!priv) + goto error_return; + + this->local_pool = mem_pool_new(changelog_local_t, 64); + if (!this->local_pool) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY, + NULL); + goto cleanup_priv; + } + + LOCK_INIT(&priv->lock); + LOCK_INIT(&priv->c_snap_lock); + GF_ATOMIC_INIT(priv->listnercnt, 0); + GF_ATOMIC_INIT(priv->clntcnt, 0); + GF_ATOMIC_INIT(priv->xprtcnt, 0); + INIT_LIST_HEAD(&priv->xprt_list); + priv->htime_fd = -1; + + ret = changelog_init_options(this, priv); + if (ret) + goto cleanup_mempool; + + /* snap dependency changes */ + priv->dm.black_fop_cnt = 0; + priv->dm.white_fop_cnt = 0; + priv->dm.drain_wait_black = _gf_false; + priv->dm.drain_wait_white = _gf_false; + priv->current_color = FOP_COLOR_BLACK; + priv->explicit_rollover = _gf_false; + + priv->cr.notify = _gf_false; + /* Mutex is not needed as threads are not spawned yet */ + priv->bn.bnotify = _gf_false; + priv->bn.bnotify_error = _gf_false; + ret = changelog_barrier_pthread_init(this, priv); + if (ret) + goto cleanup_options; + LOCK_INIT(&priv->bflags.lock); + priv->bflags.barrier_ext = _gf_false; + + /* Changelog barrier init */ + INIT_LIST_HEAD(&priv->queue); + priv->barrier_enabled = _gf_false; + + if (priv->rpc_active || priv->active) { + /* RPC ball rolling.. */ + ret = changelog_init_rpc(this, priv); + if (ret) + goto cleanup_barrier; + priv->rpc_active = _gf_true; + } + + ret = changelog_init(this, priv); + if (ret) + goto cleanup_rpc; + + gf_msg_debug(this->name, 0, "changelog translator loaded"); + + this->private = priv; + return 0; + +cleanup_rpc: + if (priv->rpc_active) { + changelog_cleanup_rpc(this, priv); + } +cleanup_barrier: + changelog_barrier_pthread_destroy(priv); +cleanup_options: + changelog_freeup_options(this, priv); +cleanup_mempool: + mem_pool_destroy(this->local_pool); + this->local_pool = NULL; +cleanup_priv: + GF_FREE(priv); +error_return: + this->private = NULL; + return -1; +} + +void +fini(xlator_t *this) +{ + changelog_priv_t *priv = NULL; + struct list_head queue = { + 0, + }; + + priv = this->private; + + if (priv) { + if (priv->active || priv->rpc_active) { + /* terminate RPC server/threads */ + changelog_cleanup_rpc(this, priv); + GF_FREE(priv->ev_dispatcher); + } + /* call barrier_disable to cancel timer */ + if (priv->barrier_enabled) + __chlog_barrier_disable(this, &queue); + + /* cleanup barrier related objects */ + changelog_barrier_pthread_destroy(priv); + + /* cleanup helper threads */ + changelog_cleanup_helper_threads(this, priv); + + /* cleanup allocated options */ + changelog_freeup_options(this, priv); + + /* deallocate mempool */ + mem_pool_destroy(this->local_pool); + + if (priv->htime_fd != -1) { + sys_close(priv->htime_fd); + } + + /* finally, dealloac private variable */ + GF_FREE(priv); + } + + this->private = NULL; + this->local_pool = NULL; + + return; +} + +struct xlator_fops fops = { + .open = changelog_open, + .mknod = changelog_mknod, + .mkdir = changelog_mkdir, + .create = changelog_create, + .symlink = changelog_symlink, + .writev = changelog_writev, + .truncate = changelog_truncate, + .ftruncate = changelog_ftruncate, + .link = changelog_link, + .rename = changelog_rename, + .unlink = changelog_unlink, + .rmdir = changelog_rmdir, + .setattr = changelog_setattr, + .fsetattr = changelog_fsetattr, + .setxattr = changelog_setxattr, + .fsetxattr = changelog_fsetxattr, + .removexattr = changelog_removexattr, + .fremovexattr = changelog_fremovexattr, + .ipc = changelog_ipc, + .xattrop = changelog_xattrop, + .fxattrop = changelog_fxattrop, +}; + +struct xlator_cbks cbks = { + .forget = changelog_forget, + .release = changelog_release, +}; + +struct volume_options options[] = { + {.key = {"changelog"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable change-logging", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_BASIC, + .tags = {"journal", "georep", "glusterfind"}}, + {.key = {"changelog-notification"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable changelog live notification", + .op_version = {3}, + .level = OPT_STATUS_BASIC, + .tags = {"bitrot", "georep"}}, + {.key = {"changelog-brick"}, + .type = GF_OPTION_TYPE_PATH, + .description = "brick path to generate unique socket file name." + " should be the export directory of the volume strictly.", + .default_value = "{{ brick.path }}", + .op_version = {3}, + .tags = {"journal"}}, + {.key = {"changelog-dir"}, + .type = GF_OPTION_TYPE_PATH, + .description = "directory for the changelog files", + .default_value = "{{ brick.path }}/.glusterfs/changelogs", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal", "georep", "glusterfind"}}, + {.key = {"op-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "realtime", + .value = {"realtime"}, + .description = "operation mode - futuristic operation modes", + .op_version = {3}, + .tags = {"journal"}}, + {.key = {"encoding"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "ascii", + .value = {"binary", "ascii"}, + .description = "encoding type for changelogs", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal"}}, + {.key = {"rollover-time"}, + .default_value = "15", + .type = GF_OPTION_TYPE_TIME, + .description = "time to switch to a new changelog file (in seconds)", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal", "georep", "glusterfind"}}, + {.key = {"fsync-interval"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = "5", + .description = "do not open CHANGELOG file with O_SYNC mode." + " instead perform fsync() at specified intervals", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal"}}, + {.key = {"changelog-barrier-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = BARRIER_TIMEOUT, + .description = "After 'timeout' seconds since the time 'barrier' " + "option was set to \"on\", unlink/rmdir/rename " + "operations are no longer blocked and previously " + "blocked fops are allowed to go through", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_ADVANCED, + .tags = {"journal"}}, + {.key = {"capture-del-path"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable capturing paths of deleted entries", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE, + .level = OPT_STATUS_BASIC, + .tags = {"journal", "glusterfind"}}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "changelog", + .category = GF_MAINTAINED, +}; |
