diff options
author | Amar Tumballi <amar@gluster.com> | 2010-06-21 04:24:40 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2010-06-21 20:21:05 -0700 |
commit | b9b8734a9496ccf5f8ed5527dc7714930a59948b (patch) | |
tree | ffcf9187c315b30e23cfa7f68f1d360e246d94c2 /xlators/protocol/legacy | |
parent | 2a5fada7546032e1fc1b106ddd40e42cc45349e1 (diff) |
renamed xlator/protocol to xlator/protocol/legacy
Signed-off-by: Amar Tumballi <amar@gluster.com>
Signed-off-by: Raghavendra G <raghavendra@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 875 (Implement a new protocol to provide proper backward/forward compatibility)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=875
Diffstat (limited to 'xlators/protocol/legacy')
37 files changed, 23785 insertions, 0 deletions
diff --git a/xlators/protocol/legacy/Makefile.am b/xlators/protocol/legacy/Makefile.am new file mode 100644 index 00000000000..9914863021c --- /dev/null +++ b/xlators/protocol/legacy/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = lib transport client server + +CLEANFILES = diff --git a/xlators/protocol/legacy/client/Makefile.am b/xlators/protocol/legacy/client/Makefile.am new file mode 100644 index 00000000000..d471a3f9243 --- /dev/null +++ b/xlators/protocol/legacy/client/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/protocol/legacy/client/src/Makefile.am b/xlators/protocol/legacy/client/src/Makefile.am new file mode 100644 index 00000000000..da00a5182b5 --- /dev/null +++ b/xlators/protocol/legacy/client/src/Makefile.am @@ -0,0 +1,18 @@ + +xlator_LTLIBRARIES = client.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol + +client_la_LDFLAGS = -module -avoidversion + +client_la_SOURCES = client-protocol.c saved-frames.c +client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/xlators/protocol/legacy/lib/src/libgfproto.la + +noinst_HEADERS = client-protocol.h saved-frames.h client-mem-types.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \ + -I$(top_srcdir)/xlators/protocol/legacy/lib/src + +CLEANFILES = + diff --git a/xlators/protocol/legacy/client/src/client-mem-types.h b/xlators/protocol/legacy/client/src/client-mem-types.h new file mode 100644 index 00000000000..1eee8d93159 --- /dev/null +++ b/xlators/protocol/legacy/client/src/client-mem-types.h @@ -0,0 +1,43 @@ + +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef __CLIENT_MEM_TYPES_H__ +#define __CLIENT_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_client_mem_types_ { + gf_client_mt_dir_entry_t = gf_common_mt_end + 1, + gf_client_mt_volfile_ctx, + gf_client_mt_client_state_t, + gf_client_mt_client_conf_t, + gf_client_mt_locker, + gf_client_mt_lock_table, + gf_client_mt_char, + gf_client_mt_client_connection_t, + gf_client_mt_client_fd_ctx_t, + gf_client_mt_client_local_t, + gf_client_mt_saved_frames, + gf_client_mt_saved_frame, + gf_client_mt_end +}; +#endif + diff --git a/xlators/protocol/legacy/client/src/client-protocol.c b/xlators/protocol/legacy/client/src/client-protocol.c new file mode 100644 index 00000000000..50030f8cf78 --- /dev/null +++ b/xlators/protocol/legacy/client/src/client-protocol.c @@ -0,0 +1,6737 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include <inttypes.h> + + +#include "glusterfs.h" +#include "client-protocol.h" +#include "compat.h" +#include "dict.h" +#include "protocol.h" +#include "transport.h" +#include "xlator.h" +#include "logging.h" +#include "timer.h" +#include "defaults.h" +#include "compat.h" +#include "compat-errno.h" +#include "statedump.h" +#include "client-mem-types.h" + +#include <sys/resource.h> +#include <inttypes.h> + +/* for default_*_cbk functions */ +#include "defaults.c" +#include "saved-frames.h" +#include "common-utils.h" + +int protocol_client_cleanup (transport_t *trans); +int protocol_client_interpret (xlator_t *this, transport_t *trans, + char *hdr_p, size_t hdrlen, + struct iobuf *iobuf); +int +protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans, + int type, int op, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iovec *vector, int count, + struct iobref *iobref); + +int +protocol_client_post_handshake (call_frame_t *frame, xlator_t *this); + +static gf_op_t gf_fops[GF_PROTO_FOP_MAXVALUE]; +static gf_op_t gf_mops[GF_MOP_MAXVALUE]; +static gf_op_t gf_cbks[GF_CBK_MAXVALUE]; + + +transport_t * +client_channel (xlator_t *this, int id) +{ + transport_t *trans = NULL; + client_conf_t *conf = NULL; + int i = 0; + struct client_connection *conn = NULL; + + conf = this->private; + + trans = conf->transport[id]; + conn = trans->xl_private; + + if (conn->connected == 1) + goto ret; + + for (i = 0; i < CHANNEL_MAX; i++) { + trans = conf->transport[i]; + conn = trans->xl_private; + if (conn->connected == 1) + break; + } + +ret: + return trans; +} + + +client_fd_ctx_t * +this_fd_del_ctx (fd_t *file, xlator_t *this) +{ + int dict_ret = -1; + uint64_t ctxaddr = 0; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, file, out); + + dict_ret = fd_ctx_del (file, this, &ctxaddr); + + if (dict_ret < 0) { + ctxaddr = 0; + } + +out: + return (client_fd_ctx_t *)(unsigned long)ctxaddr; +} + + +client_fd_ctx_t * +this_fd_get_ctx (fd_t *file, xlator_t *this) +{ + int dict_ret = -1; + uint64_t ctxaddr = 0; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, file, out); + + dict_ret = fd_ctx_get (file, this, &ctxaddr); + + if (dict_ret < 0) { + ctxaddr = 0; + } + +out: + return (client_fd_ctx_t *)(unsigned long)ctxaddr; +} + + +static void +this_fd_set_ctx (fd_t *file, xlator_t *this, loc_t *loc, client_fd_ctx_t *ctx) +{ + uint64_t oldaddr = 0; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("client", this, out); + GF_VALIDATE_OR_GOTO (this->name, file, out); + + ret = fd_ctx_get (file, this, &oldaddr); + if (ret >= 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): trying duplicate remote fd set. ", + loc->path, loc->inode->ino); + } + + ret = fd_ctx_set (file, this, (uint64_t)(unsigned long)ctx); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): failed to set remote fd", + loc->path, loc->inode->ino); + } +out: + return; +} + + +static int +client_local_wipe (client_local_t *local) +{ + if (local) { + loc_wipe (&local->loc); + + if (local->fd) + fd_unref (local->fd); + + GF_FREE (local); + } + + return 0; +} + +/* + * lookup_frame - lookup call frame corresponding to a given callid + * @trans: transport object + * @callid: call id of the frame + * + * not for external reference + */ + +static call_frame_t * +lookup_frame (transport_t *trans, int32_t op, int8_t type, int64_t callid) +{ + client_connection_t *conn = NULL; + call_frame_t *frame = NULL; + + conn = trans->xl_private; + + pthread_mutex_lock (&conn->lock); + { + frame = saved_frames_get (conn->saved_frames, + op, type, callid); + } + pthread_mutex_unlock (&conn->lock); + + return frame; +} + + +static void +call_bail (void *data) +{ + client_connection_t *conn = NULL; + struct timeval current; + transport_t *trans = NULL; + struct list_head list; + struct saved_frame *saved_frame = NULL; + struct saved_frame *trav = NULL; + struct saved_frame *tmp = NULL; + call_frame_t *frame = NULL; + gf_hdr_common_t hdr = {0, }; + char **gf_op_list = NULL; + gf_op_t *gf_ops = NULL; + struct tm frame_sent_tm; + char frame_sent[32] = {0,}; + struct timeval timeout = {0,}; + gf_timer_cbk_t timer_cbk = NULL; + + GF_VALIDATE_OR_GOTO ("client", data, out); + trans = data; + + conn = trans->xl_private; + + gettimeofday (¤t, NULL); + INIT_LIST_HEAD (&list); + + pthread_mutex_lock (&conn->lock); + { + /* Chaining to get call-always functionality from + call-once timer */ + if (conn->timer) { + timer_cbk = conn->timer->callbk; + + timeout.tv_sec = 10; + timeout.tv_usec = 0; + + gf_timer_call_cancel (trans->xl->ctx, conn->timer); + conn->timer = gf_timer_call_after (trans->xl->ctx, + timeout, + timer_cbk, + trans); + if (conn->timer == NULL) { + gf_log (trans->xl->name, GF_LOG_DEBUG, + "Cannot create bailout timer"); + } + } + + do { + saved_frame = + saved_frames_get_timedout (conn->saved_frames, + GF_OP_TYPE_MOP_REQUEST, + conn->frame_timeout, + ¤t); + if (saved_frame) + list_add (&saved_frame->list, &list); + + } while (saved_frame); + + do { + saved_frame = + saved_frames_get_timedout (conn->saved_frames, + GF_OP_TYPE_FOP_REQUEST, + conn->frame_timeout, + ¤t); + if (saved_frame) + list_add (&saved_frame->list, &list); + } while (saved_frame); + + do { + saved_frame = + saved_frames_get_timedout (conn->saved_frames, + GF_OP_TYPE_CBK_REQUEST, + conn->frame_timeout, + ¤t); + if (saved_frame) + list_add (&saved_frame->list, &list); + } while (saved_frame); + } + pthread_mutex_unlock (&conn->lock); + + hdr.rsp.op_ret = hton32 (-1); + hdr.rsp.op_errno = hton32 (ENOTCONN); + + list_for_each_entry_safe (trav, tmp, &list, list) { + switch (trav->type) + { + case GF_OP_TYPE_FOP_REQUEST: + gf_ops = gf_fops; + gf_op_list = gf_fop_list; + break; + case GF_OP_TYPE_MOP_REQUEST: + gf_ops = gf_mops; + gf_op_list = gf_mop_list; + break; + case GF_OP_TYPE_CBK_REQUEST: + gf_ops = gf_cbks; + gf_op_list = gf_cbk_list; + break; + } + + localtime_r (&trav->saved_at.tv_sec, &frame_sent_tm); + strftime (frame_sent, 32, "%Y-%m-%d %H:%M:%S", &frame_sent_tm); + + gf_log (trans->xl->name, GF_LOG_ERROR, + "bailing out frame %s(%d) " + "frame sent = %s. frame-timeout = %d", + gf_op_list[trav->op], trav->op, + frame_sent, conn->frame_timeout); + + hdr.type = hton32 (trav->type); + hdr.op = hton32 (trav->op); + + frame = trav->frame; + + gf_ops[trav->op] (frame, &hdr, sizeof (hdr), NULL); + + list_del_init (&trav->list); + GF_FREE (trav); + } +out: + return; +} + + +void +save_frame (transport_t *trans, call_frame_t *frame, + int32_t op, int8_t type, uint64_t callid) +{ + client_connection_t *conn = NULL; + struct timeval timeout = {0, }; + + + conn = trans->xl_private; + + saved_frames_put (conn->saved_frames, frame, op, type, callid); + + if (conn->timer == NULL && conn->frame_timeout) { + timeout.tv_sec = 10; + timeout.tv_usec = 0; + conn->timer = gf_timer_call_after (trans->xl->ctx, timeout, + call_bail, (void *) trans); + } +} + + + +void +client_ping_timer_expired (void *data) +{ + xlator_t *this = NULL; + transport_t *trans = NULL; + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + int disconnect = 0; + int transport_activity = 0; + struct timeval timeout = {0, }; + struct timeval current = {0, }; + + trans = data; + this = trans->xl; + conf = this->private; + conn = trans->xl_private; + + pthread_mutex_lock (&conn->lock); + { + if (conn->ping_timer) + gf_timer_call_cancel (trans->xl->ctx, + conn->ping_timer); + gettimeofday (¤t, NULL); + + pthread_mutex_lock (&conf->mutex); + { + if (((current.tv_sec - conf->last_received.tv_sec) < + conn->ping_timeout) + || ((current.tv_sec - conf->last_sent.tv_sec) < + conn->ping_timeout)) { + transport_activity = 1; + } + } + pthread_mutex_unlock (&conf->mutex); + + if (transport_activity) { + gf_log (this->name, GF_LOG_TRACE, + "ping timer expired but transport activity " + "detected - not bailing transport"); + conn->transport_activity = 0; + timeout.tv_sec = conn->ping_timeout; + timeout.tv_usec = 0; + + conn->ping_timer = + gf_timer_call_after (trans->xl->ctx, timeout, + client_ping_timer_expired, + (void *) trans); + if (conn->ping_timer == NULL) + gf_log (this->name, GF_LOG_DEBUG, + "unable to setup timer"); + + } else { + conn->ping_started = 0; + conn->ping_timer = NULL; + disconnect = 1; + } + } + pthread_mutex_unlock (&conn->lock); + if (disconnect) { + gf_log (this->name, GF_LOG_ERROR, + "Server %s has not responded in the last %d " + "seconds, disconnecting.", + conf->transport[0]->peerinfo.identifier, + conn->ping_timeout); + + transport_disconnect (conf->transport[0]); + transport_disconnect (conf->transport[1]); + } +} + + +void +client_start_ping (void *data) +{ + xlator_t *this = NULL; + transport_t *trans = NULL; + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + int32_t ret = -1; + gf_hdr_common_t *hdr = NULL; + struct timeval timeout = {0, }; + call_frame_t *dummy_frame = NULL; + size_t hdrlen = -1; + gf_mop_ping_req_t *req = NULL; + int frame_count = 0; + + + trans = data; + this = trans->xl; + conf = this->private; + conn = trans->xl_private; + + if (!conn->ping_timeout) + return; + + pthread_mutex_lock (&conn->lock); + { + if (conn->ping_timer) + gf_timer_call_cancel (trans->xl->ctx, conn->ping_timer); + + conn->ping_timer = NULL; + conn->ping_started = 0; + + if (conn->saved_frames) + /* treat the case where conn->saved_frames is NULL + as no pending frames */ + frame_count = conn->saved_frames->count; + + if ((frame_count == 0) || !conn->connected) { + /* using goto looked ugly here, + * hence getting out this way */ + /* unlock */ + pthread_mutex_unlock (&conn->lock); + return; + } + + if (frame_count < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "saved_frames->count is %"PRId64, + conn->saved_frames->count); + conn->saved_frames->count = 0; + } + + timeout.tv_sec = conn->ping_timeout; + timeout.tv_usec = 0; + + conn->ping_timer = + gf_timer_call_after (trans->xl->ctx, timeout, + client_ping_timer_expired, + (void *) trans); + + if (conn->ping_timer == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "unable to setup timer"); + } else { + conn->ping_started = 1; + } + } + pthread_mutex_unlock (&conn->lock); + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + if (!hdr) + goto err; + + dummy_frame = create_frame (this, this->ctx->pool); + + if (!dummy_frame) + goto err; + + dummy_frame->local = trans; + + ret = protocol_client_xfer (dummy_frame, this, trans, + GF_OP_TYPE_MOP_REQUEST, GF_MOP_PING, + hdr, hdrlen, NULL, 0, NULL); + return; +err: + if (hdr) + GF_FREE (hdr); + + if (dummy_frame) + STACK_DESTROY (dummy_frame->root); + + return; +} + + +int +client_ping_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + xlator_t *this = NULL; + transport_t *trans = NULL; + client_connection_t *conn = NULL; + struct timeval timeout = {0, }; + int op_ret = 0; + + trans = frame->local; frame->local = NULL; + this = trans->xl; + conn = trans->xl_private; + + op_ret = ntoh32 (hdr->rsp.op_ret); + + if (op_ret == -1) { + /* timer expired and transport bailed out */ + gf_log (this->name, GF_LOG_DEBUG, "timer must have expired"); + goto out; + } + + pthread_mutex_lock (&conn->lock); + { + timeout.tv_sec = conn->ping_timeout; + timeout.tv_usec = 0; + + gf_timer_call_cancel (trans->xl->ctx, + conn->ping_timer); + + conn->ping_timer = + gf_timer_call_after (trans->xl->ctx, timeout, + client_start_ping, (void *)trans); + if (conn->ping_timer == NULL) + gf_log (this->name, GF_LOG_DEBUG, + "gf_timer_call_after() returned NULL"); + } + pthread_mutex_unlock (&conn->lock); +out: + STACK_DESTROY (frame->root); + return 0; +} + +int +client_encode_groups (call_frame_t *frame, gf_hdr_common_t *hdr) +{ + int i = 0; + if ((!frame) || (!hdr)) + return -1; + + hdr->req.ngrps = hton32 (frame->root->ngrps); + if (frame->root->ngrps == 0) + return 0; + + for (; i < frame->root->ngrps; ++i) + hdr->req.groups[i] = hton32 (frame->root->groups[i]); + + return 0; +} + + +int +protocol_client_xfer (call_frame_t *frame, xlator_t *this, transport_t *trans, + int type, int op, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iovec *vector, int count, + struct iobref *iobref) +{ + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + uint64_t callid = 0; + int32_t ret = -1; + int start_ping = 0; + gf_hdr_common_t rsphdr = {0, }; + + conf = this->private; + + if (!trans) { + /* default to bulk op since it is 'safer' */ + trans = conf->transport[CHANNEL_BULK]; + } + conn = trans->xl_private; + + pthread_mutex_lock (&conn->lock); + { + callid = ++conn->callid; + + hdr->callid = hton64 (callid); + hdr->op = hton32 (op); + hdr->type = hton32 (type); + + if (frame) { + hdr->req.uid = hton32 (frame->root->uid); + hdr->req.gid = hton32 (frame->root->gid); + hdr->req.pid = hton32 (frame->root->pid); + hdr->req.lk_owner = hton64 (frame->root->lk_owner); + client_encode_groups (frame, hdr); + } + + if (conn->connected == 0) + transport_connect (trans); + + ret = -1; + + if (conn->connected || + ((type == GF_OP_TYPE_MOP_REQUEST) && + (op == GF_MOP_SETVOLUME))) { + ret = transport_submit (trans, (char *)hdr, hdrlen, + vector, count, iobref); + } + + if ((ret >= 0) && frame) { + pthread_mutex_lock (&conf->mutex); + { + gettimeofday (&conf->last_sent, NULL); + } + pthread_mutex_unlock (&conf->mutex); + save_frame (trans, frame, op, type, callid); + } + + if (!conn->ping_started && (ret >= 0)) { + start_ping = 1; + } + } + pthread_mutex_unlock (&conn->lock); + + if (start_ping) + client_start_ping ((void *) trans); + + if (frame && (ret < 0)) { + rsphdr.op = op; + rsphdr.rsp.op_ret = hton32 (-1); + rsphdr.rsp.op_errno = hton32 (ENOTCONN); + + if (type == GF_OP_TYPE_FOP_REQUEST) { + rsphdr.type = GF_OP_TYPE_FOP_REPLY; + gf_fops[op] (frame, &rsphdr, sizeof (rsphdr), NULL); + } else if (type == GF_OP_TYPE_MOP_REQUEST) { + rsphdr.type = GF_OP_TYPE_MOP_REPLY; + gf_mops[op] (frame, &rsphdr, sizeof (rsphdr), NULL); + } else { + rsphdr.type = GF_OP_TYPE_CBK_REPLY; + gf_cbks[op] (frame, &rsphdr, sizeof (rsphdr), NULL); + } + + GF_FREE (hdr); + } + + return ret; +} + + + +/** + * client_create - create function for client protocol + * @frame: call frame + * @this: this translator structure + * @path: complete path to file + * @flags: create flags + * @mode: create mode + * + * external reference through client_protocol_xlator->fops->create + */ + +int +client_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, fd_t *fd) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_create_req_t *req = NULL; + size_t hdrlen = 0; + size_t pathlen = 0; + size_t baselen = 0; + int32_t ret = -1; + ino_t par = 0; + uint64_t gen = 0; + client_local_t *local = NULL; + + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); + + local->fd = fd_ref (fd); + loc_copy (&local->loc, loc); + local->flags = flags; + + frame->local = local; + + pathlen = STRLEN_0 (loc->path); + baselen = STRLEN_0 (loc->name); + + ret = inode_ctx_get2 (loc->parent, this, &par, &gen); + if (loc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "CREATE %"PRId64"/%s (%s): failed to get remote inode " + "number for parent inode", + loc->parent->ino, loc->name, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->flags = hton32 (gf_flags_from_flags (flags)); + req->mode = hton32 (mode); + req->par = hton64 (par); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_CREATE, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, fd, NULL, NULL); + return 0; + +} + +/** + * client_open - open function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location of file + * @flags: open flags + * @mode: open modes + * + * external reference through client_protocol_xlator->fops->open + */ + +int +client_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, int32_t wbflags) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + gf_fop_open_req_t *req = NULL; + size_t pathlen = 0; + ino_t ino = 0; + uint64_t gen = 0; + client_local_t *local = NULL; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); + + local->fd = fd_ref (fd); + loc_copy (&local->loc, loc); + local->flags = flags; + local->wbflags = wbflags; + + frame->local = local; + + pathlen = STRLEN_0 (loc->path); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "OPEN %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->flags = hton32 (gf_flags_from_flags (flags)); + req->wbflags = hton32 (wbflags); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_OPEN, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, fd); + return 0; + +} + + +/** + * client_stat - stat function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * + * external reference through client_protocol_xlator->fops->stat + */ + +int +client_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_stat_req_t *req = NULL; + size_t hdrlen = -1; + int32_t ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + ino_t gen = 0; + + pathlen = STRLEN_0 (loc->path); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_TRACE, + "STAT %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_STAT, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_readlink - readlink function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @size: + * + * external reference through client_protocol_xlator->fops->readlink + */ +int +client_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readlink_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + uint64_t gen = 0; + + pathlen = STRLEN_0 (loc->path); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "READLINK %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->size = hton32 (size); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_READLINK, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND_STRICT (readlink, frame, -1, EINVAL, + NULL, NULL); + return 0; + +} + + +/** + * client_mknod - mknod function for client protocol + * @frame: call frame + * @this: this translator structure + * @path: pathname of node + * @mode: + * @dev: + * + * external reference through client_protocol_xlator->fops->mknod + */ +int +client_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t dev) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_mknod_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + ino_t par = 0; + uint64_t gen = 0; + client_local_t *local = NULL; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); + + loc_copy (&local->loc, loc); + + frame->local = local; + + pathlen = STRLEN_0 (loc->path); + baselen = STRLEN_0 (loc->name); + ret = inode_ctx_get2 (loc->parent, this, &par, &gen); + if (loc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "MKNOD %"PRId64"/%s (%s): failed to get remote inode " + "number for parent", + loc->parent->ino, loc->name, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + req->gen = hton64 (gen); + req->mode = hton32 (mode); + req->dev = hton64 (dev); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_MKNOD, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, loc->inode, NULL); + return 0; + +} + + +/** + * client_mkdir - mkdir function for client protocol + * @frame: call frame + * @this: this translator structure + * @path: pathname of directory + * @mode: + * + * external reference through client_protocol_xlator->fops->mkdir + */ +int +client_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_mkdir_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + ino_t par = 0; + uint64_t gen = 0; + client_local_t *local = NULL; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); + + loc_copy (&local->loc, loc); + + frame->local = local; + + pathlen = STRLEN_0 (loc->path); + baselen = STRLEN_0 (loc->name); + ret = inode_ctx_get2 (loc->parent, this, &par, &gen); + if (loc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "MKDIR %"PRId64"/%s (%s): failed to get remote inode " + "number for parent", + loc->parent->ino, loc->name, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + req->gen = hton64 (gen); + req->mode = hton32 (mode); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_MKDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, loc->inode, NULL); + return 0; + +} + +/** + * client_unlink - unlink function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location of file + * + * external reference through client_protocol_xlator->fops->unlink + */ + +int +client_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_unlink_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + ino_t par = 0; + uint64_t gen = 0; + + pathlen = STRLEN_0 (loc->path); + baselen = STRLEN_0 (loc->name); + ret = inode_ctx_get2 (loc->parent, this, &par, &gen); + if (loc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "UNLINK %"PRId64"/%s (%s): failed to get remote inode " + "number for parent", + loc->parent->ino, loc->name, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_UNLINK, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL); + return 0; + +} + +/** + * client_rmdir - rmdir function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * + * external reference through client_protocol_xlator->fops->rmdir + */ + +int +client_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_rmdir_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + ino_t par = 0; + uint64_t gen = 0; + + pathlen = STRLEN_0 (loc->path); + baselen = STRLEN_0 (loc->name); + ret = inode_ctx_get2 (loc->parent, this, &par, &gen); + if (loc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "RMDIR %"PRId64"/%s (%s): failed to get remote inode " + "number for parent", + loc->parent->ino, loc->name, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + baselen); + hdr = gf_hdr_new (req, pathlen + baselen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_RMDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL); + return 0; + +} + + +/** + * client_symlink - symlink function for client protocol + * @frame: call frame + * @this: this translator structure + * @oldpath: pathname of target + * @newpath: pathname of symlink + * + * external reference through client_protocol_xlator->fops->symlink + */ + +int +client_symlink (call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_symlink_req_t *req = NULL; + size_t hdrlen = 0; + size_t pathlen = 0; + size_t newlen = 0; + size_t baselen = 0; + ino_t par = 0; + uint64_t gen = 0; + client_local_t *local = NULL; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); + + loc_copy (&local->loc, loc); + + frame->local = local; + + pathlen = STRLEN_0 (loc->path); + baselen = STRLEN_0 (loc->name); + newlen = STRLEN_0 (linkname); + ret = inode_ctx_get2 (loc->parent, this, &par, &gen); + if (loc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "SYMLINK %"PRId64"/%s (%s): failed to get remote inode" + " number parent", + loc->parent->ino, loc->name, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + baselen + newlen); + hdr = gf_hdr_new (req, pathlen + baselen + newlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->par = hton64 (par); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + strcpy (req->bname + pathlen, loc->name); + strcpy (req->linkname + pathlen + baselen, linkname); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_SYMLINK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, loc->inode, NULL); + return 0; + +} + +/** + * client_rename - rename function for client protocol + * @frame: call frame + * @this: this translator structure + * @oldloc: location of old pathname + * @newloc: location of new pathname + * + * external reference through client_protocol_xlator->fops->rename + */ + +int +client_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_rename_req_t *req = NULL; + size_t hdrlen = 0; + size_t oldpathlen = 0; + size_t oldbaselen = 0; + size_t newpathlen = 0; + size_t newbaselen = 0; + ino_t oldpar = 0; + uint64_t oldgen = 0; + ino_t newpar = 0; + uint64_t newgen = 0; + + oldpathlen = STRLEN_0 (oldloc->path); + oldbaselen = STRLEN_0 (oldloc->name); + newpathlen = STRLEN_0 (newloc->path); + newbaselen = STRLEN_0 (newloc->name); + ret = inode_ctx_get2 (oldloc->parent, this, &oldpar, &oldgen); + if (oldloc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "RENAME %"PRId64"/%s (%s): failed to get remote inode " + "number for source parent", + oldloc->parent->ino, oldloc->name, oldloc->path); + goto unwind; + } + + ret = inode_ctx_get2 (newloc->parent, this, &newpar, &newgen); + if (newloc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "CREATE %"PRId64"/%s (%s): failed to get remote inode " + "number for destination parent", + newloc->parent->ino, newloc->name, newloc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, (oldpathlen + oldbaselen + + newpathlen + newbaselen)); + hdr = gf_hdr_new (req, (oldpathlen + oldbaselen + + newpathlen + newbaselen)); + + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->oldpar = hton64 (oldpar); + req->oldgen = hton64 (oldgen); + req->newpar = hton64 (newpar); + req->newgen = hton64 (newgen); + + strcpy (req->oldpath, oldloc->path); + strcpy (req->oldbname + oldpathlen, oldloc->name); + strcpy (req->newpath + oldpathlen + oldbaselen, newloc->path); + strcpy (req->newbname + oldpathlen + oldbaselen + newpathlen, + newloc->name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_RENAME, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; + +} + +/** + * client_link - link function for client protocol + * @frame: call frame + * @this: this translator structure + * @oldloc: location of old pathname + * @newpath: new pathname + * + * external reference through client_protocol_xlator->fops->link + */ + +int +client_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_link_req_t *req = NULL; + size_t hdrlen = 0; + size_t oldpathlen = 0; + size_t newpathlen = 0; + size_t newbaselen = 0; + ino_t oldino = 0; + uint64_t oldgen = 0; + ino_t newpar = 0; + uint64_t newgen = 0; + client_local_t *local = NULL; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); + + loc_copy (&local->loc, oldloc); + + frame->local = local; + + oldpathlen = STRLEN_0 (oldloc->path); + newpathlen = STRLEN_0 (newloc->path); + newbaselen = STRLEN_0 (newloc->name); + + ret = inode_ctx_get2 (oldloc->inode, this, &oldino, &oldgen); + if (oldloc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "LINK %"PRId64"/%s (%s) ==> %"PRId64" (%s): " + "failed to get remote inode number for source inode", + newloc->parent->ino, newloc->name, newloc->path, + oldloc->ino, oldloc->path); + goto unwind; + } + + ret = inode_ctx_get2 (newloc->parent, this, &newpar, &newgen); + if (newloc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "LINK %"PRId64"/%s (%s) ==> %"PRId64" (%s): " + "failed to get remote inode number destination parent", + newloc->parent->ino, newloc->name, newloc->path, + oldloc->ino, oldloc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, oldpathlen + newpathlen + newbaselen); + hdr = gf_hdr_new (req, oldpathlen + newpathlen + newbaselen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + strcpy (req->oldpath, oldloc->path); + strcpy (req->newpath + oldpathlen, newloc->path); + strcpy (req->newbname + oldpathlen + newpathlen, newloc->name); + + req->oldino = hton64 (oldino); + req->oldgen = hton64 (oldgen); + req->newpar = hton64 (newpar); + req->newgen = hton64 (newgen); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_LINK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL); + return 0; +} + + +/** + * client_truncate - truncate function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @offset: + * + * external reference through client_protocol_xlator->fops->truncate + */ + +int +client_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_truncate_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + uint64_t gen = 0; + + pathlen = STRLEN_0 (loc->path); + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "TRUNCATE %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->offset = hton64 (offset); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_TRUNCATE, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_readv - readv function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @size: + * @offset: + * + * external reference through client_protocol_xlator->fops->readv + */ + +int +client_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_read_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx, EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL, 0, NULL); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx, EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL, 0, NULL); + return 0; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->size = hton32 (size); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_READ, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, NULL, 0, NULL); + return 0; + +} + +/** + * client_writev - writev function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @vector: + * @count: + * @offset: + * + * external reference through client_protocol_xlator->fops->writev + */ + +int +client_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + struct iobref *iobref) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_write_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->size = hton32 (iov_length (vector, count)); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_WRITE, + hdr, hdrlen, vector, count, iobref); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_statfs - statfs function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * + * external reference through client_protocol_xlator->fops->statfs + */ + +int +client_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_statfs_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + ino_t gen = 0; + + pathlen = STRLEN_0 (loc->path); + + if (loc->inode) { + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "STATFS %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + } + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_STATFS, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; + +} + + +/** + * client_flush - flush function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * + * external reference through client_protocol_xlator->fops->flush + */ + +int +client_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_flush_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FLUSH, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL); + return 0; + +} + +/** + * client_fsync - fsync function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @flags: + * + * external reference through client_protocol_xlator->fops->fsync + */ + +int +client_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsync_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int32_t ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->data = hton32 (flags); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSYNC, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL); + return 0; + +} + +int +client_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t flags, dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_xattrop_req_t *req = NULL; + size_t hdrlen = 0; + size_t dict_len = 0; + int32_t ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + uint64_t gen = 0; + char *buf = NULL; + + GF_VALIDATE_OR_GOTO ("client", this, unwind); + + GF_VALIDATE_OR_GOTO (this->name, loc, unwind); + + if (dict) { + ret = dict_allocate_and_serialize (dict, &buf, &dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get serialized length of dict(%p)", + dict); + goto unwind; + } + } + + pathlen = STRLEN_0 (loc->path); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "XATTROP %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, dict_len + pathlen); + hdr = gf_hdr_new (req, dict_len + pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->flags = hton32 (flags); + req->dict_len = hton32 (dict_len); + if (dict) { + memcpy (req->dict, buf, dict_len); + GF_FREE (buf); + } + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + strcpy (req->path + dict_len, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_XATTROP, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; +} + + +int +client_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t flags, dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fxattrop_req_t *req = NULL; + size_t hdrlen = 0; + size_t dict_len = 0; + int64_t remote_fd = -1; + int32_t ret = -1; + ino_t ino = 0; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + if (dict) { + dict_len = dict_serialized_length (dict); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get serialized length of dict(%p)", + dict); + goto unwind; + } + } + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. " + "returning EBADFD", + fd->inode->ino); + goto unwind; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + goto unwind; + } + + ino = fd->inode->ino; + remote_fd = fdctx->remote_fd; + + hdrlen = gf_hdr_len (req, dict_len); + hdr = gf_hdr_new (req, dict_len); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->flags = hton32 (flags); + req->dict_len = hton32 (dict_len); + if (dict) { + ret = dict_serialize (dict, req->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to serialize dictionary(%p)", + dict); + goto unwind; + } + } + req->fd = hton64 (remote_fd); + req->ino = hton64 (ino); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FXATTROP, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + +} + +/** + * client_setxattr - setxattr function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location + * @dict: dictionary which contains key:value to be set. + * @flags: + * + * external reference through client_protocol_xlator->fops->setxattr + */ + +int +client_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *dict, int32_t flags) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_setxattr_req_t *req = NULL; + size_t hdrlen = 0; + size_t dict_len = 0; + int ret = -1; + size_t pathlen = 0; + ino_t ino = 0; + uint64_t gen = 0; + + dict_len = dict_serialized_length (dict); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get serialized length of dict(%p)", + dict); + goto unwind; + } + + pathlen = STRLEN_0 (loc->path); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "SETXATTR %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, dict_len + pathlen); + hdr = gf_hdr_new (req, dict_len + pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->flags = hton32 (flags); + req->dict_len = hton32 (dict_len); + + ret = dict_serialize (dict, req->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to serialize dictionary(%p)", + dict); + goto unwind; + } + + strcpy (req->path + dict_len, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_SETXATTR, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL); + return 0; +} + +/** + * client_fsetxattr - fsetxattr function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: fd + * @dict: dictionary which contains key:value to be set. + * @flags: + * + * external reference through client_protocol_xlator->fops->fsetxattr + */ + +int +client_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int32_t flags) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsetxattr_req_t *req = NULL; + size_t hdrlen = 0; + size_t dict_len = 0; + ino_t ino; + int ret = -1; + int64_t remote_fd = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + dict_len = dict_serialized_length (dict); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get serialized length of dict(%p)", + dict); + goto unwind; + } + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + goto unwind; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + goto unwind; + } + + ino = fd->inode->ino; + remote_fd = fdctx->remote_fd; + + hdrlen = gf_hdr_len (req, dict_len); + hdr = gf_hdr_new (req, dict_len); + + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->fd = hton64 (remote_fd); + req->flags = hton32 (flags); + req->dict_len = hton32 (dict_len); + + ret = dict_serialize (dict, req->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to serialize dictionary(%p)", + dict); + goto unwind; + } + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSETXATTR, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL); + return 0; +} + +/** + * client_getxattr - getxattr function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location structure + * + * external reference through client_protocol_xlator->fops->getxattr + */ + +int +client_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_getxattr_req_t *req = NULL; + size_t hdrlen = 0; + size_t pathlen = 0; + size_t namelen = 0; + ino_t ino = 0; + uint64_t gen = 0; + + pathlen = STRLEN_0 (loc->path); + if (name) + namelen = STRLEN_0 (name); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "GETXATTR %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + namelen); + hdr = gf_hdr_new (req, pathlen + namelen); + GF_VALIDATE_OR_GOTO (frame->this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->namelen = hton32 (namelen); + strcpy (req->path, loc->path); + if (name) + strcpy (req->name + pathlen, name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_GETXATTR, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; +} + + +/** + * client_fgetxattr - fgetxattr function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: fd + * + * external reference through client_protocol_xlator->fops->fgetxattr + */ + +int +client_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_fgetxattr_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + size_t namelen = 0; + ino_t ino = 0; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + if (name) + namelen = STRLEN_0 (name); + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get remote fd. EBADFD", + fd->inode->ino); + goto unwind; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + goto unwind; + } + + ino = fd->inode->ino; + remote_fd = fdctx->remote_fd; + + hdrlen = gf_hdr_len (req, namelen); + hdr = gf_hdr_new (req, namelen); + + GF_VALIDATE_OR_GOTO (frame->this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->fd = hton64 (remote_fd); + req->namelen = hton32 (namelen); + + if (name) + strcpy (req->name, name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FGETXATTR, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; +} + + +/** + * client_removexattr - removexattr function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location structure + * @name: + * + * external reference through client_protocol_xlator->fops->removexattr + */ + +int +client_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_removexattr_req_t *req = NULL; + size_t hdrlen = 0; + size_t namelen = 0; + size_t pathlen = 0; + ino_t ino = 0; + uint64_t gen = 0; + + pathlen = STRLEN_0 (loc->path); + namelen = STRLEN_0 (name); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "REMOVEXATTR %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + namelen); + hdr = gf_hdr_new (req, pathlen + namelen); + GF_VALIDATE_OR_GOTO (frame->this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + strcpy (req->name + pathlen, name); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_REMOVEXATTR, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL); + return 0; +} + +/** + * client_opendir - opendir function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location structure + * + * external reference through client_protocol_xlator->fops->opendir + */ + +int +client_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, + fd_t *fd) +{ + gf_fop_opendir_req_t *req = NULL; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + int ret = -1; + ino_t ino = 0; + uint64_t gen = 0; + size_t pathlen = 0; + client_local_t *local = NULL; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); + + loc_copy (&local->loc, loc); + local->fd = fd_ref (fd); + + frame->local = local; + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "OPENDIR %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + pathlen = STRLEN_0 (loc->path); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO (frame->this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_OPENDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, fd); + return 0; + +} + +/** + * client_readdirp - readdirp function for client protocol + * @frame: call frame + * @this: this translator structure + * + * external reference through client_protocol_xlator->fops->readdirp + */ + +int +client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readdirp_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get" + " fd ctx. EBADFD", fd->inode->ino); + goto unwind; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get" + " fd ctx. EBADFD", fd->inode->ino); + goto unwind; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req->fd = hton64 (remote_fd); + req->size = hton32 (size); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_READDIRP, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + +} + + +/** + * client_readdir - readdir function for client protocol + * @frame: call frame + * @this: this translator structure + * + * external reference through client_protocol_xlator->fops->readdir + */ + +int +client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readdir_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + goto unwind; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get" + " fd ctx. EBADFD", fd->inode->ino); + goto unwind; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req->fd = hton64 (remote_fd); + req->size = hton32 (size); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_READDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + +} + +/** + * client_fsyncdir - fsyncdir function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @flags: + * + * external reference through client_protocol_xlator->fops->fsyncdir + */ + +int +client_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsyncdir_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int32_t ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + goto unwind; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get" + " fd ctx. EBADFD", fd->inode->ino); + goto unwind; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->data = hton32 (flags); + req->fd = hton64 (remote_fd); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSYNCDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + STACK_UNWIND (frame, -1, EBADFD); + return 0; +} + +/** + * client_access - access function for client protocol + * @frame: call frame + * @this: this translator structure + * @loc: location structure + * @mode: + * + * external reference through client_protocol_xlator->fops->access + */ + +int +client_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_access_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + ino_t ino = 0; + uint64_t gen = 0; + size_t pathlen = 0; + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "ACCESS %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + pathlen = STRLEN_0 (loc->path); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->mask = hton32 (mask); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_ACCESS, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL); + return 0; + +} + +/** + * client_ftrucate - ftruncate function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @offset: offset to truncate to + * + * external reference through client_protocol_xlator->fops->ftruncate + */ + +int +client_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_ftruncate_req_t *req = NULL; + int64_t remote_fd = -1; + size_t hdrlen = -1; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get" + " fd ctx. EBADFD", fd->inode->ino); + goto unwind; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->offset = hton64 (offset); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FTRUNCATE, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; + +} + +/** + * client_fstat - fstat function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * + * external reference through client_protocol_xlator->fops->fstat + */ + +int +client_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fstat_req_t *req = NULL; + int64_t remote_fd = -1; + size_t hdrlen = -1; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get" + " fd ctx. EBADFD", fd->inode->ino); + goto unwind; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSTAT, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; + +} + +/** + * client_lk - lk function for client protocol + * @frame: call frame + * @this: this translator structure + * @fd: file descriptor structure + * @cmd: lock command + * @lock: + * + * external reference through client_protocol_xlator->fops->lk + */ + +int +client_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct flock *flock) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_lk_req_t *req = NULL; + size_t hdrlen = 0; + int64_t remote_fd = -1; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, "(%"PRId64"): failed to get" + " fd ctx. EBADFD", fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL); + return 0; + } + + remote_fd = fdctx->remote_fd; + if (cmd == F_GETLK || cmd == F_GETLK64) + gf_cmd = GF_LK_GETLK; + else if (cmd == F_SETLK || cmd == F_SETLK64) + gf_cmd = GF_LK_SETLK; + else if (cmd == F_SETLKW || cmd == F_SETLKW64) + gf_cmd = GF_LK_SETLKW; + else { + gf_log (this->name, GF_LOG_DEBUG, + "Unknown cmd (%d)!", gf_cmd); + goto unwind; + } + + switch (flock->l_type) { + case F_RDLCK: + gf_type = GF_LK_F_RDLCK; + break; + case F_WRLCK: + gf_type = GF_LK_F_WRLCK; + break; + case F_UNLCK: + gf_type = GF_LK_F_UNLCK; + break; + } + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->cmd = hton32 (gf_cmd); + req->type = hton32 (gf_type); + gf_flock_from_flock (&req->flock, flock); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_LK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; +} + +/** + * client_inodelk - inodelk function for client protocol + * @frame: call frame + * @this: this translator structure + * @inode: inode structure + * @cmd: lock command + * @lock: flock struct + * + * external reference through client_protocol_xlator->fops->inodelk + */ + +int +client_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct flock *flock) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_inodelk_req_t *req = NULL; + size_t hdrlen = 0; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + ino_t ino = 0; + uint64_t gen = 0; + size_t pathlen = 0; + size_t vollen = 0; + + pathlen = STRLEN_0 (loc->path); + vollen = STRLEN_0 (volume); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "INODELK %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + if (cmd == F_GETLK || cmd == F_GETLK64) + gf_cmd = GF_LK_GETLK; + else if (cmd == F_SETLK || cmd == F_SETLK64) + gf_cmd = GF_LK_SETLK; + else if (cmd == F_SETLKW || cmd == F_SETLKW64) + gf_cmd = GF_LK_SETLKW; + else { + gf_log (this->name, GF_LOG_DEBUG, + "Unknown cmd (%d)!", gf_cmd); + goto unwind; + } + + switch (flock->l_type) { + case F_RDLCK: + gf_type = GF_LK_F_RDLCK; + break; + case F_WRLCK: + gf_type = GF_LK_F_WRLCK; + break; + case F_UNLCK: + gf_type = GF_LK_F_UNLCK; + break; + } + + hdrlen = gf_hdr_len (req, pathlen + vollen); + hdr = gf_hdr_new (req, pathlen + vollen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + strcpy (req->path, loc->path); + strcpy (req->path + pathlen, volume); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + + req->cmd = hton32 (gf_cmd); + req->type = hton32 (gf_type); + gf_flock_from_flock (&req->flock, flock); + + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, + GF_PROTO_FOP_INODELK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL); + return 0; + +} + + +/** + * client_finodelk - finodelk function for client protocol + * @frame: call frame + * @this: this translator structure + * @inode: inode structure + * @cmd: lock command + * @lock: flock struct + * + * external reference through client_protocol_xlator->fops->finodelk + */ + +int +client_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct flock *flock) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + gf_fop_finodelk_req_t *req = NULL; + size_t hdrlen = 0; + size_t vollen = 0; + int32_t gf_cmd = 0; + int32_t gf_type = 0; + int64_t remote_fd = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + vollen = STRLEN_0 (volume); + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + remote_fd = fdctx->remote_fd; + if (cmd == F_GETLK || cmd == F_GETLK64) + gf_cmd = GF_LK_GETLK; + else if (cmd == F_SETLK || cmd == F_SETLK64) + gf_cmd = GF_LK_SETLK; + else if (cmd == F_SETLKW || cmd == F_SETLKW64) + gf_cmd = GF_LK_SETLKW; + else { + gf_log (this->name, GF_LOG_DEBUG, + "Unknown cmd (%d)!", gf_cmd); + goto unwind; + } + + switch (flock->l_type) { + case F_RDLCK: + gf_type = GF_LK_F_RDLCK; + break; + case F_WRLCK: + gf_type = GF_LK_F_WRLCK; + break; + case F_UNLCK: + gf_type = GF_LK_F_UNLCK; + break; + } + + hdrlen = gf_hdr_len (req, vollen); + hdr = gf_hdr_new (req, vollen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + strcpy (req->volume, volume); + + req->fd = hton64 (remote_fd); + + req->cmd = hton32 (gf_cmd); + req->type = hton32 (gf_type); + gf_flock_from_flock (&req->flock, flock); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, + GF_PROTO_FOP_FINODELK, + hdr, hdrlen, NULL, 0, NULL); + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL); + return 0; +} + + +int +client_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *name, entrylk_cmd cmd, + entrylk_type type) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_entrylk_req_t *req = NULL; + size_t pathlen = 0; + size_t vollen = 0; + size_t hdrlen = -1; + int ret = -1; + ino_t ino = 0; + uint64_t gen = 0; + size_t namelen = 0; + + pathlen = STRLEN_0 (loc->path); + vollen = STRLEN_0 (volume); + + if (name) + namelen = STRLEN_0 (name); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "ENTRYLK %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen + vollen + namelen); + hdr = gf_hdr_new (req, pathlen + vollen + namelen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->namelen = hton64 (namelen); + + strcpy (req->path, loc->path); + if (name) + strcpy (req->name + pathlen, name); + strcpy (req->volume + pathlen + namelen, volume); + + req->cmd = hton32 (cmd); + req->type = hton32 (type); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_ENTRYLK, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL); + return 0; + +} + + +int +client_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *name, entrylk_cmd cmd, + entrylk_type type) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fentrylk_req_t *req = NULL; + int64_t remote_fd = -1; + size_t vollen = 0; + size_t namelen = 0; + size_t hdrlen = -1; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + if (name) + namelen = STRLEN_0 (name); + + conf = this->private; + + vollen = STRLEN_0 (volume); + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD); + return 0; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, namelen + vollen); + hdr = gf_hdr_new (req, namelen + vollen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + req->namelen = hton64 (namelen); + + if (name) + strcpy (req->name, name); + + strcpy (req->volume + namelen, volume); + + req->cmd = hton32 (cmd); + req->type = hton32 (type); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FENTRYLK, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + + STACK_UNWIND (frame, -1, EINVAL); + return 0; +} + +/* + * client_lookup - lookup function for client protocol + * @frame: call frame + * @this: + * @loc: location + * + * not for external reference + */ + +int +client_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr_req) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_lookup_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + ino_t ino = 0; + ino_t par = 0; + uint64_t gen = 0; + size_t dictlen = 0; + size_t pathlen = 0; + size_t baselen = 0; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + client_local_t *local = NULL; + char *buf = NULL; + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + GF_VALIDATE_OR_GOTO (this->name, local, unwind); + + loc_copy (&local->loc, loc); + + frame->local = local; + + GF_VALIDATE_OR_GOTO (this->name, loc, unwind); + GF_VALIDATE_OR_GOTO (this->name, loc->path, unwind); + + if (loc->ino != 1 && loc->parent) { + ret = inode_ctx_get2 (loc->parent, this, &par, &gen); + if (loc->parent->ino && ret < 0) { + gf_log (this->name, GF_LOG_TRACE, + "LOOKUP %"PRId64"/%s (%s): failed to get " + "remote inode number for parent", + loc->parent->ino, loc->name, loc->path); + goto unwind; + } + GF_VALIDATE_OR_GOTO (this->name, loc->name, unwind); + baselen = STRLEN_0 (loc->name); + } else { + ino = 1; + } + + pathlen = STRLEN_0 (loc->path); + + if (xattr_req) { + ret = dict_allocate_and_serialize (xattr_req, &buf, &dictlen); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get serialized length of dict(%p)", + xattr_req); + goto unwind; + } + } + + hdrlen = gf_hdr_len (req, pathlen + baselen + dictlen); + hdr = gf_hdr_new (req, pathlen + baselen + dictlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->par = hton64 (par); + strcpy (req->path, loc->path); + if (baselen) + strcpy (req->path + pathlen, loc->name); + + if (dictlen > 0) { + memcpy (req->dict + pathlen + baselen, buf, dictlen); + GF_FREE (buf); + } + + req->dictlen = hton32 (dictlen); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_LOOKUP, + hdr, hdrlen, NULL, 0, NULL); + return ret; + +unwind: + STACK_UNWIND (frame, op_ret, op_errno, loc->inode, NULL, NULL); + return ret; +} + + +int +client_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_setattr_req_t *req = NULL; + size_t hdrlen = 0; + size_t pathlen = 0; + ino_t ino = 0; + uint64_t gen = 0; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("client", this, unwind); + GF_VALIDATE_OR_GOTO (this->name, frame, unwind); + + pathlen = STRLEN_0 (loc->path); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_TRACE, + "SETATTR %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + goto unwind; + } + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + strcpy (req->path, loc->path); + + gf_stat_from_iatt (&req->stbuf, stbuf); + req->valid = hton32 (valid); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_SETATTR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; +} + + +int +client_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsetattr_req_t *req = NULL; + size_t hdrlen = 0; + int ret = -1; + client_fd_ctx_t *fdctx = NULL; + int64_t remote_fd = -1; + client_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO ("client", this, unwind); + GF_VALIDATE_OR_GOTO (this->name, frame, unwind); + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); + return 0; + } + + remote_fd = fdctx->remote_fd; + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + + req->fd = hton64 (remote_fd); + + gf_stat_from_iatt (&req->stbuf, stbuf); + req->valid = hton32 (valid); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_FSETATTR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + STACK_UNWIND (frame, -1, EINVAL, NULL, NULL); + return 0; +} + + +int +client_fdctx_destroy (xlator_t *this, client_fd_ctx_t *fdctx) +{ + call_frame_t *fr = NULL; + int32_t ret = -1; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + gf_cbk_release_req_t *req = NULL; + gf_cbk_releasedir_req_t *reqdir = NULL; + int64_t remote_fd = -1; + int op = 0; + + remote_fd = fdctx->remote_fd; + + if (remote_fd == -1) + goto out; + + if (fdctx->is_dir) { + hdrlen = gf_hdr_len (reqdir, 0); + hdr = gf_hdr_new (reqdir, 0); + op = GF_CBK_RELEASEDIR; + reqdir = gf_param (hdr); + reqdir->fd = hton64 (remote_fd); + } else { + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + op = GF_CBK_RELEASE; + req = gf_param (hdr); + req->fd = hton64 (remote_fd); + } + + fr = create_frame (this, this->ctx->pool); + + ret = protocol_client_xfer (fr, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_CBK_REQUEST, op, + hdr, hdrlen, NULL, 0, NULL); + +out: + inode_unref (fdctx->inode); + GF_FREE (fdctx); + + return ret; +} + + +/** + * client_releasedir - releasedir function for client protocol + * @this: this translator structure + * @fd: file descriptor structure + * + * external reference through client_protocol_xlator->cbks->releasedir + */ + +int +client_releasedir (xlator_t *this, fd_t *fd) +{ + int64_t remote_fd = -1; + client_conf_t *conf = NULL; + client_fd_ctx_t *fdctx = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_del_ctx (fd, this); + if (fdctx != NULL) { + remote_fd = fdctx->remote_fd; + + /* fdctx->remote_fd == -1 indicates a reopen attempt + in progress. Just mark ->released = 1 and let + reopen_cbk handle releasing + */ + + if (remote_fd != -1) + list_del_init (&fdctx->sfd_pos); + + fdctx->released = 1; + } + } + pthread_mutex_unlock (&conf->mutex); + + if (remote_fd != -1) + client_fdctx_destroy (this, fdctx); + + return 0; +} + + +/** + * client_release - release function for client protocol + * @this: this translator structure + * @fd: file descriptor structure + * + * external reference through client_protocol_xlator->cbks->release + * + */ +int +client_release (xlator_t *this, fd_t *fd) +{ + int64_t remote_fd = -1; + client_conf_t *conf = NULL; + client_fd_ctx_t *fdctx = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_del_ctx (fd, this); + if (fdctx != NULL) { + remote_fd = fdctx->remote_fd; + + /* fdctx->remote_fd == -1 indicates a reopen attempt + in progress. Just mark ->released = 1 and let + reopen_cbk handle releasing + */ + + if (remote_fd != -1) + list_del_init (&fdctx->sfd_pos); + + fdctx->released = 1; + } + } + pthread_mutex_unlock (&conf->mutex); + + if (remote_fd != -1) + client_fdctx_destroy (this, fdctx); + + return 0; +} + +/* + * MGMT_OPS + */ + +/* Callbacks */ + +int +client_fxattrop_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_xattrop_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t gf_errno = 0; + int32_t op_errno = 0; + int32_t dict_len = 0; + dict_t *dict = NULL; + int32_t ret = -1; + char *dictbuf = NULL; + + rsp = gf_param (hdr); + GF_VALIDATE_OR_GOTO (frame->this->name, rsp, fail); + + op_ret = ntoh32 (hdr->rsp.op_ret); + + if (op_ret >= 0) { + op_ret = -1; + dict_len = ntoh32 (rsp->dict_len); + + if (dict_len > 0) { + dictbuf = memdup (rsp->dict, dict_len); + GF_VALIDATE_OR_GOTO (frame->this->name, dictbuf, fail); + + dict = dict_new(); + GF_VALIDATE_OR_GOTO (frame->this->name, dict, fail); + + ret = dict_unserialize (dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "failed to serialize dictionary(%p)", + dict); + op_errno = -ret; + goto fail; + } else { + dict->extra_free = dictbuf; + dictbuf = NULL; + } + } + op_ret = 0; + } + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + +fail: + STACK_UNWIND (frame, op_ret, op_errno, dict); + + if (dictbuf) + GF_FREE (dictbuf); + + if (dict) + dict_unref (dict); + + return 0; +} + + +int +client_xattrop_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_xattrop_rsp_t *rsp = NULL; + int32_t op_ret = -1; + int32_t gf_errno = EINVAL; + int32_t op_errno = 0; + int32_t dict_len = 0; + dict_t *dict = NULL; + int32_t ret = -1; + char *dictbuf = NULL; + + rsp = gf_param (hdr); + GF_VALIDATE_OR_GOTO (frame->this->name, rsp, fail); + + op_ret = ntoh32 (hdr->rsp.op_ret); + if (op_ret >= 0) { + op_ret = -1; + dict_len = ntoh32 (rsp->dict_len); + + if (dict_len > 0) { + dictbuf = memdup (rsp->dict, dict_len); + GF_VALIDATE_OR_GOTO (frame->this->name, dictbuf, fail); + + dict = get_new_dict(); + GF_VALIDATE_OR_GOTO (frame->this->name, dict, fail); + dict_ref (dict); + + ret = dict_unserialize (dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "failed to serialize dictionary(%p)", + dict); + goto fail; + } else { + dict->extra_free = dictbuf; + dictbuf = NULL; + } + } + op_ret = 0; + } + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + + +fail: + STACK_UNWIND (frame, op_ret, op_errno, dict); + + if (dictbuf) + GF_FREE (dictbuf); + if (dict) + dict_unref (dict); + + return 0; +} + +/* + * client_create_cbk - create callback function for client protocol + * @frame: call frame + * @args: arguments in dictionary + * + * not for external reference + */ + +int +client_create_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_create_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + fd_t *fd = NULL; + inode_t *inode = NULL; + struct iatt stbuf = {0, }; + struct iatt preparent = {0, }; + struct iatt postparent = {0, }; + int64_t remote_fd = 0; + int32_t ret = -1; + client_local_t *local = NULL; + client_conf_t *conf = NULL; + client_fd_ctx_t *fdctx = NULL; + ino_t ino = 0; + uint64_t gen = 0; + + local = frame->local; frame->local = NULL; + conf = frame->this->private; + fd = local->fd; + inode = local->loc.inode; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + if (op_ret >= 0) { + remote_fd = ntoh64 (rsp->fd); + gf_stat_to_iatt (&rsp->stat, &stbuf); + + gf_stat_to_iatt (&rsp->preparent, &preparent); + gf_stat_to_iatt (&rsp->postparent, &postparent); + + ino = stbuf.ia_ino; + gen = stbuf.ia_gen; + } + + if (op_ret >= 0) { + ret = inode_ctx_put2 (local->loc.inode, frame->this, ino, gen); + + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "CREATE %"PRId64"/%s (%s): failed to set " + "remote inode number to inode ctx", + local->loc.parent->ino, local->loc.name, + local->loc.path); + op_ret = -1; + op_errno = EINVAL; + goto unwind_out; + } + + fdctx = GF_CALLOC (1, sizeof (*fdctx), + gf_client_mt_client_fd_ctx_t); + if (!fdctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind_out; + } + + fdctx->remote_fd = remote_fd; + fdctx->inode = inode_ref (fd->inode); + fdctx->ino = ino; + fdctx->gen = gen; + fdctx->flags = local->flags; + + INIT_LIST_HEAD (&fdctx->sfd_pos); + + this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); + + pthread_mutex_lock (&conf->mutex); + { + list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + } + pthread_mutex_unlock (&conf->mutex); + } +unwind_out: + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, &stbuf, + &preparent, &postparent); + + client_local_wipe (local); + + return 0; +} + + +/* + * client_open_cbk - open callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int +client_open_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + int32_t op_ret = -1; + int32_t op_errno = ENOTCONN; + fd_t *fd = NULL; + int64_t remote_fd = 0; + gf_fop_open_rsp_t *rsp = NULL; + client_local_t *local = NULL; + client_conf_t *conf = NULL; + client_fd_ctx_t *fdctx = NULL; + ino_t ino = 0; + uint64_t gen = 0; + + + local = frame->local; + + if (local->op) { + local->op (frame, hdr, hdrlen, iobuf); + return 0; + } + + frame->local = NULL; + conf = frame->this->private; + fd = local->fd; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + if (op_ret >= 0) { + remote_fd = ntoh64 (rsp->fd); + } + + if (op_ret >= 0) { + fdctx = GF_CALLOC (1, sizeof (*fdctx), + gf_client_mt_client_fd_ctx_t); + if (!fdctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind_out; + } + + inode_ctx_get2 (fd->inode, frame->this, &ino, &gen); + + fdctx->remote_fd = remote_fd; + fdctx->inode = inode_ref (fd->inode); + fdctx->ino = ino; + fdctx->gen = gen; + fdctx->flags = local->flags; + fdctx->wbflags = local->wbflags; + + INIT_LIST_HEAD (&fdctx->sfd_pos); + + this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); + + pthread_mutex_lock (&conf->mutex); + { + list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + } + pthread_mutex_unlock (&conf->mutex); + } +unwind_out: + STACK_UNWIND (frame, op_ret, op_errno, fd); + + client_local_wipe (local); + + return 0; +} + +/* + * client_stat_cbk - stat callback for client protocol + * @frame: call frame + * @args: arguments dictionary + * + * not for external reference + */ + +int +client_stat_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct iatt stbuf = {0, }; + gf_fop_stat_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->stat, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + + +/* + * client_mknod_cbk - mknod callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_mknod_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_mknod_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt stbuf = {0, }; + inode_t *inode = NULL; + client_local_t *local = NULL; + int ret = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + + local = frame->local; + frame->local = NULL; + inode = local->loc.inode; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_iatt (&rsp->stat, &stbuf); + + ret = inode_ctx_put2 (local->loc.inode, frame->this, + stbuf.ia_ino, stbuf.ia_gen); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "MKNOD %"PRId64"/%s (%s): failed to set remote" + " inode number to inode ctx", + local->loc.parent->ino, local->loc.name, + local->loc.path); + + STACK_UNWIND (frame, -1, EINVAL, inode, NULL, + NULL, NULL); + return 0; + } + + gf_stat_to_iatt (&rsp->preparent, &preparent); + gf_stat_to_iatt (&rsp->postparent, &postparent); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, + &preparent, &postparent); + + client_local_wipe (local); + + return 0; +} + +/* + * client_symlink_cbk - symlink callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_symlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_symlink_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt stbuf = {0, }; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + inode_t *inode = NULL; + client_local_t *local = NULL; + int ret = 0; + + local = frame->local; + frame->local = NULL; + inode = local->loc.inode; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_iatt (&rsp->stat, &stbuf); + + ret = inode_ctx_put2 (inode, frame->this, + stbuf.ia_ino, stbuf.ia_gen); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "SYMLINK %"PRId64"/%s (%s): failed to set " + "remote inode number to inode ctx", + local->loc.parent->ino, local->loc.name, + local->loc.path); + STACK_UNWIND (frame, -1, EINVAL, inode, NULL, + NULL, NULL); + return 0; + } + gf_stat_to_iatt (&rsp->preparent, &preparent); + gf_stat_to_iatt (&rsp->postparent, &postparent); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, + &preparent, &postparent); + + client_local_wipe (local); + + return 0; +} + +/* + * client_link_cbk - link callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_link_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_link_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt stbuf = {0, }; + inode_t *inode = NULL; + client_local_t *local = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + + local = frame->local; + frame->local = NULL; + inode = local->loc.inode; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_iatt (&rsp->stat, &stbuf); + + gf_stat_to_iatt (&rsp->preparent, &preparent); + gf_stat_to_iatt (&rsp->postparent, &postparent); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, + &preparent, &postparent); + + client_local_wipe (local); + + return 0; +} + +/* + * client_truncate_cbk - truncate callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_truncate_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_truncate_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt prestat = {0, }; + struct iatt poststat = {0, }; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->prestat, &prestat); + gf_stat_to_iatt (&rsp->poststat, &poststat); + } + + STACK_UNWIND (frame, op_ret, op_errno, &prestat, &poststat); + + return 0; +} + +/* client_fstat_cbk - fstat callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_fstat_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct iatt stbuf = {0, }; + gf_fop_fstat_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->stat, &stbuf); + + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* + * client_ftruncate_cbk - ftruncate callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int +client_ftruncate_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_ftruncate_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt prestat = {0, }; + struct iatt poststat = {0, }; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->prestat, &prestat); + gf_stat_to_iatt (&rsp->poststat, &poststat); + } + + STACK_UNWIND (frame, op_ret, op_errno, &prestat, &poststat); + + return 0; +} + + +/* client_readv_cbk - readv callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external referece + */ + +int +client_readv_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_read_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iovec vector = {0, }; + struct iatt stbuf = {0, }; + struct iobref *iobref = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret != -1) { + iobref = iobref_new (); + gf_stat_to_iatt (&rsp->stat, &stbuf); + vector.iov_len = op_ret; + + if (op_ret > 0) { + vector.iov_base = iobuf->ptr; + iobref_add (iobref, iobuf); + } + } + + STACK_UNWIND (frame, op_ret, op_errno, &vector, 1, &stbuf, iobref); + + if (iobref) + iobref_unref (iobref); + + if (iobuf) + iobuf_unref (iobuf); + + return 0; +} + +/* + * client_write_cbk - write callback for client protocol + * @frame: cal frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_write_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_write_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt prestat = {0, }; + struct iatt poststat = {0, }; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_iatt (&rsp->prestat, &prestat); + gf_stat_to_iatt (&rsp->poststat, &poststat); + } + + STACK_UNWIND (frame, op_ret, op_errno, &prestat, &poststat); + + return 0; +} + + +int +client_readdirp_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_readdirp_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + uint32_t buf_size = 0; + gf_dirent_t entries; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + INIT_LIST_HEAD (&entries.list); + if (op_ret > 0) { + buf_size = ntoh32 (rsp->size); + gf_dirent_unserialize (&entries, rsp->buf, buf_size); + } + + STACK_UNWIND (frame, op_ret, op_errno, &entries); + + gf_dirent_free (&entries); + + return 0; +} + + +int +client_readdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_readdir_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + uint32_t buf_size = 0; + gf_dirent_t entries; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + INIT_LIST_HEAD (&entries.list); + if (op_ret > 0) { + buf_size = ntoh32 (rsp->size); + gf_dirent_unserialize (&entries, rsp->buf, buf_size); + } + + STACK_UNWIND (frame, op_ret, op_errno, &entries); + + gf_dirent_free (&entries); + + return 0; +} + +/* + * client_fsync_cbk - fsync callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_fsync_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct iatt prestat = {0, }; + struct iatt poststat = {0,}; + gf_fop_fsync_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->prestat, &prestat); + gf_stat_to_iatt (&rsp->poststat, &poststat); + } + + STACK_UNWIND (frame, op_ret, op_errno, &prestat, &poststat); + + return 0; +} + +/* + * client_unlink_cbk - unlink callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_unlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_unlink_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->preparent, &preparent); + gf_stat_to_iatt (&rsp->postparent, &postparent); + } + + STACK_UNWIND (frame, op_ret, op_errno, &preparent, &postparent); + + return 0; +} + +/* + * client_rename_cbk - rename callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_rename_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct iatt stbuf = {0, }; + gf_fop_rename_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt preoldparent = {0, }; + struct iatt postoldparent = {0, }; + struct iatt prenewparent = {0, }; + struct iatt postnewparent = {0, }; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->stat, &stbuf); + gf_stat_to_iatt (&rsp->preoldparent, &preoldparent); + gf_stat_to_iatt (&rsp->postoldparent, &postoldparent); + gf_stat_to_iatt (&rsp->prenewparent, &prenewparent); + gf_stat_to_iatt (&rsp->postnewparent, &postnewparent); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf, &preoldparent, + &postoldparent, &prenewparent, &postnewparent); + + return 0; +} + + +/* + * client_readlink_cbk - readlink callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ +int +client_readlink_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_readlink_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + char *link = NULL; + struct iatt stbuf = {0,}; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret > 0) { + link = rsp->path; + gf_stat_to_iatt (&rsp->buf, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, link, &stbuf); + return 0; +} + +/* + * client_mkdir_cbk - mkdir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_mkdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_mkdir_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt stbuf = {0, }; + inode_t *inode = NULL; + client_local_t *local = NULL; + int ret = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + + local = frame->local; + inode = local->loc.inode; + frame->local = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_stat_to_iatt (&rsp->stat, &stbuf); + + ret = inode_ctx_put2 (inode, frame->this, stbuf.ia_ino, + stbuf.ia_gen); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "MKDIR %"PRId64"/%s (%s): failed to set " + "remote inode number to inode ctx", + local->loc.parent->ino, local->loc.name, + local->loc.path); + STACK_UNWIND (frame, -1, EINVAL, inode, NULL, + NULL, NULL); + return 0; + } + + gf_stat_to_iatt (&rsp->preparent, &preparent); + gf_stat_to_iatt (&rsp->postparent, &postparent); + } + + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, + &preparent, &postparent); + + client_local_wipe (local); + + return 0; +} + +/* + * client_flush_cbk - flush callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_flush_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_opendir_cbk - opendir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_opendir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + int32_t op_ret = -1; + int32_t op_errno = ENOTCONN; + fd_t *fd = NULL; + int64_t remote_fd = 0; + gf_fop_opendir_rsp_t *rsp = NULL; + client_local_t *local = NULL; + client_conf_t *conf = NULL; + client_fd_ctx_t *fdctx = NULL; + ino_t ino = 0; + uint64_t gen = 0; + + + local = frame->local; + + if (local->op) { + local->op (frame, hdr, hdrlen, iobuf); + return 0; + } + + frame->local = NULL; + conf = frame->this->private; + fd = local->fd; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + if (op_ret >= 0) { + remote_fd = ntoh64 (rsp->fd); + } + + if (op_ret >= 0) { + fdctx = GF_CALLOC (1, sizeof (*fdctx), + gf_client_mt_client_fd_ctx_t); + if (!fdctx) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind_out; + } + + inode_ctx_get2 (fd->inode, frame->this, &ino, &gen); + + fdctx->remote_fd = remote_fd; + fdctx->inode = inode_ref (fd->inode); + fdctx->ino = ino; + fdctx->gen = gen; + + fdctx->is_dir = 1; + + INIT_LIST_HEAD (&fdctx->sfd_pos); + + this_fd_set_ctx (fd, frame->this, &local->loc, fdctx); + + pthread_mutex_lock (&conf->mutex); + { + list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + } + pthread_mutex_unlock (&conf->mutex); + } +unwind_out: + STACK_UNWIND (frame, op_ret, op_errno, fd); + + client_local_wipe (local); + + return 0; +} + +/* + * client_rmdir_cbk - rmdir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_rmdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_rmdir_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->preparent, &preparent); + gf_stat_to_iatt (&rsp->postparent, &postparent); + } + + STACK_UNWIND (frame, op_ret, op_errno, &preparent, &postparent); + + return 0; +} + +/* + * client_access_cbk - access callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_access_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_access_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_lookup_cbk - lookup callback for client protocol + * + * @frame: call frame + * @args: arguments dictionary + * + * not for external reference + */ + +int +client_lookup_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct iatt stbuf = {0, }; + struct iatt postparent = {0, }; + inode_t *inode = NULL; + dict_t *xattr = NULL; + gf_fop_lookup_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + size_t dict_len = 0; + char *dictbuf = NULL; + int32_t ret = -1; + int32_t gf_errno = 0; + client_local_t *local = NULL; + ino_t oldino = 0; + uint64_t oldgen = 0; + + local = frame->local; + inode = local->loc.inode; + frame->local = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + + gf_stat_to_iatt (&rsp->postparent, &postparent); + + if (op_ret == 0) { + op_ret = -1; + gf_stat_to_iatt (&rsp->stat, &stbuf); + + ret = inode_ctx_get2 (inode, frame->this, &oldino, &oldgen); + if (oldino != stbuf.ia_ino || oldgen != stbuf.ia_gen) { + if (oldino) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "LOOKUP %"PRId64"/%s (%s): " + "inode number changed from " + "{%"PRId64",%"PRId64"} to {%"PRId64",%"PRId64"}", + local->loc.parent ? + local->loc.parent->ino : (uint64_t) 0, + local->loc.name, + local->loc.path, + oldgen, oldino, stbuf.ia_gen, stbuf.ia_ino); + op_errno = ESTALE; + goto fail; + } + + ret = inode_ctx_put2 (inode, frame->this, + stbuf.ia_ino, stbuf.ia_gen); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "LOOKUP %"PRId64"/%s (%s) : " + "failed to set remote inode " + "number to inode ctx", + local->loc.parent ? + local->loc.parent->ino : (uint64_t) 0, + local->loc.name, + local->loc.path); + op_errno = EINVAL; + goto fail; + } + } + + dict_len = ntoh32 (rsp->dict_len); + + if (dict_len > 0) { + dictbuf = memdup (rsp->dict, dict_len); + GF_VALIDATE_OR_GOTO (frame->this->name, dictbuf, fail); + + xattr = dict_new(); + GF_VALIDATE_OR_GOTO (frame->this->name, xattr, fail); + + ret = dict_unserialize (dictbuf, dict_len, &xattr); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): failed to " + "unserialize dictionary", + local->loc.path, inode->ino); + goto fail; + } else { + xattr->extra_free = dictbuf; + dictbuf = NULL; + } + } + op_ret = 0; + } + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + +fail: + STACK_UNWIND (frame, op_ret, op_errno, inode, &stbuf, xattr, + &postparent); + + client_local_wipe (local); + + if (dictbuf) + GF_FREE (dictbuf); + + if (xattr) + dict_unref (xattr); + + return 0; +} + +static int32_t +client_setattr_cbk (call_frame_t *frame,gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct iatt statpre = {0, }; + struct iatt statpost = {0, }; + gf_fop_setattr_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->statpre, &statpre); + gf_stat_to_iatt (&rsp->statpost, &statpost); + } + + STACK_UNWIND (frame, op_ret, op_errno, &statpre, &statpost); + + return 0; +} + +static int32_t +client_fsetattr_cbk (call_frame_t *frame,gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct iatt statpre = {0, }; + struct iatt statpost = {0, }; + gf_fop_setattr_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_stat_to_iatt (&rsp->statpre, &statpre); + gf_stat_to_iatt (&rsp->statpost, &statpost); + } + + STACK_UNWIND (frame, op_ret, op_errno, &statpre, &statpost); + + return 0; +} + + +int +gf_free_direntry (dir_entry_t *head) +{ + dir_entry_t *prev = NULL; + dir_entry_t *trav = NULL; + + prev = head; + GF_VALIDATE_OR_GOTO ("client-protocol", prev, fail); + + trav = head->next; + while (trav) { + prev->next = trav->next; + GF_FREE (trav->name); + if (IA_ISLNK (trav->buf.ia_type)) + GF_FREE (trav->link); + GF_FREE (trav); + trav = prev->next; + } + GF_FREE (head); +fail: + return 0; +} + +/* + * client_statfs_cbk - statfs callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_statfs_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct statvfs stbuf = {0, }; + gf_fop_statfs_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret == 0) { + gf_statfs_to_statfs (&rsp->statfs, &stbuf); + } + + STACK_UNWIND (frame, op_ret, op_errno, &stbuf); + + return 0; +} + +/* + * client_fsyncdir_cbk - fsyncdir callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_fsyncdir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_setxattr_cbk - setxattr callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_setxattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_setxattr_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_getxattr_cbk - getxattr callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_getxattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_getxattr_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t gf_errno = 0; + int32_t op_errno = 0; + int32_t dict_len = 0; + dict_t *dict = NULL; + int32_t ret = -1; + char *dictbuf = NULL; + client_local_t *local = NULL; + + local = frame->local; + frame->local = NULL; + + rsp = gf_param (hdr); + GF_VALIDATE_OR_GOTO (frame->this->name, rsp, fail); + + op_ret = ntoh32 (hdr->rsp.op_ret); + + if (op_ret >= 0) { + op_ret = -1; + dict_len = ntoh32 (rsp->dict_len); + + if (dict_len > 0) { + dictbuf = memdup (rsp->dict, dict_len); + GF_VALIDATE_OR_GOTO (frame->this->name, dictbuf, fail); + + dict = dict_new(); + GF_VALIDATE_OR_GOTO (frame->this->name, dict, fail); + + ret = dict_unserialize (dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "%s (%"PRId64"): failed to " + "unserialize xattr dictionary", + local->loc.path, + local->loc.inode->ino); + goto fail; + } else { + dict->extra_free = dictbuf; + dictbuf = NULL; + } + } + op_ret = 0; + } + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); +fail: + STACK_UNWIND (frame, op_ret, op_errno, dict); + + client_local_wipe (local); + + if (dictbuf) + GF_FREE (dictbuf); + + if (dict) + dict_unref (dict); + + return 0; +} + +/* + * client_removexattr_cbk - removexattr callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_removexattr_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, + size_t hdrlen, struct iobuf *iobuf) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/* + * client_lk_cbk - lk callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_lk_common_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + struct flock lock = {0,}; + gf_fop_lk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret >= 0) { + gf_flock_to_flock (&rsp->flock, &lock); + } + + STACK_UNWIND (frame, op_ret, op_errno, &lock); + return 0; +} + +/* + * client_gf_file_lk_cbk - gf_file_lk callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_inodelk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_inodelk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +int +client_finodelk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_finodelk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/* + * client_entrylk_cbk - entrylk callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_entrylk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_entrylk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +int +client_fentrylk_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fentrylk_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + + +/* + * client_getspec - getspec function for client protocol + * @frame: call frame + * @this: client protocol xlator structure + * @flag: + * + * external reference through client_protocol_xlator->fops->getspec + */ + +int +client_getspec (call_frame_t *frame, xlator_t *this, const char *key, + int32_t flag) +{ + gf_hdr_common_t *hdr = NULL; + gf_mop_getspec_req_t *req = NULL; + size_t hdrlen = -1; + int keylen = 0; + int ret = -1; + + if (key) + keylen = STRLEN_0 (key); + + hdrlen = gf_hdr_len (req, keylen); + hdr = gf_hdr_new (req, keylen); + GF_VALIDATE_OR_GOTO (this->name, hdr, unwind); + + req = gf_param (hdr); + req->flags = hton32 (flag); + req->keylen = hton32 (keylen); + if (keylen) + strcpy (req->key, key); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_MOP_REQUEST, GF_MOP_GETSPEC, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +unwind: + if (hdr) + GF_FREE (hdr); + STACK_UNWIND (frame, -1, EINVAL, NULL); + return 0; +} + +/* + * client_getspec_cbk - getspec callback for client protocol + * + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_getspec_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_mop_getspec_rsp_t *rsp = NULL; + char *spec_data = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int32_t gf_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + rsp = gf_param (hdr); + + if (op_ret >= 0) { + spec_data = rsp->spec; + } + + STACK_UNWIND (frame, op_ret, op_errno, spec_data); + return 0; +} + +int +client_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flag) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_checksum_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + ino_t ino = 0; + uint64_t gen = 0; + + hdrlen = gf_hdr_len (req, strlen (loc->path) + 1); + hdr = gf_hdr_new (req, strlen (loc->path) + 1); + req = gf_param (hdr); + + ret = inode_ctx_get2 (loc->inode, this, &ino, &gen); + if (loc->inode->ino && ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "CHECKSUM %"PRId64" (%s): " + "failed to get remote inode number", + loc->inode->ino, loc->path); + STACK_UNWIND (frame, -1, EINVAL, NULL, NULL); + return 0; + + } + + req->ino = hton64 (ino); + req->gen = hton64 (gen); + req->flag = hton32 (flag); + strcpy (req->path, loc->path); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_CHECKSUM, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +} + + +int +client_checksum_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_checksum_rsp_t *rsp = NULL; + int32_t op_ret = 0; + int32_t op_errno = 0; + int32_t gf_errno = 0; + unsigned char *fchecksum = NULL; + unsigned char *dchecksum = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + + if (op_ret >= 0) { + fchecksum = rsp->fchecksum; + dchecksum = rsp->dchecksum + NAME_MAX; + } + + STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum); + return 0; +} + + +int +client_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int32_t len) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_rchecksum_req_t *req = NULL; + size_t hdrlen = -1; + int ret = -1; + + int64_t remote_fd = -1; + client_fd_ctx_t *fdctx = NULL; + client_conf_t *conf = NULL; + + hdrlen = gf_hdr_len (req, 0); + hdr = gf_hdr_new (req, 0); + req = gf_param (hdr); + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + fdctx = this_fd_get_ctx (fd, this); + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx == NULL) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, 0, NULL); + return 0; + } + + if (fdctx->remote_fd == -1) { + gf_log (this->name, GF_LOG_TRACE, + "(%"PRId64"): failed to get fd ctx. EBADFD", + fd->inode->ino); + STACK_UNWIND (frame, -1, EBADFD, 0, NULL); + return 0; + } + + remote_fd = fdctx->remote_fd; + + req->fd = hton64 (remote_fd); + req->offset = hton64 (offset); + req->len = hton32 (len); + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_BULK), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_RCHECKSUM, + hdr, hdrlen, NULL, 0, NULL); + + return ret; +} + + +int +client_rchecksum_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_rchecksum_rsp_t *rsp = NULL; + + int32_t op_ret = 0; + int32_t op_errno = 0; + int32_t gf_errno = 0; + uint32_t weak_checksum = 0; + unsigned char *strong_checksum = NULL; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + gf_errno = ntoh32 (hdr->rsp.op_errno); + op_errno = gf_error_to_errno (gf_errno); + + if (op_ret >= 0) { + weak_checksum = rsp->weak_checksum; + strong_checksum = rsp->strong_checksum; + } + + STACK_UNWIND (frame, op_ret, op_errno, weak_checksum, strong_checksum); + + return 0; +} + + +/* + * client_setspec_cbk - setspec callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_setspec_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + + + +int +protocol_client_reopendir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, + size_t hdrlen, struct iobuf *iobuf) +{ + int32_t op_ret = -1; + int32_t op_errno = ENOTCONN; + int64_t remote_fd = -1; + gf_fop_open_rsp_t *rsp = NULL; + client_local_t *local = NULL; + client_conf_t *conf = NULL; + client_fd_ctx_t *fdctx = NULL; + + + local = frame->local; frame->local = NULL; + conf = frame->this->private; + fdctx = local->fdctx; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + if (op_ret >= 0) + remote_fd = ntoh64 (rsp->fd); + + gf_log (frame->this->name, GF_LOG_DEBUG, + "reopendir on %s returned %d (%"PRId64")", + local->loc.path, op_ret, remote_fd); + + pthread_mutex_lock (&conf->mutex); + { + fdctx->remote_fd = remote_fd; + + if (!fdctx->released) { + list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + fdctx = NULL; + } + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx) + client_fdctx_destroy (frame->this, fdctx); + + STACK_DESTROY (frame->root); + + client_local_wipe (local); + + return 0; +} + + + +int +protocol_client_reopendir (xlator_t *this, client_fd_ctx_t *fdctx) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + gf_fop_opendir_req_t *req = NULL; + size_t pathlen = 0; + client_local_t *local = NULL; + inode_t *inode = NULL; + char *path = NULL; + call_frame_t *frame = NULL; + + inode = fdctx->inode; + + ret = inode_path (inode, NULL, &path); + if (ret < 0) { + goto out; + } + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + if (!local) { + goto out; + } + + local->fdctx = fdctx; + local->op = protocol_client_reopendir_cbk; + local->loc.path = path; path = NULL; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + goto out; + } + + pathlen = STRLEN_0 (local->loc.path); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + + req = gf_param (hdr); + + req->ino = hton64 (fdctx->ino); + req->gen = hton64 (fdctx->gen); + + strcpy (req->path, local->loc.path); + + gf_log (frame->this->name, GF_LOG_DEBUG, + "attempting reopendir on %s", local->loc.path); + + frame->local = local; local = NULL; + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_OPENDIR, + hdr, hdrlen, NULL, 0, NULL); + + return ret; + +out: + if (frame) + STACK_DESTROY (frame->root); + + if (local) + client_local_wipe (local); + + if (path) + GF_FREE (path); + + return 0; +} + + +int +protocol_client_reopen_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, + size_t hdrlen, struct iobuf *iobuf) +{ + int32_t op_ret = -1; + int32_t op_errno = ENOTCONN; + int64_t remote_fd = -1; + gf_fop_open_rsp_t *rsp = NULL; + client_local_t *local = NULL; + client_conf_t *conf = NULL; + client_fd_ctx_t *fdctx = NULL; + + + local = frame->local; frame->local = NULL; + conf = frame->this->private; + fdctx = local->fdctx; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = ntoh32 (hdr->rsp.op_errno); + + if (op_ret >= 0) + remote_fd = ntoh64 (rsp->fd); + + gf_log (frame->this->name, GF_LOG_DEBUG, + "reopen on %s returned %d (%"PRId64")", + local->loc.path, op_ret, remote_fd); + + pthread_mutex_lock (&conf->mutex); + { + fdctx->remote_fd = remote_fd; + + if (!fdctx->released) { + list_add_tail (&fdctx->sfd_pos, &conf->saved_fds); + fdctx = NULL; + } + } + pthread_mutex_unlock (&conf->mutex); + + if (fdctx) + client_fdctx_destroy (frame->this, fdctx); + + STACK_DESTROY (frame->root); + + client_local_wipe (local); + + return 0; +} + + +int +protocol_client_reopen (xlator_t *this, client_fd_ctx_t *fdctx) +{ + int ret = -1; + gf_hdr_common_t *hdr = NULL; + size_t hdrlen = 0; + gf_fop_open_req_t *req = NULL; + size_t pathlen = 0; + client_local_t *local = NULL; + inode_t *inode = NULL; + char *path = NULL; + call_frame_t *frame = NULL; + + inode = fdctx->inode; + + ret = inode_path (inode, NULL, &path); + if (ret < 0) { + goto out; + } + + local = GF_CALLOC (1, sizeof (*local), gf_client_mt_client_local_t); + if (!local) { + goto out; + } + + local->fdctx = fdctx; + local->op = protocol_client_reopen_cbk; + local->loc.path = path; path = NULL; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + goto out; + } + + pathlen = STRLEN_0 (local->loc.path); + + hdrlen = gf_hdr_len (req, pathlen); + hdr = gf_hdr_new (req, pathlen); + + req = gf_param (hdr); + + req->ino = hton64 (fdctx->ino); + req->gen = hton64 (fdctx->gen); + req->flags = hton32 (gf_flags_from_flags (fdctx->flags)); + req->wbflags = hton32 (fdctx->wbflags); + strcpy (req->path, local->loc.path); + + gf_log (frame->this->name, GF_LOG_DEBUG, + "attempting reopen on %s", local->loc.path); + + frame->local = local; local = NULL; + + ret = protocol_client_xfer (frame, this, + CLIENT_CHANNEL (this, CHANNEL_LOWLAT), + GF_OP_TYPE_FOP_REQUEST, GF_PROTO_FOP_OPEN, + hdr, hdrlen, NULL, 0, NULL); + + return ret; + +out: + if (frame) + STACK_DESTROY (frame->root); + + if (local) + client_local_wipe (local); + + if (path) + GF_FREE (path); + + return 0; + +} + + +int +protocol_client_post_handshake (call_frame_t *frame, xlator_t *this) +{ + client_conf_t *conf = NULL; + client_fd_ctx_t *tmp = NULL; + client_fd_ctx_t *fdctx = NULL; + xlator_list_t *parent = NULL; + struct list_head reopen_head; + + conf = this->private; + INIT_LIST_HEAD (&reopen_head); + + pthread_mutex_lock (&conf->mutex); + { + list_for_each_entry_safe (fdctx, tmp, &conf->saved_fds, + sfd_pos) { + if (fdctx->remote_fd != -1) + continue; + + list_del (&fdctx->sfd_pos); + list_add_tail (&fdctx->sfd_pos, &reopen_head); + } + } + pthread_mutex_unlock (&conf->mutex); + + list_for_each_entry_safe (fdctx, tmp, &reopen_head, sfd_pos) { + list_del_init (&fdctx->sfd_pos); + + if (fdctx->is_dir) + protocol_client_reopendir (this, fdctx); + else + protocol_client_reopen (this, fdctx); + } + + parent = this->parents; + + while (parent) { + xlator_notify (parent->xlator, GF_EVENT_CHILD_UP, + this); + parent = parent->next; + } + + return 0; +} + +/* + * client_setvolume_cbk - setvolume callback for client protocol + * @frame: call frame + * @args: argument dictionary + * + * not for external reference + */ + +int +client_setvolume_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + client_conf_t *conf = NULL; + gf_mop_setvolume_rsp_t *rsp = NULL; + client_connection_t *conn = NULL; + glusterfs_ctx_t *ctx = NULL; + xlator_t *this = NULL; + xlator_list_t *parent = NULL; + transport_t *trans = NULL; + dict_t *reply = NULL; + char *remote_subvol = NULL; + char *remote_error = NULL; + char *process_uuid = NULL; + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t dict_len = 0; + transport_t *peer_trans = NULL; + uint64_t peer_trans_int = 0; + + trans = frame->local; frame->local = NULL; + this = frame->this; + conn = trans->xl_private; + conf = this->private; + + rsp = gf_param (hdr); + + op_ret = ntoh32 (hdr->rsp.op_ret); + op_errno = gf_error_to_errno (ntoh32 (hdr->rsp.op_errno)); + + if (op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setvolume failed (%s)", + strerror (op_errno)); + goto out; + } + + reply = dict_new (); + GF_VALIDATE_OR_GOTO (this->name, reply, out); + + dict_len = ntoh32 (rsp->dict_len); + ret = dict_unserialize (rsp->buf, dict_len, &reply); + if (ret < 0) { + gf_log (frame->this->name, GF_LOG_DEBUG, + "failed to unserialize buffer(%p) to dictionary", + rsp->buf); + goto out; + } + + ret = dict_get_str (reply, "ERROR", &remote_error); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get ERROR string from reply dictionary"); + } + + ret = dict_get_str (reply, "process-uuid", &process_uuid); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get 'process-uuid' from reply dictionary"); + } + + if (op_ret < 0) { + gf_log (trans->xl->name, GF_LOG_ERROR, + "SETVOLUME on remote-host failed: %s", + remote_error ? remote_error : strerror (op_errno)); + errno = op_errno; + if (op_errno == ESTALE) { + parent = trans->xl->parents; + while (parent) { + xlator_notify (parent->xlator, + GF_EVENT_VOLFILE_MODIFIED, + trans->xl); + parent = parent->next; + } + } + + } else { + ret = dict_get_str (this->options, "remote-subvolume", + &remote_subvol); + if (!remote_subvol) + goto out; + + ctx = this->ctx; + + if (process_uuid && !strcmp (ctx->process_uuid,process_uuid)) { + ret = dict_get_uint64 (reply, "transport-ptr", + &peer_trans_int); + + peer_trans = (void *) (long) (peer_trans_int); + + gf_log (this->name, GF_LOG_WARNING, + "attaching to the local volume '%s'", + remote_subvol); + + transport_setpeer (trans, peer_trans); + + } + + gf_log (trans->xl->name, GF_LOG_NORMAL, + "Connected to %s, attached " + "to remote volume '%s'.", + trans->peerinfo.identifier, remote_subvol); + + pthread_mutex_lock (&(conn->lock)); + { + conn->connected = 1; + } + pthread_mutex_unlock (&(conn->lock)); + + protocol_client_post_handshake (frame, frame->this); + } + + conf->connecting = 0; +out: + + if (-1 == op_ret) { + /* Let the connection/re-connection happen in + * background, for now, don't hang here, + * tell the parents that i am all ok.. + */ + parent = trans->xl->parents; + while (parent) { + xlator_notify (parent->xlator, + GF_EVENT_CHILD_CONNECTING, trans->xl); + parent = parent->next; + } + conf->connecting= 1; + } + + STACK_DESTROY (frame->root); + + if (reply) + dict_unref (reply); + + return op_ret; +} + +/* + * client_enosys_cbk - + * @frame: call frame + * + * not for external reference + */ + +int +client_enosys_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + STACK_DESTROY (frame->root); + return 0; +} + + +void +client_protocol_reconnect (void *trans_ptr) +{ + transport_t *trans = NULL; + client_connection_t *conn = NULL; + struct timeval tv = {0, 0}; + int32_t ret = 0; + + trans = trans_ptr; + conn = trans->xl_private; + pthread_mutex_lock (&conn->lock); + { + if (conn->reconnect) + gf_timer_call_cancel (trans->xl->ctx, + conn->reconnect); + conn->reconnect = 0; + + if (conn->connected == 0) { + tv.tv_sec = 10; + + gf_log (trans->xl->name, GF_LOG_TRACE, + "attempting reconnect"); + ret = transport_connect (trans); + + conn->reconnect = + gf_timer_call_after (trans->xl->ctx, tv, + client_protocol_reconnect, + trans); + } else { + gf_log (trans->xl->name, GF_LOG_TRACE, + "breaking reconnect chain"); + } + } + pthread_mutex_unlock (&conn->lock); + + if (ret == -1 && errno != EINPROGRESS) { + default_notify (trans->xl, GF_EVENT_CHILD_DOWN, NULL); + } +} + +int +protocol_client_mark_fd_bad (xlator_t *this) +{ + client_conf_t *conf = NULL; + client_fd_ctx_t *tmp = NULL; + client_fd_ctx_t *fdctx = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + list_for_each_entry_safe (fdctx, tmp, &conf->saved_fds, + sfd_pos) { + fdctx->remote_fd = -1; + } + } + pthread_mutex_unlock (&conf->mutex); + + return 0; +} + +/* + * client_protocol_cleanup - cleanup function + * @trans: transport object + * + */ + +int +protocol_client_cleanup (transport_t *trans) +{ + client_connection_t *conn = NULL; + struct saved_frames *saved_frames = NULL; + + conn = trans->xl_private; + + gf_log (trans->xl->name, GF_LOG_TRACE, + "cleaning up state in transport object %p", trans); + + pthread_mutex_lock (&conn->lock); + { + saved_frames = conn->saved_frames; + conn->saved_frames = saved_frames_new (); + + /* bailout logic cleanup */ + if (conn->timer) { + gf_timer_call_cancel (trans->xl->ctx, conn->timer); + conn->timer = NULL; + } + + if (conn->reconnect == NULL) { + /* :O This part is empty.. any thing missing? */ + } + } + pthread_mutex_unlock (&conn->lock); + + saved_frames_destroy (trans->xl, saved_frames, + gf_fops, gf_mops, gf_cbks); + + return 0; +} + + +/* cbk callbacks */ +int +client_releasedir_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, + size_t hdrlen, struct iobuf *iobuf) +{ + STACK_DESTROY (frame->root); + return 0; +} + + +int +client_release_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + STACK_DESTROY (frame->root); + return 0; +} + + +int +client_forget_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_log ("", GF_LOG_CRITICAL, "fop not implemented"); + return 0; +} + + +int +client_log_cbk (call_frame_t *frame, gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_log ("", GF_LOG_CRITICAL, "fop not implemented"); + return 0; +} + + +static gf_op_t gf_fops[] = { + [GF_PROTO_FOP_STAT] = client_stat_cbk, + [GF_PROTO_FOP_READLINK] = client_readlink_cbk, + [GF_PROTO_FOP_MKNOD] = client_mknod_cbk, + [GF_PROTO_FOP_MKDIR] = client_mkdir_cbk, + [GF_PROTO_FOP_UNLINK] = client_unlink_cbk, + [GF_PROTO_FOP_RMDIR] = client_rmdir_cbk, + [GF_PROTO_FOP_SYMLINK] = client_symlink_cbk, + [GF_PROTO_FOP_RENAME] = client_rename_cbk, + [GF_PROTO_FOP_LINK] = client_link_cbk, + [GF_PROTO_FOP_TRUNCATE] = client_truncate_cbk, + [GF_PROTO_FOP_OPEN] = client_open_cbk, + [GF_PROTO_FOP_READ] = client_readv_cbk, + [GF_PROTO_FOP_WRITE] = client_write_cbk, + [GF_PROTO_FOP_STATFS] = client_statfs_cbk, + [GF_PROTO_FOP_FLUSH] = client_flush_cbk, + [GF_PROTO_FOP_FSYNC] = client_fsync_cbk, + [GF_PROTO_FOP_SETXATTR] = client_setxattr_cbk, + [GF_PROTO_FOP_GETXATTR] = client_getxattr_cbk, + [GF_PROTO_FOP_REMOVEXATTR] = client_removexattr_cbk, + [GF_PROTO_FOP_OPENDIR] = client_opendir_cbk, + [GF_PROTO_FOP_FSYNCDIR] = client_fsyncdir_cbk, + [GF_PROTO_FOP_ACCESS] = client_access_cbk, + [GF_PROTO_FOP_CREATE] = client_create_cbk, + [GF_PROTO_FOP_FTRUNCATE] = client_ftruncate_cbk, + [GF_PROTO_FOP_FSTAT] = client_fstat_cbk, + [GF_PROTO_FOP_LK] = client_lk_common_cbk, + [GF_PROTO_FOP_LOOKUP] = client_lookup_cbk, + [GF_PROTO_FOP_READDIR] = client_readdir_cbk, + [GF_PROTO_FOP_READDIRP] = client_readdirp_cbk, + [GF_PROTO_FOP_INODELK] = client_inodelk_cbk, + [GF_PROTO_FOP_FINODELK] = client_finodelk_cbk, + [GF_PROTO_FOP_ENTRYLK] = client_entrylk_cbk, + [GF_PROTO_FOP_FENTRYLK] = client_fentrylk_cbk, + [GF_PROTO_FOP_CHECKSUM] = client_checksum_cbk, + [GF_PROTO_FOP_RCHECKSUM] = client_rchecksum_cbk, + [GF_PROTO_FOP_XATTROP] = client_xattrop_cbk, + [GF_PROTO_FOP_FXATTROP] = client_fxattrop_cbk, + [GF_PROTO_FOP_SETATTR] = client_setattr_cbk, + [GF_PROTO_FOP_FSETATTR] = client_fsetattr_cbk +}; + +static gf_op_t gf_mops[] = { + [GF_MOP_SETVOLUME] = client_setvolume_cbk, + [GF_MOP_GETVOLUME] = client_enosys_cbk, + [GF_MOP_SETSPEC] = client_setspec_cbk, + [GF_MOP_GETSPEC] = client_getspec_cbk, + [GF_MOP_PING] = client_ping_cbk, + [GF_MOP_LOG] = client_log_cbk +}; + +static gf_op_t gf_cbks[] = { + [GF_CBK_FORGET] = client_forget_cbk, + [GF_CBK_RELEASE] = client_release_cbk, + [GF_CBK_RELEASEDIR] = client_releasedir_cbk +}; + +/* + * client_protocol_interpret - protocol interpreter + * @trans: transport object + * @blk: data block + * + */ +int +protocol_client_interpret (xlator_t *this, transport_t *trans, + char *hdr_p, size_t hdrlen, struct iobuf *iobuf) +{ + int ret = -1; + call_frame_t *frame = NULL; + gf_hdr_common_t *hdr = NULL; + uint64_t callid = 0; + int type = -1; + int op = -1; + client_connection_t *conn = NULL; + + conn = trans->xl_private; + + hdr = (gf_hdr_common_t *)hdr_p; + + type = ntoh32 (hdr->type); + op = ntoh32 (hdr->op); + callid = ntoh64 (hdr->callid); + + frame = lookup_frame (trans, op, type, callid); + if (frame == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "no frame for callid=%"PRId64" type=%d op=%d", + callid, type, op); + return 0; + } + + switch (type) { + case GF_OP_TYPE_FOP_REPLY: + if ((op > GF_PROTO_FOP_MAXVALUE) || + (op < 0)) { + gf_log (trans->xl->name, GF_LOG_WARNING, + "invalid fop '%d'", op); + } else { + ret = gf_fops[op] (frame, hdr, hdrlen, iobuf); + } + break; + case GF_OP_TYPE_MOP_REPLY: + if ((op > GF_MOP_MAXVALUE) || + (op < 0)) { + gf_log (trans->xl->name, GF_LOG_WARNING, + "invalid fop '%d'", op); + } else { + ret = gf_mops[op] (frame, hdr, hdrlen, iobuf); + } + break; + case GF_OP_TYPE_CBK_REPLY: + if ((op > GF_CBK_MAXVALUE) || + (op < 0)) { + gf_log (trans->xl->name, GF_LOG_WARNING, + "invalid cbk '%d'", op); + } else { + ret = gf_cbks[op] (frame, hdr, hdrlen, iobuf); + } + break; + default: + gf_log (trans->xl->name, GF_LOG_DEBUG, + "invalid packet type: %d", type); + break; + } + + return ret; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_client_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + + +/* + * init - initiliazation function. called during loading of client protocol + * @this: + * + */ + +int +init (xlator_t *this) +{ + transport_t *trans = NULL; + client_conf_t *conf = NULL; + client_connection_t *conn = NULL; + int32_t frame_timeout = 0; + int32_t ping_timeout = 0; + data_t *remote_subvolume = NULL; + int32_t ret = -1; + int i = 0; + + if (this->children) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: client protocol translator cannot have any " + "subvolumes"); + goto out; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "Volume is dangling. "); + } + + remote_subvolume = dict_get (this->options, "remote-subvolume"); + if (remote_subvolume == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Option 'remote-subvolume' is not specified."); + goto out; + } + + ret = dict_get_int32 (this->options, "frame-timeout", + &frame_timeout); + if (ret >= 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setting frame-timeout to %d", frame_timeout); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "defaulting frame-timeout to 30mins"); + frame_timeout = 1800; + } + + ret = dict_get_int32 (this->options, "ping-timeout", + &ping_timeout); + if (ret >= 0) { + gf_log (this->name, GF_LOG_DEBUG, + "setting ping-timeout to %d", ping_timeout); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "defaulting ping-timeout to 42"); + ping_timeout = GF_UNIVERSAL_ANSWER; + } + + conf = GF_CALLOC (1, sizeof (client_conf_t), + gf_client_mt_client_conf_t); + + protocol_common_init (); + + pthread_mutex_init (&conf->mutex, NULL); + INIT_LIST_HEAD (&conf->saved_fds); + + this->private = conf; + + for (i = 0; i < CHANNEL_MAX; i++) { + if (CHANNEL_LOWLAT == i) { + dict_set (this->options, "transport.socket.lowlat", + data_from_dynstr (gf_strdup ("true"))); + } + trans = transport_load (this->options, this); + if (trans == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to load transport"); + ret = -1; + goto out; + } + + conn = GF_CALLOC (1, sizeof (*conn), + gf_client_mt_client_connection_t); + + conn->saved_frames = saved_frames_new (); + + conn->callid = 1; + + conn->frame_timeout = frame_timeout; + conn->ping_timeout = ping_timeout; + + pthread_mutex_init (&conn->lock, NULL); + + trans->xl_private = conn; + conf->transport[i] = transport_ref (trans); + } + +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + ret = setrlimit (RLIMIT_NOFILE, &lim); + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "WARNING: Failed to set 'ulimit -n 1M': %s", + strerror(errno)); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + ret = setrlimit (RLIMIT_NOFILE, &lim); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to set max open fd to 64k: %s", + strerror(errno)); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "max open fd set to 64k"); + } + + } + } +#endif + ret = 0; +out: + return ret; +} + +/* + * fini - finish function called during unloading of client protocol + * @this: + * + */ +void +fini (xlator_t *this) +{ + /* TODO: Check if its enough.. how to call transport's fini () */ + client_conf_t *conf = NULL; + + conf = this->private; + this->private = NULL; + + if (conf) { + GF_FREE (conf); + } + return; +} + + +int +protocol_client_handshake (xlator_t *this, transport_t *trans) +{ + gf_hdr_common_t *hdr = NULL; + gf_mop_setvolume_req_t *req = NULL; + dict_t *options = NULL; + int32_t ret = -1; + int hdrlen = 0; + int dict_len = 0; + call_frame_t *fr = NULL; + char *process_uuid_xl; + + options = this->options; + ret = dict_set_str (options, "protocol-version", GF_PROTOCOL_VERSION); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to set protocol version(%s) in handshake msg", + GF_PROTOCOL_VERSION); + } + + ret = gf_asprintf (&process_uuid_xl, "%s-%s", this->ctx->process_uuid, + this->name); + if (-1 == ret) { + gf_log (this->name, GF_LOG_ERROR, + "asprintf failed while setting process_uuid"); + goto fail; + } + ret = dict_set_dynstr (options, "process-uuid", + process_uuid_xl); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to set process-uuid(%s) in handshake msg", + process_uuid_xl); + } + + if (this->ctx->cmd_args.volfile_server) { + if (this->ctx->cmd_args.volfile_id) + ret = dict_set_str (options, "volfile-key", + this->ctx->cmd_args.volfile_id); + ret = dict_set_uint32 (options, "volfile-checksum", + this->graph->volfile_checksum); + } + + dict_len = dict_serialized_length (options); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get serialized length of dict(%p)", + options); + ret = dict_len; + goto fail; + } + + hdrlen = gf_hdr_len (req, dict_len); + hdr = gf_hdr_new (req, dict_len); + GF_VALIDATE_OR_GOTO (this->name, hdr, fail); + + req = gf_param (hdr); + + ret = dict_serialize (options, req->buf); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to serialize dictionary(%p)", + options); + goto fail; + } + + req->dict_len = hton32 (dict_len); + fr = create_frame (this, this->ctx->pool); + GF_VALIDATE_OR_GOTO (this->name, fr, fail); + + fr->local = trans; + ret = protocol_client_xfer (fr, this, trans, + GF_OP_TYPE_MOP_REQUEST, GF_MOP_SETVOLUME, + hdr, hdrlen, NULL, 0, NULL); + return ret; +fail: + if (hdr) + GF_FREE (hdr); + return ret; +} + + +int +protocol_client_pollout (xlator_t *this, transport_t *trans) +{ + client_conf_t *conf = NULL; + + conf = trans->xl->private; + + pthread_mutex_lock (&conf->mutex); + { + gettimeofday (&conf->last_sent, NULL); + } + pthread_mutex_unlock (&conf->mutex); + + return 0; +} + + +int +protocol_client_pollin (xlator_t *this, transport_t *trans) +{ + client_conf_t *conf = NULL; + int ret = -1; + struct iobuf *iobuf = NULL; + char *hdr = NULL; + size_t hdrlen = 0; + + conf = trans->xl->private; + + pthread_mutex_lock (&conf->mutex); + { + gettimeofday (&conf->last_received, NULL); + } + pthread_mutex_unlock (&conf->mutex); + + ret = transport_receive (trans, &hdr, &hdrlen, &iobuf); + + if (ret == 0) + { + ret = protocol_client_interpret (this, trans, hdr, hdrlen, + iobuf); + } + + /* TODO: use mem-pool */ + GF_FREE (hdr); + + return ret; +} + +int +client_priv_dump (xlator_t *this) +{ + client_conf_t *conf = NULL; + int ret = -1; + client_fd_ctx_t *tmp = NULL; + int i = 0; + char key[GF_DUMP_MAX_BUF_LEN]; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + if (!this) + return -1; + + conf = this->private; + if (!conf) { + gf_log (this->name, GF_LOG_WARNING, + "conf null in xlator"); + return -1; + } + + ret = pthread_mutex_trylock(&conf->mutex); + if (ret) { + gf_log("", GF_LOG_WARNING, "Unable to lock client %s" + " errno: %d", this->name, errno); + return -1; + } + + gf_proc_dump_build_key(key_prefix, "xlator.protocol.client", + "%s.priv", this->name); + + gf_proc_dump_add_section(key_prefix); + + list_for_each_entry(tmp, &conf->saved_fds, sfd_pos) { + gf_proc_dump_build_key(key, key_prefix, + "fd.%d.remote_fd", ++i); + gf_proc_dump_write(key, "%d", tmp->remote_fd); + } + + gf_proc_dump_build_key(key, key_prefix, "connecting"); + gf_proc_dump_write(key, "%d", conf->connecting); + gf_proc_dump_build_key(key, key_prefix, "last_sent"); + gf_proc_dump_write(key, "%s", ctime(&conf->last_sent.tv_sec)); + gf_proc_dump_build_key(key, key_prefix, "last_received"); + gf_proc_dump_write(key, "%s", ctime(&conf->last_received.tv_sec)); + + pthread_mutex_unlock(&conf->mutex); + + return 0; + +} + +int32_t +client_inodectx_dump (xlator_t *this, inode_t *inode) +{ + ino_t par = 0; + int ret = -1; + char key[GF_DUMP_MAX_BUF_LEN]; + + if (!inode) + return -1; + + if (!this) + return -1; + + ret = inode_ctx_get (inode, this, &par); + + if (ret != 0) + return ret; + + gf_proc_dump_build_key(key, "xlator.protocol.client", + "%s.inode.%ld.par", + this->name,inode->ino); + gf_proc_dump_write(key, "%ld", par); + + return 0; +} + +/* + * client_protocol_notify - notify function for client protocol + * @this: + * @trans: transport object + * @event + * + */ + +int +notify (xlator_t *this, int32_t event, void *data, ...) +{ + int i = 0; + int ret = -1; + int child_down = 1; + int was_not_down = 0; + transport_t *trans = NULL; + client_connection_t *conn = NULL; + client_conf_t *conf = NULL; + xlator_list_t *parent = NULL; + + conf = this->private; + trans = data; + + switch (event) { + case GF_EVENT_POLLOUT: + { + ret = protocol_client_pollout (this, trans); + + break; + } + case GF_EVENT_POLLIN: + { + ret = protocol_client_pollin (this, trans); + + break; + } + /* no break for ret check to happen below */ + case GF_EVENT_POLLERR: + { + ret = -1; + protocol_client_cleanup (trans); + + if (conf->connecting == 0) { + /* Let the connection/re-connection happen in + * background, for now, don't hang here, + * tell the parents that i am all ok.. + */ + parent = trans->xl->parents; + while (parent) { + parent->xlator->notify (parent->xlator, + GF_EVENT_CHILD_CONNECTING, + trans->xl); + parent = parent->next; + } + conf->connecting = 1; + } + + was_not_down = 0; + for (i = 0; i < CHANNEL_MAX; i++) { + conn = conf->transport[i]->xl_private; + if (conn->connected == 1) + was_not_down = 1; + } + + conn = trans->xl_private; + if (conn->connected) { + conn->connected = 0; + if (conn->reconnect == 0) + client_protocol_reconnect (trans); + } + + child_down = 1; + for (i = 0; i < CHANNEL_MAX; i++) { + trans = conf->transport[i]; + conn = trans->xl_private; + if (conn->connected == 1) + child_down = 0; + } + + if (child_down && was_not_down) { + gf_log (this->name, GF_LOG_INFO, "disconnected"); + + protocol_client_mark_fd_bad (this); + + parent = this->parents; + while (parent) { + xlator_notify (parent->xlator, + GF_EVENT_CHILD_DOWN, this); + parent = parent->next; + } + } + } + break; + + case GF_EVENT_PARENT_UP: + { + client_conf_t *conf = NULL; + int i = 0; + transport_t *trans = NULL; + + conf = this->private; + for (i = 0; i < CHANNEL_MAX; i++) { + trans = conf->transport[i]; + if (!trans) { + gf_log (this->name, GF_LOG_DEBUG, + "transport init failed"); + return -1; + } + + conn = trans->xl_private; + + gf_log (this->name, GF_LOG_DEBUG, + "got GF_EVENT_PARENT_UP, attempting connect " + "on transport"); + + client_protocol_reconnect (trans); + } + } + break; + + case GF_EVENT_CHILD_UP: + { + char *handshake = NULL; + + ret = dict_get_str (this->options, "disable-handshake", + &handshake); + gf_log (this->name, GF_LOG_DEBUG, + "got GF_EVENT_CHILD_UP"); + if ((ret < 0) || + (strcasecmp (handshake, "on"))) { + ret = protocol_client_handshake (this, trans); + } else { + conn = trans->xl_private; + conn->connected = 1; + ret = default_notify (this, event, trans); + } + + if (ret) + transport_disconnect (trans); + + } + break; + + default: + gf_log (this->name, GF_LOG_DEBUG, + "got %d, calling default_notify ()", event); + + default_notify (this, event, data); + break; + } + + return ret; +} + + +struct xlator_fops fops = { + .stat = client_stat, + .readlink = client_readlink, + .mknod = client_mknod, + .mkdir = client_mkdir, + .unlink = client_unlink, + .rmdir = client_rmdir, + .symlink = client_symlink, + .rename = client_rename, + .link = client_link, + .truncate = client_truncate, + .open = client_open, + .readv = client_readv, + .writev = client_writev, + .statfs = client_statfs, + .flush = client_flush, + .fsync = client_fsync, + .setxattr = client_setxattr, + .getxattr = client_getxattr, + .fsetxattr = client_fsetxattr, + .fgetxattr = client_fgetxattr, + .removexattr = client_removexattr, + .opendir = client_opendir, + .readdir = client_readdir, + .readdirp = client_readdirp, + .fsyncdir = client_fsyncdir, + .access = client_access, + .ftruncate = client_ftruncate, + .fstat = client_fstat, + .create = client_create, + .lk = client_lk, + .inodelk = client_inodelk, + .finodelk = client_finodelk, + .entrylk = client_entrylk, + .fentrylk = client_fentrylk, + .lookup = client_lookup, + .checksum = client_checksum, + .rchecksum = client_rchecksum, + .xattrop = client_xattrop, + .fxattrop = client_fxattrop, + .setattr = client_setattr, + .fsetattr = client_fsetattr, + .getspec = client_getspec, +}; + +struct xlator_cbks cbks = { + .release = client_release, + .releasedir = client_releasedir +}; + + +struct xlator_dumpops dumpops = { + .priv = client_priv_dump, + .inodectx = client_inodectx_dump, +}; + +struct volume_options options[] = { + { .key = {"username"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"password"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"transport-type"}, + .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp", + "tcp/client", "ib-verbs/client"}, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"remote-host"}, + .type = GF_OPTION_TYPE_INTERNET_ADDRESS + }, + { .key = {"remote-subvolume"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"frame-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 0, + .max = 86400, + }, + { .key = {"ping-timeout"}, + .type = GF_OPTION_TYPE_TIME, + .min = 1, + .max = 1013, + }, + { .key = {NULL} }, +}; diff --git a/xlators/protocol/legacy/client/src/client-protocol.h b/xlators/protocol/legacy/client/src/client-protocol.h new file mode 100644 index 00000000000..ae65fb5fe72 --- /dev/null +++ b/xlators/protocol/legacy/client/src/client-protocol.h @@ -0,0 +1,178 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CLIENT_PROTOCOL_H +#define _CLIENT_PROTOCOL_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <arpa/inet.h> +#include "inode.h" +#include "timer.h" +#include "byte-order.h" +#include "saved-frames.h" + +#define CLIENT_PORT_CEILING 1023 + +#define GF_CLIENT_INODE_SELF 0 +#define GF_CLIENT_INODE_PARENT 1 + +#define CLIENT_CONF(this) ((client_conf_t *)(this->private)) + +#define RECEIVE_TIMEOUT(_cprivate,_current) \ + ((_cprivate->last_received.tv_sec + \ + _cprivate->frame_timeout) < \ + _current.tv_sec) + +#define SEND_TIMEOUT(_cprivate,_current) \ + ((_cprivate->last_sent.tv_sec + \ + _cprivate->frame_timeout) < \ + _current.tv_sec) + +enum { + CHANNEL_BULK = 0, + CHANNEL_LOWLAT = 1, + CHANNEL_MAX +}; + +#define CLIENT_CHANNEL client_channel + +struct client_connection; +typedef struct client_connection client_connection_t; + +#include "stack.h" +#include "xlator.h" +#include "transport.h" +#include "protocol.h" + +typedef struct _client_fd_ctx { + struct list_head sfd_pos; /* Stores the reference to this + fd's position in the saved_fds list. + */ + int64_t remote_fd; + inode_t *inode; + uint64_t ino; + uint64_t gen; + char is_dir; + char released; + int32_t flags; + int32_t wbflags; +} client_fd_ctx_t; + +struct _client_conf { + transport_t *transport[CHANNEL_MAX]; + struct list_head saved_fds; + struct timeval last_sent; + struct timeval last_received; + pthread_mutex_t mutex; + int connecting; +}; +typedef struct _client_conf client_conf_t; + +/* This will be stored in transport_t->xl_private */ +struct client_connection { + pthread_mutex_t lock; + uint64_t callid; + struct saved_frames *saved_frames; + int32_t frame_timeout; + int32_t ping_started; + int32_t ping_timeout; + int32_t transport_activity; + gf_timer_t *reconnect; + char connected; + uint64_t max_block_size; + gf_timer_t *timer; + gf_timer_t *ping_timer; +}; + +typedef struct { + loc_t loc; + loc_t loc2; + fd_t *fd; + gf_op_t op; + client_fd_ctx_t *fdctx; + uint32_t flags; + uint32_t wbflags; +} client_local_t; + + +static inline void +gf_string_to_stat(char *string, struct iatt *stbuf) +{ + uint64_t dev = 0; + uint64_t ino = 0; + uint32_t mode = 0; + uint32_t nlink = 0; + uint32_t uid = 0; + uint32_t gid = 0; + uint64_t rdev = 0; + uint64_t size = 0; + uint32_t blksize = 0; + uint64_t blocks = 0; + uint32_t atime = 0; + uint32_t atime_nsec = 0; + uint32_t mtime = 0; + uint32_t mtime_nsec = 0; + uint32_t ctime = 0; + uint32_t ctime_nsec = 0; + + sscanf (string, GF_STAT_PRINT_FMT_STR, + &dev, + &ino, + &mode, + &nlink, + &uid, + &gid, + &rdev, + &size, + &blksize, + &blocks, + &atime, + &atime_nsec, + &mtime, + &mtime_nsec, + &ctime, + &ctime_nsec); + + stbuf->ia_gen = dev; + stbuf->ia_ino = ino; + stbuf->ia_prot = ia_prot_from_st_mode (mode); + stbuf->ia_type = ia_type_from_st_mode (mode); + stbuf->ia_nlink = nlink; + stbuf->ia_uid = uid; + stbuf->ia_gid = gid; + stbuf->ia_rdev = rdev; + stbuf->ia_size = size; + stbuf->ia_blksize = blksize; + stbuf->ia_blocks = blocks; + + stbuf->ia_atime = atime; + stbuf->ia_mtime = mtime; + stbuf->ia_ctime = ctime; + + stbuf->ia_atime_nsec = atime_nsec; + stbuf->ia_mtime_nsec = mtime_nsec; + stbuf->ia_ctime_nsec = ctime_nsec; +} + +#endif diff --git a/xlators/protocol/legacy/client/src/saved-frames.c b/xlators/protocol/legacy/client/src/saved-frames.c new file mode 100644 index 00000000000..770de19ad72 --- /dev/null +++ b/xlators/protocol/legacy/client/src/saved-frames.c @@ -0,0 +1,194 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#include "saved-frames.h" +#include "common-utils.h" +#include "protocol.h" +#include "xlator.h" +#include "client-mem-types.h" + + + +struct saved_frames * +saved_frames_new (void) +{ + struct saved_frames *saved_frames = NULL; + + saved_frames = GF_CALLOC (sizeof (*saved_frames), 1, + gf_client_mt_saved_frames); + if (!saved_frames) { + return NULL; + } + + INIT_LIST_HEAD (&saved_frames->fops.list); + INIT_LIST_HEAD (&saved_frames->mops.list); + INIT_LIST_HEAD (&saved_frames->cbks.list); + + return saved_frames; +} + + +struct saved_frame * +get_head_frame_for_type (struct saved_frames *frames, int8_t type) +{ + struct saved_frame *head_frame = NULL; + + switch (type) { + case GF_OP_TYPE_FOP_REQUEST: + case GF_OP_TYPE_FOP_REPLY: + head_frame = &frames->fops; + break; + case GF_OP_TYPE_MOP_REQUEST: + case GF_OP_TYPE_MOP_REPLY: + head_frame = &frames->mops; + break; + case GF_OP_TYPE_CBK_REQUEST: + case GF_OP_TYPE_CBK_REPLY: + head_frame = &frames->cbks; + break; + } + + return head_frame; +} + + +int +saved_frames_put (struct saved_frames *frames, call_frame_t *frame, + int32_t op, int8_t type, int64_t callid) +{ + struct saved_frame *saved_frame = NULL; + struct saved_frame *head_frame = NULL; + + head_frame = get_head_frame_for_type (frames, type); + + saved_frame = GF_CALLOC (sizeof (*saved_frame), 1, + gf_client_mt_saved_frame); + if (!saved_frame) { + return -ENOMEM; + } + + INIT_LIST_HEAD (&saved_frame->list); + saved_frame->frame = frame; + saved_frame->op = op; + saved_frame->type = type; + saved_frame->callid = callid; + + gettimeofday (&saved_frame->saved_at, NULL); + + list_add_tail (&saved_frame->list, &head_frame->list); + frames->count++; + + return 0; +} + + +call_frame_t * +saved_frames_get (struct saved_frames *frames, int32_t op, + int8_t type, int64_t callid) +{ + struct saved_frame *saved_frame = NULL; + struct saved_frame *tmp = NULL; + struct saved_frame *head_frame = NULL; + call_frame_t *frame = NULL; + + head_frame = get_head_frame_for_type (frames, type); + + list_for_each_entry (tmp, &head_frame->list, list) { + if (tmp->callid == callid) { + list_del_init (&tmp->list); + frames->count--; + saved_frame = tmp; + break; + } + } + + if (saved_frame) + frame = saved_frame->frame; + + GF_FREE (saved_frame); + + return frame; +} + +struct saved_frame * +saved_frames_get_timedout (struct saved_frames *frames, int8_t type, + uint32_t timeout, struct timeval *current) +{ + struct saved_frame *bailout_frame = NULL, *tmp = NULL; + struct saved_frame *head_frame = NULL; + + head_frame = get_head_frame_for_type (frames, type); + + if (!list_empty(&head_frame->list)) { + tmp = list_entry (head_frame->list.next, typeof (*tmp), list); + if ((tmp->saved_at.tv_sec + timeout) < current->tv_sec) { + bailout_frame = tmp; + list_del_init (&bailout_frame->list); + frames->count--; + } + } + + return bailout_frame; +} + +void +saved_frames_unwind (xlator_t *this, struct saved_frames *saved_frames, + struct saved_frame *head, + gf_op_t gf_ops[], char *gf_op_list[]) +{ + struct saved_frame *trav = NULL; + struct saved_frame *tmp = NULL; + + gf_hdr_common_t hdr = {0, }; + call_frame_t *frame = NULL; + + hdr.rsp.op_ret = hton32 (-1); + hdr.rsp.op_errno = hton32 (ENOTCONN); + + list_for_each_entry_safe (trav, tmp, &head->list, list) { + gf_log (this->name, GF_LOG_ERROR, + "forced unwinding frame type(%d) op(%s)", + trav->type, gf_op_list[trav->op]); + + hdr.type = hton32 (trav->type); + hdr.op = hton32 (trav->op); + + frame = trav->frame; + + saved_frames->count--; + + gf_ops[trav->op] (frame, &hdr, sizeof (hdr), NULL); + + list_del_init (&trav->list); + GF_FREE (trav); + } +} + + +void +saved_frames_destroy (xlator_t *this, struct saved_frames *frames, + gf_op_t gf_fops[], gf_op_t gf_mops[], gf_op_t gf_cbks[]) +{ + saved_frames_unwind (this, frames, &frames->fops, gf_fops, gf_fop_list); + saved_frames_unwind (this, frames, &frames->mops, gf_mops, gf_mop_list); + saved_frames_unwind (this, frames, &frames->cbks, gf_cbks, gf_cbk_list); + + GF_FREE (frames); +} diff --git a/xlators/protocol/legacy/client/src/saved-frames.h b/xlators/protocol/legacy/client/src/saved-frames.h new file mode 100644 index 00000000000..5c18abbcc9e --- /dev/null +++ b/xlators/protocol/legacy/client/src/saved-frames.h @@ -0,0 +1,79 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _SAVED_FRAMES_H +#define _SAVED_FRAMES_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <sys/time.h> +#include "stack.h" +#include "list.h" +#include "protocol.h" + +/* UGLY: have common typedef b/w saved-frames.c and protocol-client.c */ +typedef int32_t (*gf_op_t) (call_frame_t *frame, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf); + + +struct saved_frame { + union { + struct list_head list; + struct { + struct saved_frame *frame_next; + struct saved_frame *frame_prev; + }; + }; + + struct timeval saved_at; + call_frame_t *frame; + int32_t op; + int8_t type; + uint64_t callid; +}; + + +struct saved_frames { + int64_t count; + struct saved_frame fops; + struct saved_frame mops; + struct saved_frame cbks; +}; + + +struct saved_frames *saved_frames_new (); +int saved_frames_put (struct saved_frames *frames, call_frame_t *frame, + int32_t op, int8_t type, int64_t callid); +call_frame_t *saved_frames_get (struct saved_frames *frames, int32_t op, + int8_t type, int64_t callid); + +struct saved_frame * +saved_frames_get_timedout (struct saved_frames *frames, int8_t type, + uint32_t timeout, struct timeval *current); + +void saved_frames_destroy (xlator_t *this, struct saved_frames *frames, + gf_op_t gf_fops[], gf_op_t gf_mops[], + gf_op_t gf_cbks[]); + +#endif /* _SAVED_FRAMES_H */ diff --git a/xlators/protocol/legacy/lib/Makefile.am b/xlators/protocol/legacy/lib/Makefile.am new file mode 100644 index 00000000000..d471a3f9243 --- /dev/null +++ b/xlators/protocol/legacy/lib/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/protocol/legacy/lib/src/Makefile.am b/xlators/protocol/legacy/lib/src/Makefile.am new file mode 100644 index 00000000000..1f0e93e3047 --- /dev/null +++ b/xlators/protocol/legacy/lib/src/Makefile.am @@ -0,0 +1,14 @@ +lib_LTLIBRARIES = libgfproto.la + +libgfproto_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) + +libgfproto_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE \ + -D$(GF_HOST_OS) -DLIBDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/auth\" \ + -DTRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/transport\" \ + -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/libglusterfs/src/ + +libgfproto_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +libgfproto_la_SOURCES = transport.c protocol.c + +noinst_HEADERS = transport.h protocol.h diff --git a/xlators/protocol/legacy/lib/src/protocol.c b/xlators/protocol/legacy/lib/src/protocol.c new file mode 100644 index 00000000000..63950f43dec --- /dev/null +++ b/xlators/protocol/legacy/lib/src/protocol.c @@ -0,0 +1,108 @@ + +#include "globals.h" +#include "compat.h" +#include "protocol.h" + +char *gf_mop_list[GF_MOP_MAXVALUE]; +char *gf_cbk_list[GF_CBK_MAXVALUE]; + +static int +gf_dirent_nb_size (gf_dirent_t *entries) +{ + return (sizeof (struct gf_dirent_nb) + strlen (entries->d_name) + 1); +} + +int +gf_dirent_serialize (gf_dirent_t *entries, char *buf, size_t buf_size) +{ + struct gf_dirent_nb *entry_nb = NULL; + gf_dirent_t *entry = NULL; + int size = 0; + int entry_size = 0; + + + list_for_each_entry (entry, &entries->list, list) { + entry_size = gf_dirent_nb_size (entry); + + if (buf && (size + entry_size <= buf_size)) { + entry_nb = (void *) (buf + size); + + entry_nb->d_ino = hton64 (entry->d_ino); + entry_nb->d_off = hton64 (entry->d_off); + entry_nb->d_len = hton32 (entry->d_len); + entry_nb->d_type = hton32 (entry->d_type); + + gf_stat_from_iatt (&entry_nb->d_stat, &entry->d_stat); + + strcpy (entry_nb->d_name, entry->d_name); + } + size += entry_size; + } + + return size; +} + + +int +gf_dirent_unserialize (gf_dirent_t *entries, const char *buf, size_t buf_size) +{ + struct gf_dirent_nb *entry_nb = NULL; + int remaining_size = 0; + int least_dirent_size = 0; + int count = 0; + gf_dirent_t *entry = NULL; + int entry_strlen = 0; + int entry_len = 0; + + + remaining_size = buf_size; + least_dirent_size = (sizeof (struct gf_dirent_nb) + 2); + + while (remaining_size >= least_dirent_size) { + entry_nb = (void *)(buf + (buf_size - remaining_size)); + + entry_strlen = strnlen (entry_nb->d_name, remaining_size); + if (entry_strlen == remaining_size) { + break; + } + + entry_len = sizeof (gf_dirent_t) + entry_strlen + 1; + entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t); + if (!entry) { + break; + } + + entry->d_ino = ntoh64 (entry_nb->d_ino); + entry->d_off = ntoh64 (entry_nb->d_off); + entry->d_len = ntoh32 (entry_nb->d_len); + entry->d_type = ntoh32 (entry_nb->d_type); + + gf_stat_to_iatt (&entry_nb->d_stat, &entry->d_stat); + + strcpy (entry->d_name, entry_nb->d_name); + + list_add_tail (&entry->list, &entries->list); + + remaining_size -= (sizeof (*entry_nb) + entry_strlen + 1); + count++; + } + + return count; +} + +int +protocol_common_init (void) +{ + gf_mop_list[GF_MOP_SETVOLUME] = "SETVOLUME"; + gf_mop_list[GF_MOP_GETVOLUME] = "GETVOLUME"; + gf_mop_list[GF_MOP_SETSPEC] = "SETSPEC"; + gf_mop_list[GF_MOP_GETSPEC] = "GETSPEC"; + gf_mop_list[GF_MOP_LOG] = "LOG"; + gf_mop_list[GF_MOP_PING] = "PING"; + + gf_cbk_list[GF_CBK_FORGET] = "FORGET"; + gf_cbk_list[GF_CBK_RELEASE] = "RELEASE"; + gf_cbk_list[GF_CBK_RELEASEDIR] = "RELEASEDIR"; + + return 0; +} diff --git a/xlators/protocol/legacy/lib/src/protocol.h b/xlators/protocol/legacy/lib/src/protocol.h new file mode 100644 index 00000000000..254e36e661b --- /dev/null +++ b/xlators/protocol/legacy/lib/src/protocol.h @@ -0,0 +1,1119 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _PROTOCOL_H +#define _PROTOCOL_H + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <inttypes.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/statvfs.h> +#include <unistd.h> +#include <fcntl.h> + +#include "byte-order.h" +#include "iatt.h" + +/* Any changes in the protocol structure or adding new '[f,m]ops' needs to + * bump the protocol version by "0.1" + */ + +#define GF_PROTOCOL_VERSION "3.0" + +extern char *gf_mop_list[]; +extern char *gf_cbk_list[]; + +/* NOTE: add members ONLY at the end (just before _MAXVALUE) */ +typedef enum { + GF_PROTO_FOP_STAT, /* 0 */ + GF_PROTO_FOP_READLINK, /* 1 */ + GF_PROTO_FOP_MKNOD, /* 2 */ + GF_PROTO_FOP_MKDIR, + GF_PROTO_FOP_UNLINK, + GF_PROTO_FOP_RMDIR, /* 5 */ + GF_PROTO_FOP_SYMLINK, + GF_PROTO_FOP_RENAME, + GF_PROTO_FOP_LINK, + GF_PROTO_FOP_TRUNCATE, + GF_PROTO_FOP_OPEN, /* 10 */ + GF_PROTO_FOP_READ, + GF_PROTO_FOP_WRITE, + GF_PROTO_FOP_STATFS, /* 15 */ + GF_PROTO_FOP_FLUSH, + GF_PROTO_FOP_FSYNC, + GF_PROTO_FOP_SETXATTR, + GF_PROTO_FOP_GETXATTR, + GF_PROTO_FOP_REMOVEXATTR,/* 20 */ + GF_PROTO_FOP_OPENDIR, + GF_PROTO_FOP_GETDENTS, + GF_PROTO_FOP_FSYNCDIR, + GF_PROTO_FOP_ACCESS, + GF_PROTO_FOP_CREATE, /* 25 */ + GF_PROTO_FOP_FTRUNCATE, + GF_PROTO_FOP_FSTAT, + GF_PROTO_FOP_LK, + GF_PROTO_FOP_LOOKUP, + GF_PROTO_FOP_SETDENTS, + GF_PROTO_FOP_READDIR, + GF_PROTO_FOP_INODELK, /* 35 */ + GF_PROTO_FOP_FINODELK, + GF_PROTO_FOP_ENTRYLK, + GF_PROTO_FOP_FENTRYLK, + GF_PROTO_FOP_CHECKSUM, + GF_PROTO_FOP_XATTROP, /* 40 */ + GF_PROTO_FOP_FXATTROP, + GF_PROTO_FOP_LOCK_NOTIFY, + GF_PROTO_FOP_LOCK_FNOTIFY, + GF_PROTO_FOP_FGETXATTR, + GF_PROTO_FOP_FSETXATTR, /* 45 */ + GF_PROTO_FOP_RCHECKSUM, + GF_PROTO_FOP_SETATTR, + GF_PROTO_FOP_FSETATTR, + GF_PROTO_FOP_READDIRP, + GF_PROTO_FOP_MAXVALUE, +} glusterfs_proto_fop_t; + +/* NOTE: add members ONLY at the end (just before _MAXVALUE) */ +typedef enum { + GF_MOP_SETVOLUME, /* 0 */ + GF_MOP_GETVOLUME, /* 1 */ + GF_MOP_STATS, + GF_MOP_SETSPEC, + GF_MOP_GETSPEC, + GF_MOP_PING, /* 5 */ + GF_MOP_LOG, + GF_MOP_NOTIFY, + GF_MOP_MAXVALUE, /* 8 */ +} glusterfs_mop_t; + +typedef enum { + GF_CBK_FORGET, /* 0 */ + GF_CBK_RELEASE, /* 1 */ + GF_CBK_RELEASEDIR, /* 2 */ + GF_CBK_MAXVALUE /* 3 */ +} glusterfs_cbk_t; + +typedef enum { + GF_OP_TYPE_FOP_REQUEST = 1, + GF_OP_TYPE_MOP_REQUEST, + GF_OP_TYPE_CBK_REQUEST, + GF_OP_TYPE_FOP_REPLY, + GF_OP_TYPE_MOP_REPLY, + GF_OP_TYPE_CBK_REPLY +} glusterfs_op_type_t; + + +struct gf_stat { + uint64_t ino; + uint64_t size; + uint64_t blocks; + uint64_t dev; + uint32_t rdev; + uint32_t mode; + uint32_t nlink; + uint32_t uid; + uint32_t gid; + uint32_t blksize; + uint32_t atime; + uint32_t atime_nsec; + uint32_t mtime ; + uint32_t mtime_nsec; + uint32_t ctime; + uint32_t ctime_nsec; +} __attribute__((packed)); + + +static inline void +gf_stat_to_stat (struct gf_stat *gf_stat, struct stat *stat) +{ + stat->st_dev = ntoh64 (gf_stat->dev); + stat->st_ino = ntoh64 (gf_stat->ino); + stat->st_mode = ntoh32 (gf_stat->mode); + stat->st_nlink = ntoh32 (gf_stat->nlink); + stat->st_uid = ntoh32 (gf_stat->uid); + stat->st_gid = ntoh32 (gf_stat->gid); + stat->st_rdev = ntoh32 (gf_stat->rdev); + stat->st_size = ntoh64 (gf_stat->size); + stat->st_blksize = ntoh32 (gf_stat->blksize); + stat->st_blocks = ntoh64 (gf_stat->blocks); + stat->st_atime = ntoh32 (gf_stat->atime); + stat->st_mtime = ntoh32 (gf_stat->mtime); + stat->st_ctime = ntoh32 (gf_stat->ctime); + ST_ATIM_NSEC_SET(stat, ntoh32 (gf_stat->atime_nsec)); + ST_MTIM_NSEC_SET(stat, ntoh32 (gf_stat->mtime_nsec)); + ST_CTIM_NSEC_SET(stat, ntoh32 (gf_stat->ctime_nsec)); +} + + +static inline void +gf_stat_from_stat (struct gf_stat *gf_stat, struct stat *stat) +{ + gf_stat->dev = hton64 (stat->st_dev); + gf_stat->ino = hton64 (stat->st_ino); + gf_stat->mode = hton32 (stat->st_mode); + gf_stat->nlink = hton32 (stat->st_nlink); + gf_stat->uid = hton32 (stat->st_uid); + gf_stat->gid = hton32 (stat->st_gid); + gf_stat->rdev = hton32 (stat->st_rdev); + gf_stat->size = hton64 (stat->st_size); + gf_stat->blksize = hton32 (stat->st_blksize); + gf_stat->blocks = hton64 (stat->st_blocks); + gf_stat->atime = hton32 (stat->st_atime); + gf_stat->mtime = hton32 (stat->st_mtime); + gf_stat->ctime = hton32 (stat->st_ctime); + gf_stat->atime_nsec = hton32 (ST_ATIM_NSEC(stat)); + gf_stat->mtime_nsec = hton32 (ST_MTIM_NSEC(stat)); + gf_stat->ctime_nsec = hton32 (ST_CTIM_NSEC(stat)); +} + + +static inline void +gf_stat_to_iatt (struct gf_stat *gf_stat, struct iatt *iatt) +{ + iatt->ia_ino = ntoh64 (gf_stat->ino); + iatt->ia_dev = ntoh64 (gf_stat->dev); + iatt->ia_type = ia_type_from_st_mode (ntoh32 (gf_stat->mode)); + iatt->ia_prot = ia_prot_from_st_mode (ntoh32 (gf_stat->mode)); + iatt->ia_nlink = ntoh32 (gf_stat->nlink); + iatt->ia_uid = ntoh32 (gf_stat->uid); + iatt->ia_gid = ntoh32 (gf_stat->gid); + iatt->ia_rdev = ntoh64 (gf_stat->rdev); + iatt->ia_size = ntoh64 (gf_stat->size); + iatt->ia_blksize = ntoh32 (gf_stat->blksize); + iatt->ia_blocks = ntoh64 (gf_stat->blocks); + iatt->ia_atime = ntoh32 (gf_stat->atime); + iatt->ia_atime_nsec = ntoh32 (gf_stat->atime_nsec); + iatt->ia_mtime = ntoh32 (gf_stat->mtime); + iatt->ia_mtime_nsec = ntoh32 (gf_stat->mtime_nsec); + iatt->ia_ctime = ntoh32 (gf_stat->ctime); + iatt->ia_ctime_nsec = ntoh32 (gf_stat->ctime_nsec); + + iatt->ia_gen = ntoh64 (gf_stat->dev); +} + + +static inline void +gf_stat_from_iatt (struct gf_stat *gf_stat, struct iatt *iatt) +{ + gf_stat->ino = hton64 (iatt->ia_ino); + gf_stat->dev = hton64 (iatt->ia_dev); + gf_stat->mode = hton32 (st_mode_from_ia (iatt->ia_prot, + iatt->ia_type)); + gf_stat->nlink = hton32 (iatt->ia_nlink); + gf_stat->uid = hton32 (iatt->ia_uid); + gf_stat->gid = hton32 (iatt->ia_gid); + gf_stat->rdev = hton32 (iatt->ia_rdev); + gf_stat->size = hton64 (iatt->ia_size); + gf_stat->blksize = hton32 (iatt->ia_blksize); + gf_stat->blocks = hton64 (iatt->ia_blocks); + gf_stat->atime = hton32 (iatt->ia_atime); + gf_stat->atime_nsec = hton32 (iatt->ia_atime_nsec); + gf_stat->mtime = hton32 (iatt->ia_mtime); + gf_stat->mtime_nsec = hton32 (iatt->ia_mtime_nsec); + gf_stat->ctime = hton32 (iatt->ia_ctime); + gf_stat->ctime_nsec = hton32 (iatt->ia_ctime_nsec); + + gf_stat->dev = hton64 (iatt->ia_gen); + +} + + +struct gf_statfs { + uint64_t bsize; + uint64_t frsize; + uint64_t blocks; + uint64_t bfree; + uint64_t bavail; + uint64_t files; + uint64_t ffree; + uint64_t favail; + uint64_t fsid; + uint64_t flag; + uint64_t namemax; +} __attribute__((packed)); + + +static inline void +gf_statfs_to_statfs (struct gf_statfs *gf_stat, struct statvfs *stat) +{ + stat->f_bsize = ntoh64 (gf_stat->bsize); + stat->f_frsize = ntoh64 (gf_stat->frsize); + stat->f_blocks = ntoh64 (gf_stat->blocks); + stat->f_bfree = ntoh64 (gf_stat->bfree); + stat->f_bavail = ntoh64 (gf_stat->bavail); + stat->f_files = ntoh64 (gf_stat->files); + stat->f_ffree = ntoh64 (gf_stat->ffree); + stat->f_favail = ntoh64 (gf_stat->favail); + stat->f_fsid = ntoh64 (gf_stat->fsid); + stat->f_flag = ntoh64 (gf_stat->flag); + stat->f_namemax = ntoh64 (gf_stat->namemax); +} + + +static inline void +gf_statfs_from_statfs (struct gf_statfs *gf_stat, struct statvfs *stat) +{ + gf_stat->bsize = hton64 (stat->f_bsize); + gf_stat->frsize = hton64 (stat->f_frsize); + gf_stat->blocks = hton64 (stat->f_blocks); + gf_stat->bfree = hton64 (stat->f_bfree); + gf_stat->bavail = hton64 (stat->f_bavail); + gf_stat->files = hton64 (stat->f_files); + gf_stat->ffree = hton64 (stat->f_ffree); + gf_stat->favail = hton64 (stat->f_favail); + gf_stat->fsid = hton64 (stat->f_fsid); + gf_stat->flag = hton64 (stat->f_flag); + gf_stat->namemax = hton64 (stat->f_namemax); +} + + +struct gf_flock { + uint16_t type; + uint16_t whence; + uint64_t start; + uint64_t len; + uint32_t pid; +} __attribute__((packed)); + + +static inline void +gf_flock_to_flock (struct gf_flock *gf_flock, struct flock *flock) +{ + flock->l_type = ntoh16 (gf_flock->type); + flock->l_whence = ntoh16 (gf_flock->whence); + flock->l_start = ntoh64 (gf_flock->start); + flock->l_len = ntoh64 (gf_flock->len); + flock->l_pid = ntoh32 (gf_flock->pid); +} + + +static inline void +gf_flock_from_flock (struct gf_flock *gf_flock, struct flock *flock) +{ + gf_flock->type = hton16 (flock->l_type); + gf_flock->whence = hton16 (flock->l_whence); + gf_flock->start = hton64 (flock->l_start); + gf_flock->len = hton64 (flock->l_len); + gf_flock->pid = hton32 (flock->l_pid); +} + + +struct gf_timespec { + uint32_t tv_sec; + uint32_t tv_nsec; +} __attribute__((packed)); + + +static inline void +gf_timespec_to_timespec (struct gf_timespec *gf_ts, struct timespec *ts) +{ + + ts[0].tv_sec = ntoh32 (gf_ts[0].tv_sec); + ts[0].tv_nsec = ntoh32 (gf_ts[0].tv_nsec); + ts[1].tv_sec = ntoh32 (gf_ts[1].tv_sec); + ts[1].tv_nsec = ntoh32 (gf_ts[1].tv_nsec); +} + + +static inline void +gf_timespec_from_timespec (struct gf_timespec *gf_ts, struct timespec *ts) +{ + gf_ts[0].tv_sec = hton32 (ts[0].tv_sec); + gf_ts[0].tv_nsec = hton32 (ts[0].tv_nsec); + gf_ts[1].tv_sec = hton32 (ts[1].tv_sec); + gf_ts[1].tv_nsec = hton32 (ts[1].tv_nsec); +} + + +#define GF_O_ACCMODE 003 +#define GF_O_RDONLY 00 +#define GF_O_WRONLY 01 +#define GF_O_RDWR 02 +#define GF_O_CREAT 0100 +#define GF_O_EXCL 0200 +#define GF_O_NOCTTY 0400 +#define GF_O_TRUNC 01000 +#define GF_O_APPEND 02000 +#define GF_O_NONBLOCK 04000 +#define GF_O_SYNC 010000 +#define GF_O_ASYNC 020000 + +#define GF_O_DIRECT 040000 +#define GF_O_DIRECTORY 0200000 +#define GF_O_NOFOLLOW 0400000 +#define GF_O_NOATIME 01000000 +#define GF_O_CLOEXEC 02000000 + +#define GF_O_LARGEFILE 0100000 + +#define XLATE_BIT(from, to, bit) do { \ + if (from & bit) \ + to = to | GF_##bit; \ + } while (0) + +#define UNXLATE_BIT(from, to, bit) do { \ + if (from & GF_##bit) \ + to = to | bit; \ + } while (0) + +#define XLATE_ACCESSMODE(from, to) do { \ + switch (from & O_ACCMODE) { \ + case O_RDONLY: to |= GF_O_RDONLY; \ + break; \ + case O_WRONLY: to |= GF_O_WRONLY; \ + break; \ + case O_RDWR: to |= GF_O_RDWR; \ + break; \ + } \ + } while (0) + +#define UNXLATE_ACCESSMODE(from, to) do { \ + switch (from & GF_O_ACCMODE) { \ + case GF_O_RDONLY: to |= O_RDONLY; \ + break; \ + case GF_O_WRONLY: to |= O_WRONLY; \ + break; \ + case GF_O_RDWR: to |= O_RDWR; \ + break; \ + } \ + } while (0) + +static inline uint32_t +gf_flags_from_flags (uint32_t flags) +{ + uint32_t gf_flags = 0; + + XLATE_ACCESSMODE (flags, gf_flags); + + XLATE_BIT (flags, gf_flags, O_CREAT); + XLATE_BIT (flags, gf_flags, O_EXCL); + XLATE_BIT (flags, gf_flags, O_NOCTTY); + XLATE_BIT (flags, gf_flags, O_TRUNC); + XLATE_BIT (flags, gf_flags, O_APPEND); + XLATE_BIT (flags, gf_flags, O_NONBLOCK); + XLATE_BIT (flags, gf_flags, O_SYNC); + XLATE_BIT (flags, gf_flags, O_ASYNC); + + XLATE_BIT (flags, gf_flags, O_DIRECT); + XLATE_BIT (flags, gf_flags, O_DIRECTORY); + XLATE_BIT (flags, gf_flags, O_NOFOLLOW); +#ifdef O_NOATIME + XLATE_BIT (flags, gf_flags, O_NOATIME); +#endif +#ifdef O_CLOEXEC + XLATE_BIT (flags, gf_flags, O_CLOEXEC); +#endif + XLATE_BIT (flags, gf_flags, O_LARGEFILE); + + return gf_flags; +} + +static inline uint32_t +gf_flags_to_flags (uint32_t gf_flags) +{ + uint32_t flags = 0; + + UNXLATE_ACCESSMODE (gf_flags, flags); + + UNXLATE_BIT (gf_flags, flags, O_CREAT); + UNXLATE_BIT (gf_flags, flags, O_EXCL); + UNXLATE_BIT (gf_flags, flags, O_NOCTTY); + UNXLATE_BIT (gf_flags, flags, O_TRUNC); + UNXLATE_BIT (gf_flags, flags, O_APPEND); + UNXLATE_BIT (gf_flags, flags, O_NONBLOCK); + UNXLATE_BIT (gf_flags, flags, O_SYNC); + UNXLATE_BIT (gf_flags, flags, O_ASYNC); + + UNXLATE_BIT (gf_flags, flags, O_DIRECT); + UNXLATE_BIT (gf_flags, flags, O_DIRECTORY); + UNXLATE_BIT (gf_flags, flags, O_NOFOLLOW); +#ifdef O_NOATIME + UNXLATE_BIT (gf_flags, flags, O_NOATIME); +#endif +#ifdef O_CLOEXEC + UNXLATE_BIT (gf_flags, flags, O_CLOEXEC); +#endif + UNXLATE_BIT (gf_flags, flags, O_LARGEFILE); + + return flags; +} + + +typedef struct { + uint64_t ino; + uint64_t gen; + char path[0]; /* NULL terminated */ +} __attribute__((packed)) gf_fop_stat_req_t;; +typedef struct { + struct gf_stat stat; +} __attribute__((packed)) gf_fop_stat_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t size; + char path[0]; /* NULL terminated */ +} __attribute__((packed)) gf_fop_readlink_req_t; +typedef struct { + struct gf_stat buf; + char path[0]; /* NULL terminated */ +} __attribute__((packed)) gf_fop_readlink_rsp_t; + + +typedef struct { + uint64_t par; + uint64_t gen; + uint64_t dev; + uint32_t mode; + char path[0]; /* NULL terminated */ + char bname[0]; /* NULL terminated */ +} __attribute__((packed)) gf_fop_mknod_req_t; +typedef struct { + struct gf_stat stat; + struct gf_stat preparent; + struct gf_stat postparent; +} __attribute__((packed)) gf_fop_mknod_rsp_t; + + +typedef struct { + uint64_t par; + uint64_t gen; + uint32_t mode; + char path[0]; /* NULL terminated */ + char bname[0]; /* NULL terminated */ +} __attribute__((packed)) gf_fop_mkdir_req_t; +typedef struct { + struct gf_stat stat; + struct gf_stat preparent; + struct gf_stat postparent; +} __attribute__((packed)) gf_fop_mkdir_rsp_t; + + +typedef struct { + uint64_t par; + uint64_t gen; + char path[0]; /* NULL terminated */ + char bname[0]; /* NULL terminated */ +} __attribute__((packed)) gf_fop_unlink_req_t; +typedef struct { + struct gf_stat preparent; + struct gf_stat postparent; +} __attribute__((packed)) gf_fop_unlink_rsp_t; + + +typedef struct { + uint64_t par; + uint64_t gen; + char path[0]; + char bname[0]; /* NULL terminated */ +} __attribute__((packed)) gf_fop_rmdir_req_t; +typedef struct { + struct gf_stat preparent; + struct gf_stat postparent; +} __attribute__((packed)) gf_fop_rmdir_rsp_t; + + +typedef struct { + uint64_t par; + uint64_t gen; + char path[0]; + char bname[0]; + char linkname[0]; +} __attribute__((packed)) gf_fop_symlink_req_t; +typedef struct { + struct gf_stat stat; + struct gf_stat preparent; + struct gf_stat postparent; +}__attribute__((packed)) gf_fop_symlink_rsp_t; + + +typedef struct { + uint64_t oldpar; + uint64_t oldgen; + uint64_t newpar; + uint64_t newgen; + char oldpath[0]; + char oldbname[0]; /* NULL terminated */ + char newpath[0]; + char newbname[0]; /* NULL terminated */ +} __attribute__((packed)) gf_fop_rename_req_t; +typedef struct { + struct gf_stat stat; + struct gf_stat preoldparent; + struct gf_stat postoldparent; + struct gf_stat prenewparent; + struct gf_stat postnewparent; +} __attribute__((packed)) gf_fop_rename_rsp_t; + + +typedef struct { + uint64_t oldino; + uint64_t oldgen; + uint64_t newpar; + uint64_t newgen; + char oldpath[0]; + char newpath[0]; + char newbname[0]; +}__attribute__((packed)) gf_fop_link_req_t; +typedef struct { + struct gf_stat stat; + struct gf_stat preparent; + struct gf_stat postparent; +} __attribute__((packed)) gf_fop_link_rsp_t; + +typedef struct { + uint64_t ino; + uint64_t gen; + uint64_t offset; + char path[0]; +} __attribute__((packed)) gf_fop_truncate_req_t; +typedef struct { + struct gf_stat prestat; + struct gf_stat poststat; +} __attribute__((packed)) gf_fop_truncate_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t flags; + uint32_t wbflags; + char path[0]; +} __attribute__((packed)) gf_fop_open_req_t; +typedef struct { + int64_t fd; +} __attribute__((packed)) gf_fop_open_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint64_t offset; + uint32_t size; +} __attribute__((packed)) gf_fop_read_req_t; +typedef struct { + struct gf_stat stat; + char buf[0]; +} __attribute__((packed)) gf_fop_read_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint64_t offset; + uint32_t size; +} __attribute__((packed)) gf_fop_write_req_t; +typedef struct { + struct gf_stat prestat; + struct gf_stat poststat; +} __attribute__((packed)) gf_fop_write_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + char path[0]; +} __attribute__((packed)) gf_fop_statfs_req_t; +typedef struct { + struct gf_statfs statfs; +} __attribute__((packed)) gf_fop_statfs_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; +} __attribute__((packed)) gf_fop_flush_req_t; +typedef struct { } __attribute__((packed)) gf_fop_flush_rsp_t; + + +typedef struct fsync_req { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint32_t data; +} __attribute__((packed)) gf_fop_fsync_req_t; +typedef struct { + struct gf_stat prestat; + struct gf_stat poststat; +} __attribute__((packed)) gf_fop_fsync_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t flags; + uint32_t dict_len; + char dict[0]; + char path[0]; +} __attribute__((packed)) gf_fop_setxattr_req_t; +typedef struct { } __attribute__((packed)) gf_fop_setxattr_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint32_t flags; + uint32_t dict_len; + char dict[0]; +} __attribute__((packed)) gf_fop_fsetxattr_req_t; +typedef struct { } __attribute__((packed)) gf_fop_fsetxattr_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t flags; + uint32_t dict_len; + char dict[0]; + char path[0]; +} __attribute__((packed)) gf_fop_xattrop_req_t; + +typedef struct { + uint32_t dict_len; + char dict[0]; +} __attribute__((packed)) gf_fop_xattrop_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint32_t flags; + uint32_t dict_len; + char dict[0]; +} __attribute__((packed)) gf_fop_fxattrop_req_t; + +typedef struct { + uint32_t dict_len; + char dict[0]; +} __attribute__((packed)) gf_fop_fxattrop_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t namelen; + char path[0]; + char name[0]; +} __attribute__((packed)) gf_fop_getxattr_req_t; +typedef struct { + uint32_t dict_len; + char dict[0]; +} __attribute__((packed)) gf_fop_getxattr_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint32_t namelen; + char name[0]; +} __attribute__((packed)) gf_fop_fgetxattr_req_t; +typedef struct { + uint32_t dict_len; + char dict[0]; +} __attribute__((packed)) gf_fop_fgetxattr_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + char path[0]; + char name[0]; +} __attribute__((packed)) gf_fop_removexattr_req_t; +typedef struct { } __attribute__((packed)) gf_fop_removexattr_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + char path[0]; +} __attribute__((packed)) gf_fop_opendir_req_t; +typedef struct { + int64_t fd; +} __attribute__((packed)) gf_fop_opendir_rsp_t; + + +typedef struct fsyncdir_req { + uint64_t ino; + uint64_t gen; + int64_t fd; + int32_t data; +} __attribute__((packed)) gf_fop_fsyncdir_req_t; +typedef struct { +} __attribute__((packed)) gf_fop_fsyncdir_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint64_t offset; + uint32_t size; +} __attribute__((packed)) gf_fop_readdir_req_t; +typedef struct { + uint32_t size; + char buf[0]; +} __attribute__((packed)) gf_fop_readdir_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint64_t offset; + uint32_t size; +} __attribute__((packed)) gf_fop_readdirp_req_t; +typedef struct { + uint32_t size; + char buf[0]; +} __attribute__((packed)) gf_fop_readdirp_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t mask; + char path[0]; +} __attribute__((packed)) gf_fop_access_req_t; +typedef struct { +} __attribute__((packed)) gf_fop_access_rsp_t; + + +typedef struct { + uint64_t par; + uint64_t gen; + uint32_t flags; + uint32_t mode; + char path[0]; + char bname[0]; +} __attribute__((packed)) gf_fop_create_req_t; +typedef struct { + struct gf_stat stat; + uint64_t fd; + struct gf_stat preparent; + struct gf_stat postparent; +} __attribute__((packed)) gf_fop_create_rsp_t; + + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint64_t offset; +} __attribute__((packed)) gf_fop_ftruncate_req_t; +typedef struct { + struct gf_stat prestat; + struct gf_stat poststat; +} __attribute__((packed)) gf_fop_ftruncate_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; +} __attribute__((packed)) gf_fop_fstat_req_t; +typedef struct { + struct gf_stat stat; +} __attribute__((packed)) gf_fop_fstat_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint32_t cmd; + uint32_t type; + struct gf_flock flock; +} __attribute__((packed)) gf_fop_lk_req_t; +typedef struct { + struct gf_flock flock; +} __attribute__((packed)) gf_fop_lk_rsp_t; + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t cmd; + uint32_t type; + struct gf_flock flock; + char path[0]; + char volume[0]; +} __attribute__((packed)) gf_fop_inodelk_req_t; +typedef struct { +} __attribute__((packed)) gf_fop_inodelk_rsp_t; + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint32_t cmd; + uint32_t type; + struct gf_flock flock; + char volume[0]; +} __attribute__((packed)) gf_fop_finodelk_req_t; +typedef struct { +} __attribute__((packed)) gf_fop_finodelk_rsp_t; + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t cmd; + uint32_t type; + uint64_t namelen; + char path[0]; + char name[0]; + char volume[0]; +} __attribute__((packed)) gf_fop_entrylk_req_t; +typedef struct { +} __attribute__((packed)) gf_fop_entrylk_rsp_t; + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; + uint32_t cmd; + uint32_t type; + uint64_t namelen; + char name[0]; + char volume[0]; +} __attribute__((packed)) gf_fop_fentrylk_req_t; +typedef struct { +} __attribute__((packed)) gf_fop_fentrylk_rsp_t; + +typedef struct { + uint64_t ino; /* NOTE: used only in case of 'root' lookup */ + uint64_t par; + uint64_t gen; + uint32_t flags; + uint32_t dictlen; + char path[0]; + char bname[0]; + char dict[0]; +} __attribute__((packed)) gf_fop_lookup_req_t; +typedef struct { + struct gf_stat stat; + struct gf_stat postparent; + uint32_t dict_len; + char dict[0]; +} __attribute__((packed)) gf_fop_lookup_rsp_t; + +typedef struct { + uint64_t ino; + uint64_t gen; + uint32_t flag; + char path[0]; +} __attribute__((packed)) gf_fop_checksum_req_t; +typedef struct { + unsigned char fchecksum[0]; + unsigned char dchecksum[0]; +} __attribute__((packed)) gf_fop_checksum_rsp_t; + +typedef struct { + uint64_t ino; + uint64_t gen; + struct gf_stat stbuf; + int32_t valid; + char path[0]; +} __attribute__((packed)) gf_fop_setattr_req_t; +typedef struct { + struct gf_stat statpre; + struct gf_stat statpost; +} __attribute__((packed)) gf_fop_setattr_rsp_t; + +typedef struct { + int64_t fd; + struct gf_stat stbuf; + int32_t valid; +} __attribute__((packed)) gf_fop_fsetattr_req_t; +typedef struct { + struct gf_stat statpre; + struct gf_stat statpost; +} __attribute__((packed)) gf_fop_fsetattr_rsp_t; + +typedef struct { + int64_t fd; + uint64_t offset; + uint32_t len; +} __attribute__((packed)) gf_fop_rchecksum_req_t; +typedef struct { + uint32_t weak_checksum; + unsigned char strong_checksum[0]; +} __attribute__((packed)) gf_fop_rchecksum_rsp_t; + +typedef struct { + uint32_t flags; + uint32_t keylen; + char key[0]; +} __attribute__((packed)) gf_mop_getspec_req_t; +typedef struct { + char spec[0]; +} __attribute__((packed)) gf_mop_getspec_rsp_t; + + +typedef struct { + uint32_t msglen; + char msg[0]; +} __attribute__((packed)) gf_mop_log_req_t; +typedef struct { +} __attribute__((packed)) gf_mop_log_rsp_t; + + +typedef struct { + uint32_t dict_len; + char buf[0]; +} __attribute__((packed)) gf_mop_setvolume_req_t; +typedef struct { + uint32_t dict_len; + char buf[0]; +} __attribute__((packed)) gf_mop_setvolume_rsp_t; + + +typedef struct { +} __attribute__((packed)) gf_mop_ping_req_t; +typedef struct { +} __attribute__((packed)) gf_mop_ping_rsp_t; + +typedef struct { + uint32_t flags; + char buf[0]; +} __attribute__((packed)) gf_mop_notify_req_t; +typedef struct { + uint32_t flags; + char buf[0]; +} __attribute__((packed)) gf_mop_notify_rsp_t; + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; +} __attribute__((packed)) gf_cbk_releasedir_req_t; +typedef struct { +} __attribute__((packed)) gf_cbk_releasedir_rsp_t; + + +typedef struct { + uint64_t ino; + uint64_t gen; + int64_t fd; +} __attribute__((packed)) gf_cbk_release_req_t; +typedef struct { +} __attribute__((packed)) gf_cbk_release_rsp_t; + + +typedef struct { + uint32_t count; + uint64_t ino_array[0]; +} __attribute__((packed)) gf_cbk_forget_req_t; +typedef struct { } __attribute__((packed)) gf_cbk_forget_rsp_t; + + +typedef struct { + uint32_t pid; + uint32_t uid; + uint32_t gid; + + /* Number of groups being sent through the array above. */ + uint32_t ngrps; + + /* Array of groups to which the uid belongs apart from the primary group + * in gid. + */ + uint32_t groups[GF_REQUEST_MAXGROUPS]; + + uint64_t lk_owner; +} __attribute__ ((packed)) gf_hdr_req_t; + + +typedef struct { + uint32_t op_ret; + uint32_t op_errno; +} __attribute__ ((packed)) gf_hdr_rsp_t; + + +typedef struct { + uint64_t callid; + uint32_t type; + uint32_t op; + uint32_t size; + union { + gf_hdr_req_t req; + gf_hdr_rsp_t rsp; + } __attribute__ ((packed)); +} __attribute__ ((packed)) gf_hdr_common_t; + + +static inline gf_hdr_common_t * +__gf_hdr_new (int size) +{ + gf_hdr_common_t *hdr = NULL; + + /* TODO: use mem-pool */ + hdr = GF_CALLOC (sizeof (gf_hdr_common_t) + size, 1, + gf_common_mt_gf_hdr_common_t); + + if (!hdr) { + return NULL; + } + + hdr->size = hton32 (size); + + return hdr; +} + + +#define gf_hdr_len(type, x) (sizeof (gf_hdr_common_t) + sizeof (*type) + x) +#define gf_hdr_new(type, x) __gf_hdr_new (sizeof (*type) + x) + + +static inline void * +gf_param (gf_hdr_common_t *hdr) +{ + return ((void *)hdr) + sizeof (*hdr); +} + + +struct gf_dirent_nb { + uint64_t d_ino; + uint64_t d_off; + uint32_t d_len; + uint32_t d_type; + struct gf_stat d_stat; + char d_name[0]; +} __attribute__((packed)); + +int +gf_dirent_unserialize (gf_dirent_t *entries, const char *buf, size_t buf_size); +int +gf_dirent_serialize (gf_dirent_t *entries, char *buf, size_t buf_size); + +int protocol_common_init (void); + +#endif diff --git a/xlators/protocol/legacy/lib/src/transport.c b/xlators/protocol/legacy/lib/src/transport.c new file mode 100644 index 00000000000..d460d02096e --- /dev/null +++ b/xlators/protocol/legacy/lib/src/transport.c @@ -0,0 +1,422 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <dlfcn.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/poll.h> +#include <fnmatch.h> +#include <stdint.h> + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "logging.h" +#include "transport.h" +#include "glusterfs.h" +#include "xlator.h" +#include "list.h" + + +transport_t * +transport_load (dict_t *options, + xlator_t *xl) +{ + struct transport *trans = NULL, *return_trans = NULL; + char *name = NULL; + void *handle = NULL; + char *type = NULL; + char str[] = "ERROR"; + int32_t ret = -1; + int8_t is_tcp = 0, is_unix = 0, is_ibsdp = 0; + volume_opt_list_t *vol_opt = NULL; + + GF_VALIDATE_OR_GOTO("transport", options, fail); + GF_VALIDATE_OR_GOTO("transport", xl, fail); + + trans = GF_CALLOC (1, sizeof (struct transport), + gf_common_mt_transport); + GF_VALIDATE_OR_GOTO("transport", trans, fail); + + trans->xl = xl; + type = str; + + /* Backward compatibility */ + ret = dict_get_str (options, "transport-type", &type); + if (ret < 0) { + ret = dict_set_str (options, "transport-type", "socket"); + if (ret < 0) + gf_log ("dict", GF_LOG_DEBUG, + "setting transport-type failed"); + gf_log ("transport", GF_LOG_WARNING, + "missing 'option transport-type'. defaulting to " + "\"socket\""); + } else { + { + /* Backword compatibility to handle * /client, + * * /server. + */ + char *tmp = strchr (type, '/'); + if (tmp) + *tmp = '\0'; + } + + is_tcp = strcmp (type, "tcp"); + is_unix = strcmp (type, "unix"); + is_ibsdp = strcmp (type, "ib-sdp"); + if ((is_tcp == 0) || + (is_unix == 0) || + (is_ibsdp == 0)) { + if (is_unix == 0) + ret = dict_set_str (options, + "transport.address-family", + "unix"); + if (is_ibsdp == 0) + ret = dict_set_str (options, + "transport.address-family", + "inet-sdp"); + + if (ret < 0) + gf_log ("dict", GF_LOG_DEBUG, + "setting address-family failed"); + + ret = dict_set_str (options, + "transport-type", "socket"); + if (ret < 0) + gf_log ("dict", GF_LOG_DEBUG, + "setting transport-type failed"); + } + } + + ret = dict_get_str (options, "transport-type", &type); + if (ret < 0) { + GF_FREE (trans); + gf_log ("transport", GF_LOG_ERROR, + "'option transport-type <xx>' missing in volume '%s'", + xl->name); + goto fail; + } + + ret = gf_asprintf (&name, "%s/%s.so", TRANSPORTDIR, type); + if (-1 == ret) { + gf_log ("transport", GF_LOG_ERROR, "asprintf failed"); + goto fail; + } + gf_log ("transport", GF_LOG_DEBUG, + "attempt to load file %s", name); + + handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL); + if (handle == NULL) { + gf_log ("transport", GF_LOG_ERROR, "%s", dlerror ()); + gf_log ("transport", GF_LOG_ERROR, + "volume '%s': transport-type '%s' is not valid or " + "not found on this machine", + xl->name, type); + GF_FREE (name); + GF_FREE (trans); + goto fail; + } + GF_FREE (name); + + trans->ops = dlsym (handle, "tops"); + if (trans->ops == NULL) { + gf_log ("transport", GF_LOG_ERROR, + "dlsym (transport_ops) on %s", dlerror ()); + GF_FREE (trans); + goto fail; + } + + trans->init = dlsym (handle, "init"); + if (trans->init == NULL) { + gf_log ("transport", GF_LOG_ERROR, + "dlsym (gf_transport_init) on %s", dlerror ()); + GF_FREE (trans); + goto fail; + } + + trans->fini = dlsym (handle, "fini"); + if (trans->fini == NULL) { + gf_log ("transport", GF_LOG_ERROR, + "dlsym (gf_transport_fini) on %s", dlerror ()); + GF_FREE (trans); + goto fail; + } + + vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t), + gf_common_mt_volume_opt_list_t); + vol_opt->given_opt = dlsym (handle, "options"); + if (vol_opt->given_opt == NULL) { + gf_log ("transport", GF_LOG_DEBUG, + "volume option validation not specified"); + } else { + list_add_tail (&vol_opt->list, &xl->volume_options); + if (-1 == + validate_xlator_volume_options (xl, + vol_opt->given_opt)) { + gf_log ("transport", GF_LOG_ERROR, + "volume option validation failed"); + GF_FREE (trans); + goto fail; + } + } + + ret = trans->init (trans); + if (ret != 0) { + gf_log ("transport", GF_LOG_ERROR, + "'%s' initialization failed", type); + GF_FREE (trans); + goto fail; + } + + pthread_mutex_init (&trans->lock, NULL); + return_trans = trans; +fail: + return return_trans; +} + + +int32_t +transport_submit (transport_t *this, char *buf, int32_t len, + struct iovec *vector, int count, + struct iobref *iobref) +{ + int32_t ret = -1; + transport_t *peer_trans = NULL; + struct iobuf *iobuf = NULL; + struct transport_msg *msg = NULL; + + if (this->peer_trans) { + peer_trans = this->peer_trans; + + msg = GF_CALLOC (1, sizeof (*msg), + gf_common_mt_transport_msg); + if (!msg) { + return -ENOMEM; + } + + msg->hdr = buf; + msg->hdrlen = len; + + if (vector) { + iobuf = iobuf_get (this->xl->ctx->iobuf_pool); + if (!iobuf) { + GF_FREE (msg->hdr); + GF_FREE (msg); + return -ENOMEM; + } + + iov_unload (iobuf->ptr, vector, count); + msg->iobuf = iobuf; + } + + pthread_mutex_lock (&peer_trans->handover.mutex); + { + list_add_tail (&msg->list, &peer_trans->handover.msgs); + pthread_cond_broadcast (&peer_trans->handover.cond); + } + pthread_mutex_unlock (&peer_trans->handover.mutex); + + return 0; + } + + GF_VALIDATE_OR_GOTO("transport", this, fail); + GF_VALIDATE_OR_GOTO("transport", this->ops, fail); + + ret = this->ops->submit (this, buf, len, vector, count, iobref); +fail: + return ret; +} + + +int32_t +transport_connect (transport_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("transport", this, fail); + + ret = this->ops->connect (this); +fail: + return ret; +} + + +int32_t +transport_listen (transport_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("transport", this, fail); + + ret = this->ops->listen (this); +fail: + return ret; +} + + +int32_t +transport_disconnect (transport_t *this) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("transport", this, fail); + + ret = this->ops->disconnect (this); +fail: + return ret; +} + + +int32_t +transport_destroy (transport_t *this) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("transport", this, fail); + + if (this->fini) + this->fini (this); + + pthread_mutex_destroy (&this->lock); + GF_FREE (this); +fail: + return ret; +} + + +transport_t * +transport_ref (transport_t *this) +{ + transport_t *return_this = NULL; + + GF_VALIDATE_OR_GOTO("transport", this, fail); + + pthread_mutex_lock (&this->lock); + { + this->refcount ++; + } + pthread_mutex_unlock (&this->lock); + + return_this = this; +fail: + return return_this; +} + + +int32_t +transport_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p, + struct iobuf **iobuf_p) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("transport", this, fail); + + if (this->peer_trans) { + *hdr_p = this->handover.msg->hdr; + *hdrlen_p = this->handover.msg->hdrlen; + *iobuf_p = this->handover.msg->iobuf; + + return 0; + } + + ret = this->ops->receive (this, hdr_p, hdrlen_p, iobuf_p); +fail: + return ret; +} + + +int32_t +transport_unref (transport_t *this) +{ + int32_t refcount = 0; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("transport", this, fail); + + pthread_mutex_lock (&this->lock); + { + refcount = --this->refcount; + } + pthread_mutex_unlock (&this->lock); + + if (refcount == 0) { + xlator_notify (this->xl, GF_EVENT_TRANSPORT_CLEANUP, this); + transport_destroy (this); + } + + ret = 0; +fail: + return ret; +} + + +void * +transport_peerproc (void *trans_data) +{ + transport_t *trans = NULL; + struct transport_msg *msg = NULL; + + trans = trans_data; + + while (1) { + pthread_mutex_lock (&trans->handover.mutex); + { + while (list_empty (&trans->handover.msgs)) + pthread_cond_wait (&trans->handover.cond, + &trans->handover.mutex); + + msg = list_entry (trans->handover.msgs.next, + struct transport_msg, list); + + list_del_init (&msg->list); + } + pthread_mutex_unlock (&trans->handover.mutex); + + trans->handover.msg = msg; + + xlator_notify (trans->xl, GF_EVENT_POLLIN, trans); + + GF_FREE (msg); + } +} + + +int +transport_setpeer (transport_t *trans, transport_t *peer_trans) +{ + trans->peer_trans = transport_ref (peer_trans); + + INIT_LIST_HEAD (&trans->handover.msgs); + pthread_cond_init (&trans->handover.cond, NULL); + pthread_mutex_init (&trans->handover.mutex, NULL); + pthread_create (&trans->handover.thread, NULL, + transport_peerproc, trans); + + peer_trans->peer_trans = transport_ref (trans); + + INIT_LIST_HEAD (&peer_trans->handover.msgs); + pthread_cond_init (&peer_trans->handover.cond, NULL); + pthread_mutex_init (&peer_trans->handover.mutex, NULL); + pthread_create (&peer_trans->handover.thread, NULL, + transport_peerproc, peer_trans); + + return 0; +} diff --git a/xlators/protocol/legacy/lib/src/transport.h b/xlators/protocol/legacy/lib/src/transport.h new file mode 100644 index 00000000000..f0623d5b417 --- /dev/null +++ b/xlators/protocol/legacy/lib/src/transport.h @@ -0,0 +1,106 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __TRANSPORT_H__ +#define __TRANSPORT_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <inttypes.h> + +struct transport_ops; +typedef struct transport transport_t; + +#include "xlator.h" +#include "dict.h" +#include "compat.h" + +typedef struct peer_info { + struct sockaddr_storage sockaddr; + socklen_t sockaddr_len; + char identifier[UNIX_PATH_MAX]; +}peer_info_t; + +struct transport_msg { + struct list_head list; + char *hdr; + int hdrlen; + struct iobuf *iobuf; +}; + +struct transport { + struct transport_ops *ops; + void *private; + void *xl_private; + pthread_mutex_t lock; + int32_t refcount; + + xlator_t *xl; + void *dnscache; + data_t *buf; + int32_t (*init) (transport_t *this); + void (*fini) (transport_t *this); + /* int (*notify) (transport_t *this, int event, void *data); */ + peer_info_t peerinfo; + peer_info_t myinfo; + + transport_t *peer_trans; + struct { + pthread_mutex_t mutex; + pthread_cond_t cond; + pthread_t thread; + struct list_head msgs; + struct transport_msg *msg; + } handover; + +}; + +struct transport_ops { + int32_t (*receive) (transport_t *this, char **hdr_p, size_t *hdrlen_p, + struct iobuf **iobuf_p); + int32_t (*submit) (transport_t *this, char *buf, int len, + struct iovec *vector, int count, + struct iobref *iobref); + int32_t (*connect) (transport_t *this); + int32_t (*listen) (transport_t *this); + int32_t (*disconnect) (transport_t *this); +}; + + +int32_t transport_listen (transport_t *this); +int32_t transport_connect (transport_t *this); +int32_t transport_disconnect (transport_t *this); +int32_t transport_notify (transport_t *this, int event); +int32_t transport_submit (transport_t *this, char *buf, int len, + struct iovec *vector, int count, + struct iobref *iobref); +int32_t transport_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p, + struct iobuf **iobuf_p); +int32_t transport_destroy (transport_t *this); + +transport_t *transport_load (dict_t *options, xlator_t *xl); +transport_t *transport_ref (transport_t *trans); +int32_t transport_unref (transport_t *trans); + +int transport_setpeer (transport_t *trans, transport_t *trans_peer); + +#endif /* __TRANSPORT_H__ */ diff --git a/xlators/protocol/legacy/server/Makefile.am b/xlators/protocol/legacy/server/Makefile.am new file mode 100644 index 00000000000..d471a3f9243 --- /dev/null +++ b/xlators/protocol/legacy/server/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/protocol/legacy/server/src/Makefile.am b/xlators/protocol/legacy/server/src/Makefile.am new file mode 100644 index 00000000000..87d3042c534 --- /dev/null +++ b/xlators/protocol/legacy/server/src/Makefile.am @@ -0,0 +1,22 @@ + +xlator_LTLIBRARIES = server.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/protocol + +server_la_LDFLAGS = -module -avoidversion + +server_la_SOURCES = server-protocol.c server-resolve.c server-helpers.c +server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/xlators/protocol/legacy/lib/src/libgfproto.la \ + $(top_builddir)/xlators/protocol/lib/src/libgfproto1.la + +noinst_HEADERS = server-protocol.h server-helpers.h server-mem-types.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \ + -I$(top_srcdir)/contrib/md5/ \ + -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" \ + $(GF_CFLAGS) -I$(top_srcdir)/xlators/protocol/legacy/lib/src \ + -I$(top_srcdir)/xlators/protocol/lib/src + +CLEANFILES = + diff --git a/xlators/protocol/legacy/server/src/server-helpers.c b/xlators/protocol/legacy/server/src/server-helpers.c new file mode 100644 index 00000000000..d07e841f287 --- /dev/null +++ b/xlators/protocol/legacy/server/src/server-helpers.c @@ -0,0 +1,925 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "server-protocol.h" +#include "server-helpers.h" + + +/* server_loc_fill - derive a loc_t for a given inode number + * + * NOTE: make sure that @loc is empty, because any pointers it holds with reference will + * be leaked after returning from here. + */ +int +server_loc_fill (loc_t *loc, server_state_t *state, + ino_t ino, ino_t par, + const char *name, const char *path) +{ + inode_t *inode = NULL; + inode_t *parent = NULL; + int32_t ret = -1; + char *dentry_path = NULL; + + + GF_VALIDATE_OR_GOTO ("server", loc, out); + GF_VALIDATE_OR_GOTO ("server", state, out); + GF_VALIDATE_OR_GOTO ("server", path, out); + + /* anything beyond this point is success */ + ret = 0; + loc->ino = ino; + inode = loc->inode; + if (inode == NULL) { + if (ino) + inode = inode_search (state->itable, ino, NULL); + + if ((inode == NULL) && + (par && name)) + inode = inode_search (state->itable, par, name); + + loc->inode = inode; + if (inode) + loc->ino = inode->ino; + } + + parent = loc->parent; + if (parent == NULL) { + if (inode) + parent = inode_parent (inode, par, name); + else + parent = inode_search (state->itable, par, NULL); + loc->parent = parent; + } + + if (name && parent) { + ret = inode_path (parent, name, &dentry_path); + if (ret < 0) { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "failed to build path for %"PRId64"/%s: %s", + parent->ino, name, strerror (-ret)); + } + } else if (inode) { + ret = inode_path (inode, NULL, &dentry_path); + if (ret < 0) { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "failed to build path for %"PRId64": %s", + inode->ino, strerror (-ret)); + } + } + + if (dentry_path) { + if (strcmp (dentry_path, path)) { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "paths differ for inode(%"PRId64"): " + "client path = %s. dentry path = %s", + ino, path, dentry_path); + } + + loc->path = dentry_path; + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + } else { + loc->path = gf_strdup (path); + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + } + +out: + return ret; +} + +/* + * stat_to_str - convert struct iatt to a ASCII string + * @stbuf: struct iatt pointer + * + * not for external reference + */ +char * +stat_to_str (struct iatt *stbuf) +{ + int ret = 0; + char *tmp_buf = NULL; + + uint64_t dev = stbuf->ia_gen; + uint64_t ino = stbuf->ia_ino; + uint32_t mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); + uint32_t nlink = stbuf->ia_nlink; + uint32_t uid = stbuf->ia_uid; + uint32_t gid = stbuf->ia_gid; + uint64_t rdev = stbuf->ia_rdev; + uint64_t size = stbuf->ia_size; + uint32_t blksize = stbuf->ia_blksize; + uint64_t blocks = stbuf->ia_blocks; + uint32_t atime = stbuf->ia_atime; + uint32_t mtime = stbuf->ia_mtime; + uint32_t ctime = stbuf->ia_ctime; + + uint32_t atime_nsec = stbuf->ia_atime_nsec; + uint32_t mtime_nsec = stbuf->ia_mtime_nsec; + uint32_t ctime_nsec = stbuf->ia_ctime_nsec; + + + ret = gf_asprintf (&tmp_buf, + GF_STAT_PRINT_FMT_STR, + dev, + ino, + mode, + nlink, + uid, + gid, + rdev, + size, + blksize, + blocks, + atime, + atime_nsec, + mtime, + mtime_nsec, + ctime, + ctime_nsec); + if (-1 == ret) { + gf_log ("protocol/server", GF_LOG_DEBUG, + "asprintf failed while setting up stat buffer string"); + return NULL; + } + return tmp_buf; +} + + +void +server_loc_wipe (loc_t *loc) +{ + if (loc->parent) { + inode_unref (loc->parent); + loc->parent = NULL; + } + + if (loc->inode) { + inode_unref (loc->inode); + loc->inode = NULL; + } + + if (loc->path) + GF_FREE ((char *)loc->path); +} + + +void +server_resolve_wipe (server_resolve_t *resolve) +{ + struct resolve_comp *comp = NULL; + int i = 0; + + if (resolve->path) + GF_FREE (resolve->path); + + if (resolve->bname) + GF_FREE (resolve->bname); + + if (resolve->resolved) + GF_FREE (resolve->resolved); + + loc_wipe (&resolve->deep_loc); + + comp = resolve->components; + if (comp) { + for (i = 0; comp[i].basename; i++) { + if (comp[i].inode) + inode_unref (comp[i].inode); + } + GF_FREE (resolve->components); + } +} + + +void +free_state (server_state_t *state) +{ + if (state->trans) { + transport_unref (state->trans); + state->trans = NULL; + } + + if (state->fd) { + fd_unref (state->fd); + state->fd = NULL; + } + + if (state->iobref) { + iobref_unref (state->iobref); + state->iobref = NULL; + } + + if (state->iobuf) { + iobuf_unref (state->iobuf); + state->iobuf = NULL; + } + + if (state->dict) { + dict_unref (state->dict); + state->dict = NULL; + } + + if (state->volume) + GF_FREE ((char *)state->volume); + + if (state->name) + GF_FREE (state->name); + + server_loc_wipe (&state->loc); + server_loc_wipe (&state->loc2); + + server_resolve_wipe (&state->resolve); + server_resolve_wipe (&state->resolve2); + + GF_FREE (state); +} + + +call_frame_t * +server_copy_frame (call_frame_t *frame) +{ + call_frame_t *new_frame = NULL; + server_state_t *state = NULL, *new_state = NULL; + + state = frame->root->state; + + new_frame = copy_frame (frame); + + new_state = GF_CALLOC (1, sizeof (server_state_t), + gf_server_mt_server_state_t); + + new_frame->root->op = frame->root->op; + new_frame->root->type = frame->root->type; + new_frame->root->trans = state->trans; + new_frame->root->state = new_state; + + new_state->bound_xl = state->bound_xl; + new_state->trans = transport_ref (state->trans); + new_state->itable = state->itable; + + new_state->resolve.fd_no = -1; + new_state->resolve2.fd_no = -1; + + return new_frame; +} + + +int +gf_add_locker (struct _lock_table *table, const char *volume, + loc_t *loc, fd_t *fd, pid_t pid) +{ + int32_t ret = -1; + struct _locker *new = NULL; + uint8_t dir = 0; + + new = GF_CALLOC (1, sizeof (struct _locker), + gf_server_mt_locker); + if (new == NULL) { + gf_log ("server", GF_LOG_ERROR, + "failed to allocate memory for \'struct _locker\'"); + goto out; + } + INIT_LIST_HEAD (&new->lockers); + + new->volume = gf_strdup (volume); + + if (fd == NULL) { + loc_copy (&new->loc, loc); + dir = IA_ISDIR (new->loc.inode->ia_type); + } else { + new->fd = fd_ref (fd); + dir = IA_ISDIR (fd->inode->ia_type); + } + + new->pid = pid; + + LOCK (&table->lock); + { + if (dir) + list_add_tail (&new->lockers, &table->dir_lockers); + else + list_add_tail (&new->lockers, &table->file_lockers); + } + UNLOCK (&table->lock); +out: + return ret; +} + + +int +gf_del_locker (struct _lock_table *table, const char *volume, + loc_t *loc, fd_t *fd, pid_t pid) +{ + struct _locker *locker = NULL; + struct _locker *tmp = NULL; + int32_t ret = 0; + uint8_t dir = 0; + struct list_head *head = NULL; + struct list_head del; + + INIT_LIST_HEAD (&del); + + if (fd) { + dir = IA_ISDIR (fd->inode->ia_type); + } else { + dir = IA_ISDIR (loc->inode->ia_type); + } + + LOCK (&table->lock); + { + if (dir) { + head = &table->dir_lockers; + } else { + head = &table->file_lockers; + } + + list_for_each_entry_safe (locker, tmp, head, lockers) { + if (locker->fd && fd && + (locker->fd == fd) && (locker->pid == pid) + && !strcmp (locker->volume, volume)) { + list_move_tail (&locker->lockers, &del); + } else if (locker->loc.inode && + loc && + (locker->loc.inode == loc->inode) && + (locker->pid == pid) + && !strcmp (locker->volume, volume)) { + list_move_tail (&locker->lockers, &del); + } + } + } + UNLOCK (&table->lock); + + tmp = NULL; + locker = NULL; + + list_for_each_entry_safe (locker, tmp, &del, lockers) { + list_del_init (&locker->lockers); + if (locker->fd) + fd_unref (locker->fd); + else + loc_wipe (&locker->loc); + + GF_FREE (locker->volume); + GF_FREE (locker); + } + + return ret; +} + + +int +gf_direntry_to_bin (dir_entry_t *head, char *buffer) +{ + dir_entry_t *trav = NULL; + uint32_t len = 0; + uint32_t this_len = 0; + size_t buflen = -1; + char *ptr = NULL; + char *tmp_buf = NULL; + + trav = head->next; + while (trav) { + len += strlen (trav->name); + len += 1; + len += strlen (trav->link); + len += 1; /* for '\n' */ + len += 256; // max possible for statbuf; + trav = trav->next; + } + + ptr = buffer; + trav = head->next; + while (trav) { + tmp_buf = stat_to_str (&trav->buf); + /* tmp_buf will have \n before \0 */ + + this_len = sprintf (ptr, "%s/%s%s\n", + trav->name, tmp_buf, + trav->link); + + GF_FREE (tmp_buf); + trav = trav->next; + ptr += this_len; + } + + buflen = strlen (buffer); + + return buflen; +} + + +static struct _lock_table * +gf_lock_table_new (void) +{ + struct _lock_table *new = NULL; + + new = GF_CALLOC (1, sizeof (struct _lock_table), + gf_server_mt_lock_table); + if (new == NULL) { + gf_log ("server-protocol", GF_LOG_CRITICAL, + "failed to allocate memory for new lock table"); + goto out; + } + INIT_LIST_HEAD (&new->dir_lockers); + INIT_LIST_HEAD (&new->file_lockers); + LOCK_INIT (&new->lock); +out: + return new; +} + + +int +do_lock_table_cleanup (xlator_t *this, server_connection_t *conn, + call_frame_t *frame, struct _lock_table *ltable) +{ + struct list_head file_lockers, dir_lockers; + call_frame_t *tmp_frame = NULL; + struct flock flock = {0, }; + xlator_t *bound_xl = NULL; + struct _locker *locker = NULL, *tmp = NULL; + int ret = -1; + + bound_xl = conn->bound_xl; + INIT_LIST_HEAD (&file_lockers); + INIT_LIST_HEAD (&dir_lockers); + + LOCK (<able->lock); + { + list_splice_init (<able->file_lockers, + &file_lockers); + + list_splice_init (<able->dir_lockers, &dir_lockers); + } + UNLOCK (<able->lock); + + GF_FREE (ltable); + + flock.l_type = F_UNLCK; + flock.l_start = 0; + flock.l_len = 0; + list_for_each_entry_safe (locker, + tmp, &file_lockers, lockers) { + tmp_frame = copy_frame (frame); + if (tmp_frame == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory"); + goto out; + } + /* + pid = 0 is a special case that tells posix-locks + to release all locks from this transport + */ + tmp_frame->root->pid = 0; + tmp_frame->root->trans = conn; + + if (locker->fd) { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->finodelk, + locker->volume, + locker->fd, F_SETLK, &flock); + fd_unref (locker->fd); + } else { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->inodelk, + locker->volume, + &(locker->loc), F_SETLK, &flock); + loc_wipe (&locker->loc); + } + + GF_FREE (locker->volume); + + list_del_init (&locker->lockers); + GF_FREE (locker); + } + + tmp = NULL; + locker = NULL; + list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) { + tmp_frame = copy_frame (frame); + + tmp_frame->root->pid = 0; + tmp_frame->root->trans = conn; + + if (locker->fd) { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->fentrylk, + locker->volume, + locker->fd, NULL, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + fd_unref (locker->fd); + } else { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->entrylk, + locker->volume, + &(locker->loc), NULL, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + loc_wipe (&locker->loc); + } + + GF_FREE (locker->volume); + + list_del_init (&locker->lockers); + GF_FREE (locker); + } + ret = 0; + +out: + return ret; +} + + +static int +server_connection_cleanup_flush_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno) +{ + fd_t *fd = NULL; + + fd = frame->local; + + fd_unref (fd); + frame->local = NULL; + + STACK_DESTROY (frame->root); + return 0; +} + + +int +do_fd_cleanup (xlator_t *this, server_connection_t *conn, call_frame_t *frame, + fdentry_t *fdentries, int fd_count) +{ + fd_t *fd = NULL; + int i = 0, ret = -1; + call_frame_t *tmp_frame = NULL; + xlator_t *bound_xl = NULL; + + bound_xl = conn->bound_xl; + for (i = 0;i < fd_count; i++) { + fd = fdentries[i].fd; + + if (fd != NULL) { + tmp_frame = copy_frame (frame); + if (tmp_frame == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory"); + goto out; + } + tmp_frame->local = fd; + + tmp_frame->root->pid = 0; + tmp_frame->root->trans = conn; + tmp_frame->root->lk_owner = 0; + STACK_WIND (tmp_frame, + server_connection_cleanup_flush_cbk, + bound_xl, bound_xl->fops->flush, fd); + } + } + + GF_FREE (fdentries); + ret = 0; + +out: + return ret; +} + +int +do_connection_cleanup (xlator_t *this, server_connection_t *conn, + struct _lock_table *ltable, fdentry_t *fdentries, int fd_count) +{ + int ret = 0; + int saved_ret = 0; + call_frame_t *frame = NULL; + server_state_t *state = NULL; + + frame = create_frame (this, this->ctx->pool); + if (frame == NULL) { + gf_log (this->name, GF_LOG_ERROR, "out of memory"); + goto out; + } + + saved_ret = do_lock_table_cleanup (this, conn, frame, ltable); + + if (fdentries != NULL) { + ret = do_fd_cleanup (this, conn, frame, fdentries, fd_count); + } + + state = CALL_STATE (frame); + if (state) + GF_FREE (state); + + STACK_DESTROY (frame->root); + + if (saved_ret || ret) { + ret = -1; + } + +out: + return ret; +} + + +int +server_connection_cleanup (xlator_t *this, server_connection_t *conn) +{ + char do_cleanup = 0; + struct _lock_table *ltable = NULL; + fdentry_t *fdentries = NULL; + uint32_t fd_count = 0; + int ret = 0; + + if (conn == NULL) { + goto out; + } + + pthread_mutex_lock (&conn->lock); + { + conn->active_transports--; + if (conn->active_transports == 0) { + if (conn->ltable) { + ltable = conn->ltable; + conn->ltable = gf_lock_table_new (); + } + + if (conn->fdtable) { + fdentries = gf_fd_fdtable_get_all_fds (conn->fdtable, + &fd_count); + } + do_cleanup = 1; + } + } + pthread_mutex_unlock (&conn->lock); + + if (do_cleanup && conn->bound_xl) + ret = do_connection_cleanup (this, conn, ltable, fdentries, fd_count); + +out: + return ret; +} + + +int +server_connection_destroy (xlator_t *this, server_connection_t *conn) +{ + call_frame_t *frame = NULL, *tmp_frame = NULL; + xlator_t *bound_xl = NULL; + int32_t ret = -1; + server_state_t *state = NULL; + struct list_head file_lockers; + struct list_head dir_lockers; + struct _lock_table *ltable = NULL; + struct _locker *locker = NULL, *tmp = NULL; + struct flock flock = {0,}; + fd_t *fd = NULL; + int32_t i = 0; + fdentry_t *fdentries = NULL; + uint32_t fd_count = 0; + + if (conn == NULL) { + ret = 0; + goto out; + } + + bound_xl = (xlator_t *) (conn->bound_xl); + + if (bound_xl) { + /* trans will have ref_count = 1 after this call, but its + ok since this function is called in + GF_EVENT_TRANSPORT_CLEANUP */ + frame = create_frame (this, this->ctx->pool); + + pthread_mutex_lock (&(conn->lock)); + { + if (conn->ltable) { + ltable = conn->ltable; + conn->ltable = NULL; + } + } + pthread_mutex_unlock (&conn->lock); + + INIT_LIST_HEAD (&file_lockers); + INIT_LIST_HEAD (&dir_lockers); + + LOCK (<able->lock); + { + list_splice_init (<able->file_lockers, + &file_lockers); + + list_splice_init (<able->dir_lockers, &dir_lockers); + } + UNLOCK (<able->lock); + GF_FREE (ltable); + + flock.l_type = F_UNLCK; + flock.l_start = 0; + flock.l_len = 0; + list_for_each_entry_safe (locker, + tmp, &file_lockers, lockers) { + tmp_frame = copy_frame (frame); + /* + pid = 0 is a special case that tells posix-locks + to release all locks from this transport + */ + tmp_frame->root->pid = 0; + tmp_frame->root->trans = conn; + + if (locker->fd) { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->finodelk, + locker->volume, + locker->fd, F_SETLK, &flock); + fd_unref (locker->fd); + } else { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->inodelk, + locker->volume, + &(locker->loc), F_SETLK, &flock); + loc_wipe (&locker->loc); + } + + GF_FREE (locker->volume); + + list_del_init (&locker->lockers); + GF_FREE (locker); + } + + tmp = NULL; + locker = NULL; + list_for_each_entry_safe (locker, tmp, &dir_lockers, lockers) { + tmp_frame = copy_frame (frame); + + tmp_frame->root->pid = 0; + tmp_frame->root->trans = conn; + + if (locker->fd) { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->fentrylk, + locker->volume, + locker->fd, NULL, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + fd_unref (locker->fd); + } else { + STACK_WIND (tmp_frame, server_nop_cbk, + bound_xl, + bound_xl->fops->entrylk, + locker->volume, + &(locker->loc), NULL, + ENTRYLK_UNLOCK, ENTRYLK_WRLCK); + loc_wipe (&locker->loc); + } + + GF_FREE (locker->volume); + + + list_del_init (&locker->lockers); + GF_FREE (locker); + } + + pthread_mutex_lock (&(conn->lock)); + { + if (conn->fdtable) { + fdentries = gf_fd_fdtable_get_all_fds (conn->fdtable, + &fd_count); + gf_fd_fdtable_destroy (conn->fdtable); + conn->fdtable = NULL; + } + } + pthread_mutex_unlock (&conn->lock); + + if (fdentries != NULL) { + for (i = 0; i < fd_count; i++) { + fd = fdentries[i].fd; + if (fd != NULL) { + tmp_frame = copy_frame (frame); + tmp_frame->local = fd; + + STACK_WIND (tmp_frame, + server_connection_cleanup_flush_cbk, + bound_xl, + bound_xl->fops->flush, + fd); + } + } + GF_FREE (fdentries); + } + } + + if (frame) { + state = CALL_STATE (frame); + if (state) + GF_FREE (state); + STACK_DESTROY (frame->root); + } + + gf_log (this->name, GF_LOG_INFO, "destroyed connection of %s", + conn->id); + + GF_FREE (conn->id); + GF_FREE (conn); + +out: + return ret; +} + + +server_connection_t * +server_connection_get (xlator_t *this, const char *id) +{ + server_connection_t *conn = NULL; + server_connection_t *trav = NULL; + server_conf_t *conf = NULL; + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + list_for_each_entry (trav, &conf->conns, list) { + if (!strcmp (id, trav->id)) { + conn = trav; + break; + } + } + + if (!conn) { + conn = (void *) GF_CALLOC (1, sizeof (*conn), + gf_server_mt_server_connection_t); + + conn->id = gf_strdup (id); + conn->fdtable = gf_fd_fdtable_alloc (); + conn->ltable = gf_lock_table_new (); + + pthread_mutex_init (&conn->lock, NULL); + + list_add (&conn->list, &conf->conns); + } + + conn->ref++; + conn->active_transports++; + } + pthread_mutex_unlock (&conf->mutex); + + return conn; +} + + +void +server_connection_put (xlator_t *this, server_connection_t *conn) +{ + server_conf_t *conf = NULL; + server_connection_t *todel = NULL; + + if (conn == NULL) { + goto out; + } + + conf = this->private; + + pthread_mutex_lock (&conf->mutex); + { + conn->ref--; + + if (!conn->ref) { + list_del_init (&conn->list); + todel = conn; + } + } + pthread_mutex_unlock (&conf->mutex); + + if (todel) { + server_connection_destroy (this, todel); + } + +out: + return; +} diff --git a/xlators/protocol/legacy/server/src/server-helpers.h b/xlators/protocol/legacy/server/src/server-helpers.h new file mode 100644 index 00000000000..cbf57ae44d1 --- /dev/null +++ b/xlators/protocol/legacy/server/src/server-helpers.h @@ -0,0 +1,72 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef __SERVER_HELPERS_H__ +#define __SERVER_HELPERS_H__ + +#define CALL_STATE(frame) ((server_state_t *)frame->root->state) + +#define BOUND_XL(frame) ((xlator_t *) CALL_STATE(frame)->bound_xl) + +#define TRANSPORT_FROM_FRAME(frame) ((transport_t *) CALL_STATE(frame)->trans) + +#define SERVER_CONNECTION(frame) \ + ((server_connection_t *) TRANSPORT_FROM_FRAME(frame)->xl_private) + +#define SERVER_CONF(frame) \ + ((server_conf_t *)TRANSPORT_FROM_FRAME(frame)->xl->private) + +#define TRANSPORT_FROM_XLATOR(this) ((((server_conf_t *)this->private))->trans) + +#define INODE_LRU_LIMIT(this) \ + (((server_conf_t *)(this->private))->inode_lru_limit) + +#define IS_ROOT_INODE(inode) (inode == inode->table->root) + +#define IS_NOT_ROOT(pathlen) ((pathlen > 2)? 1 : 0) + +char * +stat_to_str (struct iatt *stbuf); + +call_frame_t * +server_copy_frame (call_frame_t *frame); + +void free_state (server_state_t *state); + +void server_loc_wipe (loc_t *loc); + +int32_t +gf_add_locker (struct _lock_table *table, const char *volume, + loc_t *loc, + fd_t *fd, + pid_t pid); + +int32_t +gf_del_locker (struct _lock_table *table, const char *volume, + loc_t *loc, + fd_t *fd, + pid_t pid); + +int32_t +gf_direntry_to_bin (dir_entry_t *head, char *bufferp); + +void +server_print_request (call_frame_t *frame); + +#endif /* __SERVER_HELPERS_H__ */ diff --git a/xlators/protocol/legacy/server/src/server-mem-types.h b/xlators/protocol/legacy/server/src/server-mem-types.h new file mode 100644 index 00000000000..86877d79dac --- /dev/null +++ b/xlators/protocol/legacy/server/src/server-mem-types.h @@ -0,0 +1,39 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef __AFR_MEM_TYPES_H__ +#define __AFR_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_server_mem_types_ { + gf_server_mt_dir_entry_t = gf_common_mt_end + 1, + gf_server_mt_volfile_ctx, + gf_server_mt_server_state_t, + gf_server_mt_server_conf_t, + gf_server_mt_locker, + gf_server_mt_lock_table, + gf_server_mt_char, + gf_server_mt_server_connection_t, + gf_server_mt_resolve_comp, + gf_server_mt_end +}; +#endif + diff --git a/xlators/protocol/legacy/server/src/server-protocol.c b/xlators/protocol/legacy/server/src/server-protocol.c new file mode 100644 index 00000000000..de7d2ac0b5a --- /dev/null +++ b/xlators/protocol/legacy/server/src/server-protocol.c @@ -0,0 +1,6577 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is GF_FREE software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include <time.h> +#include <sys/uio.h> +#include <sys/resource.h> + +#include <libgen.h> + +#include "transport.h" +#include "fnmatch.h" +#include "xlator.h" +#include "protocol.h" +#include "server-protocol.h" +#include "server-helpers.h" +#include "call-stub.h" +#include "defaults.h" +#include "list.h" +#include "dict.h" +#include "compat.h" +#include "compat-errno.h" +#include "statedump.h" +#include "md5.h" + + +void +print_caller (char *str, int size, call_frame_t *frame) +{ + int filled = 0; + server_state_t *state = NULL; + transport_t *trans = NULL; + + state = CALL_STATE (frame); + trans = state->trans; + + filled += snprintf (str + filled, size - filled, + " Callid=%"PRId64", Client=%s", + frame->root->unique, + trans->peerinfo.identifier); + + return; +} + + +void +server_print_resolve (char *str, int size, server_resolve_t *resolve) +{ + int filled = 0; + + if (!resolve) { + snprintf (str, size, "<nul>"); + return; + } + + filled += snprintf (str + filled, size - filled, + " Resolve={"); + if (resolve->fd_no != -1) + filled += snprintf (str + filled, size - filled, + "fd=%"PRId64",", (uint64_t) resolve->fd_no); + if (resolve->ino) + filled += snprintf (str + filled, size - filled, + "ino=%"PRIu64",", (uint64_t) resolve->ino); + if (resolve->par) + filled += snprintf (str + filled, size - filled, + "par=%"PRIu64",", (uint64_t) resolve->par); + if (resolve->gen) + filled += snprintf (str + filled, size - filled, + "gen=%"PRIu64",", (uint64_t) resolve->gen); + if (resolve->bname) + filled += snprintf (str + filled, size - filled, + "bname=%s,", resolve->bname); + if (resolve->path) + filled += snprintf (str + filled, size - filled, + "path=%s", resolve->path); + + filled += snprintf (str + filled, size - filled, "}"); +} + + +void +server_print_loc (char *str, int size, loc_t *loc) +{ + int filled = 0; + + if (!loc) { + snprintf (str, size, "<nul>"); + return; + } + + filled += snprintf (str + filled, size - filled, + " Loc={"); + + if (loc->path) + filled += snprintf (str + filled, size - filled, + "path=%s,", loc->path); + if (loc->inode) + filled += snprintf (str + filled, size - filled, + "inode=%p,", loc->inode); + if (loc->parent) + filled += snprintf (str + filled, size - filled, + "parent=%p", loc->parent); + + filled += snprintf (str + filled, size - filled, "}"); +} + + +void +server_print_params (char *str, int size, server_state_t *state) +{ + int filled = 0; + + filled += snprintf (str + filled, size - filled, + " Params={"); + + if (state->fd) + filled += snprintf (str + filled, size - filled, + "fd=%p,", state->fd); + if (state->valid) + filled += snprintf (str + filled, size - filled, + "valid=%d,", state->valid); + if (state->flags) + filled += snprintf (str + filled, size - filled, + "flags=%d,", state->flags); + if (state->wbflags) + filled += snprintf (str + filled, size - filled, + "wbflags=%d,", state->wbflags); + if (state->size) + filled += snprintf (str + filled, size - filled, + "size=%Zu,", state->size); + if (state->offset) + filled += snprintf (str + filled, size - filled, + "offset=%"PRId64",", state->offset); + if (state->cmd) + filled += snprintf (str + filled, size - filled, + "cmd=%d,", state->cmd); + if (state->type) + filled += snprintf (str + filled, size - filled, + "type=%d,", state->type); + if (state->name) + filled += snprintf (str + filled, size - filled, + "name=%s,", state->name); + if (state->mask) + filled += snprintf (str + filled, size - filled, + "mask=%d,", state->mask); + if (state->volume) + filled += snprintf (str + filled, size - filled, + "volume=%s,", state->volume); + + filled += snprintf (str + filled, size - filled, + "bound_xl=%s}", state->bound_xl->name); +} + + +int +server_resolve_is_empty (server_resolve_t *resolve) +{ + if (resolve->fd_no != -1) + return 0; + + if (resolve->ino != 0) + return 0; + + if (resolve->gen != 0) + return 0; + + if (resolve->par != 0) + return 0; + + if (resolve->path != 0) + return 0; + + if (resolve->bname != 0) + return 0; + + return 1; +} + + +void +server_print_reply (call_frame_t *frame, int op_ret, int op_errno) +{ + server_conf_t *conf = NULL; + server_state_t *state = NULL; + xlator_t *this = NULL; + char caller[512]; + char fdstr[32]; + char *op = "UNKNOWN"; + + this = frame->this; + conf = this->private; + + if (!conf->trace) + return; + + state = CALL_STATE (frame); + + print_caller (caller, 256, frame); + + switch (frame->root->type) { + case GF_OP_TYPE_FOP_REQUEST: + case GF_OP_TYPE_FOP_REPLY: + op = gf_fop_list[frame->root->op]; + break; + case GF_OP_TYPE_MOP_REQUEST: + case GF_OP_TYPE_MOP_REPLY: + op = gf_mop_list[frame->root->op]; + break; + case GF_OP_TYPE_CBK_REQUEST: + case GF_OP_TYPE_CBK_REPLY: + op = gf_cbk_list[frame->root->op]; + break; + } + + fdstr[0] = '\0'; + if (state->fd) + snprintf (fdstr, 32, " fd=%p", state->fd); + + gf_log (this->name, GF_LOG_NORMAL, + "%s%s => (%d, %d)%s", + op, caller, op_ret, op_errno, fdstr); +} + + +void +server_print_request (call_frame_t *frame) +{ + server_conf_t *conf = NULL; + xlator_t *this = NULL; + server_state_t *state = NULL; + char resolve_vars[256]; + char resolve2_vars[256]; + char loc_vars[256]; + char loc2_vars[256]; + char other_vars[512]; + char caller[512]; + char *op = "UNKNOWN"; + + this = frame->this; + conf = this->private; + + state = CALL_STATE (frame); + + if (!conf->trace) + return; + + memset (resolve_vars, '\0', 256); + memset (resolve2_vars, '\0', 256); + memset (loc_vars, '\0', 256); + memset (loc2_vars, '\0', 256); + memset (other_vars, '\0', 256); + + print_caller (caller, 256, frame); + + if (!server_resolve_is_empty (&state->resolve)) { + server_print_resolve (resolve_vars, 256, &state->resolve); + server_print_loc (loc_vars, 256, &state->loc); + } + + if (!server_resolve_is_empty (&state->resolve2)) { + server_print_resolve (resolve2_vars, 256, &state->resolve2); + server_print_loc (loc2_vars, 256, &state->loc2); + } + + server_print_params (other_vars, 512, state); + + switch (frame->root->type) { + case GF_OP_TYPE_FOP_REQUEST: + case GF_OP_TYPE_FOP_REPLY: + op = gf_fop_list[frame->root->op]; + break; + case GF_OP_TYPE_MOP_REQUEST: + case GF_OP_TYPE_MOP_REPLY: + op = gf_mop_list[frame->root->op]; + break; + case GF_OP_TYPE_CBK_REQUEST: + case GF_OP_TYPE_CBK_REPLY: + op = gf_cbk_list[frame->root->op]; + break; + } + + gf_log (this->name, GF_LOG_NORMAL, + "%s%s%s%s%s%s%s", + gf_fop_list[frame->root->op], caller, + resolve_vars, loc_vars, resolve2_vars, loc2_vars, other_vars); +} + + +static void +protocol_server_reply (call_frame_t *frame, int type, int op, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iovec *vector, int count, + struct iobref *iobref) +{ + server_state_t *state = NULL; + xlator_t *bound_xl = NULL; + transport_t *trans = NULL; + int ret = 0; + + xlator_t *this = NULL; + + bound_xl = BOUND_XL (frame); + state = CALL_STATE (frame); + trans = state->trans; + this = frame->this; + + hdr->callid = hton64 (frame->root->unique); + hdr->type = hton32 (type); + hdr->op = hton32 (op); + + server_print_reply (frame, ntoh32 (hdr->rsp.op_ret), + gf_error_to_errno (ntoh32 (hdr->rsp.op_errno))); + + ret = transport_submit (trans, (char *)hdr, hdrlen, vector, + count, iobref); + if (ret < 0) { + gf_log ("protocol/server", GF_LOG_ERROR, + "frame %"PRId64": failed to submit. op= %d, type= %d", + frame->root->unique, op, type); + } + + STACK_DESTROY (frame->root); + + if (state) + free_state (state); + +} + + +/* + * server_lk_cbk - lk callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @lock: + * + * not for external reference + */ +int +server_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct flock *lock) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_lk_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_flock_from_flock (&rsp->flock, lock); + } else if (op_errno != ENOSYS) { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": LK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_inodelk_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + if (state->flock.l_type == F_UNLCK) + gf_del_locker (conn->ltable, state->volume, + &state->loc, NULL, frame->root->pid); + else + gf_add_locker (conn->ltable, state->volume, + &state->loc, NULL, frame->root->pid); + } else if (op_errno != ENOSYS) { + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_INODELK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_finodelk_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + if (state->flock.l_type == F_UNLCK) + gf_del_locker (conn->ltable, state->volume, + NULL, state->fd, + frame->root->pid); + else + gf_add_locker (conn->ltable, state->volume, + NULL, state->fd, + frame->root->pid); + } else if (op_errno != ENOSYS) { + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": FINODELK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FINODELK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_entrylk_cbk - + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @lock: + * + * not for external reference + */ +int +server_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_entrylk_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + if (state->cmd == ENTRYLK_UNLOCK) + gf_del_locker (conn->ltable, state->volume, + &state->loc, NULL, frame->root->pid); + else + gf_add_locker (conn->ltable, state->volume, + &state->loc, NULL, frame->root->pid); + } else if (op_errno != ENOSYS) { + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": INODELK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_ENTRYLK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_fentrylk_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + conn = SERVER_CONNECTION(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + state = CALL_STATE(frame); + if (state->cmd == ENTRYLK_UNLOCK) + gf_del_locker (conn->ltable, state->volume, + NULL, state->fd, frame->root->pid); + else + gf_add_locker (conn->ltable, state->volume, + NULL, state->fd, frame->root->pid); + } else if (op_errno != ENOSYS) { + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": FENTRYLK %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FENTRYLK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_access_cbk - access callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int +server_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_access_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_ACCESS, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_rmdir_cbk - rmdir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int +server_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_rmdir_rsp_t *rsp = NULL; + server_state_t *state = NULL; + int32_t gf_errno = 0; + size_t hdrlen = 0; + inode_t *parent = NULL; + + state = CALL_STATE(frame); + + if (op_ret == 0) { + inode_unlink (state->loc.inode, state->loc.parent, + state->loc.name); + parent = inode_parent (state->loc.inode, 0, NULL); + if (parent) + inode_unref (parent); + else + inode_forget (state->loc.inode, 0); + } else { + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": RMDIR %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_iatt (&rsp->preparent, preparent); + gf_stat_from_iatt (&rsp->postparent, postparent); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_RMDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_mkdir_cbk - mkdir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int +server_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_mkdir_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + inode_t *link_inode = NULL; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_stat_from_iatt (&rsp->stat, stbuf); + gf_stat_from_iatt (&rsp->preparent, preparent); + gf_stat_from_iatt (&rsp->postparent, postparent); + + link_inode = inode_link (inode, state->loc.parent, + state->loc.name, stbuf); + inode_lookup (link_inode); + inode_unref (link_inode); + } else { + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": MKDIR %s ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_MKDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_mknod_cbk - mknod callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int +server_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_mknod_rsp_t *rsp = NULL; + server_state_t *state = NULL; + int32_t gf_errno = 0; + size_t hdrlen = 0; + inode_t *link_inode = NULL; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_stat_from_iatt (&rsp->stat, stbuf); + gf_stat_from_iatt (&rsp->preparent, preparent); + gf_stat_from_iatt (&rsp->postparent, postparent); + + link_inode = inode_link (inode, state->loc.parent, + state->loc.name, stbuf); + inode_lookup (link_inode); + inode_unref (link_inode); + } else { + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": MKNOD %s ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_MKNOD, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_fsyncdir_cbk - fsyncdir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int +server_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsyncdir_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + if (op_ret < 0) { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": FSYNCDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSYNCDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + + +/* + * server_readdir_cbk - getdents callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int +server_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readdir_rsp_t *rsp = NULL; + size_t hdrlen = 0; + size_t buf_size = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + if (op_ret > 0) + buf_size = gf_dirent_serialize (entries, NULL, 0); + + hdrlen = gf_hdr_len (rsp, buf_size); + hdr = gf_hdr_new (rsp, buf_size); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret > 0) { + rsp->size = hton32 (buf_size); + gf_dirent_serialize (entries, rsp->buf, buf_size); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": READDIR %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_READDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_releasedir_cbk - releasedir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int +server_releasedir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_cbk_releasedir_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASEDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_opendir_cbk - opendir callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * @fd: file descriptor structure of opened directory + * + * not for external reference + */ +int +server_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_opendir_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + uint64_t fd_no = 0; + + conn = SERVER_CONNECTION (frame); + + state = CALL_STATE (frame); + + if (op_ret >= 0) { + fd_bind (fd); + + fd_no = gf_fd_unused_get (conn->fdtable, fd); + fd_ref (fd); // on behalf of the client + } else { + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": OPENDIR %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + rsp->fd = hton64 (fd_no); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_OPENDIR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_statfs_cbk - statfs callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * @buf: + * + * not for external reference + */ +int +server_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_statfs_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE (frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_statfs_from_statfs (&rsp->statfs, buf); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_STATFS, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_removexattr_cbk - removexattr callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int +server_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_removexattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE (frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_REMOVEXATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_getxattr_cbk - getxattr callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * @value: + * + * not for external reference + */ +int +server_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_getxattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t len = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + + state = CALL_STATE (frame); + + if (op_ret >= 0) { + len = dict_serialized_length (dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to get serialized length of " + "reply dict", + state->loc.path, state->resolve.ino); + op_ret = -1; + op_errno = EINVAL; + len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, len + 1); + hdr = gf_hdr_new (rsp, len + 1); + rsp = gf_param (hdr); + + if (op_ret >= 0) { + ret = dict_serialize (dict, rsp->dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to serialize reply dict", + state->loc.path, state->resolve.ino); + op_ret = -1; + op_errno = -ret; + } + } + rsp->dict_len = hton32 (len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_GETXATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fgetxattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t len = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + + state = CALL_STATE (frame); + + if (op_ret >= 0) { + len = dict_serialized_length (dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to get serialized length of " + "reply dict", + state->loc.path, state->resolve.ino); + op_ret = -1; + op_errno = EINVAL; + len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, len + 1); + hdr = gf_hdr_new (rsp, len + 1); + rsp = gf_param (hdr); + + if (op_ret >= 0) { + ret = dict_serialize (dict, rsp->dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to serialize reply dict", + state->loc.path, state->resolve.ino); + op_ret = -1; + op_errno = -ret; + } + } + rsp->dict_len = hton32 (len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FGETXATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_setxattr_cbk - setxattr callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int +server_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_setxattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE (frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_SETXATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsetxattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSETXATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_rename_cbk - rename callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int +server_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_rename_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + stbuf->ia_ino = state->loc.inode->ino; + stbuf->ia_type = state->loc.inode->ia_type; + + gf_log (state->bound_xl->name, GF_LOG_TRACE, + "%"PRId64": RENAME_CBK (%"PRId64") %"PRId64"/%s " + "==> %"PRId64"/%s", + frame->root->unique, state->loc.inode->ino, + state->loc.parent->ino, state->loc.name, + state->loc2.parent->ino, state->loc2.name); + + inode_rename (state->itable, + state->loc.parent, state->loc.name, + state->loc2.parent, state->loc2.name, + state->loc.inode, stbuf); + gf_stat_from_iatt (&rsp->stat, stbuf); + + gf_stat_from_iatt (&rsp->preoldparent, preoldparent); + gf_stat_from_iatt (&rsp->postoldparent, postoldparent); + + gf_stat_from_iatt (&rsp->prenewparent, prenewparent); + gf_stat_from_iatt (&rsp->postnewparent, postnewparent); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_RENAME, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_unlink_cbk - unlink callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int +server_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_unlink_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + inode_t *parent = NULL; + + state = CALL_STATE(frame); + + if (op_ret == 0) { + gf_log (state->bound_xl->name, GF_LOG_TRACE, + "%"PRId64": UNLINK_CBK %"PRId64"/%s (%"PRId64")", + frame->root->unique, state->loc.parent->ino, + state->loc.name, state->loc.inode->ino); + + inode_unlink (state->loc.inode, state->loc.parent, + state->loc.name); + + parent = inode_parent (state->loc.inode, 0, NULL); + if (parent) + inode_unref (parent); + else + inode_forget (state->loc.inode, 0); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": UNLINK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_iatt (&rsp->preparent, preparent); + gf_stat_from_iatt (&rsp->postparent, postparent); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_UNLINK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_symlink_cbk - symlink callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int +server_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_symlink_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + inode_t *link_inode = NULL; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret >= 0) { + gf_stat_from_iatt (&rsp->stat, stbuf); + gf_stat_from_iatt (&rsp->preparent, preparent); + gf_stat_from_iatt (&rsp->postparent, postparent); + + link_inode = inode_link (inode, state->loc.parent, + state->loc.name, stbuf); + inode_lookup (link_inode); + inode_unref (link_inode); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": SYMLINK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_SYMLINK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_link_cbk - link callback for server protocol + * @frame: call frame + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int +server_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, struct iatt *preparent, + struct iatt *postparent) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_link_rsp_t *rsp = NULL; + server_state_t *state = NULL; + int32_t gf_errno = 0; + size_t hdrlen = 0; + inode_t *link_inode = NULL; + + state = CALL_STATE(frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + stbuf->ia_ino = state->loc.inode->ino; + + gf_stat_from_iatt (&rsp->stat, stbuf); + gf_stat_from_iatt (&rsp->preparent, preparent); + gf_stat_from_iatt (&rsp->postparent, postparent); + + gf_log (state->bound_xl->name, GF_LOG_TRACE, + "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s", + frame->root->unique, inode->ino, + state->loc2.parent->ino, + state->loc2.name, state->loc.parent->ino, + state->loc.name); + + link_inode = inode_link (inode, state->loc2.parent, + state->loc2.name, stbuf); + inode_unref (link_inode); + } else { + gf_log (state->bound_xl->name, GF_LOG_DEBUG, + "%"PRId64": LINK (%"PRId64") %"PRId64"/%s ==> %"PRId64"/%s " + " ==> %"PRId32" (%s)", + frame->root->unique, state->resolve2.ino, + state->resolve2.par, + state->resolve2.bname, state->resolve.par, + state->resolve.bname, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LINK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_truncate_cbk - truncate callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int +server_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_truncate_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE (frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_iatt (&rsp->prestat, prebuf); + gf_stat_from_iatt (&rsp->poststat, postbuf); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": TRUNCATE %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_TRUNCATE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_fstat_cbk - fstat callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int +server_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fstat_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_iatt (&rsp->stat, stbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FSTAT %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSTAT, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_ftruncate_cbk - ftruncate callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int +server_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_ftruncate_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret == 0) { + gf_stat_from_iatt (&rsp->prestat, prebuf); + gf_stat_from_iatt (&rsp->poststat, postbuf); + } else { + state = CALL_STATE (frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FTRUNCATE %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FTRUNCATE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_flush_cbk - flush callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int +server_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_flush_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + if (op_ret < 0) { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FLUSH %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FLUSH, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_fsync_cbk - fsync callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int +server_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsync_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + if (op_ret < 0) { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FSYNC %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_stat_from_iatt (&(rsp->prestat), prebuf); + gf_stat_from_iatt (&(rsp->poststat), postbuf); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSYNC, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_release_cbk - rleease callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int +server_release_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + gf_hdr_common_t *hdr = NULL; + gf_cbk_release_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_CBK_REPLY, GF_CBK_RELEASE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_writev_cbk - writev callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ + +int +server_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_write_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret >= 0) { + gf_stat_from_iatt (&rsp->prestat, prebuf); + gf_stat_from_iatt (&rsp->poststat, postbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": WRITEV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_WRITE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_readv_cbk - readv callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @vector: + * @count: + * + * not for external reference + */ +int +server_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_read_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + gf_stat_from_iatt (&rsp->stat, stbuf); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": READV %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_READ, + hdr, hdrlen, vector, count, iobref); + + return 0; +} + + +/* + * server_open_cbk - open callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @fd: + * + * not for external reference + */ +int +server_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_open_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + uint64_t fd_no = 0; + + conn = SERVER_CONNECTION (frame); + + state = CALL_STATE (frame); + + if (op_ret >= 0) { + fd_bind (fd); + + fd_no = gf_fd_unused_get (conn->fdtable, fd); + fd_ref (fd); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": OPEN %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + rsp->fd = hton64 (fd_no); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_OPEN, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_create_cbk - create callback for server + * @frame: call frame + * @cookie: + * @this: translator structure + * @op_ret: + * @op_errno: + * @fd: file descriptor + * @inode: inode structure + * @stbuf: struct iatt of created file + * + * not for external reference + */ +int +server_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + fd_t *fd, inode_t *inode, struct iatt *stbuf, + struct iatt *preparent, struct iatt *postparent) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + gf_fop_create_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + uint64_t fd_no = 0; + inode_t *link_inode = NULL; + + conn = SERVER_CONNECTION (frame); + + state = CALL_STATE (frame); + + if (op_ret >= 0) { + gf_log (state->bound_xl->name, GF_LOG_TRACE, + "%"PRId64": CREATE %"PRId64"/%s (%"PRId64")", + frame->root->unique, state->loc.parent->ino, + state->loc.name, stbuf->ia_ino); + + link_inode = inode_link (inode, state->loc.parent, + state->loc.name, stbuf); + + if (link_inode != inode) { + gf_log (this->name, GF_LOG_DEBUG, + "create(%s) inode (ptr=%p, ino=%"PRId64", " + "gen=%"PRId64") found conflict (ptr=%p, " + "ino=%"PRId64", gen=%"PRId64")", + state->loc.path, inode, inode->ino, + inode->generation, link_inode, + link_inode->ino, link_inode->generation); + + /* + VERY racy code (if used anywhere else) + -- don't do this without understanding + */ + + inode_unref (fd->inode); + fd->inode = inode_ref (link_inode); + } + + inode_lookup (link_inode); + inode_unref (link_inode); + + fd_bind (fd); + + fd_no = gf_fd_unused_get (conn->fdtable, fd); + fd_ref (fd); + + if ((fd_no < 0) || (fd == 0)) { + op_ret = fd_no; + op_errno = errno; + } + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": CREATE %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + rsp->fd = hton64 (fd_no); + + if (op_ret >= 0) { + gf_stat_from_iatt (&rsp->stat, stbuf); + gf_stat_from_iatt (&rsp->preparent, preparent); + gf_stat_from_iatt (&rsp->postparent, postparent); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_CREATE, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_readlink_cbk - readlink callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @buf: + * + * not for external reference + */ +int +server_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *buf, + struct iatt *sbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readlink_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + size_t linklen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE(frame); + + if (op_ret >= 0) { + linklen = strlen (buf) + 1; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": READLINK %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + hdrlen = gf_hdr_len (rsp, linklen); + hdr = gf_hdr_new (rsp, linklen); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret >= 0) { + gf_stat_from_iatt (&(rsp->buf), sbuf); + strcpy (rsp->path, buf); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_READLINK, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_stat_cbk - stat callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int +server_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_stat_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE (frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret == 0) { + gf_stat_from_iatt (&rsp->stat, stbuf); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": STAT %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_STAT, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_setattr_cbk - setattr callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ + +int +server_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_setattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE (frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret == 0) { + gf_stat_from_iatt (&rsp->statpre, statpre); + gf_stat_from_iatt (&rsp->statpost, statpost); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": SETATTR %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_SETATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * server_setattr_cbk - setattr callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @stbuf: + * + * not for external reference + */ +int +server_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_fsetattr_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + state = CALL_STATE (frame); + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno_to_error (op_errno)); + + if (op_ret == 0) { + gf_stat_from_iatt (&rsp->statpre, statpre); + gf_stat_from_iatt (&rsp->statpost, statpost); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FSETATTR %"PRId64" (%"PRId64") ==> " + "%"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FSETATTR, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +/* + * server_lookup_cbk - lookup callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @inode: + * @stbuf: + * + * not for external reference + */ +int +server_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct iatt *stbuf, dict_t *dict, + struct iatt *postparent) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_lookup_rsp_t *rsp = NULL; + server_state_t *state = NULL; + inode_t *root_inode = NULL; + int32_t dict_len = 0; + size_t hdrlen = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + inode_t *link_inode = NULL; + loc_t fresh_loc = {0,}; + + state = CALL_STATE(frame); + + if (state->is_revalidate == 1 && op_ret == -1) { + state->is_revalidate = 2; + loc_copy (&fresh_loc, &state->loc); + inode_unref (fresh_loc.inode); + fresh_loc.inode = inode_new (state->itable); + + STACK_WIND (frame, server_lookup_cbk, + BOUND_XL (frame), BOUND_XL (frame)->fops->lookup, + &fresh_loc, state->dict); + + loc_wipe (&fresh_loc); + return 0; + } + + if (dict) { + dict_len = dict_serialized_length (dict); + if (dict_len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to get serialized " + "length of reply dict", + state->loc.path, state->loc.inode->ino); + op_ret = -1; + op_errno = EINVAL; + dict_len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, dict_len); + hdr = gf_hdr_new (rsp, dict_len); + rsp = gf_param (hdr); + + if ((op_ret >= 0) && dict) { + ret = dict_serialize (dict, rsp->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to serialize reply dict", + state->loc.path, state->loc.inode->ino); + op_ret = -1; + op_errno = -ret; + dict_len = 0; + } + } + rsp->dict_len = hton32 (dict_len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (postparent) + gf_stat_from_iatt (&rsp->postparent, postparent); + + if (op_ret == 0) { + root_inode = BOUND_XL(frame)->itable->root; + if (inode == root_inode) { + /* we just looked up root ("/") */ + stbuf->ia_ino = 1; + if (inode->ia_type == 0) + inode->ia_type = stbuf->ia_type; + } + + gf_stat_from_iatt (&rsp->stat, stbuf); + + if (inode->ino != 1) { + link_inode = inode_link (inode, state->loc.parent, + state->loc.name, stbuf); + inode_lookup (link_inode); + inode_unref (link_inode); + } + } else { + if (state->is_revalidate && op_errno == ENOENT) { + if (state->loc.inode->ino != 1) { + inode_unlink (state->loc.inode, + state->loc.parent, + state->loc.name); + } + } + + gf_log (this->name, + (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_DEBUG), + "%"PRId64": LOOKUP %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LOOKUP, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_xattrop_rsp_t *rsp = NULL; + server_state_t *state = NULL; + size_t hdrlen = 0; + int32_t len = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + + state = CALL_STATE (frame); + + if (op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": XATTROP %s (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->loc.path, + state->loc.inode ? state->loc.inode->ino : 0, + op_ret, strerror (op_errno)); + } + + if ((op_ret >= 0) && dict) { + len = dict_serialized_length (dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to get serialized length" + " for reply dict", + state->loc.path, state->loc.inode->ino); + op_ret = -1; + op_errno = EINVAL; + len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, len + 1); + hdr = gf_hdr_new (rsp, len + 1); + rsp = gf_param (hdr); + + if ((op_ret >= 0) && dict) { + ret = dict_serialize (dict, rsp->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s (%"PRId64"): failed to serialize reply dict", + state->loc.path, state->loc.inode->ino); + op_ret = -1; + op_errno = -ret; + len = 0; + } + } + rsp->dict_len = hton32 (len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_XATTROP, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_xattrop_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t len = 0; + int32_t gf_errno = 0; + int32_t ret = -1; + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (op_ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "%"PRId64": FXATTROP %"PRId64" (%"PRId64") ==> %"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + if ((op_ret >= 0) && dict) { + len = dict_serialized_length (dict); + if (len < 0) { + gf_log (this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to get " + "serialized length for reply dict", + state->resolve.fd_no, state->fd->inode->ino); + op_ret = -1; + op_errno = EINVAL; + len = 0; + } + } + + hdrlen = gf_hdr_len (rsp, len + 1); + hdr = gf_hdr_new (rsp, len + 1); + rsp = gf_param (hdr); + + if ((op_ret >= 0) && dict) { + ret = dict_serialize (dict, rsp->dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to " + "serialize reply dict", + state->resolve.fd_no, state->fd->inode->ino); + op_ret = -1; + op_errno = -ret; + len = 0; + } + } + rsp->dict_len = hton32 (len); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_FXATTROP, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_lookup_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + if (!state->loc.inode) + state->loc.inode = inode_new (state->itable); + else + state->is_revalidate = 1; + + STACK_WIND (frame, server_lookup_cbk, + bound_xl, bound_xl->fops->lookup, + &state->loc, state->dict); + + return 0; +err: + server_lookup_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL, NULL, NULL); + + return 0; +} + + +int +server_lookup (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_lookup_req_t *req = NULL; + server_state_t *state = NULL; + int32_t ret = -1; + size_t pathlen = 0; + size_t baselen = 0; + size_t dictlen = 0; + dict_t *xattr_req = NULL; + char *req_dictbuf = NULL; + + req = gf_param (hdr); + + state = CALL_STATE (frame); + + pathlen = STRLEN_0 (req->path); + dictlen = ntoh32 (req->dictlen); + + /* NOTE: lookup() uses req->ino only to identify if a lookup() + * is requested for 'root' or not + */ + state->resolve.ino = ntoh64 (req->ino); + if (state->resolve.ino != 1) + state->resolve.ino = 0; + + state->resolve.type = RESOLVE_DONTCARE; + state->resolve.par = ntoh64 (req->par); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + + if (IS_NOT_ROOT (pathlen)) { + state->resolve.bname = gf_strdup (req->bname + pathlen); + baselen = STRLEN_0 (state->resolve.bname); + } + + if (dictlen) { + /* Unserialize the dictionary */ + req_dictbuf = memdup (req->dict + pathlen + baselen, dictlen); + + xattr_req = dict_new (); + + ret = dict_unserialize (req_dictbuf, dictlen, &xattr_req); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "%"PRId64": %s (%"PRId64"): failed to " + "unserialize req-buffer to dictionary", + frame->root->unique, state->resolve.path, + state->resolve.ino); + GF_FREE (req_dictbuf); + goto err; + } + + xattr_req->extra_free = req_dictbuf; + state->dict = xattr_req; + } + + resolve_and_resume (frame, server_lookup_resume); + + return 0; +err: + if (xattr_req) + dict_unref (xattr_req); + + server_lookup_cbk (frame, NULL, frame->this, -1, EINVAL, NULL, NULL, + NULL, NULL); + return 0; +} + + +/* + * server_forget - forget function for server protocol + * + * not for external reference + */ +int +server_forget (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_log ("forget", GF_LOG_CRITICAL, "function not implemented"); + return 0; +} + + +int +server_stat_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_stat_cbk, + bound_xl, bound_xl->fops->stat, &state->loc); + return 0; +err: + server_stat_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_stat (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_stat_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + { + state->resolve.type = RESOLVE_MUST; + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + } + + resolve_and_resume (frame, server_stat_resume); + + return 0; +} + + +int +server_setattr_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_setattr_cbk, + bound_xl, bound_xl->fops->setattr, + &state->loc, &state->stbuf, state->valid); + return 0; +err: + server_setattr_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + + return 0; +} + + +int +server_setattr (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_setattr_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + + gf_stat_to_iatt (&req->stbuf, &state->stbuf); + state->valid = ntoh32 (req->valid); + + resolve_and_resume (frame, server_setattr_resume); + + return 0; +} + + +int +server_fsetattr_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_fsetattr_cbk, + bound_xl, bound_xl->fops->fsetattr, + state->fd, &state->stbuf, state->valid); + return 0; +err: + server_fsetattr_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + + return 0; +} + + +int +server_fsetattr (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fsetattr_req_t *req = NULL; + server_state_t *state = NULL; + + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + + gf_stat_to_iatt (&req->stbuf, &state->stbuf); + state->valid = ntoh32 (req->valid); + + resolve_and_resume (frame, server_fsetattr_resume); + + return 0; +} + + +int +server_readlink_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_readlink_cbk, + bound_xl, bound_xl->fops->readlink, + &state->loc, state->size); + return 0; +err: + server_readlink_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + return 0; +} + + +int +server_readlink (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_readlink_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + + state->size = ntoh32 (req->size); + + resolve_and_resume (frame, server_readlink_resume); + + return 0; +} + + +int +server_create_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + state->loc.inode = inode_new (state->itable); + + state->fd = fd_create (state->loc.inode, frame->root->pid); + state->fd->flags = state->flags; + + STACK_WIND (frame, server_create_cbk, + bound_xl, bound_xl->fops->create, + &(state->loc), state->flags, state->mode, state->fd); + + return 0; +err: + server_create_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL, NULL, + NULL, NULL); + return 0; +} + + +int +server_create (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_create_req_t *req = NULL; + server_state_t *state = NULL; + int pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + pathlen = STRLEN_0 (req->path); + + state->resolve.type = RESOLVE_NOT; + state->resolve.par = ntoh64 (req->par); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + state->resolve.bname = gf_strdup (req->bname + pathlen); + state->mode = ntoh32 (req->mode); + state->flags = gf_flags_to_flags (ntoh32 (req->flags)); + + resolve_and_resume (frame, server_create_resume); + + return 0; +} + + +int +server_open_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + state->fd = fd_create (state->loc.inode, frame->root->pid); + state->fd->flags = state->flags; + + STACK_WIND (frame, server_open_cbk, + bound_xl, bound_xl->fops->open, + &state->loc, state->flags, state->fd, 0); + + return 0; +err: + server_open_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_open (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_open_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + + state->flags = gf_flags_to_flags (ntoh32 (req->flags)); + + resolve_and_resume (frame, server_open_resume); + + return 0; +} + + +int +server_readv_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_readv_cbk, + bound_xl, bound_xl->fops->readv, + state->fd, state->size, state->offset); + + return 0; +err: + server_readv_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, 0, NULL, NULL); + return 0; +} + + +int +server_readv (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_read_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + state->size = ntoh32 (req->size); + state->offset = ntoh64 (req->offset); + + resolve_and_resume (frame, server_readv_resume); + + return 0; +} + + +int +server_writev_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + struct iovec iov = {0, }; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + iov.iov_len = state->size; + + if (state->iobuf) { + iov.iov_base = state->iobuf->ptr; + } + + STACK_WIND (frame, server_writev_cbk, + bound_xl, bound_xl->fops->writev, + state->fd, &iov, 1, state->offset, state->iobref); + + return 0; +err: + server_writev_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + return 0; +} + + +int +server_writev (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_write_req_t *req = NULL; + server_state_t *state = NULL; + struct iobref *iobref = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + state->offset = ntoh64 (req->offset); + state->size = ntoh32 (req->size); + + if (iobuf) { + iobref = iobref_new (); + iobref_add (iobref, iobuf); + + state->iobuf = iobuf; + state->iobref = iobref; + } + + resolve_and_resume (frame, server_writev_resume); + + return 0; +} + + +int +server_release (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_cbk_release_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION (frame); + state = CALL_STATE (frame); + req = gf_param (hdr); + + state->resolve.fd_no = ntoh64 (req->fd); + + gf_fd_put (conn->fdtable, state->resolve.fd_no); + + server_release_cbk (frame, NULL, frame->this, 0, 0); + + return 0; +} + + +int +server_fsync_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_fsync_cbk, + bound_xl, bound_xl->fops->fsync, + state->fd, state->flags); + return 0; +err: + server_fsync_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + + return 0; +} + + +int +server_fsync (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fsync_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + state->flags = ntoh32 (req->data); + + resolve_and_resume (frame, server_fsync_resume); + + return 0; +} + + + +int +server_flush_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_flush_cbk, + bound_xl, bound_xl->fops->flush, state->fd); + return 0; +err: + server_flush_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + + return 0; +} + + +int +server_flush (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fsync_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + + resolve_and_resume (frame, server_flush_resume); + + return 0; +} + + + +int +server_ftruncate_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_ftruncate_cbk, + bound_xl, bound_xl->fops->ftruncate, + state->fd, state->offset); + return 0; +err: + server_ftruncate_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + + return 0; +} + + +int +server_ftruncate (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_ftruncate_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + state->offset = ntoh64 (req->offset); + + resolve_and_resume (frame, server_ftruncate_resume); + + return 0; +} + + +int +server_fstat_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_fstat_cbk, + bound_xl, bound_xl->fops->fstat, + state->fd); + return 0; +err: + server_fstat_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_fstat (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fstat_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + + resolve_and_resume (frame, server_fstat_resume); + + return 0; +} + + +int +server_truncate_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_truncate_cbk, + bound_xl, bound_xl->fops->truncate, + &state->loc, state->offset); + return 0; +err: + server_truncate_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + return 0; +} + + + +int +server_truncate (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_truncate_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.path = gf_strdup (req->path); + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->offset = ntoh64 (req->offset); + + resolve_and_resume (frame, server_truncate_resume); + + return 0; +} + + +int +server_unlink_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_unlink_cbk, + bound_xl, bound_xl->fops->unlink, + &state->loc); + return 0; +err: + server_unlink_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + return 0; +} + + +int +server_unlink (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_unlink_req_t *req = NULL; + server_state_t *state = NULL; + int pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + pathlen = STRLEN_0 (req->path); + + state->resolve.type = RESOLVE_MUST; + state->resolve.par = ntoh64 (req->par); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + state->resolve.bname = gf_strdup (req->bname + pathlen); + + resolve_and_resume (frame, server_unlink_resume); + + return 0; +} + + +int +server_setxattr_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_setxattr_cbk, + bound_xl, bound_xl->fops->setxattr, + &state->loc, state->dict, state->flags); + return 0; +err: + server_setxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + + return 0; +} + + +int +server_setxattr (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_setxattr_req_t *req = NULL; + server_state_t *state = NULL; + dict_t *dict = NULL; + int32_t ret = -1; + size_t dict_len = 0; + char *req_dictbuf = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + dict_len = ntoh32 (req->dict_len); + + state->resolve.type = RESOLVE_MUST; + state->resolve.path = gf_strdup (req->path + dict_len); + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->flags = ntoh32 (req->flags); + + if (dict_len) { + req_dictbuf = memdup (req->dict, dict_len); + + dict = dict_new (); + + ret = dict_unserialize (req_dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "%"PRId64": %s (%"PRId64"): failed to " + "unserialize request buffer to dictionary", + frame->root->unique, state->loc.path, + state->resolve.ino); + GF_FREE (req_dictbuf); + goto err; + } + + dict->extra_free = req_dictbuf; + state->dict = dict; + } + + resolve_and_resume (frame, server_setxattr_resume); + + return 0; +err: + if (dict) + dict_unref (dict); + + server_setxattr_cbk (frame, NULL, frame->this, -1, EINVAL); + + return 0; + +} + + +int +server_fsetxattr_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_setxattr_cbk, + bound_xl, bound_xl->fops->fsetxattr, + state->fd, state->dict, state->flags); + return 0; +err: + server_fsetxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + + return 0; +} + + +int +server_fsetxattr (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fsetxattr_req_t *req = NULL; + server_state_t *state = NULL; + dict_t *dict = NULL; + int32_t ret = -1; + size_t dict_len = 0; + char *req_dictbuf = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + dict_len = ntoh32 (req->dict_len); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + state->flags = ntoh32 (req->flags); + + if (dict_len) { + req_dictbuf = memdup (req->dict, dict_len); + + dict = dict_new (); + + ret = dict_unserialize (req_dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "%"PRId64": %s (%"PRId64"): failed to " + "unserialize request buffer to dictionary", + frame->root->unique, state->loc.path, + state->resolve.ino); + GF_FREE (req_dictbuf); + goto err; + } + + dict->extra_free = req_dictbuf; + state->dict = dict; + } + + resolve_and_resume (frame, server_fsetxattr_resume); + + return 0; +err: + if (dict) + dict_unref (dict); + + server_setxattr_cbk (frame, NULL, frame->this, -1, EINVAL); + + return 0; +} + + +int +server_fxattrop_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_fxattrop_cbk, + bound_xl, bound_xl->fops->fxattrop, + state->fd, state->flags, state->dict); + return 0; +err: + server_fxattrop_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_fxattrop (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fxattrop_req_t *req = NULL; + dict_t *dict = NULL; + server_state_t *state = NULL; + size_t dict_len = 0; + char *req_dictbuf = NULL; + int32_t ret = -1; + + req = gf_param (hdr); + state = CALL_STATE(frame); + + dict_len = ntoh32 (req->dict_len); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->flags = ntoh32 (req->flags); + + if (dict_len) { + /* Unserialize the dictionary */ + req_dictbuf = memdup (req->dict, dict_len); + + dict = dict_new (); + + ret = dict_unserialize (req_dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to unserialize " + "request buffer to dictionary", + state->resolve.fd_no, state->fd->inode->ino); + GF_FREE (req_dictbuf); + goto fail; + } + dict->extra_free = req_dictbuf; + state->dict = dict; + dict = NULL; + } + + resolve_and_resume (frame, server_fxattrop_resume); + + return 0; + +fail: + if (dict) + dict_unref (dict); + + server_fxattrop_cbk (frame, NULL, frame->this, -1, EINVAL, NULL); + return 0; +} + + +int +server_xattrop_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_xattrop_cbk, + bound_xl, bound_xl->fops->xattrop, + &state->loc, state->flags, state->dict); + return 0; +err: + server_xattrop_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_xattrop (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_xattrop_req_t *req = NULL; + dict_t *dict = NULL; + server_state_t *state = NULL; + size_t dict_len = 0; + char *req_dictbuf = NULL; + int32_t ret = -1; + + req = gf_param (hdr); + state = CALL_STATE(frame); + + dict_len = ntoh32 (req->dict_len); + + state->resolve.type = RESOLVE_MUST; + state->resolve.path = gf_strdup (req->path + dict_len); + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->flags = ntoh32 (req->flags); + + if (dict_len) { + /* Unserialize the dictionary */ + req_dictbuf = memdup (req->dict, dict_len); + + dict = dict_new (); + + ret = dict_unserialize (req_dictbuf, dict_len, &dict); + if (ret < 0) { + gf_log (bound_xl->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): failed to unserialize " + "request buffer to dictionary", + state->resolve.fd_no, state->fd->inode->ino); + GF_FREE (req_dictbuf); + goto fail; + } + dict->extra_free = req_dictbuf; + state->dict = dict; + dict = NULL; + } + + resolve_and_resume (frame, server_xattrop_resume); + + return 0; + +fail: + if (dict) + dict_unref (dict); + + server_xattrop_cbk (frame, NULL, frame->this, -1, EINVAL, NULL); + return 0; +} + + +int +server_getxattr_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_getxattr_cbk, + bound_xl, bound_xl->fops->getxattr, + &state->loc, state->name); + return 0; +err: + server_getxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_getxattr (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_getxattr_req_t *req = NULL; + server_state_t *state = NULL; + size_t namelen = 0; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + pathlen = STRLEN_0 (req->path); + + state->resolve.type = RESOLVE_MUST; + state->resolve.path = gf_strdup (req->path); + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + + namelen = ntoh32 (req->namelen); + if (namelen) + state->name = gf_strdup (req->name + pathlen); + + resolve_and_resume (frame, server_getxattr_resume); + + return 0; +} + + +int +server_fgetxattr_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_fgetxattr_cbk, + bound_xl, bound_xl->fops->fgetxattr, + state->fd, state->name); + return 0; +err: + server_fgetxattr_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_fgetxattr (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fgetxattr_req_t *req = NULL; + server_state_t *state = NULL; + size_t namelen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + + namelen = ntoh32 (req->namelen); + if (namelen) + state->name = gf_strdup (req->name); + + resolve_and_resume (frame, server_fgetxattr_resume); + + return 0; +} + + +int +server_removexattr_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_removexattr_cbk, + bound_xl, bound_xl->fops->removexattr, + &state->loc, state->name); + return 0; +err: + server_removexattr_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + return 0; +} + + +int +server_removexattr (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_removexattr_req_t *req = NULL; + server_state_t *state = NULL; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + pathlen = STRLEN_0 (req->path); + + state->resolve.type = RESOLVE_MUST; + state->resolve.path = gf_strdup (req->path); + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->name = gf_strdup (req->name + pathlen); + + resolve_and_resume (frame, server_removexattr_resume); + + return 0; +} + + +int +server_statfs_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret !=0) + goto err; + + STACK_WIND (frame, server_statfs_cbk, + bound_xl, bound_xl->fops->statfs, + &state->loc); + return 0; + +err: + server_statfs_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_statfs (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_statfs_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.ino = ntoh64 (req->ino); + if (!state->resolve.ino) + state->resolve.ino = 1; + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + + resolve_and_resume (frame, server_statfs_resume); + + return 0; +} + + +int +server_opendir_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + state->fd = fd_create (state->loc.inode, frame->root->pid); + + STACK_WIND (frame, server_opendir_cbk, + bound_xl, bound_xl->fops->opendir, + &state->loc, state->fd); + return 0; +err: + server_opendir_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + + +int +server_opendir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_opendir_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.path = gf_strdup (req->path); + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + + resolve_and_resume (frame, server_opendir_resume); + + return 0; +} + + +int +server_releasedir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_cbk_releasedir_req_t *req = NULL; + server_connection_t *conn = NULL; + uint64_t fd_no = 0; + + conn = SERVER_CONNECTION (frame); + + req = gf_param (hdr); + + fd_no = ntoh64 (req->fd); + + gf_fd_put (conn->fdtable, fd_no); + + server_releasedir_cbk (frame, NULL, frame->this, 0, 0); + + return 0; +} + +/* + * server_readdirp_cbk - getdents callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * + * not for external reference + */ +int +server_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_readdirp_rsp_t *rsp = NULL; + size_t hdrlen = 0; + size_t buf_size = 0; + int32_t gf_errno = 0; + server_state_t *state = NULL; + + if (op_ret > 0) + buf_size = gf_dirent_serialize (entries, NULL, 0); + + hdrlen = gf_hdr_len (rsp, buf_size); + hdr = gf_hdr_new (rsp, buf_size); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret > 0) { + rsp->size = hton32 (buf_size); + gf_dirent_serialize (entries, rsp->buf, buf_size); + } else { + state = CALL_STATE(frame); + + gf_log (this->name, GF_LOG_TRACE, + "%"PRId64": READDIRP %"PRId64" (%"PRId64") ==>" + "%"PRId32" (%s)", + frame->root->unique, state->resolve.fd_no, + state->fd ? state->fd->inode->ino : 0, op_ret, + strerror (op_errno)); + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_READDIRP, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +int +server_readdirp_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_readdirp_cbk, bound_xl, + bound_xl->fops->readdirp, state->fd, state->size, + state->offset); + + return 0; +err: + server_readdirp_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} + +/* + * server_readdirp - readdirp function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int +server_readdirp (call_frame_t *frame, xlator_t *bound_xl, gf_hdr_common_t *hdr, + size_t hdrlen, struct iobuf *iobuf) +{ + gf_fop_readdirp_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + state->size = ntoh32 (req->size); + state->offset = ntoh64 (req->offset); + + resolve_and_resume (frame, server_readdirp_resume); + + return 0; +} + + +int + server_readdir_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_readdir_cbk, + bound_xl, + bound_xl->fops->readdir, + state->fd, state->size, state->offset); + + return 0; +err: + server_readdir_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL); + return 0; +} +/* + * server_readdir - readdir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int +server_readdir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_readdir_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + state->size = ntoh32 (req->size); + state->offset = ntoh64 (req->offset); + + resolve_and_resume (frame, server_readdir_resume); + + return 0; +} + +int +server_fsyncdir_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_fsyncdir_cbk, + bound_xl, + bound_xl->fops->fsyncdir, + state->fd, state->flags); + return 0; + +err: + server_fsyncdir_cbk (frame, NULL, frame->this, + state->resolve.op_ret, + state->resolve.op_errno); + return 0; +} + +/* + * server_fsyncdir - fsyncdir function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ +int +server_fsyncdir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fsyncdir_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION (frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = ntoh64 (req->fd); + state->flags = ntoh32 (req->data); + + resolve_and_resume (frame, server_fsyncdir_resume); + + return 0; +} + + +int +server_mknod_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + state->loc.inode = inode_new (state->itable); + + STACK_WIND (frame, server_mknod_cbk, + bound_xl, bound_xl->fops->mknod, + &(state->loc), state->mode, state->dev); + + return 0; +err: + server_mknod_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL, NULL, NULL); + return 0; +} + + + +int +server_mknod (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_mknod_req_t *req = NULL; + server_state_t *state = NULL; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + pathlen = STRLEN_0 (req->path); + + state->resolve.type = RESOLVE_NOT; + state->resolve.par = ntoh64 (req->par); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + state->resolve.bname = gf_strdup (req->bname + pathlen); + + state->mode = ntoh32 (req->mode); + state->dev = ntoh64 (req->dev); + + resolve_and_resume (frame, server_mknod_resume); + + return 0; +} + + +int +server_mkdir_resume (call_frame_t *frame, xlator_t *bound_xl) + +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + state->loc.inode = inode_new (state->itable); + + STACK_WIND (frame, server_mkdir_cbk, + bound_xl, bound_xl->fops->mkdir, + &(state->loc), state->mode); + + return 0; +err: + server_mkdir_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL, NULL, NULL); + return 0; +} + + +int +server_mkdir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_mkdir_req_t *req = NULL; + server_state_t *state = NULL; + size_t pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + pathlen = STRLEN_0 (req->path); + + state->resolve.type = RESOLVE_NOT; + state->resolve.par = ntoh64 (req->par); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + state->resolve.bname = gf_strdup (req->bname + pathlen); + + state->mode = ntoh32 (req->mode); + + resolve_and_resume (frame, server_mkdir_resume); + + return 0; +} + + +int +server_rmdir_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_rmdir_cbk, + bound_xl, bound_xl->fops->rmdir, &state->loc); + return 0; +err: + server_rmdir_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + return 0; +} + +int +server_rmdir (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_rmdir_req_t *req = NULL; + server_state_t *state = NULL; + int pathlen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + pathlen = STRLEN_0 (req->path); + + state->resolve.type = RESOLVE_MUST; + state->resolve.par = ntoh64 (req->par); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + state->resolve.bname = gf_strdup (req->bname + pathlen); + + resolve_and_resume (frame, server_rmdir_resume); + + return 0; +} + + +int +server_inodelk_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_inodelk_cbk, + bound_xl, bound_xl->fops->inodelk, + state->volume, &state->loc, state->cmd, &state->flock); + return 0; +err: + server_inodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + return 0; +} + + +int +server_inodelk (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_inodelk_req_t *req = NULL; + server_state_t *state = NULL; + size_t pathlen = 0; + size_t vollen = 0; + int cmd = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + pathlen = STRLEN_0 (req->path); + vollen = STRLEN_0 (req->volume + pathlen); + + state->resolve.type = RESOLVE_EXACT; + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + + cmd = ntoh32 (req->cmd); + switch (cmd) { + case GF_LK_GETLK: + state->cmd = F_GETLK; + break; + case GF_LK_SETLK: + state->cmd = F_SETLK; + break; + case GF_LK_SETLKW: + state->cmd = F_SETLKW; + break; + } + + state->type = ntoh32 (req->type); + state->volume = gf_strdup (req->volume + pathlen); + + gf_flock_to_flock (&req->flock, &state->flock); + + switch (state->type) { + case GF_LK_F_RDLCK: + state->flock.l_type = F_RDLCK; + break; + case GF_LK_F_WRLCK: + state->flock.l_type = F_WRLCK; + break; + case GF_LK_F_UNLCK: + state->flock.l_type = F_UNLCK; + break; + } + + resolve_and_resume (frame, server_inodelk_resume); + + return 0; +} + +int +server_finodelk_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_finodelk_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->finodelk, + state->volume, state->fd, state->cmd, &state->flock); + + return 0; +err: + server_finodelk_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + + return 0; +} + +int +server_finodelk (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_finodelk_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + + state->resolve.type = RESOLVE_EXACT; + state->volume = gf_strdup (req->volume); + state->resolve.fd_no = ntoh64 (req->fd); + state->cmd = ntoh32 (req->cmd); + + switch (state->cmd) { + case GF_LK_GETLK: + state->cmd = F_GETLK; + break; + case GF_LK_SETLK: + state->cmd = F_SETLK; + break; + case GF_LK_SETLKW: + state->cmd = F_SETLKW; + break; + } + + state->type = ntoh32 (req->type); + + gf_flock_to_flock (&req->flock, &state->flock); + + switch (state->type) { + case GF_LK_F_RDLCK: + state->flock.l_type = F_RDLCK; + break; + case GF_LK_F_WRLCK: + state->flock.l_type = F_WRLCK; + break; + case GF_LK_F_UNLCK: + state->flock.l_type = F_UNLCK; + break; + } + + resolve_and_resume (frame, server_finodelk_resume); + + return 0; +} + + +int +server_entrylk_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_entrylk_cbk, + bound_xl, bound_xl->fops->entrylk, + state->volume, &state->loc, state->name, + state->cmd, state->type); + return 0; +err: + server_entrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + return 0; +} + + +int +server_entrylk (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_entrylk_req_t *req = NULL; + server_state_t *state = NULL; + size_t pathlen = 0; + size_t namelen = 0; + size_t vollen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + pathlen = STRLEN_0 (req->path); + namelen = ntoh64 (req->namelen); + vollen = STRLEN_0(req->volume + pathlen + namelen); + + state->resolve.type = RESOLVE_EXACT; + state->resolve.path = gf_strdup (req->path); + state->resolve.ino = ntoh64 (req->ino); + state->resolve.gen = ntoh64 (req->gen); + + if (namelen) + state->name = gf_strdup (req->name + pathlen); + state->volume = gf_strdup (req->volume + pathlen + namelen); + + state->cmd = ntoh32 (req->cmd); + state->type = ntoh32 (req->type); + + resolve_and_resume (frame, server_entrylk_resume); + + return 0; +} + +int +server_fentrylk_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_fentrylk_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->fentrylk, + state->volume, state->fd, state->name, + state->cmd, state->type); + + return 0; +err: + server_fentrylk_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + return 0; +} + +int +server_fentrylk (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_fentrylk_req_t *req = NULL; + server_state_t *state = NULL; + size_t namelen = 0; + size_t vollen = 0; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION (frame); + + req = gf_param (hdr); + state = CALL_STATE(frame); + vollen = STRLEN_0(req->volume + namelen); + + state->resolve.type = RESOLVE_EXACT; + state->resolve.fd_no = ntoh64 (req->fd); + state->cmd = ntoh32 (req->cmd); + state->type = ntoh32 (req->type); + namelen = ntoh64 (req->namelen); + if (namelen) + state->name = req->name; + state->volume = gf_strdup (req->volume + namelen); + + + resolve_and_resume (frame, server_finodelk_resume); + + return 0; +} + + +int +server_access_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_access_cbk, + bound_xl, bound_xl->fops->access, + &state->loc, state->mask); + return 0; +err: + server_access_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno); + return 0; +} + + +int +server_access (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_access_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MUST; + state->resolve.ino = hton64 (req->ino); + state->resolve.gen = hton64 (req->gen); + state->resolve.path = gf_strdup (req->path); + + state->mask = ntoh32 (req->mask); + + resolve_and_resume (frame, server_access_resume); + + return 0; +} + + +int +server_symlink_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + state->loc.inode = inode_new (state->itable); + + STACK_WIND (frame, server_symlink_cbk, + bound_xl, bound_xl->fops->symlink, + state->name, &state->loc); + + return 0; +err: + server_symlink_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL, NULL, NULL); + return 0; +} + + + +int +server_symlink (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + server_state_t *state = NULL; + gf_fop_symlink_req_t *req = NULL; + size_t pathlen = 0; + size_t baselen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + pathlen = STRLEN_0 (req->path); + baselen = STRLEN_0 (req->bname + pathlen); + + state->resolve.type = RESOLVE_NOT; + state->resolve.par = ntoh64 (req->par); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.path = gf_strdup (req->path); + state->resolve.bname = gf_strdup (req->bname + pathlen); + state->name = gf_strdup (req->linkname + pathlen + baselen); + + resolve_and_resume (frame, server_symlink_resume); + + return 0; +} + + +int +server_link_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + int op_ret = 0; + int op_errno = 0; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) { + op_ret = state->resolve.op_ret; + op_errno = state->resolve.op_errno; + goto err; + } + + if (state->resolve2.op_ret != 0) { + op_ret = state->resolve2.op_ret; + op_errno = state->resolve2.op_errno; + goto err; + } + + state->loc2.inode = inode_ref (state->loc.inode); + + STACK_WIND (frame, server_link_cbk, + bound_xl, bound_xl->fops->link, + &state->loc, &state->loc2); + return 0; +err: + server_link_cbk (frame, NULL, frame->this, op_ret, op_errno, + NULL, NULL, NULL, NULL); + return 0; +} + + +int +server_link (call_frame_t *frame, xlator_t *this, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_link_req_t *req = NULL; + server_state_t *state = NULL; + size_t oldpathlen = 0; + size_t newpathlen = 0; + size_t newbaselen = 0; + + req = gf_param (hdr); + state = CALL_STATE (frame); + oldpathlen = STRLEN_0 (req->oldpath); + newpathlen = STRLEN_0 (req->newpath + oldpathlen); + newbaselen = STRLEN_0 (req->newbname + oldpathlen + newpathlen); + + state->resolve.type = RESOLVE_MUST; + state->resolve.path = gf_strdup (req->oldpath); + state->resolve.ino = ntoh64 (req->oldino); + state->resolve.gen = ntoh64 (req->oldgen); + + state->resolve2.type = RESOLVE_NOT; + state->resolve2.path = gf_strdup (req->newpath + oldpathlen); + state->resolve2.bname = gf_strdup (req->newbname + oldpathlen + newpathlen); + state->resolve2.par = ntoh64 (req->newpar); + state->resolve2.gen = ntoh64 (req->newgen); + + resolve_and_resume (frame, server_link_resume); + + return 0; +} + + +int +server_rename_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + int op_ret = 0; + int op_errno = 0; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) { + op_ret = state->resolve.op_ret; + op_errno = state->resolve.op_errno; + goto err; + } + + if (state->resolve2.op_ret != 0) { + op_ret = state->resolve2.op_ret; + op_errno = state->resolve2.op_errno; + goto err; + } + + STACK_WIND (frame, server_rename_cbk, + bound_xl, bound_xl->fops->rename, + &state->loc, &state->loc2); + return 0; +err: + server_rename_cbk (frame, NULL, frame->this, op_ret, op_errno, + NULL, NULL, NULL, NULL, NULL); + return 0; +} + + +int +server_rename (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_rename_req_t *req = NULL; + server_state_t *state = NULL; + size_t oldpathlen = 0; + size_t oldbaselen = 0; + size_t newpathlen = 0; + size_t newbaselen = 0; + + req = gf_param (hdr); + + state = CALL_STATE (frame); + oldpathlen = STRLEN_0 (req->oldpath); + oldbaselen = STRLEN_0 (req->oldbname + oldpathlen); + newpathlen = STRLEN_0 (req->newpath + oldpathlen + oldbaselen); + newbaselen = STRLEN_0 (req->newbname + oldpathlen + + oldbaselen + newpathlen); + + state->resolve.type = RESOLVE_MUST; + state->resolve.path = gf_strdup (req->oldpath); + state->resolve.bname = gf_strdup (req->oldbname + oldpathlen); + state->resolve.par = ntoh64 (req->oldpar); + state->resolve.gen = ntoh64 (req->oldgen); + + state->resolve2.type = RESOLVE_MAY; + state->resolve2.path = gf_strdup (req->newpath + oldpathlen + oldbaselen); + state->resolve2.bname = gf_strdup (req->newbname + oldpathlen + oldbaselen + + newpathlen); + state->resolve2.par = ntoh64 (req->newpar); + state->resolve2.gen = ntoh64 (req->newgen); + + resolve_and_resume (frame, server_rename_resume); + + return 0; +} + +int +server_lk_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_lk_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->lk, + state->fd, state->cmd, &state->flock); + + return 0; + +err: + server_lk_cbk (frame, NULL, frame->this, + state->resolve.op_ret, + state->resolve.op_errno, + NULL); + return 0; +} + +/* + * server_lk - lk function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + * not for external reference + */ + +int +server_lk (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_lk_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION (frame); + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.fd_no = ntoh64 (req->fd); + state->cmd = ntoh32 (req->cmd); + state->type = ntoh32 (req->type); + + switch (state->cmd) { + case GF_LK_GETLK: + state->cmd = F_GETLK; + break; + case GF_LK_SETLK: + state->cmd = F_SETLK; + break; + case GF_LK_SETLKW: + state->cmd = F_SETLKW; + break; + } + + gf_flock_to_flock (&req->flock, &state->flock); + + switch (state->type) { + case GF_LK_F_RDLCK: + state->flock.l_type = F_RDLCK; + break; + case GF_LK_F_WRLCK: + state->flock.l_type = F_WRLCK; + break; + case GF_LK_F_UNLCK: + state->flock.l_type = F_UNLCK; + break; + default: + gf_log (bound_xl->name, GF_LOG_ERROR, + "fd - %"PRId64" (%"PRId64"): Unknown lock type: %"PRId32"!", + state->resolve.fd_no, state->fd->inode->ino, state->type); + break; + } + + + resolve_and_resume (frame, server_lk_resume); + + return 0; +} + +/* xxx_MOPS */ +int +_volfile_update_checksum (xlator_t *this, char *key, uint32_t checksum) +{ + server_conf_t *conf = NULL; + struct _volfile_ctx *temp_volfile = NULL; + + conf = this->private; + temp_volfile = conf->volfile; + + while (temp_volfile) { + if ((NULL == key) && (NULL == temp_volfile->key)) + break; + if ((NULL == key) || (NULL == temp_volfile->key)) { + temp_volfile = temp_volfile->next; + continue; + } + if (strcmp (temp_volfile->key, key) == 0) + break; + temp_volfile = temp_volfile->next; + } + + if (!temp_volfile) { + temp_volfile = GF_CALLOC (1, sizeof (struct _volfile_ctx), + gf_server_mt_volfile_ctx); + + temp_volfile->next = conf->volfile; + temp_volfile->key = (key)? gf_strdup (key): NULL; + temp_volfile->checksum = checksum; + + conf->volfile = temp_volfile; + goto out; + } + + if (temp_volfile->checksum != checksum) { + gf_log (this->name, GF_LOG_CRITICAL, + "the volume file got modified between earlier access " + "and now, this may lead to inconsistency between " + "clients, advised to remount client"); + temp_volfile->checksum = checksum; + } + + out: + return 0; +} + + +size_t +build_volfile_path (xlator_t *this, const char *key, char *path, + size_t path_len) +{ + int ret = -1; + int free_filename = 0; + int free_conf_dir = 0; + char *filename = NULL; + char *conf_dir = CONFDIR; + struct stat buf = {0,}; + data_t * conf_dir_data = NULL; + char data_key[256] = {0,}; + + /* Inform users that this option is changed now */ + ret = dict_get_str (this->options, "client-volume-filename", + &filename); + if (ret == 0) { + gf_log (this->name, GF_LOG_WARNING, + "option 'client-volume-filename' is changed to " + "'volume-filename.<key>' which now takes 'key' as an " + "option to choose/fetch different files from server. " + "Refer documentation or contact developers for more " + "info. Currently defaulting to given file '%s'", + filename); + } + + if (key && !filename) { + sprintf (data_key, "volume-filename.%s", key); + ret = dict_get_str (this->options, data_key, &filename); + + if (ret < 0) { + + conf_dir_data = dict_get (this->options, "conf-dir"); + if (conf_dir_data) { + /* Check whether the specified directory exists, + or directory specified is non standard */ + ret = stat (conf_dir_data->data, &buf); + if ((ret != 0) || !S_ISDIR (buf.st_mode)) { + gf_log (this->name, GF_LOG_ERROR, + "Directory '%s' doesn't" + "exist, exiting.", + conf_dir_data->data); + ret = -1; + goto out; + } + /* Make sure that conf-dir doesn't + * contain ".." in path + */ + if ((gf_strstr (conf_dir_data->data, + "/", "..")) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "%s: invalid conf_dir", + conf_dir_data->data); + goto out; + } + + /* Make sure that key doesn't + * contain "../" in path + */ + + if ((gf_strstr (key, "/", "..")) == -1) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "%s: invalid key", key); + goto out; + } + + conf_dir = gf_strdup (conf_dir_data->data); + free_conf_dir = 1; + } + + ret = gf_asprintf (&filename, "%s/%s.vol", + conf_dir, key); + if (-1 == ret) + goto out; + + free_filename = 1; + } + } + + if (!filename) { + ret = dict_get_str (this->options, + "volume-filename.default", &filename); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "no default volume filename given, " + "defaulting to %s", DEFAULT_VOLUME_FILE_PATH); + filename = DEFAULT_VOLUME_FILE_PATH; + } + } + + ret = -1; + + if ((filename) && (path_len > strlen (filename))) { + strcpy (path, filename); + ret = strlen (filename); + } + +out: + if (free_conf_dir) + GF_FREE (conf_dir); + + if (free_filename) + GF_FREE (filename); + + return ret; +} + +int +_validate_volfile_checksum (xlator_t *this, char *key, + uint32_t checksum) +{ + char filename[ZR_PATH_MAX] = {0,}; + server_conf_t *conf = NULL; + struct _volfile_ctx *temp_volfile = NULL; + int ret = 0; + uint32_t local_checksum = 0; + + conf = this->private; + temp_volfile = conf->volfile; + + if (!checksum) + goto out; + + if (!temp_volfile) { + ret = build_volfile_path (this, key, filename, + sizeof (filename)); + if (ret <= 0) + goto out; + ret = open (filename, O_RDONLY); + if (-1 == ret) { + ret = 0; + gf_log (this->name, GF_LOG_DEBUG, + "failed to open volume file (%s) : %s", + filename, strerror (errno)); + goto out; + } + get_checksum_for_file (ret, &local_checksum); + _volfile_update_checksum (this, key, local_checksum); + close (ret); + } + + temp_volfile = conf->volfile; + while (temp_volfile) { + if ((NULL == key) && (NULL == temp_volfile->key)) + break; + if ((NULL == key) || (NULL == temp_volfile->key)) { + temp_volfile = temp_volfile->next; + continue; + } + if (strcmp (temp_volfile->key, key) == 0) + break; + temp_volfile = temp_volfile->next; + } + + if (!temp_volfile) + goto out; + + if ((temp_volfile->checksum) && + (checksum != temp_volfile->checksum)) + ret = -1; + +out: + return ret; +} + +/* Management Calls */ +/* + * mop_getspec - getspec function for server protocol + * @frame: call frame + * @bound_xl: + * @params: + * + */ +int +mop_getspec (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_hdr_common_t *_hdr = NULL; + gf_mop_getspec_rsp_t *rsp = NULL; + int32_t ret = -1; + int32_t op_errno = ENOENT; + int32_t gf_errno = 0; + int32_t spec_fd = -1; + size_t file_len = 0; + size_t _hdrlen = 0; + char filename[ZR_PATH_MAX] = {0,}; + struct stat stbuf = {0,}; + gf_mop_getspec_req_t *req = NULL; + uint32_t checksum = 0; + uint32_t flags = 0; + uint32_t keylen = 0; + char *key = NULL; + server_conf_t *conf = NULL; + + req = gf_param (hdr); + flags = ntoh32 (req->flags); + keylen = ntoh32 (req->keylen); + if (keylen) { + key = req->key; + } + + conf = frame->this->private; + + ret = build_volfile_path (frame->this, key, filename, + sizeof (filename)); + if (ret > 0) { + /* to allocate the proper buffer to hold the file data */ + ret = stat (filename, &stbuf); + if (ret < 0){ + gf_log (frame->this->name, GF_LOG_ERROR, + "Unable to stat %s (%s)", + filename, strerror (errno)); + goto fail; + } + + spec_fd = open (filename, O_RDONLY); + if (spec_fd < 0) { + gf_log (frame->this->name, GF_LOG_ERROR, + "Unable to open %s (%s)", + filename, strerror (errno)); + goto fail; + } + ret = 0; + file_len = stbuf.st_size; + if (conf->verify_volfile_checksum) { + get_checksum_for_file (spec_fd, &checksum); + _volfile_update_checksum (frame->this, key, checksum); + } + } else { + errno = ENOENT; + } + +fail: + op_errno = errno; + + _hdrlen = gf_hdr_len (rsp, file_len + 1); + _hdr = gf_hdr_new (rsp, file_len + 1); + rsp = gf_param (_hdr); + + _hdr->rsp.op_ret = hton32 (ret); + gf_errno = gf_errno_to_error (op_errno); + _hdr->rsp.op_errno = hton32 (gf_errno); + + if (file_len) { + ret = read (spec_fd, rsp->spec, file_len); + close (spec_fd); + } + protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_GETSPEC, + _hdr, _hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +server_checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + uint8_t *fchecksum, uint8_t *dchecksum) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_checksum_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, NAME_MAX + 1 + NAME_MAX + 1); + hdr = gf_hdr_new (rsp, NAME_MAX + 1 + NAME_MAX + 1); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + memcpy (rsp->fchecksum, fchecksum, NAME_MAX); + rsp->fchecksum[NAME_MAX] = '\0'; + memcpy (rsp->dchecksum + NAME_MAX, + dchecksum, NAME_MAX); + rsp->dchecksum[NAME_MAX + NAME_MAX] = '\0'; + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_CHECKSUM, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +int +server_checksum_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + int op_ret = 0; + int op_errno = 0; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) { + op_ret = state->resolve.op_ret; + op_errno = state->resolve.op_errno; + goto err; + } + + STACK_WIND (frame, server_checksum_cbk, + BOUND_XL(frame), + BOUND_XL(frame)->fops->checksum, + &state->loc, state->flags); + + return 0; +err: + server_checksum_cbk (frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL); + + return 0; +} + +int +server_checksum (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_checksum_req_t *req = NULL; + server_state_t *state = NULL; + + req = gf_param (hdr); + state = CALL_STATE (frame); + + state->resolve.type = RESOLVE_MAY; + state->resolve.path = gf_strdup (req->path); + state->resolve.gen = ntoh64 (req->gen); + state->resolve.ino = ntoh64 (req->ino); + state->flags = ntoh32 (req->flag); + + resolve_and_resume (frame, server_checksum_resume); + + return 0; +} + + +int +server_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + uint32_t weak_checksum, uint8_t *strong_checksum) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_rchecksum_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, MD5_DIGEST_LEN + 1); + hdr = gf_hdr_new (rsp, MD5_DIGEST_LEN + 1); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + hdr->rsp.op_errno = hton32 (gf_errno); + + if (op_ret >= 0) { + rsp->weak_checksum = weak_checksum; + + memcpy (rsp->strong_checksum, + strong_checksum, MD5_DIGEST_LEN); + + rsp->strong_checksum[MD5_DIGEST_LEN] = '\0'; + } + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_RCHECKSUM, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +int +server_rchecksum_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + int op_ret = 0; + int op_errno = 0; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) { + op_ret = state->resolve.op_ret; + op_errno = state->resolve.op_errno; + goto err; + } + + STACK_WIND (frame, server_rchecksum_cbk, + bound_xl, + bound_xl->fops->rchecksum, + state->fd, state->offset, state->size); + + return 0; +err: + server_rchecksum_cbk (frame, NULL, frame->this, -1, EINVAL, 0, NULL); + + return 0; + +} + +int +server_rchecksum (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_fop_rchecksum_req_t *req = NULL; + server_state_t *state = NULL; + server_connection_t *conn = NULL; + + conn = SERVER_CONNECTION(frame); + + req = gf_param (hdr); + + state = CALL_STATE(frame); + + state->resolve.type = RESOLVE_MAY; + state->resolve.fd_no = ntoh64 (req->fd); + state->offset = ntoh64 (req->offset); + state->size = ntoh32 (req->len); + + resolve_and_resume (frame, server_rchecksum_resume); + + return 0; +} + + +/* + * mop_unlock - unlock management function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + */ +int +mop_getvolume (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + return 0; +} + +struct __get_xl_struct { + const char *name; + xlator_t *reply; +}; + +void __check_and_set (xlator_t *each, void *data) +{ + if (!strcmp (each->name, + ((struct __get_xl_struct *) data)->name)) + ((struct __get_xl_struct *) data)->reply = each; +} + +static xlator_t * +get_xlator_by_name (xlator_t *some_xl, const char *name) +{ + struct __get_xl_struct get = { + .name = name, + .reply = NULL + }; + + xlator_foreach (some_xl, __check_and_set, &get); + + return get.reply; +} + + +/* + * mop_setvolume - setvolume management function for server protocol + * @frame: call frame + * @bound_xl: + * @params: parameter dictionary + * + */ +int +mop_setvolume (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *req_hdr, size_t req_hdrlen, + struct iobuf *iobuf) +{ + server_connection_t *conn = NULL; + server_conf_t *conf = NULL; + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_setvolume_req_t *req = NULL; + gf_mop_setvolume_rsp_t *rsp = NULL; + peer_info_t *peerinfo = NULL; + int32_t ret = -1; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + int32_t gf_errno = 0; + dict_t *reply = NULL; + dict_t *config_params = NULL; + dict_t *params = NULL; + char *name = NULL; + char *version = NULL; + char *process_uuid = NULL; + xlator_t *xl = NULL; + transport_t *trans = NULL; + size_t rsp_hdrlen = -1; + size_t dict_len = -1; + size_t req_dictlen = -1; + char *msg = NULL; + char *volfile_key = NULL; + uint32_t checksum = 0; + int32_t lru_limit = 1024; + + params = dict_new (); + reply = dict_new (); + + req = gf_param (req_hdr); + req_dictlen = ntoh32 (req->dict_len); + ret = dict_unserialize (req->buf, req_dictlen, ¶ms); + + config_params = dict_copy_with_ref (frame->this->options, NULL); + trans = TRANSPORT_FROM_FRAME(frame); + conf = SERVER_CONF(frame); + + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "Internal error: failed to unserialize " + "request dictionary"); + if (ret < 0) + gf_log (bound_xl->name, GF_LOG_DEBUG, + "failed to set error msg \"%s\"", + "Internal error: failed to unserialize " + "request dictionary"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + ret = dict_get_str (params, "process-uuid", &process_uuid); + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "UUID not specified"); + if (ret < 0) + gf_log (bound_xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + + conn = server_connection_get (frame->this, process_uuid); + if (trans->xl_private != conn) + trans->xl_private = conn; + + ret = dict_get_str (params, "protocol-version", &version); + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "No version number specified"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + ret = strcmp (version, GF_PROTOCOL_VERSION); + if (ret != 0) { + ret = gf_asprintf (&msg, "protocol version mismatch: client(%s) " + "- server(%s)", version, GF_PROTOCOL_VERSION); + if (-1 == ret) { + gf_log (trans->xl->name, GF_LOG_ERROR, + "gf_asprintf failed while setting up error msg"); + goto fail; + } + ret = dict_set_dynstr (reply, "ERROR", msg); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + ret = dict_get_str (params, + "remote-subvolume", &name); + if (ret < 0) { + ret = dict_set_str (reply, "ERROR", + "No remote-subvolume option specified"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + + op_ret = -1; + op_errno = EINVAL; + goto fail; + } + + xl = get_xlator_by_name (frame->this, name); + if (xl == NULL) { + ret = gf_asprintf (&msg, "remote-subvolume \"%s\" is not found", + name); + if (-1 == ret) { + gf_log (trans->xl->name, GF_LOG_ERROR, + "gf_asprintf failed while setting error msg"); + goto fail; + } + ret = dict_set_dynstr (reply, "ERROR", msg); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + + op_ret = -1; + op_errno = ENOENT; + goto fail; + } + + if (conf->verify_volfile_checksum) { + ret = dict_get_uint32 (params, "volfile-checksum", &checksum); + if (ret == 0) { + ret = dict_get_str (params, "volfile-key", + &volfile_key); + + ret = _validate_volfile_checksum (trans->xl, + volfile_key, + checksum); + if (-1 == ret) { + ret = dict_set_str (reply, "ERROR", + "volume-file checksum " + "varies from earlier " + "access"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + + op_ret = -1; + op_errno = ESTALE; + goto fail; + } + } + } + + + peerinfo = &trans->peerinfo; + ret = dict_set_static_ptr (params, "peer-info", peerinfo); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set peer-info"); + ret = dict_set_str (params, "peer-info-name", peerinfo->identifier); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set peer-info-name"); + + if (conf->auth_modules == NULL) { + gf_log (trans->xl->name, GF_LOG_ERROR, + "Authentication module not initialized"); + } + + ret = gf_authenticate (params, config_params, + conf->auth_modules); + if (ret == AUTH_ACCEPT) { + gf_log (trans->xl->name, GF_LOG_INFO, + "accepted client from %s", + peerinfo->identifier); + op_ret = 0; + conn->bound_xl = xl; + ret = dict_set_str (reply, "ERROR", "Success"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + } else { + gf_log (trans->xl->name, GF_LOG_ERROR, + "Cannot authenticate client from %s", + peerinfo->identifier); + op_ret = -1; + op_errno = EACCES; + ret = dict_set_str (reply, "ERROR", "Authentication failed"); + if (ret < 0) + gf_log (bound_xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + + goto fail; + } + + if (conn->bound_xl == NULL) { + ret = dict_set_str (reply, "ERROR", + "Check volfile and handshake " + "options in protocol/client"); + if (ret < 0) + gf_log (trans->xl->name, GF_LOG_DEBUG, + "failed to set error msg"); + + op_ret = -1; + op_errno = EACCES; + goto fail; + } + + if ((conn->bound_xl != NULL) && + (ret >= 0) && + (conn->bound_xl->itable == NULL)) { + /* create inode table for this bound_xl, if one doesn't + already exist */ + lru_limit = INODE_LRU_LIMIT (frame->this); + + gf_log (trans->xl->name, GF_LOG_TRACE, + "creating inode table with lru_limit=%"PRId32", " + "xlator=%s", lru_limit, conn->bound_xl->name); + + conn->bound_xl->itable = + inode_table_new (lru_limit, + conn->bound_xl); + } + + ret = dict_set_str (reply, "process-uuid", + xl->ctx->process_uuid); + + ret = dict_set_uint64 (reply, "transport-ptr", + ((uint64_t) (long) trans)); + +fail: + dict_len = dict_serialized_length (reply); + if (dict_len < 0) { + gf_log (xl->name, GF_LOG_DEBUG, + "failed to get serialized length of reply dict"); + op_ret = -1; + op_errno = EINVAL; + dict_len = 0; + } + + rsp_hdr = gf_hdr_new (rsp, dict_len); + rsp_hdrlen = gf_hdr_len (rsp, dict_len); + rsp = gf_param (rsp_hdr); + + if (dict_len) { + ret = dict_serialize (reply, rsp->buf); + if (ret < 0) { + gf_log (xl->name, GF_LOG_DEBUG, + "failed to serialize reply dict"); + op_ret = -1; + op_errno = -ret; + } + } + rsp->dict_len = hton32 (dict_len); + + rsp_hdr->rsp.op_ret = hton32 (op_ret); + gf_errno = gf_errno_to_error (op_errno); + rsp_hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_SETVOLUME, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + dict_unref (params); + dict_unref (reply); + dict_unref (config_params); + + return 0; +} + + +int +mop_ping (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_ping_rsp_t *rsp = NULL; + size_t rsp_hdrlen = 0; + + rsp_hdrlen = gf_hdr_len (rsp, 0); + rsp_hdr = gf_hdr_new (rsp, 0); + + hdr->rsp.op_ret = 0; + + protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_PING, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +mop_log (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_mop_log_req_t * req = NULL; + char * msg = NULL; + uint32_t msglen = 0; + + transport_t * trans = NULL; + + trans = TRANSPORT_FROM_FRAME (frame); + + req = gf_param (hdr); + msglen = ntoh32 (req->msglen); + + if (msglen) + msg = req->msg; + + gf_log_from_client (msg, trans->peerinfo.identifier); + + return 0; +} + + +/* ENOSYS operations (for backword compatibility) */ +int +server_setdents (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */ + size_t rsp_hdrlen = 0; + int32_t gf_errno = 0; + + rsp_hdrlen = gf_hdr_len (rsp, 0); + rsp_hdr = gf_hdr_new (rsp, 0); + + gf_errno = gf_errno_to_error (ENOSYS); + hdr->rsp.op_errno = hton32 (gf_errno); + hdr->rsp.op_ret = -1; + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_SETDENTS, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + return 0; +} + +/* */ +int +server_getdents (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */ + size_t rsp_hdrlen = 0; + int32_t gf_errno = 0; + + rsp_hdrlen = gf_hdr_len (rsp, 0); + rsp_hdr = gf_hdr_new (rsp, 0); + + gf_errno = gf_errno_to_error (ENOSYS); + hdr->rsp.op_errno = hton32 (gf_errno); + hdr->rsp.op_ret = -1; + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_GETDENTS, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + return 0; +} + +/* */ +int +server_lock_notify (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */ + size_t rsp_hdrlen = 0; + int32_t gf_errno = 0; + + rsp_hdrlen = gf_hdr_len (rsp, 0); + rsp_hdr = gf_hdr_new (rsp, 0); + + gf_errno = gf_errno_to_error (ENOSYS); + hdr->rsp.op_errno = hton32 (gf_errno); + hdr->rsp.op_ret = -1; + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LOCK_NOTIFY, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + return 0; +} + +/* */ +int +server_lock_fnotify (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */ + size_t rsp_hdrlen = 0; + int32_t gf_errno = 0; + + rsp_hdrlen = gf_hdr_len (rsp, 0); + rsp_hdr = gf_hdr_new (rsp, 0); + + gf_errno = gf_errno_to_error (ENOSYS); + hdr->rsp.op_errno = hton32 (gf_errno); + hdr->rsp.op_ret = -1; + + protocol_server_reply (frame, GF_OP_TYPE_FOP_REPLY, GF_PROTO_FOP_LOCK_FNOTIFY, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + return 0; +} + + +int +mop_stats (call_frame_t *frame, xlator_t *bound_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf) +{ + gf_hdr_common_t *rsp_hdr = NULL; + gf_mop_ping_rsp_t *rsp = NULL; /* Using for NULL */ + size_t rsp_hdrlen = 0; + int32_t gf_errno = 0; + + rsp_hdrlen = gf_hdr_len (rsp, 0); + rsp_hdr = gf_hdr_new (rsp, 0); + + gf_errno = gf_errno_to_error (ENOSYS); + hdr->rsp.op_errno = hton32 (gf_errno); + hdr->rsp.op_ret = -1; + + protocol_server_reply (frame, GF_OP_TYPE_MOP_REPLY, GF_MOP_STATS, + rsp_hdr, rsp_hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * unknown_op_cbk - This function is called when a opcode for unknown + * type is called. Helps to keep the backward/forward + * compatiblity + * @frame: call frame + * @type: + * @opcode: + * + */ + +int +unknown_op_cbk (call_frame_t *frame, int32_t type, int32_t opcode) +{ + gf_hdr_common_t *hdr = NULL; + gf_fop_flush_rsp_t *rsp = NULL; + size_t hdrlen = 0; + int32_t gf_errno = 0; + + hdrlen = gf_hdr_len (rsp, 0); + hdr = gf_hdr_new (rsp, 0); + rsp = gf_param (hdr); + + hdr->rsp.op_ret = hton32 (-1); + gf_errno = gf_errno_to_error (ENOSYS); + hdr->rsp.op_errno = hton32 (gf_errno); + + protocol_server_reply (frame, type, opcode, + hdr, hdrlen, NULL, 0, NULL); + + return 0; +} + +/* + * get_frame_for_transport - get call frame for specified transport object + * + * @trans: transport object + * + */ +static call_frame_t * +get_frame_for_transport (transport_t *trans) +{ + call_frame_t *frame = NULL; + call_pool_t *pool = NULL; + server_connection_t *conn = NULL; + server_state_t *state = NULL;; + + GF_VALIDATE_OR_GOTO("server", trans, out); + + if (trans->xl && trans->xl->ctx) + pool = trans->xl->ctx->pool; + GF_VALIDATE_OR_GOTO("server", pool, out); + + frame = create_frame (trans->xl, pool); + GF_VALIDATE_OR_GOTO("server", frame, out); + + state = GF_CALLOC (1, sizeof (*state), + gf_server_mt_server_state_t); + GF_VALIDATE_OR_GOTO("server", state, out); + + conn = trans->xl_private; + if (conn) { + if (conn->bound_xl) + state->itable = conn->bound_xl->itable; + state->bound_xl = conn->bound_xl; + } + + state->trans = transport_ref (trans); + state->resolve.fd_no = -1; + state->resolve2.fd_no = -1; + + frame->root->trans = conn; + frame->root->state = state; /* which socket */ + frame->root->unique = 0; /* which call */ + +out: + return frame; +} + + +int +server_decode_groups (call_frame_t *frame, gf_hdr_common_t *hdr) +{ + int i = 0; + + if ((!frame) || (!hdr)) + return 0; + + frame->root->ngrps = ntoh32 (hdr->req.ngrps); + if (frame->root->ngrps == 0) + return 0; + + if (frame->root->ngrps > GF_REQUEST_MAXGROUPS) + return -1; + + for (; i < frame->root->ngrps; ++i) + frame->root->groups[i] = ntoh32 (hdr->req.groups[i]); + + return 0; +} + + +/* + * get_frame_for_call - create a frame into the capable of + * generating and replying the reply packet by itself. + * By making a call with this frame, the last UNWIND + * function will have all needed state from its + * frame_t->root to send reply. + * @trans: + * @blk: + * @params: + * + * not for external reference + */ +static call_frame_t * +get_frame_for_call (transport_t *trans, gf_hdr_common_t *hdr) +{ + call_frame_t *frame = NULL; + + frame = get_frame_for_transport (trans); + + frame->root->op = ntoh32 (hdr->op); + frame->root->type = ntoh32 (hdr->type); + + frame->root->uid = ntoh32 (hdr->req.uid); + frame->root->unique = ntoh64 (hdr->callid); /* which call */ + frame->root->gid = ntoh32 (hdr->req.gid); + frame->root->pid = ntoh32 (hdr->req.pid); + frame->root->lk_owner = ntoh64 (hdr->req.lk_owner); + server_decode_groups (frame, hdr); + + return frame; +} + +/* + * prototype of operations function for each of mop and + * fop at server protocol level + * + * @frame: call frame pointer + * @bound_xl: the xlator that this frame is bound to + * @params: parameters dictionary + * + * to be used by protocol interpret, _not_ for exterenal reference + */ +typedef int32_t (*gf_op_t) (call_frame_t *frame, xlator_t *bould_xl, + gf_hdr_common_t *hdr, size_t hdrlen, + struct iobuf *iobuf); + + +static gf_op_t gf_fops[] = { + [GF_PROTO_FOP_STAT] = server_stat, + [GF_PROTO_FOP_READLINK] = server_readlink, + [GF_PROTO_FOP_MKNOD] = server_mknod, + [GF_PROTO_FOP_MKDIR] = server_mkdir, + [GF_PROTO_FOP_UNLINK] = server_unlink, + [GF_PROTO_FOP_RMDIR] = server_rmdir, + [GF_PROTO_FOP_SYMLINK] = server_symlink, + [GF_PROTO_FOP_RENAME] = server_rename, + [GF_PROTO_FOP_LINK] = server_link, + [GF_PROTO_FOP_TRUNCATE] = server_truncate, + [GF_PROTO_FOP_OPEN] = server_open, + [GF_PROTO_FOP_READ] = server_readv, + [GF_PROTO_FOP_WRITE] = server_writev, + [GF_PROTO_FOP_STATFS] = server_statfs, + [GF_PROTO_FOP_FLUSH] = server_flush, + [GF_PROTO_FOP_FSYNC] = server_fsync, + [GF_PROTO_FOP_SETXATTR] = server_setxattr, + [GF_PROTO_FOP_GETXATTR] = server_getxattr, + [GF_PROTO_FOP_FGETXATTR] = server_fgetxattr, + [GF_PROTO_FOP_FSETXATTR] = server_fsetxattr, + [GF_PROTO_FOP_REMOVEXATTR] = server_removexattr, + [GF_PROTO_FOP_OPENDIR] = server_opendir, + [GF_PROTO_FOP_FSYNCDIR] = server_fsyncdir, + [GF_PROTO_FOP_ACCESS] = server_access, + [GF_PROTO_FOP_CREATE] = server_create, + [GF_PROTO_FOP_FTRUNCATE] = server_ftruncate, + [GF_PROTO_FOP_FSTAT] = server_fstat, + [GF_PROTO_FOP_LK] = server_lk, + [GF_PROTO_FOP_LOOKUP] = server_lookup, + [GF_PROTO_FOP_READDIR] = server_readdir, + [GF_PROTO_FOP_READDIRP] = server_readdirp, + [GF_PROTO_FOP_INODELK] = server_inodelk, + [GF_PROTO_FOP_FINODELK] = server_finodelk, + [GF_PROTO_FOP_ENTRYLK] = server_entrylk, + [GF_PROTO_FOP_FENTRYLK] = server_fentrylk, + [GF_PROTO_FOP_CHECKSUM] = server_checksum, + [GF_PROTO_FOP_RCHECKSUM] = server_rchecksum, + [GF_PROTO_FOP_XATTROP] = server_xattrop, + [GF_PROTO_FOP_FXATTROP] = server_fxattrop, + [GF_PROTO_FOP_SETATTR] = server_setattr, + [GF_PROTO_FOP_FSETATTR] = server_fsetattr, + [GF_PROTO_FOP_SETDENTS] = server_setdents, + [GF_PROTO_FOP_GETDENTS] = server_getdents, + [GF_PROTO_FOP_LOCK_NOTIFY] = server_lock_notify, + [GF_PROTO_FOP_LOCK_FNOTIFY] = server_lock_fnotify, +}; + + + +static gf_op_t gf_mops[] = { + [GF_MOP_SETVOLUME] = mop_setvolume, + [GF_MOP_GETVOLUME] = mop_getvolume, + [GF_MOP_GETSPEC] = mop_getspec, + [GF_MOP_PING] = mop_ping, + [GF_MOP_LOG] = mop_log, + [GF_MOP_STATS] = mop_stats, +}; + +static gf_op_t gf_cbks[] = { + [GF_CBK_FORGET] = server_forget, + [GF_CBK_RELEASE] = server_release, + [GF_CBK_RELEASEDIR] = server_releasedir +}; + +int +protocol_server_interpret (xlator_t *this, transport_t *trans, + char *hdr_p, size_t hdrlen, struct iobuf *iobuf) +{ + server_connection_t *conn = NULL; + gf_hdr_common_t *hdr = NULL; + xlator_t *bound_xl = NULL; + call_frame_t *frame = NULL; + peer_info_t *peerinfo = NULL; + int32_t type = -1; + int32_t op = -1; + int32_t ret = -1; + + hdr = (gf_hdr_common_t *)hdr_p; + type = ntoh32 (hdr->type); + op = ntoh32 (hdr->op); + + conn = trans->xl_private; + if (conn) + bound_xl = conn->bound_xl; + + peerinfo = &trans->peerinfo; + switch (type) { + case GF_OP_TYPE_FOP_REQUEST: + if ((op < 0) || (op >= GF_PROTO_FOP_MAXVALUE)) { + gf_log (this->name, GF_LOG_ERROR, + "invalid fop %"PRId32" from client %s", + op, peerinfo->identifier); + break; + } + if (bound_xl == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Received fop %"PRId32" before " + "authentication.", op); + break; + } + frame = get_frame_for_call (trans, hdr); + frame->op = op; + ret = gf_fops[op] (frame, bound_xl, hdr, hdrlen, iobuf); + break; + + case GF_OP_TYPE_MOP_REQUEST: + if ((op < 0) || (op >= GF_MOP_MAXVALUE)) { + gf_log (this->name, GF_LOG_ERROR, + "invalid mop %"PRId32" from client %s", + op, peerinfo->identifier); + break; + } + frame = get_frame_for_call (trans, hdr); + frame->op = op; + ret = gf_mops[op] (frame, bound_xl, hdr, hdrlen, iobuf); + break; + + case GF_OP_TYPE_CBK_REQUEST: + if ((op < 0) || (op >= GF_CBK_MAXVALUE)) { + gf_log (this->name, GF_LOG_ERROR, + "invalid cbk %"PRId32" from client %s", + op, peerinfo->identifier); + break; + } + if (bound_xl == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Received cbk %d before authentication.", op); + break; + } + + frame = get_frame_for_call (trans, hdr); + ret = gf_cbks[op] (frame, bound_xl, hdr, hdrlen, iobuf); + break; + + default: + break; + } + + return ret; +} + + +/* + * server_nop_cbk - nop callback for server protocol + * @frame: call frame + * @cookie: + * @this: + * @op_ret: return value + * @op_errno: errno + * + * not for external reference + */ +int +server_nop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state) + free_state (state); + STACK_DESTROY (frame->root); + return 0; +} + +/* + * server_fd - fdtable dump function for server protocol + * @this: + * + */ +int +server_fd (xlator_t *this) +{ + server_conf_t *conf = NULL; + server_connection_t *trav = NULL; + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 1; + int ret = -1; + + if (!this) + return -1; + + conf = this->private; + if (!conf) { + gf_log (this->name, GF_LOG_WARNING, + "conf null in xlator"); + return -1; + } + + gf_proc_dump_add_section("xlator.protocol.server.conn"); + + ret = pthread_mutex_trylock (&conf->mutex); + if (ret) { + gf_log("", GF_LOG_WARNING, "Unable to dump fdtable" + " errno: %d", errno); + return -1; + } + + list_for_each_entry (trav, &conf->conns, list) { + if (trav->id) { + gf_proc_dump_build_key(key, + "xlator.protocol.server.conn", + "%d.id", i); + gf_proc_dump_write(key, "%s", trav->id); + } + + gf_proc_dump_build_key(key,"xlator.protocol.server.conn", + "%d.ref",i) + gf_proc_dump_write(key, "%d", trav->ref); + if (trav->bound_xl) { + gf_proc_dump_build_key(key, + "xlator.protocol.server.conn", + "%d.bound_xl", i); + gf_proc_dump_write(key, "%s", trav->bound_xl->name); + } + + gf_proc_dump_build_key(key, + "xlator.protocol.server.conn", + "%d.id", i); + fdtable_dump(trav->fdtable,key); + i++; + } + pthread_mutex_unlock (&conf->mutex); + + + return 0; + } + +int +server_priv (xlator_t *this) +{ + return 0; +} + +int +server_inode (xlator_t *this) +{ + server_conf_t *conf = NULL; + server_connection_t *trav = NULL; + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 1; + int ret = -1; + + if (!this) + return -1; + + conf = this->private; + if (!conf) { + gf_log (this->name, GF_LOG_WARNING, + "conf null in xlator"); + return -1; + } + + ret = pthread_mutex_trylock (&conf->mutex); + if (ret) { + gf_log("", GF_LOG_WARNING, "Unable to dump itable" + " errno: %d", errno); + return -1; + } + + list_for_each_entry (trav, &conf->conns, list) { + if (trav->bound_xl && trav->bound_xl->itable) { + gf_proc_dump_build_key(key, + "xlator.protocol.server.conn", + "%d.bound_xl.%s", + i, trav->bound_xl->name); + inode_table_dump(trav->bound_xl->itable,key); + i++; + } + } + pthread_mutex_unlock (&conf->mutex); + + + return 0; +} + + +static void +get_auth_types (dict_t *this, char *key, data_t *value, void *data) +{ + dict_t *auth_dict = NULL; + char *saveptr = NULL; + char *tmp = NULL; + char *key_cpy = NULL; + int32_t ret = -1; + + auth_dict = data; + key_cpy = gf_strdup (key); + GF_VALIDATE_OR_GOTO("server", key_cpy, out); + + tmp = strtok_r (key_cpy, ".", &saveptr); + ret = strcmp (tmp, "auth"); + if (ret == 0) { + tmp = strtok_r (NULL, ".", &saveptr); + if (strcmp (tmp, "ip") == 0) { + /* TODO: backward compatibility, remove when + newer versions are available */ + tmp = "addr"; + gf_log ("server", GF_LOG_WARNING, + "assuming 'auth.ip' to be 'auth.addr'"); + } + ret = dict_set_dynptr (auth_dict, tmp, NULL, 0); + if (ret < 0) { + gf_log ("server", GF_LOG_DEBUG, + "failed to dict_set_dynptr"); + } + } + + GF_FREE (key_cpy); +out: + return; +} + + +int +validate_auth_options (xlator_t *this, dict_t *dict) +{ + int ret = -1; + int error = 0; + xlator_list_t *trav = NULL; + data_pair_t *pair = NULL; + char *saveptr = NULL; + char *tmp = NULL; + char *key_cpy = NULL; + + trav = this->children; + while (trav) { + error = -1; + for (pair = dict->members_list; pair; pair = pair->next) { + key_cpy = gf_strdup (pair->key); + tmp = strtok_r (key_cpy, ".", &saveptr); + ret = strcmp (tmp, "auth"); + if (ret == 0) { + /* for module type */ + tmp = strtok_r (NULL, ".", &saveptr); + /* for volume name */ + tmp = strtok_r (NULL, ".", &saveptr); + } + + if (strcmp (tmp, trav->xlator->name) == 0) { + error = 0; + GF_FREE (key_cpy); + break; + } + GF_FREE (key_cpy); + } + if (-1 == error) { + gf_log (this->name, GF_LOG_ERROR, + "volume '%s' defined as subvolume, but no " + "authentication defined for the same", + trav->xlator->name); + break; + } + trav = trav->next; + } + + return error; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_server_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + " failed"); + return ret; + } + + return ret; +} + + +/* + * init - called during server protocol initialization + * + * @this: + * + */ +int +init (xlator_t *this) +{ + int32_t ret = -1; + transport_t *trans = NULL; + server_conf_t *conf = NULL; + data_t *data = NULL; + data_t *trace = NULL; + + if (this->children == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "protocol/server should have subvolume"); + goto out; + } + + trans = transport_load (this->options, this); + if (trans == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "failed to load transport"); + goto out; + } + + ret = transport_listen (trans); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "failed to bind/listen on socket"); + goto out; + } + + conf = GF_CALLOC (1, sizeof (server_conf_t), + gf_server_mt_server_conf_t); + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + INIT_LIST_HEAD (&conf->conns); + pthread_mutex_init (&conf->mutex, NULL); + + conf->trans = trans; + + conf->auth_modules = dict_new (); + GF_VALIDATE_OR_GOTO(this->name, conf->auth_modules, out); + + dict_foreach (this->options, get_auth_types, + conf->auth_modules); + ret = validate_auth_options (this, this->options); + if (ret == -1) { + /* logging already done in validate_auth_options function. */ + goto out; + } + + ret = gf_auth_init (this, conf->auth_modules); + if (ret) { + dict_unref (conf->auth_modules); + goto out; + } + + this->private = conf; + + ret = dict_get_int32 (this->options, "inode-lru-limit", + &conf->inode_lru_limit); + if (ret < 0) { + conf->inode_lru_limit = 1024; + } + + ret = dict_get_int32 (this->options, "limits.transaction-size", + &conf->max_block_size); + if (ret < 0) { + gf_log (this->name, GF_LOG_TRACE, + "defaulting limits.transaction-size to %d", + DEFAULT_BLOCK_SIZE); + conf->max_block_size = DEFAULT_BLOCK_SIZE; + } + + conf->verify_volfile_checksum = 1; + data = dict_get (this->options, "verify-volfile-checksum"); + if (data) { + ret = gf_string2boolean(data->data, + &conf->verify_volfile_checksum); + if (ret != 0) { + gf_log (this->name, GF_LOG_DEBUG, + "wrong value for verify-volfile-checksum"); + conf->verify_volfile_checksum = 1; + } + } + + trace = dict_get (this->options, "trace"); + if (trace) { + if (gf_string2boolean (trace->data, + &conf->trace) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'trace' takes on only boolean values."); + return -1; + } + } + +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_log (this->name, GF_LOG_WARNING, + "WARNING: Failed to set 'ulimit -n 1M': %s", + strerror(errno)); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to set max open fd to 64k: %s", + strerror(errno)); + } else { + gf_log (this->name, GF_LOG_TRACE, + "max open fd set to 64k"); + } + } + } +#endif + this->graph->top = this; + + ret = 0; +out: + return ret; +} + + + +int +protocol_server_pollin (xlator_t *this, transport_t *trans) +{ + char *hdr = NULL; + size_t hdrlen = 0; + int ret = -1; + struct iobuf *iobuf = NULL; + + + ret = transport_receive (trans, &hdr, &hdrlen, &iobuf); + + if (ret == 0) + ret = protocol_server_interpret (this, trans, hdr, + hdrlen, iobuf); + + /* TODO: use mem-pool */ + GF_FREE (hdr); + + return ret; +} + + +/* + * fini - finish function for server protocol, called before + * unloading server protocol. + * + * @this: + * + */ +void +fini (xlator_t *this) +{ + server_conf_t *conf = this->private; + + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->auth_modules) { + dict_unref (conf->auth_modules); + } + + GF_FREE (conf); + this->private = NULL; +out: + return; +} + +/* + * server_protocol_notify - notify function for server protocol + * @this: + * @trans: + * @event: + * + */ +int +notify (xlator_t *this, int32_t event, void *data, ...) +{ + int ret = 0; + transport_t *trans = data; + peer_info_t *peerinfo = NULL; + peer_info_t *myinfo = NULL; + + if (trans != NULL) { + peerinfo = &(trans->peerinfo); + myinfo = &(trans->myinfo); + } + + switch (event) { + case GF_EVENT_POLLIN: + ret = protocol_server_pollin (this, trans); + break; + case GF_EVENT_POLLERR: + { + gf_log (trans->xl->name, GF_LOG_INFO, "%s disconnected", + peerinfo->identifier); + + ret = -1; + transport_disconnect (trans); + if (trans->xl_private == NULL) { + gf_log (this->name, GF_LOG_DEBUG, + "POLLERR received on (%s) even before " + "handshake with (%s) is successful", + myinfo->identifier, peerinfo->identifier); + } else { + /* + * FIXME: shouldn't we check for return value? + * what should be done if cleanup fails? + */ + server_connection_cleanup (this, trans->xl_private); + } + } + break; + + case GF_EVENT_TRANSPORT_CLEANUP: + { + if (trans->xl_private) { + server_connection_put (this, trans->xl_private); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "transport (%s) cleaned up even before " + "handshake with (%s) is successful", + myinfo->identifier, peerinfo->identifier); + } + } + break; + + default: + default_notify (this, event, data); + break; + } + + return ret; +} + + +struct xlator_fops fops = { +}; + +struct xlator_cbks cbks = { +}; + +struct xlator_dumpops dumpops = { + .inode = server_inode, + .priv = server_priv, + .fd = server_fd, +}; + + +struct volume_options options[] = { + { .key = {"transport-type"}, + .value = {"tcp", "socket", "ib-verbs", "unix", "ib-sdp", + "tcp/server", "ib-verbs/server"}, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"volume-filename.*"}, + .type = GF_OPTION_TYPE_PATH, + }, + { .key = {"inode-lru-limit"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = (1 * GF_UNIT_MB) + }, + { .key = {"client-volume-filename"}, + .type = GF_OPTION_TYPE_PATH + }, + { .key = {"verify-volfile-checksum"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"trace"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"conf-dir"}, + .type = GF_OPTION_TYPE_PATH, + }, + + { .key = {NULL} }, +}; diff --git a/xlators/protocol/legacy/server/src/server-protocol.h b/xlators/protocol/legacy/server/src/server-protocol.h new file mode 100644 index 00000000000..0c21000596f --- /dev/null +++ b/xlators/protocol/legacy/server/src/server-protocol.h @@ -0,0 +1,198 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _SERVER_PROTOCOL_H_ +#define _SERVER_PROTOCOL_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <pthread.h> + +#include "glusterfs.h" +#include "xlator.h" +#include "logging.h" +#include "call-stub.h" +#include "fd.h" +#include "byte-order.h" +#include "server-mem-types.h" +#include "authenticate.h" +#include "transport.h" + +#define DEFAULT_BLOCK_SIZE 4194304 /* 4MB */ +#define DEFAULT_VOLUME_FILE_PATH CONFDIR "/glusterfs.vol" + +typedef struct _server_state server_state_t; + +struct _locker { + struct list_head lockers; + char *volume; + loc_t loc; + fd_t *fd; + pid_t pid; +}; + +struct _lock_table { + struct list_head file_lockers; + struct list_head dir_lockers; + gf_lock_t lock; + size_t count; +}; + + +/* private structure per connection (transport object) + * used as transport_t->xl_private + */ +struct _server_connection { + struct list_head list; + char *id; + int ref; + int active_transports; + pthread_mutex_t lock; + char disconnected; + fdtable_t *fdtable; + struct _lock_table *ltable; + xlator_t *bound_xl; +}; + +typedef struct _server_connection server_connection_t; + + +server_connection_t * +server_connection_get (xlator_t *this, const char *id); + +void +server_connection_put (xlator_t *this, server_connection_t *conn); + +int +server_connection_destroy (xlator_t *this, server_connection_t *conn); + +int +server_connection_cleanup (xlator_t *this, server_connection_t *conn); + +int +server_nop_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno); + + +struct _volfile_ctx { + struct _volfile_ctx *next; + char *key; + uint32_t checksum; +}; + +typedef struct { + struct _volfile_ctx *volfile; + + dict_t *auth_modules; + transport_t *trans; + int32_t max_block_size; + int32_t inode_lru_limit; + pthread_mutex_t mutex; + struct list_head conns; + gf_boolean_t verify_volfile_checksum; + gf_boolean_t trace; +} server_conf_t; + + +typedef enum { + RESOLVE_MUST = 1, + RESOLVE_NOT, + RESOLVE_MAY, + RESOLVE_DONTCARE, + RESOLVE_EXACT +} server_resolve_type_t; + + +struct resolve_comp { + char *basename; + ino_t ino; + uint64_t gen; + inode_t *inode; +}; + +typedef struct { + server_resolve_type_t type; + uint64_t fd_no; + ino_t ino; + uint64_t gen; + ino_t par; + char *path; + char *bname; + char *resolved; + int op_ret; + int op_errno; + loc_t deep_loc; + struct resolve_comp *components; + int comp_count; +} server_resolve_t; + + +typedef int (*server_resume_fn_t) (call_frame_t *frame, xlator_t *bound_xl); + +int +resolve_and_resume (call_frame_t *frame, server_resume_fn_t fn); + +struct _server_state { + transport_t *trans; + xlator_t *bound_xl; + inode_table_t *itable; + + server_resume_fn_t resume_fn; + + loc_t loc; + loc_t loc2; + server_resolve_t resolve; + server_resolve_t resolve2; + + /* used within resolve_and_resume */ + loc_t *loc_now; + server_resolve_t *resolve_now; + + struct iatt stbuf; + int valid; + + fd_t *fd; + int flags; + int wbflags; + struct iobuf *iobuf; + struct iobref *iobref; + + size_t size; + off_t offset; + mode_t mode; + dev_t dev; + size_t nr_count; + int cmd; + int type; + char *name; + int name_len; + + int mask; + char is_revalidate; + dict_t *dict; + struct flock flock; + const char *volume; + dir_entry_t *entry; +}; + + +#endif diff --git a/xlators/protocol/legacy/server/src/server-resolve.c b/xlators/protocol/legacy/server/src/server-resolve.c new file mode 100644 index 00000000000..f0da94384eb --- /dev/null +++ b/xlators/protocol/legacy/server/src/server-resolve.c @@ -0,0 +1,656 @@ +/* + Copyright (c) 2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "server-protocol.h" +#include "server-helpers.h" + +#include "compat-errno.h" + +int +server_resolve_all (call_frame_t *frame); +int +resolve_entry_simple (call_frame_t *frame); +int +resolve_inode_simple (call_frame_t *frame); +int +resolve_path_simple (call_frame_t *frame); + +int +component_count (const char *path) +{ + int count = 0; + const char *trav = NULL; + + trav = path; + + for (trav = path; *trav; trav++) { + if (*trav == '/') + count++; + } + + return count + 2; +} + + +int +prepare_components (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + char *resolved = NULL; + int count = 0; + struct resolve_comp *components = NULL; + int i = 0; + char *trav = NULL; + + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + + resolved = gf_strdup (resolve->path); + resolve->resolved = resolved; + + count = component_count (resolve->path); + components = GF_CALLOC (sizeof (*components), count, + gf_server_mt_resolve_comp); + resolve->components = components; + + components[0].basename = ""; + components[0].ino = 1; + components[0].gen = 0; + components[0].inode = state->itable->root; + + i = 1; + for (trav = resolved; *trav; trav++) { + if (*trav == '/') { + components[i].basename = trav + 1; + *trav = 0; + i++; + } + } + + return 0; +} + + +int +resolve_loc_touchup (call_frame_t *frame) +{ + server_state_t *state = NULL; + server_resolve_t *resolve = NULL; + loc_t *loc = NULL; + char *path = NULL; + int ret = 0; + + state = CALL_STATE (frame); + + resolve = state->resolve_now; + loc = state->loc_now; + + if (!loc->path) { + if (loc->parent && resolve->bname) { + ret = inode_path (loc->parent, resolve->bname, &path); + } else if (loc->inode) { + ret = inode_path (loc->inode, NULL, &path); + } + + if (!path) + path = gf_strdup (resolve->path); + + loc->path = path; + } + + loc->name = strrchr (loc->path, '/'); + if (loc->name) + loc->name++; + + if (!loc->parent && loc->inode) { + loc->parent = inode_parent (loc->inode, 0, NULL); + } + + return 0; +} + + +int +resolve_deep_continue (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + int ret = 0; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + + resolve->op_ret = 0; + resolve->op_errno = 0; + + if (resolve->par) + ret = resolve_entry_simple (frame); + else if (resolve->ino) + ret = resolve_inode_simple (frame); + else if (resolve->path) + ret = resolve_path_simple (frame); + + resolve_loc_touchup (frame); + + server_resolve_all (frame); + + return 0; +} + + +int +resolve_deep_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct iatt *buf, + dict_t *xattr, struct iatt *postparent) +{ + server_state_t *state = NULL; + server_resolve_t *resolve = NULL; + struct resolve_comp *components = NULL; + int i = 0; + inode_t *link_inode = NULL; + + state = CALL_STATE (frame); + resolve = state->resolve_now; + components = resolve->components; + + i = (long) cookie; + + if (op_ret == -1) { + goto get_out_of_here; + } + + if (i != 0) { + /* no linking for root inode */ + link_inode = inode_link (inode, resolve->deep_loc.parent, + resolve->deep_loc.name, buf); + inode_lookup (link_inode); + components[i].inode = link_inode; + link_inode = NULL; + } + + loc_wipe (&resolve->deep_loc); + + i++; /* next component */ + + if (!components[i].basename) { + /* all components of the path are resolved */ + goto get_out_of_here; + } + + /* join the current component with the path resolved until now */ + *(components[i].basename - 1) = '/'; + + resolve->deep_loc.path = gf_strdup (resolve->resolved); + resolve->deep_loc.parent = inode_ref (components[i-1].inode); + resolve->deep_loc.inode = inode_new (state->itable); + resolve->deep_loc.name = components[i].basename; + + STACK_WIND_COOKIE (frame, resolve_deep_cbk, (void *) (long) i, + BOUND_XL (frame), BOUND_XL (frame)->fops->lookup, + &resolve->deep_loc, NULL); + return 0; + +get_out_of_here: + resolve_deep_continue (frame); + return 0; +} + + +int +resolve_path_deep (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + int i = 0; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + + gf_log (BOUND_XL (frame)->name, GF_LOG_DEBUG, + "RESOLVE %s() seeking deep resolution of %s", + gf_fop_list[frame->root->op], resolve->path); + + prepare_components (frame); + + /* start from the root */ + resolve->deep_loc.inode = state->itable->root; + resolve->deep_loc.path = gf_strdup ("/"); + resolve->deep_loc.name = ""; + + STACK_WIND_COOKIE (frame, resolve_deep_cbk, (void *) (long) i, + BOUND_XL (frame), BOUND_XL (frame)->fops->lookup, + &resolve->deep_loc, NULL); + return 0; +} + + +int +resolve_path_simple (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + struct resolve_comp *components = NULL; + int ret = -1; + int par_idx = 0; + int ino_idx = 0; + int i = 0; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + components = resolve->components; + + if (!components) { + resolve->op_ret = -1; + resolve->op_errno = ENOENT; + goto out; + } + + for (i = 0; components[i].basename; i++) { + par_idx = ino_idx; + ino_idx = i; + } + + if (!components[par_idx].inode) { + resolve->op_ret = -1; + resolve->op_errno = ENOENT; + goto out; + } + + if (!components[ino_idx].inode && + (resolve->type == RESOLVE_MUST || resolve->type == RESOLVE_EXACT)) { + resolve->op_ret = -1; + resolve->op_errno = ENOENT; + goto out; + } + + if (components[ino_idx].inode && resolve->type == RESOLVE_NOT) { + resolve->op_ret = -1; + resolve->op_errno = EEXIST; + goto out; + } + + if (components[ino_idx].inode) + state->loc_now->inode = inode_ref (components[ino_idx].inode); + state->loc_now->parent = inode_ref (components[par_idx].inode); + + ret = 0; + +out: + return ret; +} + +/* + Check if the requirements are fulfilled by entries in the inode cache itself + Return value: + <= 0 - simple resolution was decisive and complete (either success or failure) + > 0 - indecisive, need to perform deep resolution +*/ + +int +resolve_entry_simple (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + inode_t *parent = NULL; + inode_t *inode = NULL; + int ret = 0; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + + parent = inode_get (state->itable, resolve->par, 0); + if (!parent) { + /* simple resolution is indecisive. need to perform + deep resolution */ + resolve->op_ret = -1; + resolve->op_errno = ENOENT; + ret = 1; + + inode = inode_grep (state->itable, parent, resolve->bname); + if (inode != NULL) { + gf_log (this->name, GF_LOG_DEBUG, "%"PRId64": inode " + "(pointer:%p ino: %"PRIu64") present but parent" + " is NULL for path (%s)", frame->root->unique, + inode, inode->ino, resolve->path); + inode_unref (inode); + } + goto out; + } + + if (parent->ino != 1 && parent->generation != resolve->gen) { + /* simple resolution is decisive - request was for a + stale handle */ + resolve->op_ret = -1; + resolve->op_errno = ENOENT; + ret = -1; + goto out; + } + + /* expected @parent was found from the inode cache */ + state->loc_now->parent = inode_ref (parent); + + inode = inode_grep (state->itable, parent, resolve->bname); + if (!inode) { + switch (resolve->type) { + case RESOLVE_DONTCARE: + case RESOLVE_NOT: + ret = 0; + break; + case RESOLVE_MAY: + ret = 1; + break; + default: + resolve->op_ret = -1; + resolve->op_errno = ENOENT; + ret = 1; + break; + } + + goto out; + } + + if (resolve->type == RESOLVE_NOT) { + gf_log (this->name, GF_LOG_DEBUG, "inode (pointer: %p ino:%" + PRIu64") found for path (%s) while type is RESOLVE_NOT", + inode, inode->ino, resolve->path); + resolve->op_ret = -1; + resolve->op_errno = EEXIST; + ret = -1; + goto out; + } + + ret = 0; + + state->loc_now->inode = inode_ref (inode); + +out: + if (parent) + inode_unref (parent); + + if (inode) + inode_unref (inode); + + return ret; +} + + +int +server_resolve_entry (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + int ret = 0; + loc_t *loc = NULL; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + loc = state->loc_now; + + ret = resolve_entry_simple (frame); + + if (ret > 0) { + loc_wipe (loc); + resolve_path_deep (frame); + return 0; + } + + if (ret == 0) + resolve_loc_touchup (frame); + + server_resolve_all (frame); + + return 0; +} + + +int +resolve_inode_simple (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + inode_t *inode = NULL; + int ret = 0; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + + if (resolve->type == RESOLVE_EXACT) { + inode = inode_get (state->itable, resolve->ino, resolve->gen); + } else { + inode = inode_get (state->itable, resolve->ino, 0); + } + + if (!inode) { + resolve->op_ret = -1; + resolve->op_errno = ENOENT; + ret = 1; + goto out; + } + + if (inode->ino != 1 && inode->generation != resolve->gen) { + resolve->op_ret = -1; + resolve->op_errno = ENOENT; + ret = -1; + goto out; + } + + ret = 0; + + state->loc_now->inode = inode_ref (inode); + +out: + if (inode) + inode_unref (inode); + + return ret; +} + + +int +server_resolve_inode (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + int ret = 0; + loc_t *loc = NULL; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + loc = state->loc_now; + + ret = resolve_inode_simple (frame); + + if (ret > 0) { + loc_wipe (loc); + resolve_path_deep (frame); + return 0; + } + + if (ret == 0) + resolve_loc_touchup (frame); + + server_resolve_all (frame); + + return 0; +} + + +int +server_resolve_fd (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + server_connection_t *conn = NULL; + uint64_t fd_no = -1; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + conn = SERVER_CONNECTION (frame); + + fd_no = resolve->fd_no; + + state->fd = gf_fd_fdptr_get (conn->fdtable, fd_no); + + if (!state->fd) { + resolve->op_ret = -1; + resolve->op_errno = EBADFD; + } + + server_resolve_all (frame); + + return 0; +} + + +int +server_resolve (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + server_resolve_t *resolve = NULL; + + state = CALL_STATE (frame); + this = frame->this; + resolve = state->resolve_now; + + if (resolve->fd_no != -1) { + + server_resolve_fd (frame); + + } else if (resolve->par) { + + server_resolve_entry (frame); + + } else if (resolve->ino) { + + server_resolve_inode (frame); + + } else if (resolve->path) { + + resolve_path_deep (frame); + + } else { + + resolve->op_ret = -1; + resolve->op_errno = EINVAL; + + server_resolve_all (frame); + } + + return 0; +} + + +int +server_resolve_done (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *bound_xl = NULL; + + state = CALL_STATE (frame); + bound_xl = BOUND_XL (frame); + + server_print_request (frame); + + state->resume_fn (frame, bound_xl); + + return 0; +} + + +/* + * This function is called multiple times, once per resolving one location/fd. + * state->resolve_now is used to decide which location/fd is to be resolved now + */ +int +server_resolve_all (call_frame_t *frame) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + + this = frame->this; + state = CALL_STATE (frame); + + if (state->resolve_now == NULL) { + + state->resolve_now = &state->resolve; + state->loc_now = &state->loc; + + server_resolve (frame); + + } else if (state->resolve_now == &state->resolve) { + + state->resolve_now = &state->resolve2; + state->loc_now = &state->loc2; + + server_resolve (frame); + + } else if (state->resolve_now == &state->resolve2) { + + server_resolve_done (frame); + + } else { + gf_log (this->name, GF_LOG_ERROR, + "Invalid pointer for state->resolve_now"); + } + + return 0; +} + + +int +resolve_and_resume (call_frame_t *frame, server_resume_fn_t fn) +{ + server_state_t *state = NULL; + xlator_t *this = NULL; + + state = CALL_STATE (frame); + state->resume_fn = fn; + + this = frame->this; + + server_resolve_all (frame); + + return 0; +} diff --git a/xlators/protocol/legacy/transport/Makefile.am b/xlators/protocol/legacy/transport/Makefile.am new file mode 100644 index 00000000000..e2f97437c12 --- /dev/null +++ b/xlators/protocol/legacy/transport/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = socket $(IBVERBS_SUBDIR) + +CLEANFILES = diff --git a/xlators/protocol/legacy/transport/ib-verbs/Makefile.am b/xlators/protocol/legacy/transport/ib-verbs/Makefile.am new file mode 100644 index 00000000000..f963effea22 --- /dev/null +++ b/xlators/protocol/legacy/transport/ib-verbs/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src
\ No newline at end of file diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am b/xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am new file mode 100644 index 00000000000..3db7aff9871 --- /dev/null +++ b/xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am @@ -0,0 +1,19 @@ +# TODO : need to change transportdir + +transport_LTLIBRARIES = ib-verbs.la +transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/transport + +ib_verbs_la_LDFLAGS = -module -avoidversion + +ib_verbs_la_SOURCES = ib-verbs.c name.c +ib_verbs_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + -libverbs $(top_builddir)/xlators/protocol/legacy/lib/src/libgfproto.la + +noinst_HEADERS = ib-verbs.h name.h ib-verbs-mem-types.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \ + -I$(top_srcdir)/xlators/protocol/legacy/transport/ib-verbs \ + -I$(top_srcdir)/xlators/protocol/legacy/lib/src + +CLEANFILES = *~ diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h new file mode 100644 index 00000000000..bac559646fc --- /dev/null +++ b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h @@ -0,0 +1,39 @@ + +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef __IB_VERBS_MEM_TYPES_H__ +#define __IB_VERBS_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_ib_verbs_mem_types_ { + gf_ibv_mt_ib_verbs_private_t = gf_common_mt_end + 1, + gf_ibv_mt_ib_verbs_ioq_t, + gf_ibv_mt_transport_t, + gf_ibv_mt_ib_verbs_local_t, + gf_ibv_mt_ib_verbs_post_t, + gf_ibv_mt_char, + gf_ibv_mt_qpent, + gf_ibv_mt_ib_verbs_device_t, + gf_ibv_mt_end +}; +#endif + diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c new file mode 100644 index 00000000000..c14be17e6e8 --- /dev/null +++ b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c @@ -0,0 +1,2616 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "dict.h" +#include "glusterfs.h" +#include "transport.h" +#include "protocol.h" +#include "logging.h" +#include "xlator.h" +#include "name.h" +#include "ib-verbs.h" +#include <signal.h> + +int32_t +gf_resolve_ip6 (const char *hostname, + uint16_t port, + int family, + void **dnscache, + struct addrinfo **addr_info); + +static uint16_t +ib_verbs_get_local_lid (struct ibv_context *context, + int32_t port) +{ + struct ibv_port_attr attr; + + if (ibv_query_port (context, port, &attr)) + return 0; + + return attr.lid; +} + +static const char * +get_port_state_str(enum ibv_port_state pstate) +{ + switch (pstate) { + case IBV_PORT_DOWN: return "PORT_DOWN"; + case IBV_PORT_INIT: return "PORT_INIT"; + case IBV_PORT_ARMED: return "PORT_ARMED"; + case IBV_PORT_ACTIVE: return "PORT_ACTIVE"; + case IBV_PORT_ACTIVE_DEFER: return "PORT_ACTIVE_DEFER"; + default: return "invalid state"; + } +} + +static int32_t +ib_check_active_port (struct ibv_context *ctx, uint8_t port) +{ + struct ibv_port_attr port_attr; + + int32_t ret = 0; + const char *state_str = NULL; + + if (!ctx) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "Error in supplied context"); + return -1; + } + + ret = ibv_query_port (ctx, port, &port_attr); + + if (ret) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "Failed to query port %u properties", port); + return -1; + } + + state_str = get_port_state_str (port_attr.state); + gf_log ("transport/ib-verbs", GF_LOG_TRACE, + "Infiniband PORT: (%u) STATE: (%s)", + port, state_str); + + if (port_attr.state == IBV_PORT_ACTIVE) + return 0; + + return -1; +} + +static int32_t +ib_get_active_port (struct ibv_context *ib_ctx) +{ + struct ibv_device_attr ib_device_attr; + + int32_t ret = -1; + uint8_t ib_port = 0; + + if (!ib_ctx) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "Error in supplied context"); + return -1; + } + if (ibv_query_device (ib_ctx, &ib_device_attr)) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "Failed to query device properties"); + return -1; + } + + for (ib_port = 1; ib_port <= ib_device_attr.phys_port_cnt; ++ib_port) { + ret = ib_check_active_port (ib_ctx, ib_port); + if (ret == 0) + return ib_port; + + gf_log ("transport/ib-verbs", GF_LOG_TRACE, + "Port:(%u) not active", ib_port); + continue; + } + return ret; +} + + + +static void +ib_verbs_put_post (ib_verbs_queue_t *queue, + ib_verbs_post_t *post) +{ + pthread_mutex_lock (&queue->lock); + if (post->prev) { + queue->active_count--; + post->prev->next = post->next; + } + if (post->next) + post->next->prev = post->prev; + post->prev = &queue->passive_posts; + post->next = post->prev->next; + post->prev->next = post; + post->next->prev = post; + queue->passive_count++; + pthread_mutex_unlock (&queue->lock); +} + + +static ib_verbs_post_t * +ib_verbs_new_post (ib_verbs_device_t *device, int32_t len) +{ + ib_verbs_post_t *post; + + post = (ib_verbs_post_t *) GF_CALLOC (1, sizeof (*post), + gf_ibv_mt_ib_verbs_post_t); + if (!post) + return NULL; + + post->buf_size = len; + + post->buf = valloc (len); + if (!post->buf) { + GF_FREE (post); + return NULL; + } + + post->mr = ibv_reg_mr (device->pd, + post->buf, + post->buf_size, + IBV_ACCESS_LOCAL_WRITE); + if (!post->mr) { + free (post->buf); + GF_FREE (post); + return NULL; + } + + return post; +} + + +static ib_verbs_post_t * +ib_verbs_get_post (ib_verbs_queue_t *queue) +{ + ib_verbs_post_t *post; + + pthread_mutex_lock (&queue->lock); + { + post = queue->passive_posts.next; + if (post == &queue->passive_posts) + post = NULL; + + if (post) { + if (post->prev) + post->prev->next = post->next; + if (post->next) + post->next->prev = post->prev; + post->prev = &queue->active_posts; + post->next = post->prev->next; + post->prev->next = post; + post->next->prev = post; + post->reused++; + queue->active_count++; + } + } + pthread_mutex_unlock (&queue->lock); + + return post; +} + +void +ib_verbs_destroy_post (ib_verbs_post_t *post) +{ + ibv_dereg_mr (post->mr); + free (post->buf); + GF_FREE (post); +} + + +static int32_t +__ib_verbs_quota_get (ib_verbs_peer_t *peer) +{ + int32_t ret = -1; + ib_verbs_private_t *priv = peer->trans->private; + + if (priv->connected && peer->quota > 0) { + ret = peer->quota--; + } + + return ret; +} + +/* + static int32_t + ib_verbs_quota_get (ib_verbs_peer_t *peer) + { + int32_t ret = -1; + ib_verbs_private_t *priv = peer->trans->private; + + pthread_mutex_lock (&priv->write_mutex); + { + ret = __ib_verbs_quota_get (peer); + } + pthread_mutex_unlock (&priv->write_mutex); + + return ret; + } +*/ + +static void +__ib_verbs_ioq_entry_free (ib_verbs_ioq_t *entry) +{ + list_del_init (&entry->list); + if (entry->iobref) + iobref_unref (entry->iobref); + + /* TODO: use mem-pool */ + GF_FREE (entry->buf); + + /* TODO: use mem-pool */ + GF_FREE (entry); +} + + +static void +__ib_verbs_ioq_flush (ib_verbs_peer_t *peer) +{ + ib_verbs_ioq_t *entry = NULL, *dummy = NULL; + + list_for_each_entry_safe (entry, dummy, &peer->ioq, list) { + __ib_verbs_ioq_entry_free (entry); + } +} + + +static int32_t +__ib_verbs_disconnect (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + int32_t ret = 0; + + if (priv->connected || priv->tcp_connected) { + fcntl (priv->sock, F_SETFL, O_NONBLOCK); + if (shutdown (priv->sock, SHUT_RDWR) != 0) { + gf_log ("transport/ib-verbs", + GF_LOG_DEBUG, + "shutdown () - error: %s", + strerror (errno)); + ret = -errno; + priv->tcp_connected = 0; + } + } + + return ret; +} + + +static int32_t +ib_verbs_post_send (struct ibv_qp *qp, + ib_verbs_post_t *post, + int32_t len) +{ + struct ibv_sge list = { + .addr = (unsigned long) post->buf, + .length = len, + .lkey = post->mr->lkey + }; + + struct ibv_send_wr wr = { + .wr_id = (unsigned long) post, + .sg_list = &list, + .num_sge = 1, + .opcode = IBV_WR_SEND, + .send_flags = IBV_SEND_SIGNALED, + }, *bad_wr; + + if (!qp) + return -1; + + return ibv_post_send (qp, &wr, &bad_wr); +} + + +static int32_t +__ib_verbs_ioq_churn_entry (ib_verbs_peer_t *peer, ib_verbs_ioq_t *entry) +{ + int32_t ret = 0, quota = 0; + ib_verbs_private_t *priv = peer->trans->private; + ib_verbs_device_t *device = priv->device; + ib_verbs_options_t *options = &priv->options; + ib_verbs_post_t *post = NULL; + int32_t len = 0; + + quota = __ib_verbs_quota_get (peer); + if (quota > 0) { + post = ib_verbs_get_post (&device->sendq); + if (!post) + post = ib_verbs_new_post (device, + (options->send_size + 2048)); + + len = iov_length ((const struct iovec *)&entry->vector, + entry->count); + if (len >= (options->send_size + 2048)) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "increase value of option 'transport.ib-verbs." + "work-request-send-size' (given=> %"PRId64") " + "to send bigger (%d) messages", + (options->send_size + 2048), len); + return -1; + } + + iov_unload (post->buf, + (const struct iovec *)&entry->vector, + entry->count); + + ret = ib_verbs_post_send (peer->qp, post, len); + if (!ret) { + __ib_verbs_ioq_entry_free (entry); + ret = len; + } else { + gf_log ("transport/ib-verbs", GF_LOG_DEBUG, + "ibv_post_send failed with ret = %d", ret); + ib_verbs_put_post (&device->sendq, post); + __ib_verbs_disconnect (peer->trans); + ret = -1; + } + } + + return ret; +} + + +static int32_t +__ib_verbs_ioq_churn (ib_verbs_peer_t *peer) +{ + ib_verbs_ioq_t *entry = NULL; + int32_t ret = 0; + + while (!list_empty (&peer->ioq)) + { + /* pick next entry */ + entry = peer->ioq_next; + + ret = __ib_verbs_ioq_churn_entry (peer, entry); + + if (ret <= 0) + break; + } + + /* + list_for_each_entry_safe (entry, dummy, &peer->ioq, list) { + ret = __ib_verbs_ioq_churn_entry (peer, entry); + if (ret <= 0) { + break; + } + } + */ + + return ret; +} + +static int32_t +__ib_verbs_quota_put (ib_verbs_peer_t *peer) +{ + int32_t ret; + + peer->quota++; + ret = peer->quota; + + if (!list_empty (&peer->ioq)) { + ret = __ib_verbs_ioq_churn (peer); + } + + return ret; +} + + +static int32_t +ib_verbs_quota_put (ib_verbs_peer_t *peer) +{ + int32_t ret; + ib_verbs_private_t *priv = peer->trans->private; + + pthread_mutex_lock (&priv->write_mutex); + { + ret = __ib_verbs_quota_put (peer); + } + pthread_mutex_unlock (&priv->write_mutex); + + return ret; +} + + +static int32_t +ib_verbs_post_recv (struct ibv_srq *srq, + ib_verbs_post_t *post) +{ + struct ibv_sge list = { + .addr = (unsigned long) post->buf, + .length = post->buf_size, + .lkey = post->mr->lkey + }; + + struct ibv_recv_wr wr = { + .wr_id = (unsigned long) post, + .sg_list = &list, + .num_sge = 1, + }, *bad_wr; + + return ibv_post_srq_recv (srq, &wr, &bad_wr); +} + + +static int32_t +ib_verbs_writev (transport_t *this, + ib_verbs_ioq_t *entry) +{ + int32_t ret = 0, need_append = 1; + ib_verbs_private_t *priv = this->private; + ib_verbs_peer_t *peer = NULL; + + pthread_mutex_lock (&priv->write_mutex); + { + if (!priv->connected) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "ib-verbs is not connected to post a " + "send request"); + ret = -1; + goto unlock; + } + + peer = &priv->peer; + if (list_empty (&peer->ioq)) { + ret = __ib_verbs_ioq_churn_entry (peer, entry); + if (ret != 0) { + need_append = 0; + } + } + + if (need_append) { + list_add_tail (&entry->list, &peer->ioq); + } + } +unlock: + pthread_mutex_unlock (&priv->write_mutex); + return ret; +} + + +static ib_verbs_ioq_t * +ib_verbs_ioq_new (char *buf, int len, struct iovec *vector, + int count, struct iobref *iobref) +{ + ib_verbs_ioq_t *entry = NULL; + + /* TODO: use mem-pool */ + entry = GF_CALLOC (1, sizeof (*entry), gf_ibv_mt_ib_verbs_ioq_t); + + assert (count <= (MAX_IOVEC-2)); + + entry->header.colonO[0] = ':'; + entry->header.colonO[1] = 'O'; + entry->header.colonO[2] = '\0'; + entry->header.version = 42; + entry->header.size1 = hton32 (len); + entry->header.size2 = hton32 (iov_length (vector, count)); + + entry->vector[0].iov_base = &entry->header; + entry->vector[0].iov_len = sizeof (entry->header); + entry->count++; + + entry->vector[1].iov_base = buf; + entry->vector[1].iov_len = len; + entry->count++; + + if (vector && count) + { + memcpy (&entry->vector[2], vector, sizeof (*vector) * count); + entry->count += count; + } + + if (iobref) + entry->iobref = iobref_ref (iobref); + + entry->buf = buf; + + INIT_LIST_HEAD (&entry->list); + + return entry; +} + + +static int32_t +ib_verbs_submit (transport_t *this, char *buf, int32_t len, + struct iovec *vector, int count, struct iobref *iobref) +{ + int32_t ret = 0; + ib_verbs_ioq_t *entry = NULL; + + entry = ib_verbs_ioq_new (buf, len, vector, count, iobref); + ret = ib_verbs_writev (this, entry); + + if (ret > 0) { + ret = 0; + } + + return ret; +} + +static int +ib_verbs_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p, + struct iobuf **iobuf_p) +{ + ib_verbs_private_t *priv = this->private; + /* TODO: return error if !priv->connected, check with locks */ + /* TODO: boundry checks for data_ptr/offset */ + char *copy_from = NULL; + ib_verbs_header_t *header = NULL; + uint32_t size1, size2, data_len = 0; + char *hdr = NULL; + struct iobuf *iobuf = NULL; + int32_t ret = 0; + + pthread_mutex_lock (&priv->recv_mutex); + { +/* + while (!priv->data_ptr) + pthread_cond_wait (&priv->recv_cond, &priv->recv_mutex); +*/ + + copy_from = priv->data_ptr + priv->data_offset; + + priv->data_ptr = NULL; + data_len = priv->data_len; + pthread_cond_broadcast (&priv->recv_cond); + } + pthread_mutex_unlock (&priv->recv_mutex); + + header = (ib_verbs_header_t *)copy_from; + if (strcmp (header->colonO, ":O")) { + gf_log ("transport/ib-verbs", GF_LOG_DEBUG, + "%s: corrupt header received", this->xl->name); + ret = -1; + goto err; + } + + size1 = ntoh32 (header->size1); + size2 = ntoh32 (header->size2); + + if (data_len != (size1 + size2 + sizeof (*header))) { + gf_log ("transport/ib-verbs", GF_LOG_DEBUG, + "%s: sizeof data read from transport is not equal " + "to the size specified in the header", + this->xl->name); + ret = -1; + goto err; + } + + copy_from += sizeof (*header); + + if (size1) { + hdr = GF_CALLOC (1, size1, gf_ibv_mt_char); + if (!hdr) { + gf_log (this->xl->name, GF_LOG_ERROR, + "unable to allocate header for peer %s", + this->peerinfo.identifier); + ret = -ENOMEM; + goto err; + } + memcpy (hdr, copy_from, size1); + copy_from += size1; + *hdr_p = hdr; + } + *hdrlen_p = size1; + + if (size2) { + iobuf = iobuf_get (this->xl->ctx->iobuf_pool); + if (!iobuf) { + gf_log (this->xl->name, GF_LOG_ERROR, + "unable to allocate IO buffer for peer %s", + this->peerinfo.identifier); + ret = -ENOMEM; + goto err; + } + memcpy (iobuf->ptr, copy_from, size2); + *iobuf_p = iobuf; + } + +err: + return ret; +} + + +static void +ib_verbs_destroy_cq (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + ib_verbs_device_t *device = priv->device; + + if (device->recv_cq) + ibv_destroy_cq (device->recv_cq); + device->recv_cq = NULL; + + if (device->send_cq) + ibv_destroy_cq (device->send_cq); + device->send_cq = NULL; + + return; +} + + +static int32_t +ib_verbs_create_cq (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + ib_verbs_options_t *options = &priv->options; + ib_verbs_device_t *device = priv->device; + int32_t ret = 0; + + device->recv_cq = ibv_create_cq (priv->device->context, + options->recv_count * 2, + device, + device->recv_chan, + 0); + if (!device->recv_cq) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "%s: creation of CQ failed", + this->xl->name); + ret = -1; + } else if (ibv_req_notify_cq (device->recv_cq, 0)) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "%s: ibv_req_notify_cq on CQ failed", + this->xl->name); + ret = -1; + } + + do { + /* TODO: make send_cq size dynamically adaptive */ + device->send_cq = ibv_create_cq (priv->device->context, + options->send_count * 1024, + device, + device->send_chan, + 0); + if (!device->send_cq) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "%s: creation of send_cq failed", + this->xl->name); + ret = -1; + break; + } + + if (ibv_req_notify_cq (device->send_cq, 0)) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "%s: ibv_req_notify_cq on send_cq failed", + this->xl->name); + ret = -1; + break; + } + } while (0); + + if (ret != 0) + ib_verbs_destroy_cq (this); + + return ret; +} + + +static void +ib_verbs_register_peer (ib_verbs_device_t *device, + int32_t qp_num, + ib_verbs_peer_t *peer) +{ + struct _qpent *ent; + ib_verbs_qpreg_t *qpreg = &device->qpreg; + int32_t hash = qp_num % 42; + + pthread_mutex_lock (&qpreg->lock); + ent = qpreg->ents[hash].next; + while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) + ent = ent->next; + if (ent->qp_num == qp_num) { + pthread_mutex_unlock (&qpreg->lock); + return; + } + ent = (struct _qpent *) GF_CALLOC (1, sizeof (*ent), gf_ibv_mt_qpent); + ERR_ABORT (ent); + /* TODO: ref reg->peer */ + ent->peer = peer; + ent->next = &qpreg->ents[hash]; + ent->prev = ent->next->prev; + ent->next->prev = ent; + ent->prev->next = ent; + ent->qp_num = qp_num; + qpreg->count++; + pthread_mutex_unlock (&qpreg->lock); +} + + +static void +ib_verbs_unregister_peer (ib_verbs_device_t *device, + int32_t qp_num) +{ + struct _qpent *ent; + ib_verbs_qpreg_t *qpreg = &device->qpreg; + int32_t hash = qp_num % 42; + + pthread_mutex_lock (&qpreg->lock); + ent = qpreg->ents[hash].next; + while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) + ent = ent->next; + if (ent->qp_num != qp_num) { + pthread_mutex_unlock (&qpreg->lock); + return; + } + ent->prev->next = ent->next; + ent->next->prev = ent->prev; + /* TODO: unref reg->peer */ + GF_FREE (ent); + qpreg->count--; + pthread_mutex_unlock (&qpreg->lock); +} + + +static ib_verbs_peer_t * +__ib_verbs_lookup_peer (ib_verbs_device_t *device, int32_t qp_num) +{ + struct _qpent *ent = NULL; + ib_verbs_peer_t *peer = NULL; + ib_verbs_qpreg_t *qpreg = NULL; + int32_t hash = 0; + + qpreg = &device->qpreg; + hash = qp_num % 42; + ent = qpreg->ents[hash].next; + while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) + ent = ent->next; + + if (ent != &qpreg->ents[hash]) { + peer = ent->peer; + } + + return peer; +} + +/* +static ib_verbs_peer_t * +ib_verbs_lookup_peer (ib_verbs_device_t *device, + int32_t qp_num) +{ + ib_verbs_qpreg_t *qpreg = NULL; + ib_verbs_peer_t *peer = NULL; + + qpreg = &device->qpreg; + pthread_mutex_lock (&qpreg->lock); + { + peer = __ib_verbs_lookup_peer (device, qp_num); + } + pthread_mutex_unlock (&qpreg->lock); + + return peer; +} +*/ + + +static void +__ib_verbs_destroy_qp (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + + if (priv->peer.qp) { + ib_verbs_unregister_peer (priv->device, priv->peer.qp->qp_num); + ibv_destroy_qp (priv->peer.qp); + } + priv->peer.qp = NULL; + + return; +} + + +static int32_t +ib_verbs_create_qp (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + ib_verbs_options_t *options = &priv->options; + ib_verbs_device_t *device = priv->device; + int32_t ret = 0; + ib_verbs_peer_t *peer; + + peer = &priv->peer; + struct ibv_qp_init_attr init_attr = { + .send_cq = device->send_cq, + .recv_cq = device->recv_cq, + .srq = device->srq, + .cap = { + .max_send_wr = peer->send_count, + .max_recv_wr = peer->recv_count, + .max_send_sge = 1, + .max_recv_sge = 1 + }, + .qp_type = IBV_QPT_RC + }; + + struct ibv_qp_attr attr = { + .qp_state = IBV_QPS_INIT, + .pkey_index = 0, + .port_num = options->port, + .qp_access_flags = 0 + }; + + peer->qp = ibv_create_qp (device->pd, &init_attr); + if (!peer->qp) { + gf_log ("transport/ib-verbs", + GF_LOG_CRITICAL, + "%s: could not create QP", + this->xl->name); + ret = -1; + goto out; + } else if (ibv_modify_qp (peer->qp, &attr, + IBV_QP_STATE | + IBV_QP_PKEY_INDEX | + IBV_QP_PORT | + IBV_QP_ACCESS_FLAGS)) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "%s: failed to modify QP to INIT state", + this->xl->name); + ret = -1; + goto out; + } + + peer->local_lid = ib_verbs_get_local_lid (device->context, + options->port); + peer->local_qpn = peer->qp->qp_num; + peer->local_psn = lrand48 () & 0xffffff; + + ib_verbs_register_peer (device, peer->qp->qp_num, peer); + +out: + if (ret == -1) + __ib_verbs_destroy_qp (this); + + return ret; +} + + +static void +ib_verbs_destroy_posts (transport_t *this) +{ + +} + + +static int32_t +__ib_verbs_create_posts (transport_t *this, + int32_t count, + int32_t size, + ib_verbs_queue_t *q) +{ + int32_t i; + int32_t ret = 0; + ib_verbs_private_t *priv = this->private; + ib_verbs_device_t *device = priv->device; + + for (i=0 ; i<count ; i++) { + ib_verbs_post_t *post; + + post = ib_verbs_new_post (device, size + 2048); + if (!post) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "%s: post creation failed", + this->xl->name); + ret = -1; + break; + } + + ib_verbs_put_post (q, post); + } + return ret; +} + + +static int32_t +ib_verbs_create_posts (transport_t *this) +{ + int32_t i, ret; + ib_verbs_post_t *post = NULL; + ib_verbs_private_t *priv = this->private; + ib_verbs_options_t *options = &priv->options; + ib_verbs_device_t *device = priv->device; + + ret = __ib_verbs_create_posts (this, options->send_count, + options->send_size, + &device->sendq); + if (!ret) + ret = __ib_verbs_create_posts (this, options->recv_count, + options->recv_size, + &device->recvq); + + if (!ret) { + for (i=0 ; i<options->recv_count ; i++) { + post = ib_verbs_get_post (&device->recvq); + if (ib_verbs_post_recv (device->srq, post) != 0) { + ret = -1; + break; + } + } + } + + if (ret) + ib_verbs_destroy_posts (this); + + return ret; +} + + +static int32_t +ib_verbs_connect_qp (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + ib_verbs_options_t *options = &priv->options; + struct ibv_qp_attr attr = { + .qp_state = IBV_QPS_RTR, + .path_mtu = options->mtu, + .dest_qp_num = priv->peer.remote_qpn, + .rq_psn = priv->peer.remote_psn, + .max_dest_rd_atomic = 1, + .min_rnr_timer = 12, + .ah_attr = { + .is_global = 0, + .dlid = priv->peer.remote_lid, + .sl = 0, + .src_path_bits = 0, + .port_num = options->port + } + }; + if (ibv_modify_qp (priv->peer.qp, &attr, + IBV_QP_STATE | + IBV_QP_AV | + IBV_QP_PATH_MTU | + IBV_QP_DEST_QPN | + IBV_QP_RQ_PSN | + IBV_QP_MAX_DEST_RD_ATOMIC | + IBV_QP_MIN_RNR_TIMER)) { + gf_log ("transport/ib-verbs", + GF_LOG_CRITICAL, + "Failed to modify QP to RTR\n"); + return -1; + } + + /* TODO: make timeout and retry_cnt configurable from options */ + attr.qp_state = IBV_QPS_RTS; + attr.timeout = 14; + attr.retry_cnt = 7; + attr.rnr_retry = 7; + attr.sq_psn = priv->peer.local_psn; + attr.max_rd_atomic = 1; + if (ibv_modify_qp (priv->peer.qp, &attr, + IBV_QP_STATE | + IBV_QP_TIMEOUT | + IBV_QP_RETRY_CNT | + IBV_QP_RNR_RETRY | + IBV_QP_SQ_PSN | + IBV_QP_MAX_QP_RD_ATOMIC)) { + gf_log ("transport/ib-verbs", + GF_LOG_CRITICAL, + "Failed to modify QP to RTS\n"); + return -1; + } + + return 0; +} + +static int32_t +__ib_verbs_teardown (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + + __ib_verbs_destroy_qp (this); + + if (!list_empty (&priv->peer.ioq)) { + __ib_verbs_ioq_flush (&priv->peer); + } + + /* TODO: decrement cq size */ + return 0; +} + +/* + * return value: + * 0 = success (completed) + * -1 = error + * > 0 = incomplete + */ + +static int +__tcp_rwv (transport_t *this, struct iovec *vector, int count, + struct iovec **pending_vector, int *pending_count, + int write) +{ + ib_verbs_private_t *priv = NULL; + int sock = -1; + int ret = -1; + struct iovec *opvector = vector; + int opcount = count; + int moved = 0; + + priv = this->private; + sock = priv->sock; + + while (opcount) + { + if (write) + { + ret = writev (sock, opvector, opcount); + + if (ret == 0 || (ret == -1 && errno == EAGAIN)) + { + /* done for now */ + break; + } + } + else + { + ret = readv (sock, opvector, opcount); + + if (ret == -1 && errno == EAGAIN) + { + /* done for now */ + break; + } + } + + if (ret == 0) + { + gf_log (this->xl->name, GF_LOG_DEBUG, + "EOF from peer %s", this->peerinfo.identifier); + opcount = -1; + errno = ENOTCONN; + break; + } + + if (ret == -1) + { + if (errno == EINTR) + continue; + + gf_log (this->xl->name, GF_LOG_DEBUG, + "%s failed (%s)", write ? "writev" : "readv", + strerror (errno)); + if (write && !priv->connected && + (errno == ECONNREFUSED)) + gf_log (this->xl->name, GF_LOG_ERROR, + "possible mismatch of 'transport-type'" + " in protocol server and client. " + "check volume file"); + opcount = -1; + break; + } + + moved = 0; + + while (moved < ret) + { + if ((ret - moved) >= opvector[0].iov_len) + { + moved += opvector[0].iov_len; + opvector++; + opcount--; + } + else + { + opvector[0].iov_len -= (ret - moved); + opvector[0].iov_base += (ret - moved); + moved += (ret - moved); + } + while (opcount && !opvector[0].iov_len) + { + opvector++; + opcount--; + } + } + } + + if (pending_vector) + *pending_vector = opvector; + + if (pending_count) + *pending_count = opcount; + + return opcount; +} + + +static int +__tcp_readv (transport_t *this, struct iovec *vector, int count, + struct iovec **pending_vector, int *pending_count) +{ + int ret = -1; + + ret = __tcp_rwv (this, vector, count, + pending_vector, pending_count, 0); + + return ret; +} + + +static int +__tcp_writev (transport_t *this, struct iovec *vector, int count, + struct iovec **pending_vector, int *pending_count) +{ + int ret = -1; + ib_verbs_private_t *priv = this->private; + + ret = __tcp_rwv (this, vector, count, pending_vector, + pending_count, 1); + + if (ret > 0) { + /* TODO: Avoid multiple calls when socket is already + registered for POLLOUT */ + priv->idx = event_select_on (this->xl->ctx->event_pool, + priv->sock, priv->idx, -1, 1); + } else if (ret == 0) { + priv->idx = event_select_on (this->xl->ctx->event_pool, + priv->sock, + priv->idx, -1, 0); + } + + return ret; +} + + +static void * +ib_verbs_recv_completion_proc (void *data) +{ + struct ibv_comp_channel *chan = data; + ib_verbs_private_t *priv = NULL; + ib_verbs_device_t *device; + ib_verbs_post_t *post; + ib_verbs_peer_t *peer; + struct ibv_cq *event_cq; + struct ibv_wc wc; + void *event_ctx; + int32_t ret = 0; + + + while (1) { + ret = ibv_get_cq_event (chan, &event_cq, &event_ctx); + if (ret) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "ibv_get_cq_event failed, terminating recv " + "thread %d (%d)", ret, errno); + continue; + } + + device = event_ctx; + + ret = ibv_req_notify_cq (event_cq, 0); + if (ret) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "ibv_req_notify_cq on %s failed, terminating " + "recv thread: %d (%d)", + device->device_name, ret, errno); + continue; + } + + device = (ib_verbs_device_t *) event_ctx; + + while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) { + post = (ib_verbs_post_t *) (long) wc.wr_id; + + pthread_mutex_lock (&device->qpreg.lock); + { + peer = __ib_verbs_lookup_peer (device, + wc.qp_num); + + /* + * keep a refcount on transport so that it + * doesnot get freed because of some error + * indicated by wc.status till we are done + * with usage of peer and thereby that of trans. + */ + if (peer != NULL) { + transport_ref (peer->trans); + } + } + pthread_mutex_unlock (&device->qpreg.lock); + + if (wc.status != IBV_WC_SUCCESS) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "recv work request on `%s' returned " + "error (%d)", + device->device_name, + wc.status); + if (peer) { + transport_unref (peer->trans); + transport_disconnect (peer->trans); + } + + if (post) { + ib_verbs_post_recv (device->srq, post); + } + continue; + } + + if (peer) { + priv = peer->trans->private; + + pthread_mutex_lock (&priv->recv_mutex); + { + while (priv->data_ptr) + pthread_cond_wait (&priv->recv_cond, + &priv->recv_mutex); + + priv->data_ptr = post->buf; + priv->data_offset = 0; + priv->data_len = wc.byte_len; + + /*pthread_cond_broadcast (&priv->recv_cond);*/ + } + pthread_mutex_unlock (&priv->recv_mutex); + + if ((ret = xlator_notify (peer->trans->xl, GF_EVENT_POLLIN, + peer->trans, NULL)) == -1) { + gf_log ("transport/ib-verbs", + GF_LOG_DEBUG, + "pollin notification to %s " + "failed, disconnecting " + "transport", + peer->trans->xl->name); + transport_disconnect (peer->trans); + } + + transport_unref (peer->trans); + } else { + gf_log ("transport/ib-verbs", + GF_LOG_DEBUG, + "could not lookup peer for qp_num: %d", + wc.qp_num); + } + ib_verbs_post_recv (device->srq, post); + } + + if (ret < 0) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "ibv_poll_cq on `%s' returned error " + "(ret = %d, errno = %d)", + device->device_name, ret, errno); + continue; + } + ibv_ack_cq_events (event_cq, 1); + } + return NULL; +} + + +static void * +ib_verbs_send_completion_proc (void *data) +{ + struct ibv_comp_channel *chan = data; + ib_verbs_post_t *post; + ib_verbs_peer_t *peer; + struct ibv_cq *event_cq; + void *event_ctx; + ib_verbs_device_t *device; + struct ibv_wc wc; + int32_t ret; + + while (1) { + ret = ibv_get_cq_event (chan, &event_cq, &event_ctx); + if (ret) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "ibv_get_cq_event on failed, terminating " + "send thread: %d (%d)", ret, errno); + continue; + } + + device = event_ctx; + + ret = ibv_req_notify_cq (event_cq, 0); + if (ret) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "ibv_req_notify_cq on %s failed, terminating " + "send thread: %d (%d)", + device->device_name, ret, errno); + continue; + } + + while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) { + post = (ib_verbs_post_t *) (long) wc.wr_id; + + pthread_mutex_lock (&device->qpreg.lock); + { + peer = __ib_verbs_lookup_peer (device, + wc.qp_num); + + /* + * keep a refcount on transport so that it + * doesnot get freed because of some error + * indicated by wc.status till we are done + * with usage of peer and thereby that of trans. + */ + if (peer != NULL) { + transport_ref (peer->trans); + } + } + pthread_mutex_unlock (&device->qpreg.lock); + + if (wc.status != IBV_WC_SUCCESS) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "send work request on `%s' returned " + "error wc.status = %d, wc.vendor_err " + "= %d, post->buf = %p, wc.byte_len = " + "%d, post->reused = %d", + device->device_name, wc.status, + wc.vendor_err, + post->buf, wc.byte_len, post->reused); + if (wc.status == IBV_WC_RETRY_EXC_ERR) + gf_log ("ib-verbs", GF_LOG_ERROR, + "connection between client and" + " server not working. check by" + " running 'ibv_srq_pingpong'. " + "also make sure subnet manager" + " is running (eg: 'opensm'), " + "or check if ib-verbs port is " + "valid (or active) by running " + " 'ibv_devinfo'. contact " + "Gluster Support Team if " + "the problem persists."); + if (peer) + transport_disconnect (peer->trans); + } + + if (post) { + ib_verbs_put_post (&device->sendq, post); + } + + if (peer) { + int quota_ret = ib_verbs_quota_put (peer); + if (quota_ret < 0) { + gf_log ("ib-verbs", GF_LOG_DEBUG, + "failed to send message"); + + } + + transport_unref (peer->trans); + } else { + gf_log ("transport/ib-verbs", GF_LOG_DEBUG, + "could not lookup peer for qp_num: %d", + wc.qp_num); + } + } + + if (ret < 0) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "ibv_poll_cq on `%s' returned error (ret = %d," + " errno = %d)", + device->device_name, ret, errno); + continue; + } + ibv_ack_cq_events (event_cq, 1); + } + + return NULL; +} + +static void +ib_verbs_options_init (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + ib_verbs_options_t *options = &priv->options; + int32_t mtu; + data_t *temp; + + /* TODO: validate arguments from options below */ + + options->send_size = this->xl->ctx->page_size * 4; /* 512 KB */ + options->recv_size = this->xl->ctx->page_size * 4; /* 512 KB */ + options->send_count = 32; + options->recv_count = 32; + + temp = dict_get (this->xl->options, + "transport.ib-verbs.work-request-send-count"); + if (temp) + options->send_count = data_to_int32 (temp); + + temp = dict_get (this->xl->options, + "transport.ib-verbs.work-request-recv-count"); + if (temp) + options->recv_count = data_to_int32 (temp); + + options->port = 0; + temp = dict_get (this->xl->options, + "transport.ib-verbs.port"); + if (temp) + options->port = data_to_uint64 (temp); + + options->mtu = mtu = IBV_MTU_2048; + temp = dict_get (this->xl->options, + "transport.ib-verbs.mtu"); + if (temp) + mtu = data_to_int32 (temp); + switch (mtu) { + case 256: options->mtu = IBV_MTU_256; + break; + case 512: options->mtu = IBV_MTU_512; + break; + case 1024: options->mtu = IBV_MTU_1024; + break; + case 2048: options->mtu = IBV_MTU_2048; + break; + case 4096: options->mtu = IBV_MTU_4096; + break; + default: + if (temp) + gf_log ("transport/ib-verbs", GF_LOG_WARNING, + "%s: unrecognized MTU value '%s', defaulting " + "to '2048'", this->xl->name, + data_to_str (temp)); + else + gf_log ("transport/ib-verbs", GF_LOG_TRACE, + "%s: defaulting MTU to '2048'", + this->xl->name); + options->mtu = IBV_MTU_2048; + break; + } + + temp = dict_get (this->xl->options, + "transport.ib-verbs.device-name"); + if (temp) + options->device_name = gf_strdup (temp->data); + + return; +} + +static void +ib_verbs_queue_init (ib_verbs_queue_t *queue) +{ + pthread_mutex_init (&queue->lock, NULL); + + queue->active_posts.next = &queue->active_posts; + queue->active_posts.prev = &queue->active_posts; + queue->passive_posts.next = &queue->passive_posts; + queue->passive_posts.prev = &queue->passive_posts; +} + + +static ib_verbs_device_t * +ib_verbs_get_device (transport_t *this, + struct ibv_context *ibctx) +{ + glusterfs_ctx_t *ctx = this->xl->ctx; + ib_verbs_private_t *priv = this->private; + ib_verbs_options_t *options = &priv->options; + char *device_name = priv->options.device_name; + uint32_t port = priv->options.port; + + uint8_t active_port = 0; + int32_t ret = 0; + int32_t i = 0; + + ib_verbs_device_t *trav; + + trav = ctx->ib; + while (trav) { + if ((!strcmp (trav->device_name, device_name)) && + (trav->port == port)) + break; + trav = trav->next; + } + + if (!trav) { + + trav = GF_CALLOC (1, sizeof (*trav), + gf_ibv_mt_ib_verbs_device_t); + ERR_ABORT (trav); + priv->device = trav; + + trav->context = ibctx; + + ret = ib_get_active_port (trav->context); + + if (ret < 0) { + if (!port) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "Failed to find any active ports and " + "none specified in volume file," + " exiting"); + return NULL; + } + } + + active_port = ret; + + if (port) { + ret = ib_check_active_port (trav->context, port); + if (ret < 0) { + gf_log ("transport/ib-verbs", GF_LOG_WARNING, + "On device %s: provided port:%u is " + "found to be offline, continuing to " + "use the same port", device_name, port); + } + } else { + priv->options.port = active_port; + port = active_port; + gf_log ("transport/ib-verbs", GF_LOG_TRACE, + "Port unspecified in volume file using active " + "port: %u", port); + } + + trav->device_name = gf_strdup (device_name); + trav->port = port; + + trav->next = ctx->ib; + ctx->ib = trav; + + trav->send_chan = ibv_create_comp_channel (trav->context); + if (!trav->send_chan) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "%s: could not create send completion channel", + device_name); + /* TODO: cleanup current mess */ + return NULL; + } + + trav->recv_chan = ibv_create_comp_channel (trav->context); + if (!trav->recv_chan) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "could not create recv completion channel"); + /* TODO: cleanup current mess */ + return NULL; + } + + if (ib_verbs_create_cq (this) < 0) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "%s: could not create CQ", + this->xl->name); + return NULL; + } + + /* protection domain */ + trav->pd = ibv_alloc_pd (trav->context); + + if (!trav->pd) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "%s: could not allocate protection domain", + this->xl->name); + return NULL; + } + + struct ibv_srq_init_attr attr = { + .attr = { + .max_wr = options->recv_count, + .max_sge = 1 + } + }; + trav->srq = ibv_create_srq (trav->pd, &attr); + + if (!trav->srq) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "%s: could not create SRQ", + this->xl->name); + return NULL; + } + + /* queue init */ + ib_verbs_queue_init (&trav->sendq); + ib_verbs_queue_init (&trav->recvq); + + if (ib_verbs_create_posts (this) < 0) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "%s: could not allocate posts", + this->xl->name); + return NULL; + } + + /* completion threads */ + ret = pthread_create (&trav->send_thread, + NULL, + ib_verbs_send_completion_proc, + trav->send_chan); + if (ret) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "could not create send completion thread"); + return NULL; + } + ret = pthread_create (&trav->recv_thread, + NULL, + ib_verbs_recv_completion_proc, + trav->recv_chan); + if (ret) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "could not create recv completion thread"); + return NULL; + } + + /* qpreg */ + pthread_mutex_init (&trav->qpreg.lock, NULL); + for (i=0; i<42; i++) { + trav->qpreg.ents[i].next = &trav->qpreg.ents[i]; + trav->qpreg.ents[i].prev = &trav->qpreg.ents[i]; + } + } + return trav; +} + +static int32_t +ib_verbs_init (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + ib_verbs_options_t *options = &priv->options; + struct ibv_device **dev_list; + struct ibv_context *ib_ctx = NULL; + int32_t ret = 0; + + ib_verbs_options_init (this); + + { + dev_list = ibv_get_device_list (NULL); + + if (!dev_list) { + gf_log ("transport/ib-verbs", + GF_LOG_CRITICAL, + "Failed to get IB devices"); + ret = -1; + goto cleanup; + } + + if (!*dev_list) { + gf_log ("transport/ib-verbs", + GF_LOG_CRITICAL, + "No IB devices found"); + ret = -1; + goto cleanup; + } + + if (!options->device_name) { + if (*dev_list) { + options->device_name = + gf_strdup (ibv_get_device_name (*dev_list)); + } else { + gf_log ("transport/ib-verbs", GF_LOG_CRITICAL, + "IB device list is empty. Check for " + "'ib_uverbs' module"); + return -1; + goto cleanup; + } + } + + while (*dev_list) { + if (!strcmp (ibv_get_device_name (*dev_list), + options->device_name)) { + ib_ctx = ibv_open_device (*dev_list); + + if (!ib_ctx) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "Failed to get infiniband" + "device context"); + ret = -1; + goto cleanup; + } + break; + } + ++dev_list; + } + + priv->device = ib_verbs_get_device (this, ib_ctx); + + if (!priv->device) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "could not create ib_verbs device for %s", + priv->device->device_name); + ret = -1; + goto cleanup; + } + } + + priv->peer.trans = this; + INIT_LIST_HEAD (&priv->peer.ioq); + + pthread_mutex_init (&priv->read_mutex, NULL); + pthread_mutex_init (&priv->write_mutex, NULL); + pthread_mutex_init (&priv->recv_mutex, NULL); + pthread_cond_init (&priv->recv_cond, NULL); + +cleanup: + if (-1 == ret) { + if (ib_ctx) + ibv_close_device (ib_ctx); + } + + if (dev_list) + ibv_free_device_list (dev_list); + + return ret; +} + + +static int32_t +ib_verbs_disconnect (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + int32_t ret = 0; + + pthread_mutex_lock (&priv->write_mutex); + { + ret = __ib_verbs_disconnect (this); + } + pthread_mutex_unlock (&priv->write_mutex); + + return ret; +} + + +static int32_t +__tcp_connect_finish (int fd) +{ + int ret = -1; + int optval = 0; + socklen_t optlen = sizeof (int); + + ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, + (void *)&optval, &optlen); + + if (ret == 0 && optval) + { + errno = optval; + ret = -1; + } + + return ret; +} + +static inline void +ib_verbs_fill_handshake_data (char *buf, struct ib_verbs_nbio *nbio, + ib_verbs_private_t *priv) +{ + sprintf (buf, + "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n" + "QP1:LID=%04x:QPN=%06x:PSN=%06x\n", + priv->peer.recv_size, + priv->peer.send_size, + priv->peer.local_lid, + priv->peer.local_qpn, + priv->peer.local_psn); + + nbio->vector.iov_base = buf; + nbio->vector.iov_len = strlen (buf) + 1; + nbio->count = 1; + return; +} + +static inline void +ib_verbs_fill_handshake_ack (char *buf, struct ib_verbs_nbio *nbio) +{ + sprintf (buf, "DONE\n"); + nbio->vector.iov_base = buf; + nbio->vector.iov_len = strlen (buf) + 1; + nbio->count = 1; + return; +} + +static int +ib_verbs_handshake_pollin (transport_t *this) +{ + int ret = 0; + ib_verbs_private_t *priv = this->private; + char *buf = priv->handshake.incoming.buf; + int32_t recv_buf_size, send_buf_size; + socklen_t sock_len; + + if (priv->handshake.incoming.state == IB_VERBS_HANDSHAKE_COMPLETE) { + return -1; + } + + pthread_mutex_lock (&priv->write_mutex); + { + while (priv->handshake.incoming.state != IB_VERBS_HANDSHAKE_COMPLETE) + { + switch (priv->handshake.incoming.state) + { + case IB_VERBS_HANDSHAKE_START: + buf = priv->handshake.incoming.buf = GF_CALLOC (1, 256, gf_ibv_mt_char); + ib_verbs_fill_handshake_data (buf, &priv->handshake.incoming, priv); + buf[0] = 0; + priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVING_DATA; + break; + + case IB_VERBS_HANDSHAKE_RECEIVING_DATA: + ret = __tcp_readv (this, + &priv->handshake.incoming.vector, + priv->handshake.incoming.count, + &priv->handshake.incoming.pending_vector, + &priv->handshake.incoming.pending_count); + if (ret == -1) { + goto unlock; + } + + if (ret > 0) { + gf_log (this->xl->name, GF_LOG_TRACE, + "partial header read on NB socket. continue later"); + goto unlock; + } + + if (!ret) { + priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVED_DATA; + } + break; + + case IB_VERBS_HANDSHAKE_RECEIVED_DATA: + ret = sscanf (buf, + "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n" + "QP1:LID=%04x:QPN=%06x:PSN=%06x\n", + &recv_buf_size, + &send_buf_size, + &priv->peer.remote_lid, + &priv->peer.remote_qpn, + &priv->peer.remote_psn); + + if ((ret != 5) && (strncmp (buf, "QP1:", 4))) { + gf_log ("transport/ib-verbs", + GF_LOG_CRITICAL, + "%s: remote-host(%s)'s " + "transport type is different", + this->xl->name, + this->peerinfo.identifier); + ret = -1; + goto unlock; + } + + if (recv_buf_size < priv->peer.recv_size) + priv->peer.recv_size = recv_buf_size; + if (send_buf_size < priv->peer.send_size) + priv->peer.send_size = send_buf_size; + + gf_log ("transport/ib-verbs", GF_LOG_TRACE, + "%s: transacted recv_size=%d " + "send_size=%d", + this->xl->name, priv->peer.recv_size, + priv->peer.send_size); + + priv->peer.quota = priv->peer.send_count; + + if (ib_verbs_connect_qp (this)) { + gf_log ("transport/ib-verbs", + GF_LOG_ERROR, + "%s: failed to connect with " + "remote QP", this->xl->name); + ret = -1; + goto unlock; + } + ib_verbs_fill_handshake_ack (buf, &priv->handshake.incoming); + buf[0] = 0; + priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVING_ACK; + break; + + case IB_VERBS_HANDSHAKE_RECEIVING_ACK: + ret = __tcp_readv (this, + &priv->handshake.incoming.vector, + priv->handshake.incoming.count, + &priv->handshake.incoming.pending_vector, + &priv->handshake.incoming.pending_count); + if (ret == -1) { + goto unlock; + } + + if (ret > 0) { + gf_log (this->xl->name, GF_LOG_TRACE, + "partial header read on NB " + "socket. continue later"); + goto unlock; + } + + if (!ret) { + priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_RECEIVED_ACK; + } + break; + + case IB_VERBS_HANDSHAKE_RECEIVED_ACK: + if (strncmp (buf, "DONE", 4)) { + gf_log ("transport/ib-verbs", + GF_LOG_DEBUG, + "%s: handshake-3 did not " + "return 'DONE' (%s)", + this->xl->name, buf); + ret = -1; + goto unlock; + } + ret = 0; + priv->connected = 1; + sock_len = sizeof (struct sockaddr_storage); + getpeername (priv->sock, + (struct sockaddr *) &this->peerinfo.sockaddr, + &sock_len); + + GF_FREE (priv->handshake.incoming.buf); + priv->handshake.incoming.buf = NULL; + priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_COMPLETE; + } + } + } +unlock: + pthread_mutex_unlock (&priv->write_mutex); + + if (ret == -1) { + transport_disconnect (this); + } else { + ret = 0; + } + + if (!ret && priv->connected) { + ret = xlator_notify (this->xl, GF_EVENT_CHILD_UP, this); + } + + return ret; +} + +static int +ib_verbs_handshake_pollout (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + char *buf = priv->handshake.outgoing.buf; + int32_t ret = 0; + + if (priv->handshake.outgoing.state == IB_VERBS_HANDSHAKE_COMPLETE) { + return 0; + } + + pthread_mutex_unlock (&priv->write_mutex); + { + while (priv->handshake.outgoing.state != IB_VERBS_HANDSHAKE_COMPLETE) + { + switch (priv->handshake.outgoing.state) + { + case IB_VERBS_HANDSHAKE_START: + buf = priv->handshake.outgoing.buf = GF_CALLOC (1, 256, gf_ibv_mt_char); + ib_verbs_fill_handshake_data (buf, &priv->handshake.outgoing, priv); + priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENDING_DATA; + break; + + case IB_VERBS_HANDSHAKE_SENDING_DATA: + ret = __tcp_writev (this, + &priv->handshake.outgoing.vector, + priv->handshake.outgoing.count, + &priv->handshake.outgoing.pending_vector, + &priv->handshake.outgoing.pending_count); + if (ret == -1) { + goto unlock; + } + + if (ret > 0) { + gf_log (this->xl->name, GF_LOG_TRACE, + "partial header read on NB socket. continue later"); + goto unlock; + } + + if (!ret) { + priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENT_DATA; + } + break; + + case IB_VERBS_HANDSHAKE_SENT_DATA: + ib_verbs_fill_handshake_ack (buf, &priv->handshake.outgoing); + priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_SENDING_ACK; + break; + + case IB_VERBS_HANDSHAKE_SENDING_ACK: + ret = __tcp_writev (this, + &priv->handshake.outgoing.vector, + priv->handshake.outgoing.count, + &priv->handshake.outgoing.pending_vector, + &priv->handshake.outgoing.pending_count); + + if (ret == -1) { + goto unlock; + } + + if (ret > 0) { + gf_log (this->xl->name, GF_LOG_TRACE, + "partial header read on NB " + "socket. continue later"); + goto unlock; + } + + if (!ret) { + GF_FREE (priv->handshake.outgoing.buf); + priv->handshake.outgoing.buf = NULL; + priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_COMPLETE; + } + break; + } + } + } +unlock: + pthread_mutex_unlock (&priv->write_mutex); + + if (ret == -1) { + transport_disconnect (this); + } else { + ret = 0; + } + + return ret; +} + +static int +ib_verbs_handshake_pollerr (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + int32_t ret = 0; + char need_unref = 0; + + gf_log ("transport/ib-verbs", GF_LOG_DEBUG, + "%s: peer disconnected, cleaning up", + this->xl->name); + + pthread_mutex_lock (&priv->write_mutex); + { + __ib_verbs_teardown (this); + + if (priv->sock != -1) { + event_unregister (this->xl->ctx->event_pool, + priv->sock, priv->idx); + need_unref = 1; + + if (close (priv->sock) != 0) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "close () - error: %s", + strerror (errno)); + ret = -errno; + } + priv->tcp_connected = priv->connected = 0; + priv->sock = -1; + } + + if (priv->handshake.incoming.buf) { + GF_FREE (priv->handshake.incoming.buf); + priv->handshake.incoming.buf = NULL; + } + + priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START; + + if (priv->handshake.outgoing.buf) { + GF_FREE (priv->handshake.outgoing.buf); + priv->handshake.outgoing.buf = NULL; + } + + priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START; + } + pthread_mutex_unlock (&priv->write_mutex); + + xlator_notify (this->xl, GF_EVENT_POLLERR, this, NULL); + + if (need_unref) + transport_unref (this); + + return 0; +} + + +static int +tcp_connect_finish (transport_t *this) +{ + ib_verbs_private_t *priv = this->private; + int error = 0, ret = 0; + + pthread_mutex_lock (&priv->write_mutex); + { + ret = __tcp_connect_finish (priv->sock); + + if (!ret) { + this->myinfo.sockaddr_len = + sizeof (this->myinfo.sockaddr); + ret = getsockname (priv->sock, + (struct sockaddr *)&this->myinfo.sockaddr, + &this->myinfo.sockaddr_len); + if (ret == -1) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "getsockname on new client-socket %d " + "failed (%s)", + priv->sock, strerror (errno)); + close (priv->sock); + error = 1; + goto unlock; + } + + get_transport_identifiers (this); + priv->tcp_connected = 1; + } + + if (ret == -1 && errno != EINPROGRESS) { + gf_log (this->xl->name, GF_LOG_ERROR, + "tcp connect to %s failed (%s)", + this->peerinfo.identifier, strerror (errno)); + error = 1; + } + } +unlock: + pthread_mutex_unlock (&priv->write_mutex); + + if (error) { + transport_disconnect (this); + } + + return ret; +} + +static int +ib_verbs_event_handler (int fd, int idx, void *data, + int poll_in, int poll_out, int poll_err) +{ + transport_t *this = data; + ib_verbs_private_t *priv = this->private; + ib_verbs_options_t *options = NULL; + int ret = 0; + + if (!priv->tcp_connected) { + ret = tcp_connect_finish (this); + if (priv->tcp_connected) { + options = &priv->options; + + priv->peer.send_count = options->send_count; + priv->peer.recv_count = options->recv_count; + priv->peer.send_size = options->send_size; + priv->peer.recv_size = options->recv_size; + + if ((ret = ib_verbs_create_qp (this)) < 0) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "%s: could not create QP", + this->xl->name); + transport_disconnect (this); + } + } + } + + if (!ret && poll_out && priv->tcp_connected) { + ret = ib_verbs_handshake_pollout (this); + } + + if (!ret && poll_in && priv->tcp_connected) { + if (priv->handshake.incoming.state == IB_VERBS_HANDSHAKE_COMPLETE) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "%s: pollin received on tcp socket (peer: %s) " + "after handshake is complete", + this->xl->name, this->peerinfo.identifier); + ib_verbs_handshake_pollerr (this); + return 0; + } + ret = ib_verbs_handshake_pollin (this); + } + + if (ret < 0 || poll_err) { + ret = ib_verbs_handshake_pollerr (this); + } + + return 0; +} + +static int +__tcp_nonblock (int fd) +{ + int flags = 0; + int ret = -1; + + flags = fcntl (fd, F_GETFL); + + if (flags != -1) + ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK); + + return ret; +} + +static int32_t +ib_verbs_connect (struct transport *this) +{ + dict_t *options = this->xl->options; + + ib_verbs_private_t *priv = this->private; + + int32_t ret = 0; + gf_boolean_t non_blocking = 1; + struct sockaddr_storage sockaddr; + socklen_t sockaddr_len = 0; + + if (priv->connected) { + return 0; + } + + if (dict_get (options, "non-blocking-io")) { + char *nb_connect = data_to_str (dict_get (this->xl->options, + "non-blocking-io")); + + if (gf_string2boolean (nb_connect, &non_blocking) == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean " + "options, not taking any action"); + non_blocking = 1; + } + } + + ret = ibverbs_client_get_remote_sockaddr (this, (struct sockaddr *)&sockaddr, + &sockaddr_len); + if (ret != 0) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "cannot get remote address to connect"); + return ret; + } + + pthread_mutex_lock (&priv->write_mutex); + { + if (priv->sock != -1) { + ret = 0; + goto unlock; + } + + priv->sock = socket (((struct sockaddr *)&sockaddr)->sa_family, + SOCK_STREAM, 0); + + if (priv->sock == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "socket () - error: %s", strerror (errno)); + ret = -errno; + goto unlock; + } + + gf_log (this->xl->name, GF_LOG_TRACE, + "socket fd = %d", priv->sock); + + memcpy (&this->peerinfo.sockaddr, &sockaddr, sockaddr_len); + this->peerinfo.sockaddr_len = sockaddr_len; + + ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = + ((struct sockaddr *)&this->peerinfo.sockaddr)->sa_family; + + if (non_blocking) + { + ret = __tcp_nonblock (priv->sock); + + if (ret == -1) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "could not set socket %d to non " + "blocking mode (%s)", + priv->sock, strerror (errno)); + close (priv->sock); + priv->sock = -1; + goto unlock; + } + } + + ret = client_bind (this, + (struct sockaddr *)&this->myinfo.sockaddr, + &this->myinfo.sockaddr_len, priv->sock); + if (ret == -1) + { + gf_log (this->xl->name, GF_LOG_WARNING, + "client bind failed: %s", strerror (errno)); + close (priv->sock); + priv->sock = -1; + goto unlock; + } + + ret = connect (priv->sock, + (struct sockaddr *)&this->peerinfo.sockaddr, + this->peerinfo.sockaddr_len); + if (ret == -1 && errno != EINPROGRESS) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "connection attempt failed (%s)", + strerror (errno)); + close (priv->sock); + priv->sock = -1; + goto unlock; + } + + priv->tcp_connected = priv->connected = 0; + + transport_ref (this); + + priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START; + priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START; + + priv->idx = event_register (this->xl->ctx->event_pool, + priv->sock, ib_verbs_event_handler, + this, 1, 1); + } +unlock: + pthread_mutex_unlock (&priv->write_mutex); + + return ret; +} + +static int +ib_verbs_server_event_handler (int fd, int idx, void *data, + int poll_in, int poll_out, int poll_err) +{ + int32_t main_sock = -1; + transport_t *this, *trans = data; + ib_verbs_private_t *priv = NULL; + ib_verbs_private_t *trans_priv = (ib_verbs_private_t *) trans->private; + ib_verbs_options_t *options = NULL; + + if (!poll_in) + return 0; + + this = GF_CALLOC (1, sizeof (transport_t), + gf_ibv_mt_transport_t); + ERR_ABORT (this); + priv = GF_CALLOC (1, sizeof (ib_verbs_private_t), + gf_ibv_mt_ib_verbs_private_t); + ERR_ABORT (priv); + this->private = priv; + /* Copy all the ib_verbs related values in priv, from trans_priv + as other than QP, all the values remain same */ + priv->device = trans_priv->device; + priv->options = trans_priv->options; + options = &priv->options; + + this->ops = trans->ops; + this->xl = trans->xl; + this->init = trans->init; + this->fini = trans->fini; + + memcpy (&this->myinfo.sockaddr, &trans->myinfo.sockaddr, + trans->myinfo.sockaddr_len); + this->myinfo.sockaddr_len = trans->myinfo.sockaddr_len; + + main_sock = (trans_priv)->sock; + this->peerinfo.sockaddr_len = sizeof (this->peerinfo.sockaddr); + priv->sock = accept (main_sock, + (struct sockaddr *)&this->peerinfo.sockaddr, + &this->peerinfo.sockaddr_len); + if (priv->sock == -1) { + gf_log ("ib-verbs/server", GF_LOG_ERROR, + "accept() failed: %s", + strerror (errno)); + GF_FREE (this->private); + GF_FREE (this); + return -1; + } + + priv->peer.trans = this; + transport_ref (this); + + get_transport_identifiers (this); + + priv->tcp_connected = 1; + priv->handshake.incoming.state = IB_VERBS_HANDSHAKE_START; + priv->handshake.outgoing.state = IB_VERBS_HANDSHAKE_START; + + priv->peer.send_count = options->send_count; + priv->peer.recv_count = options->recv_count; + priv->peer.send_size = options->send_size; + priv->peer.recv_size = options->recv_size; + INIT_LIST_HEAD (&priv->peer.ioq); + + if (ib_verbs_create_qp (this) < 0) { + gf_log ("transport/ib-verbs", GF_LOG_ERROR, + "%s: could not create QP", + this->xl->name); + transport_disconnect (this); + return -1; + } + + priv->idx = event_register (this->xl->ctx->event_pool, priv->sock, + ib_verbs_event_handler, this, 1, 1); + + pthread_mutex_init (&priv->read_mutex, NULL); + pthread_mutex_init (&priv->write_mutex, NULL); + pthread_mutex_init (&priv->recv_mutex, NULL); + /* pthread_cond_init (&priv->recv_cond, NULL); */ + + return 0; +} + +static int32_t +ib_verbs_listen (transport_t *this) +{ + struct sockaddr_storage sockaddr; + socklen_t sockaddr_len; + ib_verbs_private_t *priv = this->private; + int opt = 1, ret = 0; + char service[NI_MAXSERV], host[NI_MAXHOST]; + + memset (&sockaddr, 0, sizeof (sockaddr)); + ret = ibverbs_server_get_local_sockaddr (this, + (struct sockaddr *)&sockaddr, + &sockaddr_len); + if (ret != 0) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "cannot find network address of server to bind to"); + goto err; + } + + priv->sock = socket (((struct sockaddr *)&sockaddr)->sa_family, + SOCK_STREAM, 0); + if (priv->sock == -1) { + gf_log ("ib-verbs/server", GF_LOG_CRITICAL, + "init: failed to create socket, error: %s", + strerror (errno)); + GF_FREE (this->private); + ret = -1; + goto err; + } + + memcpy (&this->myinfo.sockaddr, &sockaddr, sockaddr_len); + this->myinfo.sockaddr_len = sockaddr_len; + + ret = getnameinfo ((struct sockaddr *)&this->myinfo.sockaddr, + this->myinfo.sockaddr_len, + host, sizeof (host), + service, sizeof (service), + NI_NUMERICHOST); + if (ret != 0) { + gf_log (this->xl->name, GF_LOG_ERROR, + "getnameinfo failed (%s)", gai_strerror (ret)); + goto err; + } + sprintf (this->myinfo.identifier, "%s:%s", host, service); + + setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt)); + if (bind (priv->sock, + (struct sockaddr *)&sockaddr, + sockaddr_len) != 0) { + ret = -1; + gf_log ("ib-verbs/server", GF_LOG_ERROR, + "init: failed to bind to socket for %s (%s)", + this->myinfo.identifier, strerror (errno)); + goto err; + } + + if (listen (priv->sock, 10) != 0) { + gf_log ("ib-verbs/server", GF_LOG_ERROR, + "init: listen () failed on socket for %s (%s)", + this->myinfo.identifier, strerror (errno)); + ret = -1; + goto err; + } + + /* Register the main socket */ + priv->idx = event_register (this->xl->ctx->event_pool, priv->sock, + ib_verbs_server_event_handler, + transport_ref (this), 1, 0); + +err: + return ret; +} + +struct transport_ops tops = { + .receive = ib_verbs_receive, + .submit = ib_verbs_submit, + .connect = ib_verbs_connect, + .disconnect = ib_verbs_disconnect, + .listen = ib_verbs_listen, +}; + +int32_t +init (transport_t *this) +{ + ib_verbs_private_t *priv = GF_CALLOC (1, sizeof (*priv), + gf_ibv_mt_ib_verbs_private_t); + this->private = priv; + priv->sock = -1; + + if (ib_verbs_init (this)) { + gf_log (this->xl->name, GF_LOG_ERROR, + "Failed to initialize IB Device"); + return -1; + } + + return 0; +} + +void +fini (struct transport *this) +{ + /* TODO: verify this function does graceful finish */ + ib_verbs_private_t *priv = this->private; + this->private = NULL; + + pthread_mutex_destroy (&priv->recv_mutex); + pthread_mutex_destroy (&priv->write_mutex); + pthread_mutex_destroy (&priv->read_mutex); + /* pthread_cond_destroy (&priv->recv_cond); */ + + gf_log (this->xl->name, GF_LOG_TRACE, + "called fini on transport: %p", + this); + GF_FREE (priv); + return; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_common_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + +/* TODO: expand each option */ +struct volume_options options[] = { + { .key = {"transport.ib-verbs.port", + "ib-verbs-port"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 4, + .description = "check the option by 'ibv_devinfo'" + }, + { .key = {"transport.ib-verbs.mtu", + "ib-verbs-mtu"}, + .type = GF_OPTION_TYPE_INT, + }, + { .key = {"transport.ib-verbs.device-name", + "ib-verbs-device-name"}, + .type = GF_OPTION_TYPE_ANY, + .description = "check by 'ibv_devinfo'" + }, + { .key = {"transport.ib-verbs.work-request-send-count", + "ib-verbs-work-request-send-count"}, + .type = GF_OPTION_TYPE_INT, + }, + { .key = {"transport.ib-verbs.work-request-recv-count", + "ib-verbs-work-request-recv-count"}, + .type = GF_OPTION_TYPE_INT, + }, + { .key = {"remote-port", + "transport.remote-port", + "transport.ib-verbs.remote-port"}, + .type = GF_OPTION_TYPE_INT + }, + { .key = {"transport.ib-verbs.listen-port", "listen-port"}, + .type = GF_OPTION_TYPE_INT + }, + { .key = {"transport.ib-verbs.connect-path", "connect-path"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"transport.ib-verbs.bind-path", "bind-path"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"transport.ib-verbs.listen-path", "listen-path"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"transport.address-family", + "address-family"}, + .value = {"inet", "inet6", "inet/inet6", "inet6/inet", + "unix", "inet-sdp" }, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"transport.socket.lowlat"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {NULL} } +}; diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h new file mode 100644 index 00000000000..c385b62e5cb --- /dev/null +++ b/xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h @@ -0,0 +1,220 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _XPORT_IB_VERBS_H +#define _XPORT_IB_VERBS_H + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#ifndef MAX_IOVEC +#define MAX_IOVEC 16 +#endif /* MAX_IOVEC */ + +#include "xlator.h" +#include "event.h" +#include "ib-verbs-mem-types.h" + +#include <stdio.h> +#include <list.h> +#include <arpa/inet.h> +#include <infiniband/verbs.h> + +#define GF_DEFAULT_IBVERBS_LISTEN_PORT 6997 + +/* options per transport end point */ +struct _ib_verbs_options { + int32_t port; + char *device_name; + enum ibv_mtu mtu; + int32_t send_count; + int32_t recv_count; + uint64_t recv_size; + uint64_t send_size; +}; +typedef struct _ib_verbs_options ib_verbs_options_t; + + +struct _ib_verbs_header { + char colonO[3]; + uint32_t size1; + uint32_t size2; + char version; +} __attribute__((packed)); +typedef struct _ib_verbs_header ib_verbs_header_t; + +struct _ib_verbs_ioq { + union { + struct list_head list; + struct { + struct _ib_verbs_ioq *next; + struct _ib_verbs_ioq *prev; + }; + }; + ib_verbs_header_t header; + struct iovec vector[MAX_IOVEC]; + int count; + char *buf; + struct iobref *iobref; +}; +typedef struct _ib_verbs_ioq ib_verbs_ioq_t; + +/* represents one communication peer, two per transport_t */ +struct _ib_verbs_peer { + transport_t *trans; + struct ibv_qp *qp; + + int32_t recv_count; + int32_t send_count; + int32_t recv_size; + int32_t send_size; + + int32_t quota; + union { + struct list_head ioq; + struct { + ib_verbs_ioq_t *ioq_next; + ib_verbs_ioq_t *ioq_prev; + }; + }; + + /* QP attributes, needed to connect with remote QP */ + int32_t local_lid; + int32_t local_psn; + int32_t local_qpn; + int32_t remote_lid; + int32_t remote_psn; + int32_t remote_qpn; +}; +typedef struct _ib_verbs_peer ib_verbs_peer_t; + + +struct _ib_verbs_post { + struct _ib_verbs_post *next, *prev; + struct ibv_mr *mr; + char *buf; + int32_t buf_size; + char aux; + int32_t reused; + pthread_barrier_t wait; +}; +typedef struct _ib_verbs_post ib_verbs_post_t; + + +struct _ib_verbs_queue { + ib_verbs_post_t active_posts, passive_posts; + int32_t active_count, passive_count; + pthread_mutex_t lock; +}; +typedef struct _ib_verbs_queue ib_verbs_queue_t; + + +struct _ib_verbs_qpreg { + pthread_mutex_t lock; + int32_t count; + struct _qpent { + struct _qpent *next, *prev; + int32_t qp_num; + ib_verbs_peer_t *peer; + } ents[42]; +}; +typedef struct _ib_verbs_qpreg ib_verbs_qpreg_t; + +/* context per device, stored in global glusterfs_ctx_t->ib */ +struct _ib_verbs_device { + struct _ib_verbs_device *next; + const char *device_name; + struct ibv_context *context; + int32_t port; + struct ibv_pd *pd; + struct ibv_srq *srq; + ib_verbs_qpreg_t qpreg; + struct ibv_comp_channel *send_chan, *recv_chan; + struct ibv_cq *send_cq, *recv_cq; + ib_verbs_queue_t sendq, recvq; + pthread_t send_thread, recv_thread; +}; +typedef struct _ib_verbs_device ib_verbs_device_t; + +typedef enum { + IB_VERBS_HANDSHAKE_START = 0, + IB_VERBS_HANDSHAKE_SENDING_DATA, + IB_VERBS_HANDSHAKE_RECEIVING_DATA, + IB_VERBS_HANDSHAKE_SENT_DATA, + IB_VERBS_HANDSHAKE_RECEIVED_DATA, + IB_VERBS_HANDSHAKE_SENDING_ACK, + IB_VERBS_HANDSHAKE_RECEIVING_ACK, + IB_VERBS_HANDSHAKE_RECEIVED_ACK, + IB_VERBS_HANDSHAKE_COMPLETE, +} ib_verbs_handshake_state_t; + +struct ib_verbs_nbio { + int state; + char *buf; + int count; + struct iovec vector; + struct iovec *pending_vector; + int pending_count; +}; + + +struct _ib_verbs_private { + int32_t sock; + int32_t idx; + unsigned char connected; + unsigned char tcp_connected; + unsigned char ib_connected; + in_addr_t addr; + unsigned short port; + + /* IB Verbs Driver specific variables, pointers */ + ib_verbs_peer_t peer; + ib_verbs_device_t *device; + ib_verbs_options_t options; + + /* Used by trans->op->receive */ + char *data_ptr; + int32_t data_offset; + int32_t data_len; + + /* Mutex */ + pthread_mutex_t read_mutex; + pthread_mutex_t write_mutex; + pthread_barrier_t handshake_barrier; + char handshake_ret; + + pthread_mutex_t recv_mutex; + pthread_cond_t recv_cond; + + /* used during ib_verbs_handshake */ + struct { + struct ib_verbs_nbio incoming; + struct ib_verbs_nbio outgoing; + int state; + ib_verbs_header_t header; + char *buf; + size_t size; + } handshake; +}; +typedef struct _ib_verbs_private ib_verbs_private_t; + +#endif /* _XPORT_IB_VERBS_H */ diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/name.c b/xlators/protocol/legacy/transport/ib-verbs/src/name.c new file mode 100644 index 00000000000..a3e18481458 --- /dev/null +++ b/xlators/protocol/legacy/transport/ib-verbs/src/name.c @@ -0,0 +1,712 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <sys/types.h> +#include <sys/socket.h> +#include <errno.h> +#include <netdb.h> +#include <string.h> + +#ifdef CLIENT_PORT_CEILING +#undef CLIENT_PORT_CEILING +#endif + +#define CLIENT_PORT_CEILING 1024 + +#ifndef AF_INET_SDP +#define AF_INET_SDP 27 +#endif + +#include "transport.h" +#include "ib-verbs.h" + +int32_t +gf_resolve_ip6 (const char *hostname, + uint16_t port, + int family, + void **dnscache, + struct addrinfo **addr_info); + +static int32_t +af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr, + socklen_t sockaddr_len, int ceiling) +{ + int32_t ret = -1; + /* struct sockaddr_in sin = {0, }; */ + uint16_t port = ceiling - 1; + + while (port) + { + switch (sockaddr->sa_family) + { + case AF_INET6: + ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port); + break; + + case AF_INET_SDP: + case AF_INET: + ((struct sockaddr_in *)sockaddr)->sin_port = htons (port); + break; + } + + ret = bind (fd, sockaddr, sockaddr_len); + + if (ret == 0) + break; + + if (ret == -1 && errno == EACCES) + break; + + port--; + } + + return ret; +} + +static int32_t +af_unix_client_bind (transport_t *this, + struct sockaddr *sockaddr, + socklen_t sockaddr_len, + int sock) +{ + data_t *path_data = NULL; + struct sockaddr_un *addr = NULL; + int32_t ret = -1; + + path_data = dict_get (this->xl->options, + "transport.ib-verbs.bind-path"); + if (path_data) { + char *path = data_to_str (path_data); + if (!path || strlen (path) > UNIX_PATH_MAX) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "transport.ib-verbs.bind-path not specfied " + "for unix socket, letting connect to assign " + "default value"); + goto err; + } + + addr = (struct sockaddr_un *) sockaddr; + strcpy (addr->sun_path, path); + ret = bind (sock, (struct sockaddr *)addr, sockaddr_len); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "cannot bind to unix-domain socket %d (%s)", + sock, strerror (errno)); + goto err; + } + } + +err: + return ret; +} + +static int32_t +client_fill_address_family (transport_t *this, struct sockaddr *sockaddr) +{ + data_t *address_family_data = NULL; + + address_family_data = dict_get (this->xl->options, + "transport.address-family"); + if (!address_family_data) { + data_t *remote_host_data = NULL, *connect_path_data = NULL; + remote_host_data = dict_get (this->xl->options, "remote-host"); + connect_path_data = dict_get (this->xl->options, + "transport.ib-verbs.connect-path"); + + if (!(remote_host_data || connect_path_data) || + (remote_host_data && connect_path_data)) { + gf_log (this->xl->name, GF_LOG_ERROR, + "address-family not specified and not able to " + "determine the same from other options " + "(remote-host:%s and connect-path:%s)", + data_to_str (remote_host_data), + data_to_str (connect_path_data)); + return -1; + } + + if (remote_host_data) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "address-family not specified, guessing it " + "to be inet/inet6"); + sockaddr->sa_family = AF_UNSPEC; + } else { + gf_log (this->xl->name, GF_LOG_DEBUG, + "address-family not specified, guessing it " + "to be unix"); + sockaddr->sa_family = AF_UNIX; + } + + } else { + char *address_family = data_to_str (address_family_data); + if (!strcasecmp (address_family, "unix")) { + sockaddr->sa_family = AF_UNIX; + } else if (!strcasecmp (address_family, "inet")) { + sockaddr->sa_family = AF_INET; + } else if (!strcasecmp (address_family, "inet6")) { + sockaddr->sa_family = AF_INET6; + } else if (!strcasecmp (address_family, "inet-sdp")) { + sockaddr->sa_family = AF_INET_SDP; + } else if (!strcasecmp (address_family, "inet/inet6") + || !strcasecmp (address_family, "inet6/inet")) { + sockaddr->sa_family = AF_UNSPEC; + } else { + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address-family (%s) specified", + address_family); + return -1; + } + } + + return 0; +} + +static int32_t +af_inet_client_get_remote_sockaddr (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len) +{ + dict_t *options = this->xl->options; + data_t *remote_host_data = NULL; + data_t *remote_port_data = NULL; + char *remote_host = NULL; + uint16_t remote_port = 0; + struct addrinfo *addr_info = NULL; + int32_t ret = 0; + + remote_host_data = dict_get (options, "remote-host"); + if (remote_host_data == NULL) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "option remote-host missing in volume %s", + this->xl->name); + ret = -1; + goto err; + } + + remote_host = data_to_str (remote_host_data); + if (remote_host == NULL) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "option remote-host has data NULL in volume %s", + this->xl->name); + ret = -1; + goto err; + } + + remote_port_data = dict_get (options, "remote-port"); + if (remote_port_data == NULL) + { + gf_log (this->xl->name, GF_LOG_DEBUG, + "option remote-port missing in volume %s. " + "Defaulting to %d", + this->xl->name, GF_DEFAULT_IBVERBS_LISTEN_PORT); + + remote_port = GF_DEFAULT_IBVERBS_LISTEN_PORT; + } + else + { + remote_port = data_to_uint16 (remote_port_data); + } + + if (remote_port == (uint16_t)-1) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "option remote-port has invalid port in volume %s", + this->xl->name); + ret = -1; + goto err; + } + + /* TODO: gf_resolve is a blocking call. kick in some + non blocking dns techniques */ + ret = gf_resolve_ip6 (remote_host, remote_port, + sockaddr->sa_family, + &this->dnscache, &addr_info); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "DNS resolution failed on host %s", remote_host); + goto err; + } + + memcpy (sockaddr, addr_info->ai_addr, addr_info->ai_addrlen); + *sockaddr_len = addr_info->ai_addrlen; + +err: + return ret; +} + +static int32_t +af_unix_client_get_remote_sockaddr (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len) +{ + struct sockaddr_un *sockaddr_un = NULL; + char *connect_path = NULL; + data_t *connect_path_data = NULL; + int32_t ret = 0; + + connect_path_data = dict_get (this->xl->options, + "transport.ib-verbs.connect-path"); + if (!connect_path_data) { + gf_log (this->xl->name, GF_LOG_ERROR, + "option transport.ib-verbs.connect-path not " + "specified for address-family unix"); + ret = -1; + goto err; + } + + connect_path = data_to_str (connect_path_data); + if (!connect_path) { + gf_log (this->xl->name, GF_LOG_ERROR, + "connect-path is null-string"); + ret = -1; + goto err; + } + + if (strlen (connect_path) > UNIX_PATH_MAX) { + gf_log (this->xl->name, GF_LOG_ERROR, + "connect-path value length %"GF_PRI_SIZET" > " + "%d octets", strlen (connect_path), UNIX_PATH_MAX); + ret = -1; + goto err; + } + + gf_log (this->xl->name, + GF_LOG_DEBUG, + "using connect-path %s", connect_path); + sockaddr_un = (struct sockaddr_un *)sockaddr; + strcpy (sockaddr_un->sun_path, connect_path); + *sockaddr_len = sizeof (struct sockaddr_un); + +err: + return ret; +} + +static int32_t +af_unix_server_get_local_sockaddr (transport_t *this, + struct sockaddr *addr, + socklen_t *addr_len) +{ + data_t *listen_path_data = NULL; + char *listen_path = NULL; + int32_t ret = 0; + struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; + + + listen_path_data = dict_get (this->xl->options, + "transport.ib-verbs.listen-path"); + if (!listen_path_data) { + gf_log (this->xl->name, GF_LOG_ERROR, + "missing option listen-path"); + ret = -1; + goto err; + } + + listen_path = data_to_str (listen_path_data); + +#ifndef UNIX_PATH_MAX +#define UNIX_PATH_MAX 108 +#endif + + if (strlen (listen_path) > UNIX_PATH_MAX) { + gf_log (this->xl->name, GF_LOG_ERROR, + "option listen-path has value length %"GF_PRI_SIZET" > %d", + strlen (listen_path), UNIX_PATH_MAX); + ret = -1; + goto err; + } + + sunaddr->sun_family = AF_UNIX; + strcpy (sunaddr->sun_path, listen_path); + *addr_len = sizeof (struct sockaddr_un); + +err: + return ret; +} + +static int32_t +af_inet_server_get_local_sockaddr (transport_t *this, + struct sockaddr *addr, + socklen_t *addr_len) +{ + struct addrinfo hints, *res = 0; + data_t *listen_port_data = NULL, *listen_host_data = NULL; + uint16_t listen_port = -1; + char service[NI_MAXSERV], *listen_host = NULL; + dict_t *options = NULL; + int32_t ret = 0; + + options = this->xl->options; + + listen_port_data = dict_get (options, "transport.ib-verbs.listen-port"); + listen_host_data = dict_get (options, "transport.ib-verbs.bind-address"); + + if (listen_port_data) + { + listen_port = data_to_uint16 (listen_port_data); + } else { + if (addr->sa_family == AF_INET6) { + struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr; + in->sin6_addr = in6addr_any; + in->sin6_port = htons(listen_port); + *addr_len = sizeof(struct sockaddr_in6); + goto out; + } else if (addr->sa_family == AF_INET) { + struct sockaddr_in *in = (struct sockaddr_in *) addr; + in->sin_addr.s_addr = htonl(INADDR_ANY); + in->sin_port = htons(listen_port); + *addr_len = sizeof(struct sockaddr_in); + goto out; + } + } + + if (listen_port == (uint16_t) -1) + listen_port = GF_DEFAULT_IBVERBS_LISTEN_PORT; + + + if (listen_host_data) + { + listen_host = data_to_str (listen_host_data); + } + + memset (service, 0, sizeof (service)); + sprintf (service, "%d", listen_port); + + memset (&hints, 0, sizeof (hints)); + hints.ai_family = addr->sa_family; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE; + + ret = getaddrinfo(listen_host, service, &hints, &res); + if (ret != 0) { + gf_log (this->xl->name, + GF_LOG_ERROR, + "getaddrinfo failed for host %s, service %s (%s)", + listen_host, service, gai_strerror (ret)); + ret = -1; + goto out; + } + + memcpy (addr, res->ai_addr, res->ai_addrlen); + *addr_len = res->ai_addrlen; + + freeaddrinfo (res); + +out: + return ret; +} + +int32_t +client_bind (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len, + int sock) +{ + int ret = 0; + + *sockaddr_len = sizeof (struct sockaddr_in6); + switch (sockaddr->sa_family) + { + case AF_INET_SDP: + case AF_INET: + *sockaddr_len = sizeof (struct sockaddr_in); + + case AF_INET6: + ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr, + *sockaddr_len, + CLIENT_PORT_CEILING); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_WARNING, + "cannot bind inet socket (%d) to port " + "less than %d (%s)", + sock, CLIENT_PORT_CEILING, strerror (errno)); + ret = 0; + } + break; + + case AF_UNIX: + *sockaddr_len = sizeof (struct sockaddr_un); + ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr, + *sockaddr_len, sock); + break; + + default: + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address family %d", sockaddr->sa_family); + ret = -1; + break; + } + + return ret; +} + +int32_t +ibverbs_client_get_remote_sockaddr (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len) +{ + int32_t ret = 0; + char is_inet_sdp = 0; + + ret = client_fill_address_family (this, sockaddr); + if (ret) { + ret = -1; + goto err; + } + + switch (sockaddr->sa_family) + { + case AF_INET_SDP: + sockaddr->sa_family = AF_INET; + is_inet_sdp = 1; + + case AF_INET: + case AF_INET6: + case AF_UNSPEC: + ret = af_inet_client_get_remote_sockaddr (this, + sockaddr, + sockaddr_len); + + if (is_inet_sdp) { + sockaddr->sa_family = AF_INET_SDP; + } + + break; + + case AF_UNIX: + ret = af_unix_client_get_remote_sockaddr (this, + sockaddr, + sockaddr_len); + break; + + default: + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address-family %d", sockaddr->sa_family); + ret = -1; + } + +err: + return ret; +} + +int32_t +ibverbs_server_get_local_sockaddr (transport_t *this, + struct sockaddr *addr, + socklen_t *addr_len) +{ + data_t *address_family_data = NULL; + int32_t ret = 0; + char is_inet_sdp = 0; + + address_family_data = dict_get (this->xl->options, + "transport.address-family"); + if (address_family_data) { + char *address_family = NULL; + address_family = data_to_str (address_family_data); + + if (!strcasecmp (address_family, "inet")) { + addr->sa_family = AF_INET; + } else if (!strcasecmp (address_family, "inet6")) { + addr->sa_family = AF_INET6; + } else if (!strcasecmp (address_family, "inet-sdp")) { + addr->sa_family = AF_INET_SDP; + } else if (!strcasecmp (address_family, "unix")) { + addr->sa_family = AF_UNIX; + } else if (!strcasecmp (address_family, "inet/inet6") + || !strcasecmp (address_family, "inet6/inet")) { + addr->sa_family = AF_UNSPEC; + } else { + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address family (%s) specified", + address_family); + ret = -1; + goto err; + } + } else { + gf_log (this->xl->name, GF_LOG_DEBUG, + "option address-family not specified, defaulting " + "to inet/inet6"); + addr->sa_family = AF_UNSPEC; + } + + switch (addr->sa_family) + { + case AF_INET_SDP: + is_inet_sdp = 1; + addr->sa_family = AF_INET; + + case AF_INET: + case AF_INET6: + case AF_UNSPEC: + ret = af_inet_server_get_local_sockaddr (this, addr, addr_len); + if (is_inet_sdp && !ret) { + addr->sa_family = AF_INET_SDP; + } + break; + + case AF_UNIX: + ret = af_unix_server_get_local_sockaddr (this, addr, addr_len); + break; + } + +err: + return ret; +} + +int32_t +fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr, + int32_t addr_len, char *identifier) +{ + int32_t ret = 0, tmpaddr_len = 0; + char service[NI_MAXSERV], host[NI_MAXHOST]; + struct sockaddr_storage tmpaddr; + + memset (&tmpaddr, 0, sizeof (tmpaddr)); + tmpaddr = *addr; + tmpaddr_len = addr_len; + + if (((struct sockaddr *) &tmpaddr)->sa_family == AF_INET6) { + int32_t one_to_four, four_to_eight, twelve_to_sixteen; + int16_t eight_to_ten, ten_to_twelve; + + one_to_four = four_to_eight = twelve_to_sixteen = 0; + eight_to_ten = ten_to_twelve = 0; + + one_to_four = ((struct sockaddr_in6 *) + &tmpaddr)->sin6_addr.s6_addr32[0]; + four_to_eight = ((struct sockaddr_in6 *) + &tmpaddr)->sin6_addr.s6_addr32[1]; +#ifdef GF_SOLARIS_HOST_OS + eight_to_ten = S6_ADDR16(((struct sockaddr_in6 *) + &tmpaddr)->sin6_addr)[4]; +#else + eight_to_ten = ((struct sockaddr_in6 *) + &tmpaddr)->sin6_addr.s6_addr16[4]; +#endif + +#ifdef GF_SOLARIS_HOST_OS + ten_to_twelve = S6_ADDR16(((struct sockaddr_in6 *) + &tmpaddr)->sin6_addr)[5]; +#else + ten_to_twelve = ((struct sockaddr_in6 *) + &tmpaddr)->sin6_addr.s6_addr16[5]; +#endif + twelve_to_sixteen = ((struct sockaddr_in6 *) + &tmpaddr)->sin6_addr.s6_addr32[3]; + + /* ipv4 mapped ipv6 address has + bits 0-80: 0 + bits 80-96: 0xffff + bits 96-128: ipv4 address + */ + + if (one_to_four == 0 && + four_to_eight == 0 && + eight_to_ten == 0 && + ten_to_twelve == -1) { + struct sockaddr_in *in_ptr = (struct sockaddr_in *)&tmpaddr; + memset (&tmpaddr, 0, sizeof (tmpaddr)); + + in_ptr->sin_family = AF_INET; + in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port; + in_ptr->sin_addr.s_addr = twelve_to_sixteen; + tmpaddr_len = sizeof (*in_ptr); + } + } + + ret = getnameinfo ((struct sockaddr *) &tmpaddr, + tmpaddr_len, + host, sizeof (host), + service, sizeof (service), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) { + gf_log (this->xl->name, + GF_LOG_ERROR, + "getnameinfo failed (%s)", gai_strerror (ret)); + } + + sprintf (identifier, "%s:%s", host, service); + + return ret; +} + +int32_t +get_transport_identifiers (transport_t *this) +{ + int32_t ret = 0; + char is_inet_sdp = 0; + + switch (((struct sockaddr *) &this->myinfo.sockaddr)->sa_family) + { + case AF_INET_SDP: + is_inet_sdp = 1; + ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET; + + case AF_INET: + case AF_INET6: + { + ret = fill_inet6_inet_identifiers (this, + &this->myinfo.sockaddr, + this->myinfo.sockaddr_len, + this->myinfo.identifier); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "can't fill inet/inet6 identifier for server"); + goto err; + } + + ret = fill_inet6_inet_identifiers (this, + &this->peerinfo.sockaddr, + this->peerinfo.sockaddr_len, + this->peerinfo.identifier); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "can't fill inet/inet6 identifier for client"); + goto err; + } + + if (is_inet_sdp) { + ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET_SDP; + } + } + break; + + case AF_UNIX: + { + struct sockaddr_un *sunaddr = NULL; + + sunaddr = (struct sockaddr_un *) &this->myinfo.sockaddr; + strcpy (this->myinfo.identifier, sunaddr->sun_path); + + sunaddr = (struct sockaddr_un *) &this->peerinfo.sockaddr; + strcpy (this->peerinfo.identifier, sunaddr->sun_path); + } + break; + + default: + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address family (%d)", + ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family); + ret = -1; + break; + } + +err: + return ret; +} diff --git a/xlators/protocol/legacy/transport/ib-verbs/src/name.h b/xlators/protocol/legacy/transport/ib-verbs/src/name.h new file mode 100644 index 00000000000..4f0f47711d2 --- /dev/null +++ b/xlators/protocol/legacy/transport/ib-verbs/src/name.h @@ -0,0 +1,47 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _IB_VERBS_NAME_H +#define _IB_VERBS_NAME_H + +#include <sys/socket.h> +#include <sys/un.h> + +#include "compat.h" + +int32_t +client_bind (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len, + int sock); + +int32_t +ibverbs_client_get_remote_sockaddr (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len); + +int32_t +ibverbs_server_get_local_sockaddr (transport_t *this, + struct sockaddr *addr, + socklen_t *addr_len); + +int32_t +get_transport_identifiers (transport_t *this); + +#endif /* _IB_VERBS_NAME_H */ diff --git a/xlators/protocol/legacy/transport/socket/Makefile.am b/xlators/protocol/legacy/transport/socket/Makefile.am new file mode 100644 index 00000000000..f963effea22 --- /dev/null +++ b/xlators/protocol/legacy/transport/socket/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src
\ No newline at end of file diff --git a/xlators/protocol/legacy/transport/socket/src/Makefile.am b/xlators/protocol/legacy/transport/socket/src/Makefile.am new file mode 100644 index 00000000000..5952e18e97b --- /dev/null +++ b/xlators/protocol/legacy/transport/socket/src/Makefile.am @@ -0,0 +1,19 @@ +# TODO : change to proper transport dir + +transport_LTLIBRARIES = socket.la +transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/transport + +socket_la_LDFLAGS = -module -avoidversion + +socket_la_SOURCES = socket.c name.c +socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/xlators/protocol/legacy/lib/src/libgfproto.la + +noinst_HEADERS = socket.h name.h socket-mem-types.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \ + -I$(top_srcdir)/xlators/protocol/legacy/transport/socket/src \ + -I$(top_srcdir)/xlators/protocol/legacy/lib/src + +CLEANFILES = *~ diff --git a/xlators/protocol/legacy/transport/socket/src/name.c b/xlators/protocol/legacy/transport/socket/src/name.c new file mode 100644 index 00000000000..74abe4480a2 --- /dev/null +++ b/xlators/protocol/legacy/transport/socket/src/name.c @@ -0,0 +1,740 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <errno.h> +#include <netdb.h> +#include <string.h> + +#ifdef CLIENT_PORT_CEILING +#undef CLIENT_PORT_CEILING +#endif + +#define CLIENT_PORT_CEILING 1024 + +#ifndef AF_INET_SDP +#define AF_INET_SDP 27 +#endif + +static int gf_name_addr_enotspec_log; + +#include "transport.h" +#include "socket.h" + +int32_t +gf_resolve_ip6 (const char *hostname, + uint16_t port, + int family, + void **dnscache, + struct addrinfo **addr_info); + +static int32_t +af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr, + socklen_t sockaddr_len, int ceiling) +{ + int32_t ret = -1; + /* struct sockaddr_in sin = {0, }; */ + uint16_t port = ceiling - 1; + + while (port) + { + switch (sockaddr->sa_family) + { + case AF_INET6: + ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port); + break; + + case AF_INET_SDP: + case AF_INET: + ((struct sockaddr_in *)sockaddr)->sin_port = htons (port); + break; + } + + ret = bind (fd, sockaddr, sockaddr_len); + + if (ret == 0) + break; + + if (ret == -1 && errno == EACCES) + break; + + port--; + } + + return ret; +} + +static int32_t +af_unix_client_bind (transport_t *this, + struct sockaddr *sockaddr, + socklen_t sockaddr_len, + int sock) +{ + data_t *path_data = NULL; + struct sockaddr_un *addr = NULL; + int32_t ret = 0; + + path_data = dict_get (this->xl->options, "transport.socket.bind-path"); + if (path_data) { + char *path = data_to_str (path_data); + if (!path || strlen (path) > UNIX_PATH_MAX) { + gf_log (this->xl->name, GF_LOG_TRACE, + "bind-path not specfied for unix socket, " + "letting connect to assign default value"); + goto err; + } + + addr = (struct sockaddr_un *) sockaddr; + strcpy (addr->sun_path, path); + ret = bind (sock, (struct sockaddr *)addr, sockaddr_len); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "cannot bind to unix-domain socket %d (%s)", + sock, strerror (errno)); + goto err; + } + } else { + gf_log (this->xl->name, GF_LOG_TRACE, + "bind-path not specfied for unix socket, " + "letting connect to assign default value"); + } + +err: + return ret; +} + +int32_t +client_fill_address_family (transport_t *this, sa_family_t *sa_family) +{ + data_t *address_family_data = NULL; + int32_t ret = -1; + + if (sa_family == NULL) { + goto out; + } + + address_family_data = dict_get (this->xl->options, + "transport.address-family"); + if (!address_family_data) { + data_t *remote_host_data = NULL, *connect_path_data = NULL; + remote_host_data = dict_get (this->xl->options, "remote-host"); + connect_path_data = dict_get (this->xl->options, + "transport.socket.connect-path"); + + if (!(remote_host_data || connect_path_data) || + (remote_host_data && connect_path_data)) { + GF_LOG_OCCASIONALLY (gf_name_addr_enotspec_log, + this->xl->name, GF_LOG_ERROR, + "transport.address-family not specified and " + "not able to determine the " + "same from other options (remote-host:%s and " + "transport.unix.connect-path:%s)", + data_to_str (remote_host_data), + data_to_str (connect_path_data)); + goto out; + } + + if (remote_host_data) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "address-family not specified, guessing it " + "to be inet/inet6"); + *sa_family = AF_UNSPEC; + } else { + gf_log (this->xl->name, GF_LOG_DEBUG, + "address-family not specified, guessing it " + "to be unix"); + *sa_family = AF_UNIX; + } + + } else { + char *address_family = data_to_str (address_family_data); + if (!strcasecmp (address_family, "unix")) { + *sa_family = AF_UNIX; + } else if (!strcasecmp (address_family, "inet")) { + *sa_family = AF_INET; + } else if (!strcasecmp (address_family, "inet6")) { + *sa_family = AF_INET6; + } else if (!strcasecmp (address_family, "inet-sdp")) { + *sa_family = AF_INET_SDP; + } else if (!strcasecmp (address_family, "inet/inet6") + || !strcasecmp (address_family, "inet6/inet")) { + *sa_family = AF_UNSPEC; + } else { + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address-family (%s) specified", + address_family); + goto out; + } + } + + ret = 0; + +out: + return ret; +} + +static int32_t +af_inet_client_get_remote_sockaddr (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len) +{ + dict_t *options = this->xl->options; + data_t *remote_host_data = NULL; + data_t *remote_port_data = NULL; + char *remote_host = NULL; + uint16_t remote_port = 0; + struct addrinfo *addr_info = NULL; + int32_t ret = 0; + + remote_host_data = dict_get (options, "remote-host"); + if (remote_host_data == NULL) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "option remote-host missing in volume %s", this->xl->name); + ret = -1; + goto err; + } + + remote_host = data_to_str (remote_host_data); + if (remote_host == NULL) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "option remote-host has data NULL in volume %s", this->xl->name); + ret = -1; + goto err; + } + + remote_port_data = dict_get (options, "remote-port"); + if (remote_port_data == NULL) + { + gf_log (this->xl->name, GF_LOG_TRACE, + "option remote-port missing in volume %s. Defaulting to %d", + this->xl->name, GF_DEFAULT_SOCKET_LISTEN_PORT); + + remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT; + } + else + { + remote_port = data_to_uint16 (remote_port_data); + } + + if (remote_port == (uint16_t)-1) + { + gf_log (this->xl->name, GF_LOG_ERROR, + "option remote-port has invalid port in volume %s", + this->xl->name); + ret = -1; + goto err; + } + + /* TODO: gf_resolve is a blocking call. kick in some + non blocking dns techniques */ + ret = gf_resolve_ip6 (remote_host, remote_port, + sockaddr->sa_family, &this->dnscache, &addr_info); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "DNS resolution failed on host %s", remote_host); + goto err; + } + + memcpy (sockaddr, addr_info->ai_addr, addr_info->ai_addrlen); + *sockaddr_len = addr_info->ai_addrlen; + +err: + return ret; +} + +static int32_t +af_unix_client_get_remote_sockaddr (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len) +{ + struct sockaddr_un *sockaddr_un = NULL; + char *connect_path = NULL; + data_t *connect_path_data = NULL; + int32_t ret = 0; + + connect_path_data = dict_get (this->xl->options, + "transport.socket.connect-path"); + if (!connect_path_data) { + gf_log (this->xl->name, GF_LOG_ERROR, + "option transport.unix.connect-path not specified for " + "address-family unix"); + ret = -1; + goto err; + } + + connect_path = data_to_str (connect_path_data); + if (!connect_path) { + gf_log (this->xl->name, GF_LOG_ERROR, + "transport.unix.connect-path is null-string"); + ret = -1; + goto err; + } + + if (strlen (connect_path) > UNIX_PATH_MAX) { + gf_log (this->xl->name, GF_LOG_ERROR, + "connect-path value length %"GF_PRI_SIZET" > %d octets", + strlen (connect_path), UNIX_PATH_MAX); + ret = -1; + goto err; + } + + gf_log (this->xl->name, GF_LOG_TRACE, + "using connect-path %s", connect_path); + sockaddr_un = (struct sockaddr_un *)sockaddr; + strcpy (sockaddr_un->sun_path, connect_path); + *sockaddr_len = sizeof (struct sockaddr_un); + +err: + return ret; +} + +static int32_t +af_unix_server_get_local_sockaddr (transport_t *this, + struct sockaddr *addr, + socklen_t *addr_len) +{ + data_t *listen_path_data = NULL; + char *listen_path = NULL; + int32_t ret = 0; + struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr; + + + listen_path_data = dict_get (this->xl->options, + "transport.socket.listen-path"); + if (!listen_path_data) { + gf_log (this->xl->name, GF_LOG_ERROR, + "missing option transport.socket.listen-path"); + ret = -1; + goto err; + } + + listen_path = data_to_str (listen_path_data); + +#ifndef UNIX_PATH_MAX +#define UNIX_PATH_MAX 108 +#endif + + if (strlen (listen_path) > UNIX_PATH_MAX) { + gf_log (this->xl->name, GF_LOG_ERROR, + "option transport.unix.listen-path has value length " + "%"GF_PRI_SIZET" > %d", + strlen (listen_path), UNIX_PATH_MAX); + ret = -1; + goto err; + } + + sunaddr->sun_family = AF_UNIX; + strcpy (sunaddr->sun_path, listen_path); + *addr_len = sizeof (struct sockaddr_un); + +err: + return ret; +} + +static int32_t +af_inet_server_get_local_sockaddr (transport_t *this, + struct sockaddr *addr, + socklen_t *addr_len) +{ + struct addrinfo hints, *res = 0; + data_t *listen_port_data = NULL, *listen_host_data = NULL; + uint16_t listen_port = -1; + char service[NI_MAXSERV], *listen_host = NULL; + dict_t *options = NULL; + int32_t ret = 0; + + options = this->xl->options; + + listen_port_data = dict_get (options, "transport.socket.listen-port"); + listen_host_data = dict_get (options, "transport.socket.bind-address"); + + if (listen_port_data) + { + listen_port = data_to_uint16 (listen_port_data); + } + + if (listen_port == (uint16_t) -1) + listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT; + + + if (listen_host_data) + { + listen_host = data_to_str (listen_host_data); + } else { + if (addr->sa_family == AF_INET6) { + struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr; + in->sin6_addr = in6addr_any; + in->sin6_port = htons(listen_port); + *addr_len = sizeof(struct sockaddr_in6); + goto out; + } else if (addr->sa_family == AF_INET) { + struct sockaddr_in *in = (struct sockaddr_in *) addr; + in->sin_addr.s_addr = htonl(INADDR_ANY); + in->sin_port = htons(listen_port); + *addr_len = sizeof(struct sockaddr_in); + goto out; + } + } + + memset (service, 0, sizeof (service)); + sprintf (service, "%d", listen_port); + + memset (&hints, 0, sizeof (hints)); + hints.ai_family = addr->sa_family; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE; + + ret = getaddrinfo(listen_host, service, &hints, &res); + if (ret != 0) { + gf_log (this->xl->name, GF_LOG_ERROR, + "getaddrinfo failed for host %s, service %s (%s)", + listen_host, service, gai_strerror (ret)); + ret = -1; + goto out; + } + + memcpy (addr, res->ai_addr, res->ai_addrlen); + *addr_len = res->ai_addrlen; + + freeaddrinfo (res); + +out: + return ret; +} + +int32_t +client_bind (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len, + int sock) +{ + int ret = 0; + + *sockaddr_len = sizeof (struct sockaddr_in6); + switch (sockaddr->sa_family) + { + case AF_INET_SDP: + case AF_INET: + *sockaddr_len = sizeof (struct sockaddr_in); + + case AF_INET6: + ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr, + *sockaddr_len, CLIENT_PORT_CEILING); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_WARNING, + "cannot bind inet socket (%d) to port less than %d (%s)", + sock, CLIENT_PORT_CEILING, strerror (errno)); + ret = 0; + } + break; + + case AF_UNIX: + *sockaddr_len = sizeof (struct sockaddr_un); + ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr, + *sockaddr_len, sock); + break; + + default: + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address family %d", sockaddr->sa_family); + ret = -1; + break; + } + + return ret; +} + +int32_t +socket_client_get_remote_sockaddr (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len, + sa_family_t *sa_family) +{ + int32_t ret = 0; + + if ((sockaddr == NULL) || (sockaddr_len == NULL) + || (sa_family == NULL)) { + ret = -1; + goto err; + } + + + ret = client_fill_address_family (this, &sockaddr->sa_family); + if (ret) { + ret = -1; + goto err; + } + + *sa_family = sockaddr->sa_family; + + switch (sockaddr->sa_family) + { + case AF_INET_SDP: + sockaddr->sa_family = AF_INET; + + case AF_INET: + case AF_INET6: + case AF_UNSPEC: + ret = af_inet_client_get_remote_sockaddr (this, sockaddr, + sockaddr_len); + break; + + case AF_UNIX: + ret = af_unix_client_get_remote_sockaddr (this, sockaddr, + sockaddr_len); + break; + + default: + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address-family %d", sockaddr->sa_family); + ret = -1; + } + + if (*sa_family == AF_UNSPEC) { + *sa_family = sockaddr->sa_family; + } + +err: + return ret; +} + + +int32_t +server_fill_address_family (transport_t *this, sa_family_t *sa_family) +{ + data_t *address_family_data = NULL; + int32_t ret = -1; + + if (sa_family == NULL) { + goto out; + } + + address_family_data = dict_get (this->xl->options, + "transport.address-family"); + if (address_family_data) { + char *address_family = NULL; + address_family = data_to_str (address_family_data); + + if (!strcasecmp (address_family, "inet")) { + *sa_family = AF_INET; + } else if (!strcasecmp (address_family, "inet6")) { + *sa_family = AF_INET6; + } else if (!strcasecmp (address_family, "inet-sdp")) { + *sa_family = AF_INET_SDP; + } else if (!strcasecmp (address_family, "unix")) { + *sa_family = AF_UNIX; + } else if (!strcasecmp (address_family, "inet/inet6") + || !strcasecmp (address_family, "inet6/inet")) { + *sa_family = AF_UNSPEC; + } else { + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address family (%s) specified", address_family); + goto out; + } + } else { + gf_log (this->xl->name, GF_LOG_DEBUG, + "option address-family not specified, defaulting to inet/inet6"); + *sa_family = AF_UNSPEC; + } + + ret = 0; +out: + return ret; +} + + +int32_t +socket_server_get_local_sockaddr (transport_t *this, struct sockaddr *addr, + socklen_t *addr_len, sa_family_t *sa_family) +{ + int32_t ret = -1; + + if ((addr == NULL) || (addr_len == NULL) || (sa_family == NULL)) { + goto err; + } + + ret = server_fill_address_family (this, &addr->sa_family); + if (ret == -1) { + goto err; + } + + *sa_family = addr->sa_family; + + switch (addr->sa_family) + { + case AF_INET_SDP: + addr->sa_family = AF_INET; + + case AF_INET: + case AF_INET6: + case AF_UNSPEC: + ret = af_inet_server_get_local_sockaddr (this, addr, addr_len); + break; + + case AF_UNIX: + ret = af_unix_server_get_local_sockaddr (this, addr, addr_len); + break; + } + + if (*sa_family == AF_UNSPEC) { + *sa_family = addr->sa_family; + } + +err: + return ret; +} + +int32_t +fill_inet6_inet_identifiers (transport_t *this, struct sockaddr_storage *addr, + int32_t addr_len, char *identifier) +{ + int32_t ret = 0, tmpaddr_len = 0; + char service[NI_MAXSERV], host[NI_MAXHOST]; + struct sockaddr_storage tmpaddr; + + memset (&tmpaddr, 0, sizeof (tmpaddr)); + tmpaddr = *addr; + tmpaddr_len = addr_len; + + if (((struct sockaddr *) &tmpaddr)->sa_family == AF_INET6) { + int32_t one_to_four, four_to_eight, twelve_to_sixteen; + int16_t eight_to_ten, ten_to_twelve; + + one_to_four = four_to_eight = twelve_to_sixteen = 0; + eight_to_ten = ten_to_twelve = 0; + + one_to_four = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[0]; + four_to_eight = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[1]; +#ifdef GF_SOLARIS_HOST_OS + eight_to_ten = S6_ADDR16(((struct sockaddr_in6 *) &tmpaddr)->sin6_addr)[4]; +#else + eight_to_ten = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr16[4]; +#endif + +#ifdef GF_SOLARIS_HOST_OS + ten_to_twelve = S6_ADDR16(((struct sockaddr_in6 *) &tmpaddr)->sin6_addr)[5]; +#else + ten_to_twelve = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr16[5]; +#endif + + twelve_to_sixteen = ((struct sockaddr_in6 *) &tmpaddr)->sin6_addr.s6_addr32[3]; + + /* ipv4 mapped ipv6 address has + bits 0-80: 0 + bits 80-96: 0xffff + bits 96-128: ipv4 address + */ + + if (one_to_four == 0 && + four_to_eight == 0 && + eight_to_ten == 0 && + ten_to_twelve == -1) { + struct sockaddr_in *in_ptr = (struct sockaddr_in *)&tmpaddr; + memset (&tmpaddr, 0, sizeof (tmpaddr)); + + in_ptr->sin_family = AF_INET; + in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port; + in_ptr->sin_addr.s_addr = twelve_to_sixteen; + tmpaddr_len = sizeof (*in_ptr); + } + } + + ret = getnameinfo ((struct sockaddr *) &tmpaddr, + tmpaddr_len, + host, sizeof (host), + service, sizeof (service), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) { + gf_log (this->xl->name, GF_LOG_ERROR, + "getnameinfo failed (%s)", gai_strerror (ret)); + } + + sprintf (identifier, "%s:%s", host, service); + + return ret; +} + +int32_t +get_transport_identifiers (transport_t *this) +{ + int32_t ret = 0; + char is_inet_sdp = 0; + + switch (((struct sockaddr *) &this->myinfo.sockaddr)->sa_family) + { + case AF_INET_SDP: + is_inet_sdp = 1; + ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET; + + case AF_INET: + case AF_INET6: + { + ret = fill_inet6_inet_identifiers (this, + &this->myinfo.sockaddr, + this->myinfo.sockaddr_len, + this->myinfo.identifier); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "cannot fill inet/inet6 identifier for server"); + goto err; + } + + ret = fill_inet6_inet_identifiers (this, + &this->peerinfo.sockaddr, + this->peerinfo.sockaddr_len, + this->peerinfo.identifier); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "cannot fill inet/inet6 identifier for client"); + goto err; + } + + if (is_inet_sdp) { + ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET_SDP; + } + } + break; + + case AF_UNIX: + { + struct sockaddr_un *sunaddr = NULL; + + sunaddr = (struct sockaddr_un *) &this->myinfo.sockaddr; + strcpy (this->myinfo.identifier, sunaddr->sun_path); + + sunaddr = (struct sockaddr_un *) &this->peerinfo.sockaddr; + strcpy (this->peerinfo.identifier, sunaddr->sun_path); + } + break; + + default: + gf_log (this->xl->name, GF_LOG_ERROR, + "unknown address family (%d)", + ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family); + ret = -1; + break; + } + +err: + return ret; +} diff --git a/xlators/protocol/legacy/transport/socket/src/name.h b/xlators/protocol/legacy/transport/socket/src/name.h new file mode 100644 index 00000000000..f50a7b7f4dd --- /dev/null +++ b/xlators/protocol/legacy/transport/socket/src/name.h @@ -0,0 +1,44 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _SOCKET_NAME_H +#define _SOCKET_NAME_H + +#include "compat.h" + +int32_t +client_bind (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len, + int sock); + +int32_t +socket_client_get_remote_sockaddr (transport_t *this, + struct sockaddr *sockaddr, + socklen_t *sockaddr_len, + sa_family_t *sa_family); + +int32_t +socket_server_get_local_sockaddr (transport_t *this, struct sockaddr *addr, + socklen_t *addr_len, sa_family_t *sa_family); + +int32_t +get_transport_identifiers (transport_t *this); + +#endif /* _SOCKET_NAME_H */ diff --git a/xlators/protocol/legacy/transport/socket/src/socket-mem-types.h b/xlators/protocol/legacy/transport/socket/src/socket-mem-types.h new file mode 100644 index 00000000000..f50f4a75de8 --- /dev/null +++ b/xlators/protocol/legacy/transport/socket/src/socket-mem-types.h @@ -0,0 +1,36 @@ + +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef __SOCKET_MEM_TYPES_H__ +#define __SOCKET_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_socket_mem_types_ { + gf_socket_mt_socket_private_t = gf_common_mt_end + 1, + gf_socket_mt_ioq, + gf_socket_mt_transport_t, + gf_socket_mt_socket_local_t, + gf_socket_mt_char, + gf_socket_mt_end +}; +#endif + diff --git a/xlators/protocol/legacy/transport/socket/src/socket.c b/xlators/protocol/legacy/transport/socket/src/socket.c new file mode 100644 index 00000000000..fd3815d83fe --- /dev/null +++ b/xlators/protocol/legacy/transport/socket/src/socket.c @@ -0,0 +1,1640 @@ +/* + Copyright (c) 2008-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "socket.h" +#include "name.h" +#include "dict.h" +#include "transport.h" +#include "logging.h" +#include "xlator.h" +#include "byte-order.h" +#include "common-utils.h" +#include "compat-errno.h" + +#include <fcntl.h> +#include <errno.h> +#include <netinet/tcp.h> + + +#define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR) +#define SA(ptr) ((struct sockaddr *)ptr) + +int socket_init (transport_t *this); + +/* + * return value: + * 0 = success (completed) + * -1 = error + * > 0 = incomplete + */ + +int +__socket_rwv (transport_t *this, struct iovec *vector, int count, + struct iovec **pending_vector, int *pending_count, + int write) +{ + socket_private_t *priv = NULL; + int sock = -1; + int ret = -1; + struct iovec *opvector = NULL; + int opcount = 0; + int moved = 0; + + priv = this->private; + sock = priv->sock; + + opvector = vector; + opcount = count; + + while (opcount) { + if (write) { + ret = writev (sock, opvector, opcount); + + if (ret == 0 || (ret == -1 && errno == EAGAIN)) { + /* done for now */ + break; + } + } else { + ret = readv (sock, opvector, opcount); + + if (ret == -1 && errno == EAGAIN) { + /* done for now */ + break; + } + } + + if (ret == 0) { + /* Mostly due to 'umount' in client */ + gf_log (this->xl->name, GF_LOG_TRACE, + "EOF from peer %s", this->peerinfo.identifier); + opcount = -1; + errno = ENOTCONN; + break; + } + + if (ret == -1) { + if (errno == EINTR) + continue; + + gf_log (this->xl->name, GF_LOG_TRACE, + "%s failed (%s)", write ? "writev" : "readv", + strerror (errno)); + opcount = -1; + break; + } + + moved = 0; + + while (moved < ret) { + if ((ret - moved) >= opvector[0].iov_len) { + moved += opvector[0].iov_len; + opvector++; + opcount--; + } else { + opvector[0].iov_len -= (ret - moved); + opvector[0].iov_base += (ret - moved); + moved += (ret - moved); + } + while (opcount && !opvector[0].iov_len) { + opvector++; + opcount--; + } + } + } + + if (pending_vector) + *pending_vector = opvector; + + if (pending_count) + *pending_count = opcount; + + return opcount; +} + + +int +__socket_readv (transport_t *this, struct iovec *vector, int count, + struct iovec **pending_vector, int *pending_count) +{ + int ret = -1; + + ret = __socket_rwv (this, vector, count, + pending_vector, pending_count, 0); + + return ret; +} + + +int +__socket_writev (transport_t *this, struct iovec *vector, int count, + struct iovec **pending_vector, int *pending_count) +{ + int ret = -1; + + ret = __socket_rwv (this, vector, count, + pending_vector, pending_count, 1); + + return ret; +} + + +int +__socket_disconnect (transport_t *this) +{ + socket_private_t *priv = NULL; + int ret = -1; + + priv = this->private; + + if (priv->sock != -1) { + ret = shutdown (priv->sock, SHUT_RDWR); + priv->connected = -1; + gf_log (this->xl->name, GF_LOG_TRACE, + "shutdown() returned %d. set connection state to -1", + ret); + } + + return ret; +} + + +int +__socket_server_bind (transport_t *this) +{ + socket_private_t *priv = NULL; + int ret = -1; + int opt = 1; + + priv = this->private; + + ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR, + &opt, sizeof (opt)); + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "setsockopt() for SO_REUSEADDR failed (%s)", + strerror (errno)); + } + + ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr, + this->myinfo.sockaddr_len); + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "binding to %s failed: %s", + this->myinfo.identifier, strerror (errno)); + if (errno == EADDRINUSE) { + gf_log (this->xl->name, GF_LOG_ERROR, + "Port is already in use"); + } + } + + return ret; +} + + +int +__socket_nonblock (int fd) +{ + int flags = 0; + int ret = -1; + + flags = fcntl (fd, F_GETFL); + + if (flags != -1) + ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK); + + return ret; +} + + +int +__socket_nodelay (int fd) +{ + int on = 1; + int ret = -1; + + ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY, + &on, sizeof (on)); + if (!ret) + gf_log ("", GF_LOG_TRACE, + "NODELAY enabled for socket %d", fd); + + return ret; +} + + +int +__socket_keepalive (int fd, int keepalive_intvl) +{ + int on = 1; + int ret = -1; + + ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof (on)); + if (ret == -1) + goto err; + + if (keepalive_intvl == GF_USE_DEFAULT_KEEPALIVE) + goto done; + +#ifndef GF_LINUX_HOST_OS + ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_intvl, + sizeof (keepalive_intvl)); + if (ret == -1) + goto err; +#else + ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_intvl, + sizeof (keepalive_intvl)); + if (ret == -1) + goto err; + + ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepalive_intvl, + sizeof (keepalive_intvl)); + if (ret == -1) + goto err; +#endif + +done: + gf_log ("", GF_LOG_TRACE, "Keep-alive enabled for socket %d, interval " + "%d", fd, keepalive_intvl); + +err: + return ret; +} + + +int +__socket_connect_finish (int fd) +{ + int ret = -1; + int optval = 0; + socklen_t optlen = sizeof (int); + + ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen); + + if (ret == 0 && optval) { + errno = optval; + ret = -1; + } + + return ret; +} + + +void +__socket_reset (transport_t *this) +{ + socket_private_t *priv = NULL; + + priv = this->private; + + /* TODO: use mem-pool on incoming data */ + + if (priv->incoming.hdr_p) + GF_FREE (priv->incoming.hdr_p); + + if (priv->incoming.iobuf) + iobuf_unref (priv->incoming.iobuf); + + memset (&priv->incoming, 0, sizeof (priv->incoming)); + + event_unregister (this->xl->ctx->event_pool, priv->sock, priv->idx); + close (priv->sock); + priv->sock = -1; + priv->idx = -1; + priv->connected = -1; +} + + +struct ioq * +__socket_ioq_new (transport_t *this, char *buf, int len, + struct iovec *vector, int count, struct iobref *iobref) +{ + socket_private_t *priv = NULL; + struct ioq *entry = NULL; + + priv = this->private; + + /* TODO: use mem-pool */ + entry = GF_CALLOC (1, sizeof (*entry), + gf_common_mt_ioq); + if (!entry) + return NULL; + + assert (count <= (MAX_IOVEC-2)); + + entry->header.colonO[0] = ':'; + entry->header.colonO[1] = 'O'; + entry->header.colonO[2] = '\0'; + entry->header.version = 42; + entry->header.size1 = hton32 (len); + entry->header.size2 = hton32 (iov_length (vector, count)); + + entry->vector[0].iov_base = &entry->header; + entry->vector[0].iov_len = sizeof (entry->header); + entry->count++; + + entry->vector[1].iov_base = buf; + entry->vector[1].iov_len = len; + entry->count++; + + if (vector && count) { + memcpy (&entry->vector[2], vector, sizeof (*vector) * count); + entry->count += count; + } + + entry->pending_vector = entry->vector; + entry->pending_count = entry->count; + + if (iobref) + entry->iobref = iobref_ref (iobref); + + entry->buf = buf; + + INIT_LIST_HEAD (&entry->list); + + return entry; +} + + +void +__socket_ioq_entry_free (struct ioq *entry) +{ + list_del_init (&entry->list); + if (entry->iobref) + iobref_unref (entry->iobref); + + /* TODO: use mem-pool */ + GF_FREE (entry->buf); + + /* TODO: use mem-pool */ + GF_FREE (entry); +} + + +void +__socket_ioq_flush (transport_t *this) +{ + socket_private_t *priv = NULL; + struct ioq *entry = NULL; + + priv = this->private; + + while (!list_empty (&priv->ioq)) { + entry = priv->ioq_next; + __socket_ioq_entry_free (entry); + } + + return; +} + + +int +__socket_ioq_churn_entry (transport_t *this, struct ioq *entry) +{ + int ret = -1; + + ret = __socket_writev (this, entry->pending_vector, + entry->pending_count, + &entry->pending_vector, + &entry->pending_count); + + if (ret == 0) { + /* current entry was completely written */ + assert (entry->pending_count == 0); + __socket_ioq_entry_free (entry); + } + + return ret; +} + + +int +__socket_ioq_churn (transport_t *this) +{ + socket_private_t *priv = NULL; + int ret = 0; + struct ioq *entry = NULL; + + priv = this->private; + + while (!list_empty (&priv->ioq)) { + /* pick next entry */ + entry = priv->ioq_next; + + ret = __socket_ioq_churn_entry (this, entry); + + if (ret != 0) + break; + } + + if (list_empty (&priv->ioq)) { + /* all pending writes done, not interested in POLLOUT */ + priv->idx = event_select_on (this->xl->ctx->event_pool, + priv->sock, priv->idx, -1, 0); + } + + return ret; +} + + +int +socket_event_poll_err (transport_t *this) +{ + socket_private_t *priv = NULL; + int ret = -1; + + priv = this->private; + + pthread_mutex_lock (&priv->lock); + { + __socket_ioq_flush (this); + __socket_reset (this); + } + pthread_mutex_unlock (&priv->lock); + + xlator_notify (this->xl, GF_EVENT_POLLERR, this); + + return ret; +} + + +int +socket_event_poll_out (transport_t *this) +{ + socket_private_t *priv = NULL; + int ret = -1; + + priv = this->private; + + pthread_mutex_lock (&priv->lock); + { + if (priv->connected == 1) { + ret = __socket_ioq_churn (this); + + if (ret == -1) { + __socket_disconnect (this); + } + } + } + pthread_mutex_unlock (&priv->lock); + + xlator_notify (this->xl, GF_EVENT_POLLOUT, this); + + return ret; +} + + +int +__socket_proto_validate_header (transport_t *this, + struct socket_header *header, + size_t *size1_p, size_t *size2_p) +{ + size_t size1 = 0; + size_t size2 = 0; + + if (strcmp (header->colonO, ":O")) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "socket header signature does not match :O (%x.%x.%x)", + header->colonO[0], header->colonO[1], + header->colonO[2]); + return -1; + } + + if (header->version != 42) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "socket header version does not match 42 != %d", + header->version); + return -1; + } + + size1 = ntoh32 (header->size1); + size2 = ntoh32 (header->size2); + + if (size1 <= 0 || size1 > 1048576) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "socket header has incorrect size1=%"GF_PRI_SIZET, + size1); + return -1; + } + + if (size2 > (131072)) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "socket header has incorrect size2=%"GF_PRI_SIZET, + size2); + return -1; + } + + if (size1_p) + *size1_p = size1; + + if (size2_p) + *size2_p = size2; + + return 0; +} + + + +/* socket protocol state machine */ + +int +__socket_proto_state_machine (transport_t *this) +{ + int ret = -1; + socket_private_t *priv = NULL; + size_t size1 = 0; + size_t size2 = 0; + int previous_state = -1; + struct socket_header *hdr = NULL; + struct iobuf *iobuf = NULL; + + + priv = this->private; + + while (priv->incoming.state != SOCKET_PROTO_STATE_COMPLETE) { + /* debug check against infinite loops */ + if (previous_state == priv->incoming.state) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "state did not change! (%d) breaking", + previous_state); + ret = -1; + goto unlock; + } + previous_state = priv->incoming.state; + + switch (priv->incoming.state) { + + case SOCKET_PROTO_STATE_NADA: + priv->incoming.pending_vector = + priv->incoming.vector; + + priv->incoming.pending_vector->iov_base = + &priv->incoming.header; + + priv->incoming.pending_vector->iov_len = + sizeof (struct socket_header); + + priv->incoming.state = + SOCKET_PROTO_STATE_HEADER_COMING; + break; + + case SOCKET_PROTO_STATE_HEADER_COMING: + + ret = __socket_readv (this, + priv->incoming.pending_vector, 1, + &priv->incoming.pending_vector, + NULL); + if (ret == 0) { + priv->incoming.state = + SOCKET_PROTO_STATE_HEADER_CAME; + break; + } + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_TRACE, + "read (%s) in state %d (%s)", + strerror (errno), + SOCKET_PROTO_STATE_HEADER_COMING, + this->peerinfo.identifier); + goto unlock; + } + + if (ret > 0) { + gf_log (this->xl->name, GF_LOG_TRACE, + "partial header read on NB socket."); + goto unlock; + } + break; + + case SOCKET_PROTO_STATE_HEADER_CAME: + hdr = &priv->incoming.header; + ret = __socket_proto_validate_header (this, hdr, + &size1, &size2); + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "socket header validate failed (%s). " + "possible mismatch of transport-type " + "between server and client volumes, " + "or version mismatch", + this->peerinfo.identifier); + goto unlock; + } + + priv->incoming.hdrlen = size1; + priv->incoming.buflen = size2; + + /* TODO: use mem-pool */ + priv->incoming.hdr_p = GF_MALLOC (size1, + gf_common_mt_char); + if (size2) { + /* TODO: sanity check size2 < page size + */ + iobuf = iobuf_get (this->xl->ctx->iobuf_pool); + if (!iobuf) { + gf_log (this->xl->name, GF_LOG_ERROR, + "unable to allocate IO buffer " + "for peer %s", + this->peerinfo.identifier); + ret = -ENOMEM; + goto unlock; + } + priv->incoming.iobuf = iobuf; + priv->incoming.buf_p = iobuf->ptr; + } + + priv->incoming.vector[0].iov_base = + priv->incoming.hdr_p; + + priv->incoming.vector[0].iov_len = size1; + + priv->incoming.vector[1].iov_base = + priv->incoming.buf_p; + + priv->incoming.vector[1].iov_len = size2; + priv->incoming.count = size2 ? 2 : 1; + + priv->incoming.pending_vector = + priv->incoming.vector; + + priv->incoming.pending_count = + priv->incoming.count; + + priv->incoming.state = + SOCKET_PROTO_STATE_DATA_COMING; + break; + + case SOCKET_PROTO_STATE_DATA_COMING: + + ret = __socket_readv (this, + priv->incoming.pending_vector, + priv->incoming.pending_count, + &priv->incoming.pending_vector, + &priv->incoming.pending_count); + if (ret == 0) { + priv->incoming.state = + SOCKET_PROTO_STATE_DATA_CAME; + break; + } + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "read (%s) in state %d (%s)", + strerror (errno), + SOCKET_PROTO_STATE_DATA_COMING, + this->peerinfo.identifier); + goto unlock; + } + + if (ret > 0) { + gf_log (this->xl->name, GF_LOG_TRACE, + "partial data read on NB socket"); + goto unlock; + } + break; + + case SOCKET_PROTO_STATE_DATA_CAME: + memset (&priv->incoming.vector, 0, + sizeof (priv->incoming.vector)); + priv->incoming.pending_vector = NULL; + priv->incoming.pending_count = 0; + priv->incoming.state = SOCKET_PROTO_STATE_COMPLETE; + break; + + case SOCKET_PROTO_STATE_COMPLETE: + /* not reached */ + break; + + default: + gf_log (this->xl->name, GF_LOG_DEBUG, + "undefined state reached: %d", + priv->incoming.state); + goto unlock; + } + } +unlock: + + return ret; +} + + +int +socket_proto_state_machine (transport_t *this) +{ + socket_private_t *priv = NULL; + int ret = 0; + + priv = this->private; + + pthread_mutex_lock (&priv->lock); + { + ret = __socket_proto_state_machine (this); + } + pthread_mutex_unlock (&priv->lock); + + return ret; +} + + +int +socket_event_poll_in (transport_t *this) +{ + int ret = -1; + + ret = socket_proto_state_machine (this); + + /* call POLLIN on xlator even if complete block is not received, + just to keep the last_received timestamp ticking */ + + if (ret == 0) + ret = xlator_notify (this->xl, GF_EVENT_POLLIN, this); + + return ret; +} + + +int +socket_connect_finish (transport_t *this) +{ + int ret = -1; + socket_private_t *priv = NULL; + int event = -1; + char notify_xlator = 0; + + priv = this->private; + + pthread_mutex_lock (&priv->lock); + { + if (priv->connected) + goto unlock; + + ret = __socket_connect_finish (priv->sock); + + if (ret == -1 && errno == EINPROGRESS) + ret = 1; + + if (ret == -1 && errno != EINPROGRESS) { + if (!priv->connect_finish_log) { + gf_log (this->xl->name, GF_LOG_ERROR, + "connection to %s failed (%s)", + this->peerinfo.identifier, + strerror (errno)); + priv->connect_finish_log = 1; + } + __socket_disconnect (this); + notify_xlator = 1; + event = GF_EVENT_POLLERR; + goto unlock; + } + + if (ret == 0) { + notify_xlator = 1; + + this->myinfo.sockaddr_len = + sizeof (this->myinfo.sockaddr); + + ret = getsockname (priv->sock, + SA (&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "getsockname on (%d) failed (%s)", + priv->sock, strerror (errno)); + __socket_disconnect (this); + event = GF_EVENT_POLLERR; + goto unlock; + } + + priv->connected = 1; + priv->connect_finish_log = 0; + event = GF_EVENT_CHILD_UP; + get_transport_identifiers (this); + } + } +unlock: + pthread_mutex_unlock (&priv->lock); + + if (notify_xlator) + xlator_notify (this->xl, event, this); + + return 0; +} + + +int +socket_event_handler (int fd, int idx, void *data, + int poll_in, int poll_out, int poll_err) +{ + transport_t *this = NULL; + socket_private_t *priv = NULL; + int ret = 0; + + this = data; + priv = this->private; + + pthread_mutex_lock (&priv->lock); + { + priv->idx = idx; + } + pthread_mutex_unlock (&priv->lock); + + if (!priv->connected) { + ret = socket_connect_finish (this); + } + + if (!ret && poll_out) { + ret = socket_event_poll_out (this); + } + + if (!ret && poll_in) { + ret = socket_event_poll_in (this); + } + + if (ret < 0 || poll_err) { + socket_event_poll_err (this); + transport_unref (this); + } + + return 0; +} + + +int +socket_server_event_handler (int fd, int idx, void *data, + int poll_in, int poll_out, int poll_err) +{ + transport_t *this = NULL; + socket_private_t *priv = NULL; + int ret = 0; + int new_sock = -1; + transport_t *new_trans = NULL; + struct sockaddr_storage new_sockaddr = {0, }; + socklen_t addrlen = sizeof (new_sockaddr); + socket_private_t *new_priv = NULL; + glusterfs_ctx_t *ctx = NULL; + + this = data; + priv = this->private; + ctx = this->xl->ctx; + + pthread_mutex_lock (&priv->lock); + { + priv->idx = idx; + + if (poll_in) { + new_sock = accept (priv->sock, SA (&new_sockaddr), + &addrlen); + + if (new_sock == -1) + goto unlock; + + if (!priv->bio) { + ret = __socket_nonblock (new_sock); + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "NBIO on %d failed (%s)", + new_sock, strerror (errno)); + close (new_sock); + goto unlock; + } + } + + if (priv->nodelay) { + ret = __socket_nodelay (new_sock); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "setsockopt() failed for " + "NODELAY (%s)", + strerror (errno)); + } + } + + if (priv->keepalive) { + ret = __socket_keepalive (new_sock, + priv->keepaliveintvl); + if (ret == -1) + gf_log (this->xl->name, GF_LOG_ERROR, + "Failed to set keep-alive: %s", + strerror (errno)); + } + + new_trans = GF_CALLOC (1, sizeof (*new_trans), + gf_common_mt_transport_t); + new_trans->xl = this->xl; + new_trans->fini = this->fini; + + memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr, + addrlen); + new_trans->peerinfo.sockaddr_len = addrlen; + + new_trans->myinfo.sockaddr_len = + sizeof (new_trans->myinfo.sockaddr); + + ret = getsockname (new_sock, + SA (&new_trans->myinfo.sockaddr), + &new_trans->myinfo.sockaddr_len); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "getsockname on %d failed (%s)", + new_sock, strerror (errno)); + close (new_sock); + goto unlock; + } + + get_transport_identifiers (new_trans); + socket_init (new_trans); + new_trans->ops = this->ops; + new_trans->init = this->init; + new_trans->fini = this->fini; + + new_priv = new_trans->private; + + pthread_mutex_lock (&new_priv->lock); + { + new_priv->sock = new_sock; + new_priv->connected = 1; + + transport_ref (new_trans); + new_priv->idx = + event_register (ctx->event_pool, + new_sock, + socket_event_handler, + new_trans, 1, 0); + + if (new_priv->idx == -1) + ret = -1; + } + pthread_mutex_unlock (&new_priv->lock); + } + } +unlock: + pthread_mutex_unlock (&priv->lock); + + return ret; +} + + +int +socket_disconnect (transport_t *this) +{ + socket_private_t *priv = NULL; + int ret = -1; + + priv = this->private; + + pthread_mutex_lock (&priv->lock); + { + ret = __socket_disconnect (this); + } + pthread_mutex_unlock (&priv->lock); + + return ret; +} + + +int +socket_connect (transport_t *this) +{ + int ret = -1; + int sock = -1; + socket_private_t *priv = NULL; + struct sockaddr_storage sockaddr = {0, }; + socklen_t sockaddr_len = 0; + glusterfs_ctx_t *ctx = NULL; + sa_family_t sa_family = {0, }; + + priv = this->private; + ctx = this->xl->ctx; + + if (!priv) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "connect() called on uninitialized transport"); + goto err; + } + + pthread_mutex_lock (&priv->lock); + { + sock = priv->sock; + } + pthread_mutex_unlock (&priv->lock); + + if (sock != -1) { + gf_log (this->xl->name, GF_LOG_TRACE, + "connect () called on transport already connected"); + ret = 0; + goto err; + } + + ret = socket_client_get_remote_sockaddr (this, SA (&sockaddr), + &sockaddr_len, &sa_family); + if (ret == -1) { + /* logged inside client_get_remote_sockaddr */ + goto err; + } + + pthread_mutex_lock (&priv->lock); + { + if (priv->sock != -1) { + gf_log (this->xl->name, GF_LOG_TRACE, + "connect() -- already connected"); + goto unlock; + } + + memcpy (&this->peerinfo.sockaddr, &sockaddr, sockaddr_len); + this->peerinfo.sockaddr_len = sockaddr_len; + + priv->sock = socket (sa_family, SOCK_STREAM, 0); + if (priv->sock == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "socket creation failed (%s)", + strerror (errno)); + goto unlock; + } + + /* Cant help if setting socket options fails. We can continue + * working nonetheless. + */ + if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF, + &priv->windowsize, + sizeof (priv->windowsize)) < 0) { + gf_log (this->xl->name, GF_LOG_ERROR, + "setting receive window size failed: %d: %d: " + "%s", priv->sock, priv->windowsize, + strerror (errno)); + } + + if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF, + &priv->windowsize, + sizeof (priv->windowsize)) < 0) { + gf_log (this->xl->name, GF_LOG_ERROR, + "setting send window size failed: %d: %d: " + "%s", priv->sock, priv->windowsize, + strerror (errno)); + } + + + if (priv->nodelay && priv->lowlat) { + ret = __socket_nodelay (priv->sock); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "setsockopt() failed for NODELAY (%s)", + strerror (errno)); + } + } + + if (!priv->bio) { + ret = __socket_nonblock (priv->sock); + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "NBIO on %d failed (%s)", + priv->sock, strerror (errno)); + close (priv->sock); + priv->sock = -1; + goto unlock; + } + } + + if (priv->keepalive) { + ret = __socket_keepalive (priv->sock, + priv->keepaliveintvl); + if (ret == -1) + gf_log (this->xl->name, GF_LOG_ERROR, + "Failed to set keep-alive: %s", + strerror (errno)); + } + + SA (&this->myinfo.sockaddr)->sa_family = + SA (&this->peerinfo.sockaddr)->sa_family; + + ret = client_bind (this, SA (&this->myinfo.sockaddr), + &this->myinfo.sockaddr_len, priv->sock); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_WARNING, + "client bind failed: %s", strerror (errno)); + close (priv->sock); + priv->sock = -1; + goto unlock; + } + + ret = connect (priv->sock, SA (&this->peerinfo.sockaddr), + this->peerinfo.sockaddr_len); + + if (ret == -1 && errno != EINPROGRESS) { + gf_log (this->xl->name, GF_LOG_ERROR, + "connection attempt failed (%s)", + strerror (errno)); + close (priv->sock); + priv->sock = -1; + goto unlock; + } + + priv->connected = 0; + + transport_ref (this); + + priv->idx = event_register (ctx->event_pool, priv->sock, + socket_event_handler, this, 1, 1); + if (priv->idx == -1) + ret = -1; + } +unlock: + pthread_mutex_unlock (&priv->lock); + +err: + return ret; +} + + +int +socket_listen (transport_t *this) +{ + socket_private_t * priv = NULL; + int ret = -1; + int sock = -1; + struct sockaddr_storage sockaddr; + socklen_t sockaddr_len; + peer_info_t *myinfo = NULL; + glusterfs_ctx_t *ctx = NULL; + sa_family_t sa_family = {0, }; + + priv = this->private; + myinfo = &this->myinfo; + ctx = this->xl->ctx; + + pthread_mutex_lock (&priv->lock); + { + sock = priv->sock; + } + pthread_mutex_unlock (&priv->lock); + + if (sock != -1) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "alreading listening"); + return ret; + } + + ret = socket_server_get_local_sockaddr (this, SA (&sockaddr), + &sockaddr_len, &sa_family); + if (ret == -1) { + return ret; + } + + pthread_mutex_lock (&priv->lock); + { + if (priv->sock != -1) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "already listening"); + goto unlock; + } + + memcpy (&myinfo->sockaddr, &sockaddr, sockaddr_len); + myinfo->sockaddr_len = sockaddr_len; + + priv->sock = socket (sa_family, SOCK_STREAM, 0); + + if (priv->sock == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "socket creation failed (%s)", + strerror (errno)); + goto unlock; + } + + /* Cant help if setting socket options fails. We can continue + * working nonetheless. + */ + if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF, + &priv->windowsize, + sizeof (priv->windowsize)) < 0) { + gf_log (this->xl->name, GF_LOG_ERROR, + "setting receive window size failed: %d: %d: " + "%s", priv->sock, priv->windowsize, + strerror (errno)); + } + + if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF, + &priv->windowsize, + sizeof (priv->windowsize)) < 0) { + gf_log (this->xl->name, GF_LOG_ERROR, + "setting send window size failed: %d: %d: " + "%s", priv->sock, priv->windowsize, + strerror (errno)); + } + + if (priv->nodelay) { + ret = __socket_nodelay (priv->sock); + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "setsockopt() failed for NODELAY (%s)", + strerror (errno)); + } + } + + if (!priv->bio) { + ret = __socket_nonblock (priv->sock); + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "NBIO on %d failed (%s)", + priv->sock, strerror (errno)); + close (priv->sock); + priv->sock = -1; + goto unlock; + } + } + + ret = __socket_server_bind (this); + + if (ret == -1) { + /* logged inside __socket_server_bind() */ + close (priv->sock); + priv->sock = -1; + goto unlock; + } + + ret = listen (priv->sock, 10); + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "could not set socket %d to listen mode (%s)", + priv->sock, strerror (errno)); + close (priv->sock); + priv->sock = -1; + goto unlock; + } + + transport_ref (this); + + priv->idx = event_register (ctx->event_pool, priv->sock, + socket_server_event_handler, + this, 1, 0); + + if (priv->idx == -1) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "could not register socket %d with events", + priv->sock); + ret = -1; + close (priv->sock); + priv->sock = -1; + goto unlock; + } + } +unlock: + pthread_mutex_unlock (&priv->lock); + + return ret; +} + + +int +socket_receive (transport_t *this, char **hdr_p, size_t *hdrlen_p, + struct iobuf **iobuf_p) +{ + socket_private_t *priv = NULL; + int ret = -1; + + priv = this->private; + + pthread_mutex_lock (&priv->lock); + { + if (priv->connected != 1) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "socket not connected to receive"); + goto unlock; + } + + if (!hdr_p || !hdrlen_p || !iobuf_p) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "bad parameters %p %p %p", + hdr_p, hdrlen_p, iobuf_p); + goto unlock; + } + + if (priv->incoming.state == SOCKET_PROTO_STATE_COMPLETE) { + *hdr_p = priv->incoming.hdr_p; + *hdrlen_p = priv->incoming.hdrlen; + *iobuf_p = priv->incoming.iobuf; + + memset (&priv->incoming, 0, sizeof (priv->incoming)); + priv->incoming.state = SOCKET_PROTO_STATE_NADA; + + ret = 0; + } + } +unlock: + pthread_mutex_unlock (&priv->lock); + + return ret; +} + + +/* TODO: implement per transfer limit */ +int +socket_submit (transport_t *this, char *buf, int len, + struct iovec *vector, int count, + struct iobref *iobref) +{ + socket_private_t *priv = NULL; + int ret = -1; + char need_poll_out = 0; + char need_append = 1; + struct ioq *entry = NULL; + glusterfs_ctx_t *ctx = NULL; + + priv = this->private; + ctx = this->xl->ctx; + + pthread_mutex_lock (&priv->lock); + { + if (priv->connected != 1) { + if (!priv->submit_log && !priv->connect_finish_log) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "not connected (priv->connected = %d)", + priv->connected); + priv->submit_log = 1; + } + goto unlock; + } + + priv->submit_log = 0; + entry = __socket_ioq_new (this, buf, len, vector, count, iobref); + if (!entry) + goto unlock; + + if (list_empty (&priv->ioq)) { + ret = __socket_ioq_churn_entry (this, entry); + + if (ret == 0) + need_append = 0; + + if (ret > 0) + need_poll_out = 1; + } + + if (need_append) { + list_add_tail (&entry->list, &priv->ioq); + ret = 0; + } + + if (need_poll_out) { + /* first entry to wait. continue writing on POLLOUT */ + priv->idx = event_select_on (ctx->event_pool, + priv->sock, + priv->idx, -1, 1); + } + } +unlock: + pthread_mutex_unlock (&priv->lock); + + return ret; +} + + +struct transport_ops tops = { + .listen = socket_listen, + .connect = socket_connect, + .disconnect = socket_disconnect, + .submit = socket_submit, + .receive = socket_receive +}; + + +int +socket_init (transport_t *this) +{ + socket_private_t *priv = NULL; + gf_boolean_t tmp_bool = 0; + uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE; + char *optstr = NULL; + uint32_t keepalive = 0; + + if (this->private) { + gf_log (this->xl->name, GF_LOG_DEBUG, + "double init attempted"); + return -1; + } + + priv = GF_CALLOC (1, sizeof (*priv), + gf_common_mt_socket_private_t); + if (!priv) { + gf_log (this->xl->name, GF_LOG_ERROR, + "calloc (1, %"GF_PRI_SIZET") returned NULL", + sizeof (*priv)); + return -1; + } + + pthread_mutex_init (&priv->lock, NULL); + + priv->sock = -1; + priv->idx = -1; + priv->connected = -1; + + INIT_LIST_HEAD (&priv->ioq); + + if (dict_get (this->xl->options, "non-blocking-io")) { + optstr = data_to_str (dict_get (this->xl->options, + "non-blocking-io")); + + if (gf_string2boolean (optstr, &tmp_bool) == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean options," + " not taking any action"); + tmp_bool = 1; + } + priv->bio = 0; + if (!tmp_bool) { + priv->bio = 1; + gf_log (this->xl->name, GF_LOG_WARNING, + "disabling non-blocking IO"); + } + } + + optstr = NULL; + + // By default, we enable NODELAY + priv->nodelay = 1; + if (dict_get (this->xl->options, "transport.socket.nodelay")) { + optstr = data_to_str (dict_get (this->xl->options, + "transport.socket.nodelay")); + + if (gf_string2boolean (optstr, &tmp_bool) == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "'transport.socket.nodelay' takes only " + "boolean options, not taking any action"); + tmp_bool = 1; + } + if (!tmp_bool) { + priv->nodelay = 0; + gf_log (this->xl->name, GF_LOG_DEBUG, + "disabling nodelay"); + } + } + + + optstr = NULL; + if (dict_get_str (this->xl->options, "transport.window-size", + &optstr) == 0) { + if (gf_string2bytesize (optstr, &windowsize) != 0) { + gf_log (this->xl->name, GF_LOG_ERROR, + "invalid number format: %s", optstr); + return -1; + } + } + + optstr = NULL; + + if (dict_get_str (this->xl->options, "transport.socket.lowlat", + &optstr) == 0) { + priv->lowlat = 1; + } + + /* Enable Keep-alive by default. */ + priv->keepalive = 1; + priv->keepaliveintvl = GF_USE_DEFAULT_KEEPALIVE; + if (dict_get_str (this->xl->options, "transport.socket.keepalive", + &optstr) == 0) { + if (gf_string2boolean (optstr, &tmp_bool) == -1) { + gf_log (this->xl->name, GF_LOG_ERROR, + "'transport.socket.keepalive' takes only " + "boolean options, not taking any action"); + tmp_bool = 1; + } + + if (!tmp_bool) + priv->keepalive = 0; + + } + + if (dict_get_uint32 (this->xl->options, + "transport.socket.keepalive-interval", + &keepalive) == 0) { + priv->keepaliveintvl = keepalive; + } + + priv->windowsize = (int)windowsize; + this->private = priv; + + return 0; +} + + +void +fini (transport_t *this) +{ + socket_private_t *priv = this->private; + + gf_log (this->xl->name, GF_LOG_TRACE, + "transport %p destroyed", this); + + pthread_mutex_destroy (&priv->lock); + GF_FREE (priv); +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_common_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + +int32_t +init (transport_t *this) +{ + int ret = -1; + + ret = socket_init (this); + + if (ret == -1) { + gf_log (this->xl->name, GF_LOG_DEBUG, "socket_init() failed"); + } + + return ret; +} + +struct volume_options options[] = { + { .key = {"remote-port", + "transport.remote-port", + "transport.socket.remote-port"}, + .type = GF_OPTION_TYPE_INT + }, + { .key = {"transport.socket.listen-port", "listen-port"}, + .type = GF_OPTION_TYPE_INT + }, + { .key = {"transport.socket.bind-address", "bind-address" }, + .type = GF_OPTION_TYPE_INTERNET_ADDRESS + }, + { .key = {"transport.socket.connect-path", "connect-path"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"transport.socket.bind-path", "bind-path"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = {"transport.socket.listen-path", "listen-path"}, + .type = GF_OPTION_TYPE_ANY + }, + { .key = { "transport.address-family", + "address-family" }, + .value = {"inet", "inet6", "inet/inet6", "inet6/inet", + "unix", "inet-sdp" }, + .type = GF_OPTION_TYPE_STR + }, + + { .key = {"non-blocking-io"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"transport.window-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = GF_MIN_SOCKET_WINDOW_SIZE, + .max = GF_MAX_SOCKET_WINDOW_SIZE, + }, + { .key = {"transport.socket.nodelay"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"transport.socket.lowlat"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"transport.socket.keepalive"}, + .type = GF_OPTION_TYPE_BOOL + }, + { .key = {"transport.socket.keepalive-interval"}, + .type = GF_OPTION_TYPE_INT + }, + { .key = {NULL} } +}; + diff --git a/xlators/protocol/legacy/transport/socket/src/socket.h b/xlators/protocol/legacy/transport/socket/src/socket.h new file mode 100644 index 00000000000..e02801a5b1c --- /dev/null +++ b/xlators/protocol/legacy/transport/socket/src/socket.h @@ -0,0 +1,129 @@ +/* + Copyright (c) 2006-2009 Gluster, Inc. <http://www.gluster.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _SOCKET_H +#define _SOCKET_H + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "event.h" +#include "transport.h" +#include "logging.h" +#include "dict.h" +#include "mem-pool.h" +#include "socket-mem-types.h" + +#ifndef MAX_IOVEC +#define MAX_IOVEC 16 +#endif /* MAX_IOVEC */ + +#define GF_DEFAULT_SOCKET_LISTEN_PORT 6996 + +/* This is the size set through setsockopt for + * both the TCP receive window size and the + * send buffer size. + * Till the time iobuf size becomes configurable, this size is set to include + * two iobufs + the GlusterFS protocol headers. + * Linux allows us to over-ride the max values for the system. + * Should we over-ride them? Because if we set a value larger than the default + * setsockopt will fail. Having larger values might be beneficial for + * IB links. + */ +#define GF_DEFAULT_SOCKET_WINDOW_SIZE (512 * GF_UNIT_KB) +#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB) +#define GF_MIN_SOCKET_WINDOW_SIZE (128 * GF_UNIT_KB) + +#define GF_USE_DEFAULT_KEEPALIVE (-1) + +typedef enum { + SOCKET_PROTO_STATE_NADA = 0, + SOCKET_PROTO_STATE_HEADER_COMING, + SOCKET_PROTO_STATE_HEADER_CAME, + SOCKET_PROTO_STATE_DATA_COMING, + SOCKET_PROTO_STATE_DATA_CAME, + SOCKET_PROTO_STATE_COMPLETE, +} socket_proto_state_t; + +struct socket_header { + char colonO[3]; + uint32_t size1; + uint32_t size2; + char version; +} __attribute__((packed)); + + +struct ioq { + union { + struct list_head list; + struct { + struct ioq *next; + struct ioq *prev; + }; + }; + struct socket_header header; + struct iovec vector[MAX_IOVEC]; + int count; + struct iovec *pending_vector; + int pending_count; + char *buf; + struct iobref *iobref; +}; + + +typedef struct { + int32_t sock; + int32_t idx; + unsigned char connected; // -1 = not connected. 0 = in progress. 1 = connected + char bio; + char connect_finish_log; + char submit_log; + union { + struct list_head ioq; + struct { + struct ioq *ioq_next; + struct ioq *ioq_prev; + }; + }; + struct { + int state; + struct socket_header header; + char *hdr_p; + size_t hdrlen; + struct iobuf *iobuf; + char *buf_p; + size_t buflen; + struct iovec vector[2]; + int count; + struct iovec *pending_vector; + int pending_count; + } incoming; + pthread_mutex_t lock; + int windowsize; + char lowlat; + char nodelay; + int keepalive; + int keepaliveintvl; +} socket_private_t; + + +#endif |