summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/posix/src')
-rw-r--r--xlators/storage/posix/src/Makefile.am17
-rw-r--r--xlators/storage/posix/src/posix-aio.c569
-rw-r--r--xlators/storage/posix/src/posix-aio.h39
-rw-r--r--xlators/storage/posix/src/posix-handle.c744
-rw-r--r--xlators/storage/posix/src/posix-handle.h143
-rw-r--r--xlators/storage/posix/src/posix-helpers.c1391
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h21
-rw-r--r--xlators/storage/posix/src/posix.c4349
-rw-r--r--xlators/storage/posix/src/posix.h129
9 files changed, 5476 insertions, 1926 deletions
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am
index b8d1668ae..88efcc784 100644
--- a/xlators/storage/posix/src/Makefile.am
+++ b/xlators/storage/posix/src/Makefile.am
@@ -2,17 +2,18 @@
xlator_LTLIBRARIES = posix.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage
-posix_la_LDFLAGS = -module -avoidversion
+posix_la_LDFLAGS = -module -avoid-version
-posix_la_SOURCES = posix.c
-posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c
+posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO)
-noinst_HEADERS = posix.h posix-mem-types.h
+noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h
-AM_CFLAGS = -fPIC -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
- -D$(GF_HOST_OS) -Wall -I$(top_srcdir)/libglusterfs/src -shared \
- -nostartfiles -I$(top_srcdir)/contrib/md5 -I$(top_srcdir)/rpc/xdr/src \
- -I$(top_srcdir)/rpc/rpc-lib/src $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c
new file mode 100644
index 000000000..c3bbddd67
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.c
@@ -0,0 +1,569 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+#include "posix.h"
+#include <sys/uio.h>
+
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+
+
+void
+__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size)
+{
+ int odirect = 0;
+ int flags = 0;
+ int ret = 0;
+
+ odirect = pfd->odirect;
+
+ if ((fd->flags|opflags) & O_DIRECT) {
+ /* if instructed, use O_DIRECT always */
+ odirect = 1;
+ } else {
+ /* else use O_DIRECT when feasible */
+ if ((offset|size) & 0xfff)
+ odirect = 0;
+ else
+ odirect = 1;
+ }
+
+ if (!odirect && pfd->odirect) {
+ flags = fcntl (pfd->fd, F_GETFL);
+ ret = fcntl (pfd->fd, F_SETFL, (flags & (~O_DIRECT)));
+ pfd->odirect = 0;
+ }
+
+ if (odirect && !pfd->odirect) {
+ flags = fcntl (pfd->fd, F_GETFL);
+ ret = fcntl (pfd->fd, F_SETFL, (flags | O_DIRECT));
+ pfd->odirect = 1;
+ }
+
+ if (ret) {
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "fcntl() failed (%s). fd=%d flags=%d pfd->odirect=%d",
+ strerror (errno), pfd->fd, flags, pfd->odirect);
+ }
+}
+
+
+struct posix_aio_cb {
+ struct iocb iocb;
+ call_frame_t *frame;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ struct iatt prebuf;
+ int fd;
+ int op;
+ off_t offset;
+};
+
+
+int
+posix_aio_readv_complete (struct posix_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iatt postbuf = {0,};
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ struct iovec iov;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ off_t offset = 0;
+ struct posix_private * priv = NULL;
+
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ iobuf = paiocb->iobuf;
+ _fd = paiocb->fd;
+ offset = paiocb->offset;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_log (this->name, GF_LOG_ERROR,
+ "readv(async) failed fd=%d,size=%lu,offset=%llu (%d/%s)",
+ _fd, paiocb->iocb.u.c.nbytes,
+ (unsigned long long) paiocb->offset,
+ res, strerror (op_errno));
+ goto out;
+ }
+
+ ret = posix_fdstat (this, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%d: %s", _fd,
+ strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = res;
+ op_errno = 0;
+
+ iobref = iobref_new ();
+ if (!iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add (iobref, iobuf);
+
+ iov.iov_base = iobuf_ptr (iobuf);
+ iov.iov_len = op_ret;
+
+
+ /* Hack to notify higher layers of EOF. */
+ if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size)
+ op_errno = ENOENT;
+
+ LOCK (&priv->lock);
+ {
+ priv->read_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, &iov, 1,
+ &postbuf, iobref, NULL);
+ if (iobuf)
+ iobuf_unref (iobuf);
+ if (iobref)
+ iobref_unref (iobref);
+
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+
+int
+posix_aio_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct iobuf *iobuf = NULL;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ if (!size) {
+ op_errno = EINVAL;
+ gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
+ goto err;
+ }
+
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->iobuf = iobuf;
+ paiocb->offset = offset;
+ paiocb->fd = _fd;
+ paiocb->op = GF_FOP_READ;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PREAD;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.c.buf = iobuf_ptr (iobuf);
+ paiocb->iocb.u.c.nbytes = size;
+ paiocb->iocb.u.c.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ LOCK (&fd->lock);
+ {
+ __posix_fd_set_odirect (fd, pfd, flags, offset, size);
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_submit() returned %d", ret);
+ op_errno = -ret;
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
+ if (iobuf)
+ iobuf_unref (iobuf);
+
+ if (paiocb)
+ GF_FREE (paiocb);
+
+ return 0;
+}
+
+
+int
+posix_aio_writev_complete (struct posix_aio_cb *paiocb, int res, int res2)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ struct iatt prebuf = {0,};
+ struct iatt postbuf = {0,};
+ int _fd = -1;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ struct posix_private * priv = NULL;
+
+
+ frame = paiocb->frame;
+ this = frame->this;
+ priv = this->private;
+ prebuf = paiocb->prebuf;
+ _fd = paiocb->fd;
+
+ if (res < 0) {
+ op_ret = -1;
+ op_errno = -res;
+ gf_log (this->name, GF_LOG_ERROR,
+ "writev(async) failed fd=%d,offset=%llu (%d/%s)",
+ _fd, (unsigned long long) paiocb->offset, res,
+ strerror (op_errno));
+
+ goto out;
+ }
+
+ ret = posix_fdstat (this, _fd, &postbuf);
+ if (ret != 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%d: %s", _fd,
+ strerror (op_errno));
+ goto out;
+ }
+
+
+ op_ret = res;
+ op_errno = 0;
+
+ LOCK (&priv->lock);
+ {
+ priv->write_value += op_ret;
+ }
+ UNLOCK (&priv->lock);
+
+out:
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &prebuf, &postbuf,
+ NULL);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+
+int
+posix_aio_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *iov, int count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int _fd = -1;
+ struct posix_fd * pfd = NULL;
+ int ret = -1;
+ struct posix_aio_cb *paiocb = NULL;
+ struct posix_private *priv = NULL;
+ struct iocb *iocb = NULL;
+
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
+ goto err;
+ }
+ _fd = pfd->fd;
+
+ paiocb = GF_CALLOC (1, sizeof (*paiocb), gf_posix_mt_paiocb);
+ if (!paiocb) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ paiocb->frame = frame;
+ paiocb->offset = offset;
+ paiocb->fd = _fd;
+ paiocb->op = GF_FOP_WRITE;
+
+ paiocb->iocb.data = paiocb;
+ paiocb->iocb.aio_fildes = _fd;
+ paiocb->iobref = iobref_ref (iobref);
+ paiocb->iocb.aio_lio_opcode = IO_CMD_PWRITEV;
+ paiocb->iocb.aio_reqprio = 0;
+ paiocb->iocb.u.v.vec = iov;
+ paiocb->iocb.u.v.nr = count;
+ paiocb->iocb.u.v.offset = offset;
+
+ iocb = &paiocb->iocb;
+
+ ret = posix_fdstat (this, _fd, &paiocb->prebuf);
+ if (ret != 0) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fstat failed on fd=%p: %s", fd,
+ strerror (op_errno));
+ goto err;
+ }
+
+
+ LOCK (&fd->lock);
+ {
+ __posix_fd_set_odirect (fd, pfd, flags, offset,
+ iov_length (iov, count));
+
+ ret = io_submit (priv->ctxp, 1, &iocb);
+ }
+ UNLOCK (&fd->lock);
+
+ if (ret != 1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_submit() returned %d", ret);
+ op_errno = -ret;
+ goto err;
+ }
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, 0, 0, 0);
+
+ if (paiocb) {
+ if (paiocb->iobref)
+ iobref_unref (paiocb->iobref);
+ GF_FREE (paiocb);
+ }
+
+ return 0;
+}
+
+
+void *
+posix_aio_thread (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ int ret = 0;
+ int i = 0;
+ struct io_event events[POSIX_AIO_MAX_NR_GETEVENTS];
+ struct io_event *event = NULL;
+ struct posix_aio_cb *paiocb = NULL;
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ for (;;) {
+ memset (&events[0], 0, sizeof (events));
+ ret = io_getevents (priv->ctxp, 1, POSIX_AIO_MAX_NR_GETEVENTS,
+ &events[0], NULL);
+ if (ret <= 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "io_getevents() returned %d", ret);
+ if (ret == -EINTR)
+ continue;
+ break;
+ }
+
+ for (i = 0; i < ret; i++) {
+ event = &events[i];
+
+ paiocb = event->data;
+
+ switch (paiocb->op) {
+ case GF_FOP_READ:
+ posix_aio_readv_complete (paiocb, event->res,
+ event->res2);
+ break;
+ case GF_FOP_WRITE:
+ posix_aio_writev_complete (paiocb, event->res,
+ event->res2);
+ break;
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "unknown op %d found in piocb",
+ paiocb->op);
+ break;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+int
+posix_aio_init (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ ret = io_setup (POSIX_AIO_MAX_NR_EVENTS, &priv->ctxp);
+ if ((ret == -1 && errno == ENOSYS) || ret == -ENOSYS) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Linux AIO not available at run-time."
+ " Continuing with synchronous IO");
+ ret = 0;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "io_setup() failed. ret=%d, errno=%d",
+ ret, errno);
+ goto out;
+ }
+
+ ret = gf_thread_create (&priv->aiothread, NULL,
+ posix_aio_thread, this);
+ if (ret != 0) {
+ io_destroy (priv->ctxp);
+ goto out;
+ }
+
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+out:
+ return ret;
+}
+
+
+int
+posix_aio_on (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ if (!priv->aio_init_done) {
+ ret = posix_aio_init (this);
+ if (ret == 0)
+ priv->aio_capable = _gf_true;
+ else
+ priv->aio_capable = _gf_false;
+ priv->aio_init_done = _gf_true;
+ }
+
+ if (priv->aio_capable) {
+ this->fops->readv = posix_aio_readv;
+ this->fops->writev = posix_aio_writev;
+ }
+
+ return ret;
+}
+
+int
+posix_aio_off (xlator_t *this)
+{
+ this->fops->readv = posix_readv;
+ this->fops->writev = posix_writev;
+
+ return 0;
+}
+
+
+#else
+
+
+int
+posix_aio_on (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+int
+posix_aio_off (xlator_t *this)
+{
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return 0;
+}
+
+void
+__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size)
+{
+ xlator_t *this = THIS;
+ gf_log (this->name, GF_LOG_INFO,
+ "Linux AIO not available at build-time."
+ " Continuing with synchronous IO");
+ return;
+}
+#endif
diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h
new file mode 100644
index 000000000..5bde71601
--- /dev/null
+++ b/xlators/storage/posix/src/posix-aio.h
@@ -0,0 +1,39 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _POSIX_AIO_H
+#define _POSIX_AIO_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "glusterfs.h"
+
+// Maximum number of concurrently submitted IO events. The heaviest load
+// GlusterFS has been able to handle had 60-80 concurrent calls
+#define POSIX_AIO_MAX_NR_EVENTS 256
+
+// Maximum number of completed IO operations to reap per getevents syscall
+#define POSIX_AIO_MAX_NR_GETEVENTS 16
+
+
+int posix_aio_on (xlator_t *this);
+int posix_aio_off (xlator_t *this);
+
+int posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+#endif /* !_POSIX_AIO_H */
diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c
new file mode 100644
index 000000000..219a582c9
--- /dev/null
+++ b/xlators/storage/posix/src/posix-handle.c
@@ -0,0 +1,744 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <libgen.h>
+#ifdef GF_LINUX_HOST_OS
+#include <alloca.h>
+#endif
+
+#include "posix-handle.h"
+#include "posix.h"
+#include "xlator.h"
+#include "syscall.h"
+
+
+#define HANDLE_PFX ".glusterfs"
+#define TRASH_DIR "landfill"
+
+#define UUID0_STR "00000000-0000-0000-0000-000000000000"
+#define SLEN(str) (sizeof(str) - 1)
+
+
+int
+posix_handle_relpath (xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t buflen)
+{
+ char *uuid_str = NULL;
+ int len = 0;
+
+ len = SLEN("../")
+ + SLEN("../")
+ + SLEN("00/")
+ + SLEN("00/")
+ + SLEN(UUID0_STR)
+ + 1 /* '\0' */
+ ;
+
+ if (basename) {
+ len += (strlen (basename) + 1);
+ }
+
+ if (buflen < len || !buf)
+ return len;
+
+ uuid_str = uuid_utoa (gfid);
+
+ if (basename) {
+ len = snprintf (buf, buflen, "../../%02x/%02x/%s/%s",
+ gfid[0], gfid[1], uuid_str, basename);
+ } else {
+ len = snprintf (buf, buflen, "../../%02x/%02x/%s",
+ gfid[0], gfid[1], uuid_str);
+ }
+
+ return len;
+}
+
+
+/*
+ TODO: explain how this pump fixes ELOOP
+*/
+int
+posix_handle_pump (xlator_t *this, char *buf, int len, int maxlen,
+ char *base_str, int base_len, int pfx_len)
+{
+ char linkname[512] = {0,}; /* "../../<gfid>/<NAME_MAX>" */
+ int ret = 0;
+ int blen = 0;
+ int link_len = 0;
+
+ /* is a directory's symlink-handle */
+ ret = readlink (base_str, linkname, 512);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "internal readlink failed on %s (%s)",
+ base_str, strerror (errno));
+ goto err;
+ }
+
+ if (ret < 512)
+ linkname[ret] = 0;
+
+ link_len = ret;
+
+ if ((ret == 8) && memcmp (linkname, "../../..", 8) == 0) {
+ if (strcmp (base_str, buf) == 0) {
+ strcpy (buf + pfx_len, "..");
+ }
+ goto out;
+ }
+
+ if (ret < 50 || ret >= 512) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "malformed internal link %s for %s",
+ linkname, base_str);
+ goto err;
+ }
+
+ if (memcmp (linkname, "../../", 6) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "malformed internal link %s for %s",
+ linkname, base_str);
+ goto err;
+ }
+
+ if ((linkname[2] != '/') ||
+ (linkname[5] != '/') ||
+ (linkname[8] != '/') ||
+ (linkname[11] != '/') ||
+ (linkname[48] != '/')) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "malformed internal link %s for %s",
+ linkname, base_str);
+ goto err;
+ }
+
+ if ((linkname[20] != '-') ||
+ (linkname[25] != '-') ||
+ (linkname[30] != '-') ||
+ (linkname[35] != '-')) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "malformed internal link %s for %s",
+ linkname, base_str);
+ goto err;
+ }
+
+ blen = link_len - 48;
+ memmove (buf + base_len + blen, buf + base_len,
+ (strlen (buf) - base_len) + 1);
+
+ strncpy (base_str + pfx_len, linkname + 6, 42);
+
+ if (len + blen < maxlen)
+ strncpy (buf + pfx_len, linkname + 6, link_len - 6);
+out:
+ return len + blen;
+err:
+ return -1;
+}
+
+
+/*
+ posix_handle_path differs from posix_handle_gfid_path in the way that the
+ path filled in @buf by posix_handle_path will return type IA_IFDIR when
+ an lstat() is performed on it, whereas posix_handle_gfid_path returns path
+ to the handle symlink (typically used for the purpose of unlinking it).
+
+ posix_handle_path also guarantees immunity to ELOOP on the path returned by it
+*/
+
+int
+posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename,
+ char *ubuf, size_t size)
+{
+ struct posix_private *priv = NULL;
+ char *uuid_str = NULL;
+ int len = 0;
+ int ret = -1;
+ struct stat stat;
+ char *base_str = NULL;
+ int base_len = 0;
+ int pfx_len;
+ int maxlen;
+ char *buf;
+
+ priv = this->private;
+
+ uuid_str = uuid_utoa (gfid);
+
+ if (ubuf) {
+ buf = ubuf;
+ maxlen = size;
+ } else {
+ maxlen = PATH_MAX;
+ buf = alloca (maxlen);
+ }
+
+ base_len = (priv->base_path_length + SLEN(HANDLE_PFX) + 45);
+ base_str = alloca (base_len + 1);
+ base_len = snprintf (base_str, base_len + 1, "%s/%s/%02x/%02x/%s",
+ priv->base_path, HANDLE_PFX, gfid[0], gfid[1],
+ uuid_str);
+
+ pfx_len = priv->base_path_length + 1 + SLEN(HANDLE_PFX) + 1;
+
+ if (basename) {
+ len = snprintf (buf, maxlen, "%s/%s", base_str, basename);
+ } else {
+ len = snprintf (buf, maxlen, "%s", base_str);
+ }
+
+ ret = lstat (base_str, &stat);
+
+ if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1))
+ goto out;
+
+ do {
+ errno = 0;
+ ret = posix_handle_pump (this, buf, len, maxlen,
+ base_str, base_len, pfx_len);
+ if (ret == -1)
+ break;
+
+ len = ret;
+
+ ret = lstat (buf, &stat);
+ } while ((ret == -1) && errno == ELOOP);
+
+out:
+ return len + 1;
+}
+
+
+int
+posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t buflen)
+{
+ struct posix_private *priv = NULL;
+ char *uuid_str = NULL;
+ int len = 0;
+
+ priv = this->private;
+
+ len = priv->base_path_length /* option directory "/export" */
+ + SLEN("/")
+ + SLEN(HANDLE_PFX)
+ + SLEN("/")
+ + SLEN("00/")
+ + SLEN("00/")
+ + SLEN(UUID0_STR)
+ + 1 /* '\0' */
+ ;
+
+ if (basename) {
+ len += (strlen (basename) + 1);
+ } else {
+ len += 256; /* worst-case for directory's symlink-handle expansion */
+ }
+
+ if ((buflen < len) || !buf)
+ return len;
+
+ uuid_str = uuid_utoa (gfid);
+
+ if (__is_root_gfid (gfid)) {
+ if (basename) {
+ len = snprintf (buf, buflen, "%s/%s", priv->base_path,
+ basename);
+ } else {
+ strncpy (buf, priv->base_path, buflen);
+ }
+ goto out;
+ }
+
+ if (basename) {
+ len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s/%s", priv->base_path,
+ HANDLE_PFX, gfid[0], gfid[1], uuid_str, basename);
+ } else {
+ len = snprintf (buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path,
+ HANDLE_PFX, gfid[0], gfid[1], uuid_str);
+ }
+out:
+ return len;
+}
+
+
+int
+posix_handle_init (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ char *handle_pfx = NULL;
+ int ret = 0;
+ int len = 0;
+ struct stat stbuf;
+ struct stat rootbuf;
+ struct stat exportbuf;
+ char *rootstr = NULL;
+ uuid_t gfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
+
+ priv = this->private;
+
+ ret = stat (priv->base_path, &exportbuf);
+ if (ret || !S_ISDIR (exportbuf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Not a directory: %s", priv->base_path);
+ return -1;
+ }
+
+ handle_pfx = alloca (priv->base_path_length + 1 + strlen (HANDLE_PFX)
+ + 1);
+
+ sprintf (handle_pfx, "%s/%s", priv->base_path, HANDLE_PFX);
+
+ ret = stat (handle_pfx, &stbuf);
+ switch (ret) {
+ case -1:
+ if (errno == ENOENT) {
+ ret = mkdir (handle_pfx, 0600);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Creating directory %s failed: %s",
+ handle_pfx, strerror (errno));
+ return -1;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Checking for %s failed: %s",
+ handle_pfx, strerror (errno));
+ return -1;
+ }
+ break;
+ case 0:
+ if (!S_ISDIR (stbuf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Not a directory: %s",
+ handle_pfx);
+ return -1;
+ }
+ break;
+ default:
+ break;
+ }
+
+ stat (handle_pfx, &priv->handledir);
+
+ len = posix_handle_path (this, gfid, NULL, NULL, 0);
+ rootstr = alloca (len);
+ posix_handle_path (this, gfid, NULL, rootstr, len);
+
+ ret = stat (rootstr, &rootbuf);
+ switch (ret) {
+ case -1:
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: %s", priv->base_path,
+ strerror (errno));
+ return -1;
+ }
+
+ ret = posix_handle_mkdir_hashes (this, rootstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mkdir %s failed (%s)",
+ rootstr, strerror (errno));
+ return -1;
+ }
+
+ ret = symlink ("../../..", rootstr);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "symlink %s creation failed (%s)",
+ rootstr, strerror (errno));
+ return -1;
+ }
+ break;
+ case 0:
+ if ((exportbuf.st_ino == rootbuf.st_ino) &&
+ (exportbuf.st_dev == rootbuf.st_dev))
+ return 0;
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "Different dirs %s (%lld/%lld) != %s (%lld/%lld)",
+ priv->base_path, (long long) exportbuf.st_ino,
+ (long long) exportbuf.st_dev, rootstr,
+ (long long) rootbuf.st_ino, (long long) rootbuf.st_dev);
+ return -1;
+
+ break;
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+posix_does_old_trash_exists (char *old_trash)
+{
+ uuid_t gfid = {0};
+ gf_boolean_t exists = _gf_false;
+ struct stat stbuf = {0};
+ int ret = 0;
+
+ ret = lstat (old_trash, &stbuf);
+ if ((ret == 0) && S_ISDIR (stbuf.st_mode)) {
+ ret = sys_lgetxattr (old_trash, "trusted.gfid", gfid, 16);
+ if ((ret < 0) && (errno == ENODATA))
+ exists = _gf_true;
+ }
+ return exists;
+}
+
+int
+posix_handle_new_trash_init (xlator_t *this, char *trash)
+{
+ int ret = 0;
+ struct stat stbuf = {0};
+
+ ret = lstat (trash, &stbuf);
+ switch (ret) {
+ case -1:
+ if (errno == ENOENT) {
+ ret = mkdir (trash, 0755);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Creating directory %s failed: %s",
+ trash, strerror (errno));
+ }
+ } else {
+ gf_log (this->name, GF_LOG_ERROR, "Checking for %s "
+ "failed: %s", trash, strerror (errno));
+ }
+ break;
+ case 0:
+ if (!S_ISDIR (stbuf.st_mode)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Not a directory: %s", trash);
+ ret = -1;
+ }
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int
+posix_mv_old_trash_into_new_trash (xlator_t *this, char *old, char *new)
+{
+ char dest_old[PATH_MAX] = {0};
+ int ret = 0;
+ uuid_t dest_name = {0};
+
+ if (!posix_does_old_trash_exists (old))
+ goto out;
+ uuid_generate (dest_name);
+ snprintf (dest_old, sizeof (dest_old), "%s/%s", new,
+ uuid_utoa (dest_name));
+ ret = rename (old, dest_old);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Not able to move "
+ "%s -> %s (%s)", old, dest_old, strerror (errno));
+ }
+out:
+ return ret;
+}
+
+int
+posix_handle_trash_init (xlator_t *this)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ char old_trash[PATH_MAX] = {0};
+
+ priv = this->private;
+
+ priv->trash_path = GF_CALLOC (1, priv->base_path_length + strlen ("/")
+ + strlen (HANDLE_PFX) + strlen ("/")
+ + strlen (TRASH_DIR) + 1,
+ gf_posix_mt_trash_path);
+
+ if (!priv->trash_path)
+ goto out;
+
+ strncpy (priv->trash_path, priv->base_path, priv->base_path_length);
+ strcat (priv->trash_path, "/" HANDLE_PFX "/" TRASH_DIR);
+ ret = posix_handle_new_trash_init (this, priv->trash_path);
+ if (ret)
+ goto out;
+ snprintf (old_trash, sizeof (old_trash), "%s/.landfill",
+ priv->base_path);
+ ret = posix_mv_old_trash_into_new_trash (this, old_trash,
+ priv->trash_path);
+out:
+ return ret;
+}
+
+int
+posix_handle_mkdir_hashes (xlator_t *this, const char *newpath)
+{
+ char *duppath = NULL;
+ char *parpath = NULL;
+ int ret = 0;
+
+ duppath = strdupa (newpath);
+ parpath = dirname (duppath);
+ parpath = dirname (duppath);
+
+ ret = mkdir (parpath, 0700);
+ if (ret == -1 && errno != EEXIST) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error mkdir hash-1 %s (%s)",
+ parpath, strerror (errno));
+ return -1;
+ }
+
+ strcpy (duppath, newpath);
+ parpath = dirname (duppath);
+
+ ret = mkdir (parpath, 0700);
+ if (ret == -1 && errno != EEXIST) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "error mkdir hash-2 %s (%s)",
+ parpath, strerror (errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+
+int
+posix_handle_hard (xlator_t *this, const char *oldpath, uuid_t gfid, struct stat *oldbuf)
+{
+ char *newpath = NULL;
+ struct stat newbuf;
+ int ret = -1;
+
+
+ MAKE_HANDLE_PATH (newpath, this, gfid, NULL);
+
+ ret = lstat (newpath, &newbuf);
+ if (ret == -1 && errno != ENOENT) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: %s", newpath, strerror (errno));
+ return -1;
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ ret = posix_handle_mkdir_hashes (this, newpath);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mkdir %s failed (%s)",
+ newpath, strerror (errno));
+ return -1;
+ }
+
+#ifdef HAVE_LINKAT
+ /*
+ * Use linkat if the target may be a symlink to a directory
+ * or without an existing target. See comment about linkat()
+ * usage in posix_link() in posix.c for details
+ */
+ ret = linkat (AT_FDCWD, oldpath, AT_FDCWD, newpath, 0);
+#else
+ ret = link (oldpath, newpath);
+#endif
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "link %s -> %s failed (%s)",
+ oldpath, newpath, strerror (errno));
+ return -1;
+ }
+
+ ret = lstat (newpath, &newbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat on %s failed (%s)",
+ newpath, strerror (errno));
+ return -1;
+ }
+ }
+
+ if (newbuf.st_ino != oldbuf->st_ino ||
+ newbuf.st_dev != oldbuf->st_dev) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mismatching ino/dev between file %s (%lld/%lld) "
+ "and handle %s (%lld/%lld)",
+ oldpath, (long long) oldbuf->st_ino, (long long) oldbuf->st_dev,
+ newpath, (long long) newbuf.st_ino, (long long) newbuf.st_dev);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+int
+posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc,
+ uuid_t gfid, struct stat *oldbuf)
+{
+ char *oldpath = NULL;
+ char *newpath = NULL;
+ struct stat newbuf;
+ int ret = -1;
+
+
+ MAKE_HANDLE_PATH (newpath, this, gfid, NULL);
+ MAKE_HANDLE_RELPATH (oldpath, this, loc->pargfid, loc->name);
+
+
+ ret = lstat (newpath, &newbuf);
+ if (ret == -1 && errno != ENOENT) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: %s", newpath, strerror (errno));
+ return -1;
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ ret = posix_handle_mkdir_hashes (this, newpath);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mkdir %s failed (%s)",
+ newpath, strerror (errno));
+ return -1;
+ }
+
+ ret = symlink (oldpath, newpath);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "symlink %s -> %s failed (%s)",
+ oldpath, newpath, strerror (errno));
+ return -1;
+ }
+
+ ret = lstat (newpath, &newbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stat on %s failed (%s)",
+ newpath, strerror (errno));
+ return -1;
+ }
+ }
+
+ ret = stat (real_path, &newbuf);
+ if (ret) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stat on %s failed (%s)", newpath, strerror (errno));
+ return -1;
+ }
+
+ if (!oldbuf)
+ return ret;
+
+ if (newbuf.st_ino != oldbuf->st_ino ||
+ newbuf.st_dev != oldbuf->st_dev) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mismatching ino/dev between file %s (%lld/%lld) "
+ "and handle %s (%lld/%lld)",
+ oldpath, (long long) oldbuf->st_ino, (long long) oldbuf->st_dev,
+ newpath, (long long) newbuf.st_ino, (long long) newbuf.st_dev);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+
+static int
+posix_handle_unset_gfid (xlator_t *this, uuid_t gfid)
+{
+ char *path = NULL;
+ int ret = 0;
+ struct stat stat;
+
+ MAKE_HANDLE_GFID_PATH (path, this, gfid, NULL);
+
+ ret = lstat (path, &stat);
+
+ if (ret == -1) {
+ if (errno != ENOENT) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: %s", path, strerror (errno));
+ }
+ goto out;
+ }
+
+ ret = unlink (path);
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "unlink %s failed (%s)", path, strerror (errno));
+ }
+
+out:
+ return ret;
+}
+
+
+int
+posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename)
+{
+ int ret;
+ struct iatt stat;
+ char *path = NULL;
+
+
+ if (!basename) {
+ ret = posix_handle_unset_gfid (this, gfid);
+ return ret;
+ }
+
+ MAKE_HANDLE_PATH (path, this, gfid, basename);
+
+ ret = posix_istat (this, gfid, basename, &stat);
+
+ if (ret == -1) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: %s", path, strerror (errno));
+ return -1;
+ }
+
+ ret = posix_handle_unset_gfid (this, stat.ia_gfid);
+
+ return ret;
+}
+
+
+int
+posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid,
+ char *real_path)
+{
+ int ret = -1;
+ struct stat stbuf = {0,};
+ char *newpath = NULL;
+
+ MAKE_HANDLE_PATH (newpath, this, gfid, NULL);
+ ret = lstat (newpath, &stbuf);
+ if (!ret) {
+#ifdef HAVE_LINKAT
+ /*
+ * Use linkat if the target may be a symlink to a directory
+ * or without an existing target. See comment about linkat()
+ * usage in posix_link() in posix.c for details
+ */
+ ret = linkat (AT_FDCWD, newpath, AT_FDCWD, real_path, 0);
+#else
+ ret = link (newpath, real_path);
+#endif
+ }
+
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h
new file mode 100644
index 000000000..f1163b727
--- /dev/null
+++ b/xlators/storage/posix/src/posix-handle.h
@@ -0,0 +1,143 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _POSIX_HANDLE_H
+#define _POSIX_HANDLE_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <sys/types.h>
+#include "xlator.h"
+
+
+#define LOC_HAS_ABSPATH(loc) ((loc) && (loc->path) && (loc->path[0] == '/'))
+
+#define MAKE_REAL_PATH(var, this, path) do { \
+ var = alloca (strlen (path) + POSIX_BASE_PATH_LEN(this) + 2); \
+ strcpy (var, POSIX_BASE_PATH(this)); \
+ strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \
+ } while (0)
+
+
+#define MAKE_HANDLE_PATH(var, this, gfid, base) do { \
+ int __len; \
+ __len = posix_handle_path (this, gfid, base, NULL, 0); \
+ if (__len <= 0) \
+ break; \
+ var = alloca (__len); \
+ __len = posix_handle_path (this, gfid, base, var, __len); \
+ } while (0)
+
+
+#define MAKE_HANDLE_GFID_PATH(var, this, gfid, base) do { \
+ int __len = 0; \
+ __len = posix_handle_gfid_path (this, gfid, base, NULL, 0); \
+ if (__len <= 0) \
+ break; \
+ var = alloca (__len); \
+ __len = posix_handle_gfid_path (this, gfid, base, var, __len); \
+ } while (0)
+
+
+#define MAKE_HANDLE_RELPATH(var, this, gfid, base) do { \
+ int __len; \
+ __len = posix_handle_relpath (this, gfid, base, NULL, 0); \
+ if (__len <= 0) \
+ break; \
+ var = alloca (__len); \
+ __len = posix_handle_relpath (this, gfid, base, var, __len); \
+ } while (0)
+
+
+#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) do { \
+ if (uuid_is_null (loc->gfid)) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "null gfid for path %s", loc->path); \
+ break; \
+ } \
+ if (LOC_HAS_ABSPATH (loc)) { \
+ MAKE_REAL_PATH (rpath, this, loc->path); \
+ op_ret = posix_pstat (this, loc->gfid, rpath, iatt_p); \
+ break; \
+ } \
+ errno = 0; \
+ op_ret = posix_istat (this, loc->gfid, NULL, iatt_p); \
+ if (errno != ELOOP) { \
+ MAKE_HANDLE_PATH (rpath, this, loc->gfid, NULL); \
+ break; \
+ } \
+ /* __ret == -1 && errno == ELOOP */ \
+ } while (0)
+
+
+#define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) do { \
+ char *__parp; \
+ \
+ if (uuid_is_null (loc->pargfid) || !loc->name) { \
+ gf_log (this->name, GF_LOG_ERROR, \
+ "null pargfid/name for path %s", loc->path); \
+ break; \
+ } \
+ \
+ if (LOC_HAS_ABSPATH (loc)) { \
+ MAKE_REAL_PATH (entp, this, loc->path); \
+ __parp = strdupa (entp); \
+ parp = dirname (__parp); \
+ op_ret = posix_pstat (this, NULL, entp, ent_p); \
+ break; \
+ } \
+ errno = 0; \
+ op_ret = posix_istat (this, loc->pargfid, loc->name, ent_p); \
+ if (errno != ELOOP) { \
+ MAKE_HANDLE_PATH (parp, this, loc->pargfid, NULL); \
+ MAKE_HANDLE_PATH (entp, this, loc->pargfid, loc->name); \
+ break; \
+ } \
+ /* __ret == -1 && errno == ELOOP */ \
+ /* expand ELOOP */ \
+ } while (0)
+
+
+
+int
+posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename, char *buf,
+ size_t len);
+int
+posix_handle_path_safe (xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t len);
+
+int
+posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename,
+ char *buf, size_t len);
+
+int
+posix_handle_hard (xlator_t *this, const char *path, uuid_t gfid,
+ struct stat *buf);
+
+
+int
+posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc,
+ uuid_t gfid, struct stat *buf);
+
+int
+posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename);
+
+int posix_handle_mkdir_hashes (xlator_t *this, const char *newpath);
+
+int posix_handle_init (xlator_t *this);
+
+int posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid,
+ char *real_path);
+
+int
+posix_handle_trash_init (xlator_t *this);
+#endif /* !_POSIX_HANDLE_H */
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
new file mode 100644
index 000000000..e295f8850
--- /dev/null
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -0,0 +1,1391 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#define __XOPEN_SOURCE 500
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <errno.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <ftw.h>
+#include <sys/stat.h>
+#include <signal.h>
+
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif /* GF_BSD_HOST_OS */
+
+#include "glusterfs.h"
+#include "checksum.h"
+#include "dict.h"
+#include "logging.h"
+#include "posix.h"
+#include "xlator.h"
+#include "defaults.h"
+#include "common-utils.h"
+#include "compat-errno.h"
+#include "compat.h"
+#include "byte-order.h"
+#include "syscall.h"
+#include "statedump.h"
+#include "locking.h"
+#include "timer.h"
+#include "glusterfs3-xdr.h"
+#include "hashfn.h"
+#include "glusterfs-acl.h"
+#include <fnmatch.h>
+
+char *marker_xattrs[] = {"trusted.glusterfs.quota.*",
+ "trusted.glusterfs.*.xtime",
+ NULL};
+
+static char* posix_ignore_xattrs[] = {
+ "gfid-req",
+ GLUSTERFS_ENTRYLK_COUNT,
+ GLUSTERFS_INODELK_COUNT,
+ GLUSTERFS_POSIXLK_COUNT,
+ NULL
+};
+
+gf_boolean_t
+posix_special_xattr (char **pattern, char *key)
+{
+ int i = 0;
+ gf_boolean_t flag = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("posix", pattern, out);
+ GF_VALIDATE_OR_GOTO ("posix", key, out);
+
+ for (i = 0; pattern[i]; i++) {
+ if (!fnmatch (pattern[i], key, 0)) {
+ flag = _gf_true;
+ break;
+ }
+ }
+out:
+ return flag;
+}
+
+static gf_boolean_t
+posix_xattr_ignorable (char *key, posix_xattr_filler_t *filler)
+{
+ int i = 0;
+ gf_boolean_t ignore = _gf_false;
+
+ GF_ASSERT (key);
+ if (!key)
+ goto out;
+ for (i = 0; posix_ignore_xattrs[i]; i++) {
+ if (!strcmp (key, posix_ignore_xattrs[i])) {
+ ignore = _gf_true;
+ goto out;
+ }
+ }
+ if ((!strcmp (key, GF_CONTENT_KEY))
+ && (!IA_ISREG (filler->stbuf->ia_type)))
+ ignore = _gf_true;
+out:
+ return ignore;
+}
+
+static int
+_posix_xattr_get_set (dict_t *xattr_req,
+ char *key,
+ data_t *data,
+ void *xattrargs)
+{
+ posix_xattr_filler_t *filler = xattrargs;
+ char *value = NULL;
+ ssize_t xattr_size = -1;
+ int ret = -1;
+ char *databuf = NULL;
+ int _fd = -1;
+ loc_t *loc = NULL;
+ ssize_t req_size = 0;
+
+
+ if (posix_xattr_ignorable (key, filler))
+ goto out;
+ /* should size be put into the data_t ? */
+ if (!strcmp (key, GF_CONTENT_KEY)
+ && IA_ISREG (filler->stbuf->ia_type)) {
+
+ /* file content request */
+ req_size = data_to_uint64 (data);
+ if (req_size >= filler->stbuf->ia_size) {
+ _fd = open (filler->real_path, O_RDONLY);
+ if (_fd == -1) {
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "Opening file %s failed: %s",
+ filler->real_path, strerror (errno));
+ goto err;
+ }
+
+ databuf = GF_CALLOC (1, filler->stbuf->ia_size,
+ gf_posix_mt_char);
+ if (!databuf) {
+ goto err;
+ }
+
+ ret = read (_fd, databuf, filler->stbuf->ia_size);
+ if (ret == -1) {
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "Read on file %s failed: %s",
+ filler->real_path, strerror (errno));
+ goto err;
+ }
+
+ ret = close (_fd);
+ _fd = -1;
+ if (ret == -1) {
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "Close on file %s failed: %s",
+ filler->real_path, strerror (errno));
+ goto err;
+ }
+
+ ret = dict_set_bin (filler->xattr, key,
+ databuf, filler->stbuf->ia_size);
+ if (ret < 0) {
+ gf_log (filler->this->name, GF_LOG_ERROR,
+ "failed to set dict value. key: %s, path: %s",
+ key, filler->real_path);
+ goto err;
+ }
+
+ /* To avoid double free in cleanup below */
+ databuf = NULL;
+ err:
+ if (_fd != -1)
+ close (_fd);
+ GF_FREE (databuf);
+ }
+ } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
+ loc = filler->loc;
+ if (loc) {
+ ret = dict_set_uint32 (filler->xattr, key,
+ loc->inode->fd_count);
+ if (ret < 0)
+ gf_log (filler->this->name, GF_LOG_WARNING,
+ "Failed to set dictionary value for %s",
+ key);
+ }
+ } else {
+ xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0);
+
+ if (xattr_size > 0) {
+ value = GF_CALLOC (1, xattr_size + 1,
+ gf_posix_mt_char);
+ if (!value)
+ return -1;
+
+ xattr_size = sys_lgetxattr (filler->real_path, key, value,
+ xattr_size);
+ if (xattr_size <= 0) {
+ gf_log (filler->this->name, GF_LOG_WARNING,
+ "getxattr failed. path: %s, key: %s",
+ filler->real_path, key);
+ GF_FREE (value);
+ return -1;
+ }
+
+ value[xattr_size] = '\0';
+ ret = dict_set_bin (filler->xattr, key,
+ value, xattr_size);
+ if (ret < 0) {
+ gf_log (filler->this->name, GF_LOG_DEBUG,
+ "dict set failed. path: %s, key: %s",
+ filler->real_path, key);
+ GF_FREE (value);
+ }
+ }
+ }
+out:
+ return 0;
+}
+
+
+int
+posix_fill_gfid_path (xlator_t *this, const char *path, struct iatt *iatt)
+{
+ int ret = 0;
+ ssize_t size = 0;
+
+ if (!iatt)
+ return 0;
+
+ size = sys_lgetxattr (path, GFID_XATTR_KEY, iatt->ia_gfid, 16);
+ /* Return value of getxattr */
+ if ((size == 16) || (size == -1))
+ ret = 0;
+ else
+ ret = size;
+
+ return ret;
+}
+
+
+int
+posix_fill_gfid_fd (xlator_t *this, int fd, struct iatt *iatt)
+{
+ int ret = 0;
+ ssize_t size = 0;
+
+ if (!iatt)
+ return 0;
+
+ size = sys_fgetxattr (fd, GFID_XATTR_KEY, iatt->ia_gfid, 16);
+ /* Return value of getxattr */
+ if ((size == 16) || (size == -1))
+ ret = 0;
+ else
+ ret = size;
+
+ return ret;
+}
+
+void
+posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf)
+{
+ uint64_t temp_ino = 0;
+ int j = 0;
+ int i = 0;
+
+ /* consider least significant 8 bytes of value out of gfid */
+ if (uuid_is_null (buf->ia_gfid)) {
+ buf->ia_ino = -1;
+ goto out;
+ }
+ for (i = 15; i > (15 - 8); i--) {
+ temp_ino += (uint64_t)(buf->ia_gfid[i]) << j;
+ j += 8;
+ }
+ buf->ia_ino = temp_ino;
+out:
+ return;
+}
+
+int
+posix_fdstat (xlator_t *this, int fd, struct iatt *stbuf_p)
+{
+ int ret = 0;
+ struct stat fstatbuf = {0, };
+ struct iatt stbuf = {0, };
+
+ ret = fstat (fd, &fstatbuf);
+ if (ret == -1)
+ goto out;
+
+ if (fstatbuf.st_nlink && !S_ISDIR (fstatbuf.st_mode))
+ fstatbuf.st_nlink--;
+
+ iatt_from_stat (&stbuf, &fstatbuf);
+
+ ret = posix_fill_gfid_fd (this, fd, &stbuf);
+ if (ret)
+ gf_log_callingfn (this->name, GF_LOG_DEBUG, "failed to get gfid");
+
+ posix_fill_ino_from_gfid (this, &stbuf);
+
+ if (stbuf_p)
+ *stbuf_p = stbuf;
+
+out:
+ return ret;
+}
+
+
+int
+posix_istat (xlator_t *this, uuid_t gfid, const char *basename,
+ struct iatt *buf_p)
+{
+ char *real_path = NULL;
+ struct stat lstatbuf = {0, };
+ struct iatt stbuf = {0, };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+
+ priv = this->private;
+
+ MAKE_HANDLE_PATH (real_path, this, gfid, basename);
+
+ ret = lstat (real_path, &lstatbuf);
+
+ if (ret != 0) {
+ if (ret == -1) {
+ if (errno != ENOENT && errno != ELOOP)
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat failed on %s (%s)",
+ real_path, strerror (errno));
+ } else {
+ // may be some backend filesystem issue
+ gf_log (this->name, GF_LOG_ERROR, "lstat failed on "
+ "%s and return value is %d instead of -1. "
+ "Please see dmesg output to check whether the "
+ "failure is due to backend filesystem issue",
+ real_path, ret);
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if ((lstatbuf.st_ino == priv->handledir.st_ino) &&
+ (lstatbuf.st_dev == priv->handledir.st_dev)) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ if (!S_ISDIR (lstatbuf.st_mode))
+ lstatbuf.st_nlink --;
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+
+ if (basename)
+ posix_fill_gfid_path (this, real_path, &stbuf);
+ else
+ uuid_copy (stbuf.ia_gfid, gfid);
+
+ posix_fill_ino_from_gfid (this, &stbuf);
+
+ if (buf_p)
+ *buf_p = stbuf;
+out:
+ return ret;
+}
+
+
+
+int
+posix_pstat (xlator_t *this, uuid_t gfid, const char *path,
+ struct iatt *buf_p)
+{
+ struct stat lstatbuf = {0, };
+ struct iatt stbuf = {0, };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+
+ priv = this->private;
+
+ ret = lstat (path, &lstatbuf);
+
+ if (ret != 0) {
+ if (ret == -1) {
+ if (errno != ENOENT)
+ gf_log (this->name, GF_LOG_WARNING,
+ "lstat failed on %s (%s)",
+ path, strerror (errno));
+ } else {
+ // may be some backend filesytem issue
+ gf_log (this->name, GF_LOG_ERROR, "lstat failed on "
+ "%s and return value is %d instead of -1. "
+ "Please see dmesg output to check whether the "
+ "failure is due to backend filesystem issue",
+ path, ret);
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if ((lstatbuf.st_ino == priv->handledir.st_ino) &&
+ (lstatbuf.st_dev == priv->handledir.st_dev)) {
+ errno = ENOENT;
+ return -1;
+ }
+
+ if (!S_ISDIR (lstatbuf.st_mode))
+ lstatbuf.st_nlink --;
+
+ iatt_from_stat (&stbuf, &lstatbuf);
+
+ if (gfid && !uuid_is_null (gfid))
+ uuid_copy (stbuf.ia_gfid, gfid);
+ else
+ posix_fill_gfid_path (this, path, &stbuf);
+
+ posix_fill_ino_from_gfid (this, &stbuf);
+
+ if (buf_p)
+ *buf_p = stbuf;
+out:
+ return ret;
+}
+
+
+dict_t *
+posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc,
+ dict_t *xattr_req, struct iatt *buf)
+{
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {0, };
+
+ xattr = get_new_dict();
+ if (!xattr) {
+ goto out;
+ }
+
+ filler.this = this;
+ filler.real_path = real_path;
+ filler.xattr = xattr;
+ filler.stbuf = buf;
+ filler.loc = loc;
+
+ dict_foreach (xattr_req, _posix_xattr_get_set, &filler);
+out:
+ return xattr;
+}
+
+
+int
+posix_gfid_set (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
+{
+ void *uuid_req = NULL;
+ uuid_t uuid_curr;
+ int ret = 0;
+ ssize_t size = 0;
+ struct stat stat = {0, };
+
+
+ if (!xattr_req)
+ goto out;
+
+ if (sys_lstat (path, &stat) != 0)
+ goto out;
+
+ size = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (size == 16) {
+ ret = 0;
+ goto verify_handle;
+ }
+
+ ret = dict_get_ptr (xattr_req, "gfid-req", &uuid_req);
+ if (ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get the gfid from dict for %s",
+ loc->path);
+ goto out;
+ }
+
+ ret = sys_lsetxattr (path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE);
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "setting GFID on %s failed (%s)", path,
+ strerror (errno));
+ goto out;
+ }
+ uuid_copy (uuid_curr, uuid_req);
+
+verify_handle:
+ if (!S_ISDIR (stat.st_mode))
+ ret = posix_handle_hard (this, path, uuid_curr, &stat);
+ else
+ ret = posix_handle_soft (this, path, loc, uuid_curr, &stat);
+
+out:
+ return ret;
+}
+
+
+int
+posix_set_file_contents (xlator_t *this, const char *path, char *keyp,
+ data_t *value, int flags)
+{
+ char * key = NULL;
+ char real_path[PATH_MAX];
+ int32_t file_fd = -1;
+ int op_ret = 0;
+ int ret = -1;
+
+
+ /* XXX: does not handle assigning GFID to created files */
+ return -1;
+
+ key = &(keyp[15]);
+ sprintf (real_path, "%s/%s", path, key);
+
+ if (flags & XATTR_REPLACE) {
+ /* if file exists, replace it
+ * else, error out */
+ file_fd = open (real_path, O_TRUNC|O_WRONLY);
+
+ if (file_fd == -1) {
+ goto create;
+ }
+
+ if (value->len) {
+ ret = write (file_fd, value->data, value->len);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "write failed while doing setxattr "
+ "for key %s on path %s: %s",
+ key, real_path, strerror (errno));
+ goto out;
+ }
+
+ ret = close (file_fd);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "close failed on %s: %s",
+ real_path, strerror (errno));
+ goto out;
+ }
+ }
+
+ create: /* we know file doesn't exist, create it */
+
+ file_fd = open (real_path, O_CREAT|O_WRONLY, 0644);
+
+ if (file_fd == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to open file %s with O_CREAT: %s",
+ key, strerror (errno));
+ goto out;
+ }
+
+ ret = write (file_fd, value->data, value->len);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "write failed on %s while setxattr with "
+ "key %s: %s",
+ real_path, key, strerror (errno));
+ goto out;
+ }
+
+ ret = close (file_fd);
+ if (ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "close failed on %s while setxattr with "
+ "key %s: %s",
+ real_path, key, strerror (errno));
+ goto out;
+ }
+ }
+
+out:
+ return op_ret;
+}
+
+
+int
+posix_get_file_contents (xlator_t *this, uuid_t pargfid,
+ const char *name, char **contents)
+{
+ char *real_path = NULL;
+ int32_t file_fd = -1;
+ struct iatt stbuf = {0,};
+ int op_ret = 0;
+ int ret = -1;
+
+
+ MAKE_HANDLE_PATH (real_path, this, pargfid, name);
+
+ op_ret = posix_istat (this, pargfid, name, &stbuf);
+ if (op_ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s",
+ real_path, strerror (errno));
+ goto out;
+ }
+
+ file_fd = open (real_path, O_RDONLY);
+
+ if (file_fd == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s",
+ real_path, strerror (errno));
+ goto out;
+ }
+
+ *contents = GF_CALLOC (stbuf.ia_size + 1, sizeof(char),
+ gf_posix_mt_char);
+ if (! *contents) {
+ op_ret = -errno;
+ goto out;
+ }
+
+ ret = read (file_fd, *contents, stbuf.ia_size);
+ if (ret <= 0) {
+ op_ret = -1;
+ gf_log (this->name, GF_LOG_ERROR, "read on %s failed: %s",
+ real_path, strerror (errno));
+ goto out;
+ }
+
+ *contents[stbuf.ia_size] = '\0';
+
+ op_ret = close (file_fd);
+ file_fd = -1;
+ if (op_ret == -1) {
+ op_ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s",
+ real_path, strerror (errno));
+ goto out;
+ }
+
+out:
+ if (op_ret < 0) {
+ GF_FREE (*contents);
+ if (file_fd != -1)
+ close (file_fd);
+ }
+
+ return op_ret;
+}
+
+static int gf_xattr_enotsup_log;
+
+int
+posix_handle_pair (xlator_t *this, const char *real_path,
+ char *key, data_t *value, int flags)
+{
+ int sys_ret = -1;
+ int ret = 0;
+
+ if (ZR_FILE_CONTENT_REQUEST(key)) {
+ ret = posix_set_file_contents (this, real_path, key, value,
+ flags);
+ } else {
+ sys_ret = sys_lsetxattr (real_path, key, value->data,
+ value->len, flags);
+
+ if (sys_ret < 0) {
+ if (errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log,
+ this->name,GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting "
+ "brick with 'user_xattr' "
+ "flag)");
+ } else if (errno == ENOENT) {
+ if (!posix_special_xattr (marker_xattrs,
+ key)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setxattr on %s failed: %s",
+ real_path, strerror (errno));
+ }
+ } else {
+
+#ifdef GF_DARWIN_HOST_OS
+ gf_log (this->name,
+ ((errno == EINVAL) ?
+ GF_LOG_DEBUG : GF_LOG_ERROR),
+ "%s: key:%s error:%s",
+ real_path, key,
+ strerror (errno));
+#else /* ! DARWIN */
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: key:%s error:%s",
+ real_path, key,
+ strerror (errno));
+#endif /* DARWIN */
+ }
+
+ ret = -errno;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+posix_fhandle_pair (xlator_t *this, int fd,
+ char *key, data_t *value, int flags)
+{
+ int sys_ret = -1;
+ int ret = 0;
+
+ sys_ret = sys_fsetxattr (fd, key, value->data,
+ value->len, flags);
+
+ if (sys_ret < 0) {
+ if (errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log,
+ this->name,GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting "
+ "brick with 'user_xattr' "
+ "flag)");
+ } else if (errno == ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsetxattr on fd=%d failed: %s", fd,
+ strerror (errno));
+ } else {
+
+#ifdef GF_DARWIN_HOST_OS
+ gf_log (this->name,
+ ((errno == EINVAL) ?
+ GF_LOG_DEBUG : GF_LOG_ERROR),
+ "fd=%d: key:%s error:%s",
+ fd, key, strerror (errno));
+#else /* ! DARWIN */
+ gf_log (this->name, GF_LOG_ERROR,
+ "fd=%d: key:%s error:%s",
+ fd, key, strerror (errno));
+#endif /* DARWIN */
+ }
+
+ ret = -errno;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+
+static int
+janitor_walker (const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ struct iatt stbuf = {0, };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ posix_pstat (this, NULL, fpath, &stbuf);
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFREG:
+ case S_IFBLK:
+ case S_IFLNK:
+ case S_IFCHR:
+ case S_IFIFO:
+ case S_IFSOCK:
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "unlinking %s", fpath);
+ unlink (fpath);
+ if (stbuf.ia_nlink == 1)
+ posix_handle_unset (this, stbuf.ia_gfid, NULL);
+ break;
+
+ case S_IFDIR:
+ if (ftwbuf->level) { /* don't remove top level dir */
+ gf_log (THIS->name, GF_LOG_TRACE,
+ "removing directory %s", fpath);
+
+ rmdir (fpath);
+ posix_handle_unset (this, stbuf.ia_gfid, NULL);
+ }
+ break;
+ }
+
+ return 0; /* 0 = FTW_CONTINUE */
+}
+
+
+static struct posix_fd *
+janitor_get_next_fd (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd = NULL;
+
+ struct timespec timeout;
+
+ priv = this->private;
+
+ pthread_mutex_lock (&priv->janitor_lock);
+ {
+ if (list_empty (&priv->janitor_fds)) {
+ time (&timeout.tv_sec);
+ timeout.tv_sec += priv->janitor_sleep_duration;
+ timeout.tv_nsec = 0;
+
+ pthread_cond_timedwait (&priv->janitor_cond,
+ &priv->janitor_lock,
+ &timeout);
+ goto unlock;
+ }
+
+ pfd = list_entry (priv->janitor_fds.next, struct posix_fd,
+ list);
+
+ list_del (priv->janitor_fds.next);
+ }
+unlock:
+ pthread_mutex_unlock (&priv->janitor_lock);
+
+ return pfd;
+}
+
+
+static void *
+posix_janitor_thread_proc (void *data)
+{
+ xlator_t * this = NULL;
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd;
+
+ time_t now;
+
+ this = data;
+ priv = this->private;
+
+ THIS = this;
+
+ while (1) {
+ time (&now);
+ if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "janitor cleaning out %s", priv->trash_path);
+
+ nftw (priv->trash_path,
+ janitor_walker,
+ 32,
+ FTW_DEPTH | FTW_PHYS);
+
+ priv->last_landfill_check = now;
+ }
+
+ pfd = janitor_get_next_fd (this);
+ if (pfd) {
+ if (pfd->dir == NULL) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "janitor: closing file fd=%d", pfd->fd);
+ close (pfd->fd);
+ } else {
+ gf_log (this->name, GF_LOG_TRACE,
+ "janitor: closing dir fd=%p", pfd->dir);
+ closedir (pfd->dir);
+ }
+
+ GF_FREE (pfd);
+ }
+ }
+
+ return NULL;
+}
+
+
+void
+posix_spawn_janitor_thread (xlator_t *this)
+{
+ struct posix_private *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ LOCK (&priv->lock);
+ {
+ if (!priv->janitor_present) {
+ ret = gf_thread_create (&priv->janitor, NULL,
+ posix_janitor_thread_proc, this);
+
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "spawning janitor thread failed: %s",
+ strerror (errno));
+ goto unlock;
+ }
+
+ priv->janitor_present = _gf_true;
+ }
+ }
+unlock:
+ UNLOCK (&priv->lock);
+}
+
+static int
+is_fresh_file (struct stat *stat)
+{
+ struct timeval tv;
+
+ gettimeofday (&tv, NULL);
+
+ if ((stat->st_ctime >= (tv.tv_sec - 1))
+ && (stat->st_ctime <= tv.tv_sec))
+ return 1;
+
+ return 0;
+}
+
+
+int
+posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req)
+{
+ /* The purpose of this function is to prevent a race
+ where an inode creation FOP (like mkdir/mknod/create etc)
+ races with lookup in the following way:
+
+ {create thread} | {lookup thread}
+ |
+ t0
+ mkdir ("name") |
+ t1
+ | posix_gfid_set ("name", 2);
+ t2
+ posix_gfid_set ("name", 1); |
+ t3
+ lstat ("name"); | lstat ("name");
+
+ In the above case mkdir FOP would have resulted with GFID 2 while
+ it should have been GFID 1. It matters in the case where GFID would
+ have gotten set to 1 on other subvolumes of replciate/distribute
+
+ The "solution" here is that, if we detect lookup is attempting to
+ set a GFID on a file which is created very recently, but does not
+ yet have a GFID (i.e, between t1 and t2), then "fake" it as though
+ posix_gfid_heal was called at t0 instead.
+ */
+
+ uuid_t uuid_curr;
+ int ret = 0;
+ struct stat stat = {0, };
+
+ if (!xattr_req)
+ goto out;
+
+ if (sys_lstat (path, &stat) != 0)
+ goto out;
+
+ ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
+ if (ret != 16) {
+ if (is_fresh_file (&stat)) {
+ ret = -1;
+ errno = ENOENT;
+ goto out;
+ }
+ }
+
+ ret = posix_gfid_set (this, path, loc, xattr_req);
+out:
+ return ret;
+}
+
+
+int
+posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req)
+{
+ int ret = 0;
+ data_t *data = NULL;
+ struct stat stat = {0, };
+
+ if (!xattr_req)
+ goto out;
+
+ if (sys_lstat (path, &stat) != 0)
+ goto out;
+
+ data = dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR);
+ if (data) {
+ ret = sys_lsetxattr (path, POSIX_ACL_ACCESS_XATTR,
+ data->data, data->len, 0);
+ if (ret != 0)
+ goto out;
+ }
+
+ data = dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR);
+ if (data) {
+ ret = sys_lsetxattr (path, POSIX_ACL_DEFAULT_XATTR,
+ data->data, data->len, 0);
+ if (ret != 0)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+_handle_entry_create_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int ret = -1;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ if (!strcmp (GFID_XATTR_KEY, k) ||
+ !strcmp ("gfid-req", k) ||
+ !strcmp (POSIX_ACL_DEFAULT_XATTR, k) ||
+ !strcmp (POSIX_ACL_ACCESS_XATTR, k) ||
+ ZR_FILE_CONTENT_REQUEST(k)) {
+ return 0;
+ }
+
+ ret = posix_handle_pair (filler->this, filler->real_path, k, v,
+ XATTR_CREATE);
+ if (ret < 0) {
+ errno = -ret;
+ return -1;
+ }
+ return 0;
+}
+
+int
+posix_entry_create_xattr_set (xlator_t *this, const char *path,
+ dict_t *dict)
+{
+ int ret = -1;
+
+ posix_xattr_filler_t filler = {0,};
+
+ if (!dict)
+ goto out;
+
+ filler.this = this;
+ filler.real_path = path;
+
+ ret = dict_foreach (dict, _handle_entry_create_keyvalue_pair, &filler);
+
+out:
+ return ret;
+}
+
+
+static int
+__posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd_p)
+{
+ uint64_t tmp_pfd = 0;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ char *real_path = NULL;
+ int _fd = -1;
+ DIR *dir = NULL;
+
+ ret = __fd_ctx_get (fd, this, &tmp_pfd);
+ if (ret == 0) {
+ pfd = (void *)(long) tmp_pfd;
+ ret = 0;
+ goto out;
+ }
+
+ if (!fd_is_anonymous(fd))
+ /* anonymous fd */
+ goto out;
+
+ MAKE_HANDLE_PATH (real_path, this, fd->inode->gfid, NULL);
+
+ pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
+ if (!pfd) {
+ goto out;
+ }
+ pfd->fd = -1;
+
+ if (fd->inode->ia_type == IA_IFDIR) {
+ dir = opendir (real_path);
+ if (!dir) {
+ GF_FREE (pfd);
+ pfd = NULL;
+ goto out;
+ }
+ _fd = dirfd (dir);
+ }
+
+ if (fd->inode->ia_type == IA_IFREG) {
+ _fd = open (real_path, O_RDWR|O_LARGEFILE);
+ if (_fd == -1) {
+ GF_FREE (pfd);
+ pfd = NULL;
+ goto out;
+ }
+ }
+
+ pfd->fd = _fd;
+ pfd->dir = dir;
+
+ ret = __fd_ctx_set (fd, this, (uint64_t) (long) pfd);
+ if (ret != 0) {
+ if (_fd != -1)
+ close (_fd);
+ if (dir)
+ closedir (dir);
+ GF_FREE (pfd);
+ pfd = NULL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (pfd_p)
+ *pfd_p = pfd;
+ return ret;
+}
+
+
+int
+posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd)
+{
+ int ret;
+
+ LOCK (&fd->inode->lock);
+ {
+ ret = __posix_fd_ctx_get (fd, this, pfd);
+ }
+ UNLOCK (&fd->inode->lock);
+
+ return ret;
+}
+
+static void *
+posix_health_check_thread_proc (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ uint32_t interval = 0;
+ int ret = -1;
+ struct stat sb = {0, };
+
+ this = data;
+ priv = this->private;
+
+ /* prevent races when the interval is updated */
+ interval = priv->health_check_interval;
+ if (interval == 0)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "health-check thread started, "
+ "interval = %d seconds", interval);
+
+ while (1) {
+ /* aborting sleep() is a request to exit this thread, sleep()
+ * will normally not return when cancelled */
+ ret = sleep (interval);
+ if (ret > 0)
+ break;
+
+ /* prevent thread errors while doing the health-check(s) */
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+
+ /* Do the health-check, it should be moved to its own function
+ * in case it gets more complex. */
+ ret = stat (priv->base_path, &sb);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stat() on %s returned: %s", priv->base_path,
+ strerror (errno));
+ goto abort;
+ }
+
+ pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "health-check thread exiting");
+
+ LOCK (&priv->lock);
+ {
+ priv->health_check_active = _gf_false;
+ }
+ UNLOCK (&priv->lock);
+
+ return NULL;
+
+abort:
+ /* health-check failed */
+ gf_log (this->name, GF_LOG_EMERG, "health-check failed, going down");
+ xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
+
+ ret = sleep (30);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGTERM");
+ kill (getpid(), SIGTERM);
+ }
+
+ ret = sleep (30);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGKILL");
+ kill (getpid(), SIGKILL);
+ }
+
+ return NULL;
+}
+
+void
+posix_spawn_health_check_thread (xlator_t *xl)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = xl->private;
+
+ LOCK (&priv->lock);
+ {
+ /* cancel the running thread */
+ if (priv->health_check_active == _gf_true) {
+ pthread_cancel (priv->health_check);
+ priv->health_check_active = _gf_false;
+ }
+
+ /* prevent scheduling a check in a tight loop */
+ if (priv->health_check_interval == 0)
+ goto unlock;
+
+ ret = gf_thread_create (&priv->health_check, NULL,
+ posix_health_check_thread_proc, xl);
+ if (ret < 0) {
+ priv->health_check_interval = 0;
+ priv->health_check_active = _gf_false;
+ gf_log (xl->name, GF_LOG_ERROR,
+ "unable to setup health-check thread: %s",
+ strerror (errno));
+ goto unlock;
+ }
+
+ /* run the thread detached, resources will be freed on exit */
+ pthread_detach (priv->health_check);
+ priv->health_check_active = _gf_true;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+}
+
+int
+posix_fsyncer_pick (xlator_t *this, struct list_head *head)
+{
+ struct posix_private *priv = NULL;
+ int count = 0;
+
+ priv = this->private;
+ pthread_mutex_lock (&priv->fsync_mutex);
+ {
+ while (list_empty (&priv->fsyncs))
+ pthread_cond_wait (&priv->fsync_cond,
+ &priv->fsync_mutex);
+
+ count = priv->fsync_queue_count;
+ priv->fsync_queue_count = 0;
+ list_splice_init (&priv->fsyncs, head);
+ }
+ pthread_mutex_unlock (&priv->fsync_mutex);
+
+ return count;
+}
+
+
+void
+posix_fsyncer_process (xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync)
+{
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ ret = posix_fd_ctx_get (stub->args.fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not get fdctx for fd(%s)",
+ uuid_utoa (stub->args.fd->inode->gfid));
+ call_unwind_error (stub, -1, EINVAL);
+ return;
+ }
+
+ if (do_fsync) {
+#ifdef HAVE_FDATASYNC
+ if (stub->args.datasync)
+ ret = fdatasync (pfd->fd);
+ else
+#endif
+ ret = fsync (pfd->fd);
+ } else {
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "could not fstat fd(%s)",
+ uuid_utoa (stub->args.fd->inode->gfid));
+ call_unwind_error (stub, -1, errno);
+ return;
+ }
+
+ call_unwind_error (stub, 0, 0);
+}
+
+
+static void
+posix_fsyncer_syncfs (xlator_t *this, struct list_head *head)
+{
+ call_stub_t *stub = NULL;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+
+ stub = list_entry (head->prev, call_stub_t, list);
+ ret = posix_fd_ctx_get (stub->args.fd, this, &pfd);
+ if (ret)
+ return;
+
+#ifdef GF_LINUX_HOST_OS
+ /* syncfs() is not "declared" in RHEL's glibc even though
+ the kernel has support.
+ */
+#include <sys/syscall.h>
+#include <unistd.h>
+#ifdef SYS_syncfs
+ syscall (SYS_syncfs, pfd->fd);
+#else
+ sync();
+#endif
+#else
+ sync();
+#endif
+}
+
+
+void *
+posix_fsyncer (void *d)
+{
+ xlator_t *this = d;
+ struct posix_private *priv = NULL;
+ call_stub_t *stub = NULL;
+ call_stub_t *tmp = NULL;
+ struct list_head list;
+ int count = 0;
+ gf_boolean_t do_fsync = _gf_true;
+
+ priv = this->private;
+
+ for (;;) {
+ INIT_LIST_HEAD (&list);
+
+ count = posix_fsyncer_pick (this, &list);
+
+ usleep (priv->batch_fsync_delay_usec);
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "picked %d fsyncs", count);
+
+ switch (priv->batch_fsync_mode) {
+ case BATCH_NONE:
+ case BATCH_REVERSE_FSYNC:
+ break;
+ case BATCH_SYNCFS:
+ case BATCH_SYNCFS_SINGLE_FSYNC:
+ case BATCH_SYNCFS_REVERSE_FSYNC:
+ posix_fsyncer_syncfs (this, &list);
+ break;
+ }
+
+ if (priv->batch_fsync_mode == BATCH_SYNCFS)
+ do_fsync = _gf_false;
+ else
+ do_fsync = _gf_true;
+
+ list_for_each_entry_safe_reverse (stub, tmp, &list, list) {
+ list_del_init (&stub->list);
+
+ posix_fsyncer_process (this, stub, do_fsync);
+
+ if (priv->batch_fsync_mode == BATCH_SYNCFS_SINGLE_FSYNC)
+ do_fsync = _gf_false;
+ }
+ }
+}
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
index 940e21cc7..81752c17e 100644
--- a/xlators/storage/posix/src/posix-mem-types.h
+++ b/xlators/storage/posix/src/posix-mem-types.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __POSIX_MEM_TYPES_H__
#define __POSIX_MEM_TYPES_H__
@@ -30,6 +20,7 @@ enum gf_posix_mem_types_ {
gf_posix_mt_int32_t,
gf_posix_mt_posix_dev_t,
gf_posix_mt_trash_path,
+ gf_posix_mt_paiocb,
gf_posix_mt_end
};
#endif
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index e3130f4f1..fb45c7a67 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -24,6 +14,7 @@
#define __XOPEN_SOURCE 500
+#include <openssl/md5.h>
#include <stdint.h>
#include <sys/time.h>
#include <sys/resource.h>
@@ -32,13 +23,18 @@
#include <pthread.h>
#include <ftw.h>
#include <sys/stat.h>
+#include <signal.h>
+#include <sys/uio.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
#endif /* GF_BSD_HOST_OS */
+#ifdef HAVE_LINKAT
+#include <fcntl.h>
+#endif /* HAVE_LINKAT */
+
#include "glusterfs.h"
-#include "md5.h"
#include "checksum.h"
#include "dict.h"
#include "logging.h"
@@ -55,19 +51,23 @@
#include "timer.h"
#include "glusterfs3-xdr.h"
#include "hashfn.h"
+#include "posix-aio.h"
+#include "glusterfs-acl.h"
+extern char *marker_xattrs[];
+#define ALIGN_SIZE 4096
#undef HAVE_SET_FSID
#ifdef HAVE_SET_FSID
#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid;
-#define SET_FS_ID(uid, gid) do { \
+#define SET_FS_ID(uid, gid) do { \
old_fsuid = setfsuid (uid); \
old_fsgid = setfsgid (gid); \
} while (0)
-#define SET_TO_OLD_FS_ID() do { \
+#define SET_TO_OLD_FS_ID() do { \
setfsuid (old_fsuid); \
setfsgid (old_fsgid); \
} while (0)
@@ -80,398 +80,104 @@
#endif
-typedef struct {
- xlator_t *this;
- const char *real_path;
- dict_t *xattr;
- struct iatt *stbuf;
- loc_t *loc;
-} posix_xattr_filler_t;
-
int
posix_forget (xlator_t *this, inode_t *inode)
{
- uint64_t tmp_cache = 0;
- if (!inode_ctx_del (inode, this, &tmp_cache))
- dict_destroy ((dict_t *)(long)tmp_cache);
-
- return 0;
-}
-
-static void
-_posix_xattr_get_set (dict_t *xattr_req,
- char *key,
- data_t *data,
- void *xattrargs)
-{
- posix_xattr_filler_t *filler = xattrargs;
- char *value = NULL;
- ssize_t xattr_size = -1;
- int ret = -1;
- char *databuf = NULL;
- int _fd = -1;
- loc_t *loc = NULL;
- ssize_t req_size = 0;
-
-
- /* should size be put into the data_t ? */
- if (!strcmp (key, GF_CONTENT_KEY)
- && IA_ISREG (filler->stbuf->ia_type)) {
-
- /* file content request */
- req_size = data_to_uint64 (data);
- if (req_size >= filler->stbuf->ia_size) {
- _fd = open (filler->real_path, O_RDONLY);
-
- if (_fd == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Opening file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- databuf = GF_CALLOC (1, filler->stbuf->ia_size,
- gf_posix_mt_char);
-
- if (!databuf) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto err;
- }
-
- ret = read (_fd, databuf, filler->stbuf->ia_size);
- if (ret == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Read on file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- ret = close (_fd);
- _fd = -1;
- if (ret == -1) {
- gf_log (filler->this->name, GF_LOG_ERROR,
- "Close on file %s failed: %s",
- filler->real_path, strerror (errno));
- goto err;
- }
-
- ret = dict_set_bin (filler->xattr, key,
- databuf, filler->stbuf->ia_size);
- if (ret < 0) {
- goto err;
- }
-
- /* To avoid double free in cleanup below */
- databuf = NULL;
- err:
- if (_fd != -1)
- close (_fd);
- if (databuf)
- GF_FREE (databuf);
- }
- } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) {
- loc = filler->loc;
- if (!list_empty (&loc->inode->fd_list)) {
- ret = dict_set_uint32 (filler->xattr, key, 1);
- if (ret < 0)
- gf_log (filler->this->name, GF_LOG_WARNING,
- "Failed to set dictionary value for %s",
- key);
- } else {
- ret = dict_set_uint32 (filler->xattr, key, 0);
- if (ret < 0)
- gf_log (filler->this->name, GF_LOG_WARNING,
- "Failed to set dictionary value for %s",
- key);
- }
- } else {
- xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0);
-
- if (xattr_size > 0) {
- value = GF_CALLOC (1, xattr_size + 1,
- gf_posix_mt_char);
- if (!value)
- return;
-
- sys_lgetxattr (filler->real_path, key, value,
- xattr_size);
-
- value[xattr_size] = '\0';
- ret = dict_set_bin (filler->xattr, key,
- value, xattr_size);
- if (ret < 0)
- gf_log (filler->this->name, GF_LOG_DEBUG,
- "dict set failed. path: %s, key: %s",
- filler->real_path, key);
- }
- }
-}
-
-
-int
-posix_fill_gfid_path (xlator_t *this, const char *path, struct iatt *iatt)
-{
- int ret = 0;
-
- if (!iatt)
- return 0;
-
- ret = sys_lgetxattr (path, GFID_XATTR_KEY, iatt->ia_gfid, 16);
- /* Return value of getxattr */
- if (ret == 16)
- ret = 0;
-
- return ret;
-}
-
-
-int
-posix_fill_gfid_fd (xlator_t *this, int fd, struct iatt *iatt)
-{
- int ret = 0;
-
- if (!iatt)
- return 0;
-
- ret = sys_fgetxattr (fd, GFID_XATTR_KEY, iatt->ia_gfid, 16);
- /* Return value of getxattr */
- if (ret == 16)
- ret = 0;
-
- return ret;
-}
-
-
-int
-posix_lstat_with_gfid (xlator_t *this, const char *path, struct iatt *stbuf_p)
-{
- struct posix_private *priv = NULL;
- int ret = 0;
- struct stat lstatbuf = {0, };
- struct iatt stbuf = {0, };
-
- priv = this->private;
-
- ret = lstat (path, &lstatbuf);
- if (ret == -1)
- return -1;
-
- iatt_from_stat (&stbuf, &lstatbuf);
-
- ret = posix_fill_gfid_path (this, path, &stbuf);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG, "failed to get gfid");
-
- if (stbuf_p)
- *stbuf_p = stbuf;
-
- return ret;
-}
-
-
-int
-posix_fstat_with_gfid (xlator_t *this, int fd, struct iatt *stbuf_p)
-{
- struct posix_private *priv = NULL;
- int ret = 0;
- struct stat fstatbuf = {0, };
- struct iatt stbuf = {0, };
-
- priv = this->private;
-
- ret = fstat (fd, &fstatbuf);
- if (ret == -1)
- return -1;
-
- iatt_from_stat (&stbuf, &fstatbuf);
-
- ret = posix_fill_gfid_fd (this, fd, &stbuf);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG, "failed to set gfid");
-
- if (stbuf_p)
- *stbuf_p = stbuf;
-
- return ret;
-}
-
-
-dict_t *
-posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc,
- dict_t *xattr_req, struct iatt *buf)
-{
- dict_t *xattr = NULL;
- posix_xattr_filler_t filler = {0, };
-
- xattr = get_new_dict();
- if (!xattr) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
-
- filler.this = this;
- filler.real_path = real_path;
- filler.xattr = xattr;
- filler.stbuf = buf;
- filler.loc = loc;
-
- dict_foreach (xattr_req, _posix_xattr_get_set, &filler);
-out:
- return xattr;
-}
-
-
-/*
- * If the parent directory of {real_path} has the setgid bit set,
- * then set {gid} to the gid of the parent. Otherwise,
- * leave {gid} unchanged.
- */
-
-int
-setgid_override (xlator_t *this, char *real_path, gid_t *gid)
-{
- char * tmp_path = NULL;
- char * parent_path = NULL;
- struct iatt parent_stbuf;
+ uint64_t tmp_cache = 0;
+ if (!inode_ctx_del (inode, this, &tmp_cache))
+ dict_destroy ((dict_t *)(long)tmp_cache);
- int op_ret = 0;
-
- tmp_path = gf_strdup (real_path);
- if (!tmp_path) {
- op_ret = -ENOMEM;
- gf_log ("[storage/posix]", GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
-
- parent_path = dirname (tmp_path);
-
- op_ret = posix_lstat_with_gfid (this, parent_path, &parent_stbuf);
-
- if (op_ret == -1) {
- op_ret = -errno;
- gf_log ("[storage/posix]", GF_LOG_ERROR,
- "lstat on parent directory (%s) failed: %s",
- parent_path, strerror (errno));
- goto out;
- }
-
- if (parent_stbuf.ia_prot.sgid) {
- /*
- Entries created inside a setgid directory
- should inherit the gid from the parent
- */
-
- *gid = parent_stbuf.ia_gid;
- }
-out:
-
- if (tmp_path)
- GF_FREE (tmp_path);
-
- return op_ret;
-}
-
-
-int
-posix_gfid_set (xlator_t *this, const char *path, dict_t *xattr_req)
-{
- void *uuid_req = NULL;
- uuid_t uuid_curr;
- int ret = 0;
- struct stat stat = {0, };
-
- if (!xattr_req)
- return 0;
-
- if (sys_lstat (path, &stat) != 0)
- return 0;
-
- ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16);
-
- if (ret == 16)
- return 0;
-
- ret = dict_get_ptr (xattr_req, "gfid-req", &uuid_req);
- if (ret)
- goto out;
-
- ret = sys_lsetxattr (path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE);
-
-out:
- return ret;
+ return 0;
}
+/* Regular fops */
int32_t
posix_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+ loc_t *loc, dict_t *xdata)
{
struct iatt buf = {0, };
- char * real_path = NULL;
int32_t op_ret = -1;
int32_t entry_ret = 0;
int32_t op_errno = 0;
dict_t * xattr = NULL;
- char * pathdup = NULL;
- char * parentpath = NULL;
+ char * real_path = NULL;
+ char * par_path = NULL;
struct iatt postparent = {0,};
+ int32_t gfidless = 0;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (loc->path, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get any gfid on it */
+ if (__is_root_gfid (loc->pargfid) &&
+ (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Lookup issued on %s, which is not permitted",
+ GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
- posix_gfid_set (this, real_path, xattr_req);
+ op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless);
+ op_ret = -1;
+ if (uuid_is_null (loc->pargfid)) {
+ /* nameless lookup */
+ MAKE_INODE_HANDLE (real_path, this, loc, &buf);
+ } else {
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf);
+
+ if (uuid_is_null (loc->inode->gfid)) {
+ posix_gfid_heal (this, real_path, loc, xdata);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this,
+ loc, &buf);
+ }
+ }
- op_ret = posix_lstat_with_gfid (this, real_path, &buf);
op_errno = errno;
if (op_ret == -1) {
- if (op_errno != ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s",
- loc->path, strerror (op_errno));
- }
+ if (op_errno != ENOENT) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "lstat on %s failed: %s",
+ real_path, strerror (op_errno));
+ }
entry_ret = -1;
goto parent;
}
- if (xattr_req && (op_ret == 0)) {
- xattr = posix_lookup_xattr_fill (this, real_path, loc,
- xattr_req, &buf);
+ if (xdata && (op_ret == 0)) {
+ xattr = posix_lookup_xattr_fill (this, real_path, loc,
+ xdata, &buf);
}
parent:
- if (loc->parent) {
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
-
- parentpath = dirname (pathdup);
-
- op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
+ if (par_path) {
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "post-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
}
op_ret = entry_ret;
out:
- if (pathdup)
- GF_FREE (pathdup);
-
if (xattr)
dict_ref (xattr);
+ if (!op_ret && !gfidless && uuid_is_null (buf.ia_gfid)) {
+ gf_log (this->name, GF_LOG_ERROR, "buf->ia_gfid is null for "
+ "%s", (real_path) ? real_path: "");
+ op_ret = -1;
+ op_errno = ENODATA;
+ }
STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
(loc)?loc->inode:NULL, &buf, xattr, &postparent);
@@ -483,15 +189,13 @@ out:
int32_t
-posix_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
+posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
struct iatt buf = {0,};
- char * real_path = NULL;
int32_t op_ret = -1;
int32_t op_errno = 0;
- struct posix_private *priv = NULL;
+ struct posix_private *priv = NULL;
+ char *real_path = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -503,22 +207,23 @@ posix_stat (call_frame_t *frame,
VALIDATE_OR_GOTO (priv, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
- op_ret = posix_lstat_with_gfid (this, real_path, &buf);
+ MAKE_INODE_HANDLE (real_path, this, loc, &buf);
+
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s", loc->path,
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
+ "lstat on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID();
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf);
+ STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, NULL);
return 0;
}
@@ -534,7 +239,7 @@ posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf)
ret = sys_lstat (path, &stat);
if (ret != 0) {
gf_log (this->name, GF_LOG_WARNING,
- "%s (%s)", path, strerror (errno));
+ "lstat failed: %s (%s)", path, strerror (errno));
goto out;
}
@@ -547,6 +252,8 @@ posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf)
/* in Linux symlinks are always in mode 0777 and no
such call as lchmod exists.
*/
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (%s)", path, strerror (errno));
if (is_symlink) {
ret = 0;
goto out;
@@ -606,13 +313,15 @@ posix_do_utimes (xlator_t *this,
ret = lutimes (path, tv);
if ((ret == -1) && (errno == ENOSYS)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "%s (%s)", path, strerror (errno));
if (is_symlink) {
ret = 0;
goto out;
}
ret = utimes (path, tv);
- }
+ }
out:
return ret;
@@ -620,7 +329,7 @@ out:
int
posix_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *stbuf, int32_t valid)
+ loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -635,9 +344,8 @@ posix_setattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, &statpre);
- op_ret = posix_lstat_with_gfid (this, real_path, &statpre);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -691,7 +399,7 @@ posix_setattr (call_frame_t *frame, xlator_t *this,
}
}
- op_ret = posix_lstat_with_gfid (this, real_path, &statpost);
+ op_ret = posix_pstat (this, loc->gfid, real_path, &statpost);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -706,7 +414,7 @@ out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno,
- &statpre, &statpost);
+ &statpre, &statpost, NULL);
return 0;
}
@@ -748,20 +456,21 @@ posix_do_futimes (xlator_t *this,
int fd,
struct iatt *stbuf)
{
+ gf_log (this->name, GF_LOG_WARNING, "function not implemented fd(%d)", fd);
+
errno = ENOSYS;
return -1;
}
int
posix_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid)
+ fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
struct iatt statpre = {0,};
struct iatt statpost = {0,};
struct posix_fd *pfd = NULL;
- uint64_t tmp_pfd = 0;
int32_t ret = -1;
DECLARE_OLD_FS_ID_VAR;
@@ -772,16 +481,15 @@ posix_fsetattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ "pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
- op_ret = posix_fstat_with_gfid (this, pfd->fd, &statpre);
+ op_ret = posix_fdstat (this, pfd->fd, &statpre);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -836,7 +544,7 @@ posix_fsetattr (call_frame_t *frame, xlator_t *this,
}
}
- op_ret = posix_fstat_with_gfid (this, pfd->fd, &statpost);
+ op_ret = posix_fdstat (this, pfd->fd, &statpost);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -851,14 +559,297 @@ out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
- &statpre, &statpost);
+ &statpre, &statpost, NULL);
+
+ return 0;
+}
+
+static int32_t
+posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ off_t offset, size_t len, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ struct posix_fd *pfd = NULL;
+ int32_t ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fallocate (fstat) failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+ ret = sys_fallocate(pfd->fd, flags, offset, len);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fallocate (fstat) failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ return ret;
+}
+
+char*
+_page_aligned_alloc (size_t size, char **aligned_buf)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+
+ alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char);
+ if (!alloc_buf)
+ goto out;
+ /* page aligned buffer */
+ buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE);
+ *aligned_buf = buf;
+out:
+ return alloc_buf;
+}
+
+static int32_t
+_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct)
+{
+ size_t num_vect = 0;
+ int32_t num_loop = 1;
+ int32_t idx = 0;
+ int32_t op_ret = -1;
+ int32_t vect_size = VECTOR_SIZE;
+ size_t remain = 0;
+ size_t extra = 0;
+ struct iovec *vector = NULL;
+ char *iov_base = NULL;
+ char *alloc_buf = NULL;
+
+ if (len == 0)
+ return 0;
+ if (len < VECTOR_SIZE)
+ vect_size = len;
+
+ num_vect = len / (vect_size);
+ remain = len % vect_size ;
+ if (num_vect > MAX_NO_VECT) {
+ extra = num_vect % MAX_NO_VECT;
+ num_loop = num_vect / MAX_NO_VECT;
+ num_vect = MAX_NO_VECT;
+ }
+
+ vector = GF_CALLOC (num_vect, sizeof(struct iovec),
+ gf_common_mt_iovec);
+ if (!vector)
+ return -1;
+ if (o_direct) {
+ alloc_buf = _page_aligned_alloc(vect_size, &iov_base);
+ if (!alloc_buf) {
+ gf_log ("_posix_do_zerofill", GF_LOG_DEBUG,
+ "memory alloc failed, vect_size %d: %s",
+ vect_size, strerror(errno));
+ GF_FREE(vector);
+ return -1;
+ }
+ } else {
+ iov_base = GF_CALLOC (vect_size, sizeof(char),
+ gf_common_mt_char);
+ if (!iov_base) {
+ GF_FREE(vector);
+ return -1;
+ }
+ }
+
+ for (idx = 0; idx < num_vect; idx++) {
+ vector[idx].iov_base = iov_base;
+ vector[idx].iov_len = vect_size;
+ }
+ lseek(fd, offset, SEEK_SET);
+ for (idx = 0; idx < num_loop; idx++) {
+ op_ret = writev(fd, vector, num_vect);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (extra) {
+ op_ret = writev(fd, vector, extra);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (remain) {
+ vector[0].iov_len = remain;
+ op_ret = writev(fd, vector , 1);
+ if (op_ret < 0)
+ goto err;
+ }
+err:
+ if (o_direct)
+ GF_FREE(alloc_buf);
+ else
+ GF_FREE(iov_base);
+ GF_FREE(vector);
+ return op_ret;
+}
+
+static int32_t
+posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ struct posix_fd *pfd = NULL;
+ int32_t ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pre-operation fstat failed on fd = %p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+ ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT);
+ if (ret < 0) {
+ ret = -errno;
+ gf_log(this->name, GF_LOG_ERROR,
+ "zerofill failed on fd %d length %ld %s",
+ pfd->fd, len, strerror(errno));
+ goto out;
+ }
+ if (pfd->flags & (O_SYNC|O_DSYNC)) {
+ ret = fsync (pfd->fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync() in writev on fd %d failed: %s",
+ pfd->fd, strerror (errno));
+ ret = -errno;
+ goto out;
+ }
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "post operation fstat failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ return ret;
+}
+
+static int32_t
+_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ int32_t flags = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ if (keep_size)
+ flags = FALLOC_FL_KEEP_SIZE;
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+ STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+
+}
+
+static int32_t
+posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ ret = posix_do_zerofill(frame, this, fd, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL);
return 0;
+
}
int32_t
posix_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
+ loc_t *loc, fd_t *fd, dict_t *xdata)
{
char * real_path = NULL;
int32_t op_ret = -1;
@@ -875,73 +866,68 @@ posix_opendir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ op_ret = -1;
dir = opendir (real_path);
if (dir == NULL) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"opendir failed on %s: %s",
- loc->path, strerror (op_errno));
+ real_path, strerror (op_errno));
goto out;
}
op_ret = dirfd (dir);
- if (op_ret < 0) {
+ if (op_ret < 0) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"dirfd() failed on %s: %s",
- loc->path, strerror (op_errno));
- goto out;
- }
+ real_path, strerror (op_errno));
+ goto out;
+ }
pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
if (!pfd) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
pfd->dir = dir;
pfd->fd = dirfd (dir);
- pfd->path = gf_strdup (real_path);
- if (!pfd->path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
- }
- fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the fd context path=%s fd=%p",
+ real_path, fd);
op_ret = 0;
- out:
+out:
if (op_ret == -1) {
if (dir) {
closedir (dir);
dir = NULL;
}
if (pfd) {
- if (pfd->path)
- GF_FREE (pfd->path);
GF_FREE (pfd);
pfd = NULL;
}
}
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL);
return 0;
}
int32_t
posix_releasedir (xlator_t *this,
- fd_t *fd)
+ fd_t *fd)
{
struct posix_fd * pfd = NULL;
- uint64_t tmp_pfd = 0;
+ uint64_t tmp_pfd = 0;
int ret = 0;
struct posix_private *priv = NULL;
@@ -956,22 +942,15 @@ posix_releasedir (xlator_t *this,
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
+ pfd = (struct posix_fd *)(long)tmp_pfd;
if (!pfd->dir) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd->dir is NULL for fd=%p path=%s",
- fd, pfd->path ? pfd->path : "<NULL>");
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd->dir is NULL for fd=%p", fd);
goto out;
}
priv = this->private;
- if (!pfd->path) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd->path was NULL. fd=%p pfd=%p",
- fd, pfd);
- }
-
pthread_mutex_lock (&priv->janitor_lock);
{
INIT_LIST_HEAD (&pfd->list);
@@ -980,57 +959,52 @@ posix_releasedir (xlator_t *this,
}
pthread_mutex_unlock (&priv->janitor_lock);
- out:
+out:
return 0;
}
int32_t
posix_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size)
+ loc_t *loc, size_t size, dict_t *xdata)
{
char * dest = NULL;
int32_t op_ret = -1;
- int32_t lstat_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
struct iatt stbuf = {0,};
DECLARE_OLD_FS_ID_VAR;
- VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (frame, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
dest = alloca (size + 1);
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- op_ret = readlink (real_path, dest, size);
+ MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "readlink on %s failed: %s", loc->path,
+ "lstat on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
- dest[op_ret] = 0;
-
- lstat_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
- if (lstat_ret == -1) {
- op_ret = -1;
+ op_ret = readlink (real_path, dest, size);
+ if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s", loc->path,
+ "readlink on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
- out:
+ dest[op_ret] = 0;
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf);
+ STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL);
return 0;
}
@@ -1038,20 +1012,20 @@ posix_readlink (call_frame_t *frame, xlator_t *this,
int
posix_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, dict_t *params)
+ loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
{
- int tmp_fd = 0;
+ int tmp_fd = 0;
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_path = 0;
+ char *par_path = 0;
struct iatt stbuf = { 0, };
char was_present = 1;
struct posix_private *priv = NULL;
gid_t gid = 0;
- char *pathdup = NULL;
struct iatt preparent = {0,};
struct iatt postparent = {0,};
- char *parentpath = NULL;
+ void * uuid_req = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -1062,93 +1036,131 @@ posix_mknod (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
gid = frame->root->gid;
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
SET_FS_ID (frame->root->uid, gid);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
-
- parentpath = dirname (pathdup);
- op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ real_path, strerror (op_errno));
goto out;
}
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
+ /* Check if the 'gfid' already exists, because this mknod may be an
+ internal call from distribute for creating 'linkfile', and that
+ linkfile may be for a hardlinked file */
+ if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY);
+ op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "failed to get the gfid from dict for %s",
+ loc->path);
+ goto real_op;
+ }
+ op_ret = posix_create_link_if_gfid_exists (this, uuid_req,
+ real_path);
+ if (!op_ret)
+ goto post_op;
+ }
+
+real_op:
+#ifdef __NetBSD__
+ if (S_ISFIFO(mode))
+ op_ret = mkfifo (real_path, mode);
+ else
+#endif /* __NetBSD__ */
op_ret = mknod (real_path, mode, dev);
if (op_ret == -1) {
op_errno = errno;
- if ((op_errno == EINVAL) && S_ISREG (mode)) {
- /* Over Darwin, mknod with (S_IFREG|mode)
- doesn't work */
- tmp_fd = creat (real_path, mode);
- if (tmp_fd == -1)
- goto out;
- close (tmp_fd);
- } else {
+ if ((op_errno == EINVAL) && S_ISREG (mode)) {
+ /* Over Darwin, mknod with (S_IFREG|mode)
+ doesn't work */
+ tmp_fd = creat (real_path, mode);
+ if (tmp_fd == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "create failed on %s: %s",
+ real_path, strerror (errno));
+ goto out;
+ }
+ close (tmp_fd);
+ } else {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod on %s failed: %s", loc->path,
- strerror (op_errno));
- goto out;
- }
+ gf_log (this->name, GF_LOG_ERROR,
+ "mknod on %s failed: %s", real_path,
+ strerror (op_errno));
+ goto out;
+ }
}
- op_ret = posix_gfid_set (this, real_path, params);
+ op_ret = posix_gfid_set (this, real_path, loc, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting gfid on %s failed", real_path);
+ }
#ifndef HAVE_SET_FSID
op_ret = lchown (real_path, frame->root->uid, gid);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "lchown on %s failed: %s", loc->path,
+ "lchown on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
#endif
- op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
+post_op:
+ op_ret = posix_acl_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting ACLs on %s failed (%s)", real_path,
+ strerror (errno));
+ }
+
+ op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting xattrs on %s failed (%s)", real_path,
+ strerror (errno));
+ }
+
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "mknod on %s failed: %s", loc->path,
+ "mknod on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "post-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
op_ret = 0;
- out:
- if (pathdup)
- GF_FREE (pathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);
+ (loc)?loc->inode:NULL, &stbuf, &preparent,
+ &postparent, NULL);
if ((op_ret == -1) && (!was_present)) {
unlink (real_path);
@@ -1158,164 +1170,18 @@ posix_mknod (call_frame_t *frame, xlator_t *this,
}
-static int
-janitor_walker (const char *fpath, const struct stat *sb,
- int typeflag, struct FTW *ftwbuf)
-{
- switch (sb->st_mode & S_IFMT) {
- case S_IFREG:
- case S_IFBLK:
- case S_IFLNK:
- case S_IFCHR:
- case S_IFIFO:
- case S_IFSOCK:
- gf_log (THIS->name, GF_LOG_TRACE,
- "unlinking %s", fpath);
- unlink (fpath);
- break;
-
- case S_IFDIR:
- if (ftwbuf->level) { /* don't remove top level dir */
- gf_log (THIS->name, GF_LOG_TRACE,
- "removing directory %s", fpath);
-
- rmdir (fpath);
- }
- break;
- }
-
- return 0; /* 0 = FTW_CONTINUE */
-}
-
-
-static struct posix_fd *
-janitor_get_next_fd (xlator_t *this)
-{
- struct posix_private *priv = NULL;
- struct posix_fd *pfd = NULL;
-
- struct timespec timeout;
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->janitor_lock);
- {
- if (list_empty (&priv->janitor_fds)) {
- time (&timeout.tv_sec);
- timeout.tv_sec += priv->janitor_sleep_duration;
- timeout.tv_nsec = 0;
-
- pthread_cond_timedwait (&priv->janitor_cond,
- &priv->janitor_lock,
- &timeout);
- goto unlock;
- }
-
- pfd = list_entry (priv->janitor_fds.next, struct posix_fd,
- list);
-
- list_del (priv->janitor_fds.next);
- }
-unlock:
- pthread_mutex_unlock (&priv->janitor_lock);
-
- return pfd;
-}
-
-
-static void *
-posix_janitor_thread_proc (void *data)
-{
- xlator_t * this = NULL;
- struct posix_private *priv = NULL;
- struct posix_fd *pfd;
-
- time_t now;
-
- this = data;
- priv = this->private;
-
- THIS = this;
-
- while (1) {
- time (&now);
- if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor cleaning out /" GF_REPLICATE_TRASH_DIR);
-
- nftw (priv->trash_path,
- janitor_walker,
- 32,
- FTW_DEPTH | FTW_PHYS);
-
- priv->last_landfill_check = now;
- }
-
- pfd = janitor_get_next_fd (this);
- if (pfd) {
- if (pfd->dir == NULL) {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor: closing file fd=%d", pfd->fd);
- close (pfd->fd);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "janitor: closing dir fd=%p", pfd->dir);
- closedir (pfd->dir);
- }
-
- if (pfd->path)
- GF_FREE (pfd->path);
-
- GF_FREE (pfd);
- }
- }
-
- return NULL;
-}
-
-
-static void
-posix_spawn_janitor_thread (xlator_t *this)
-{
- struct posix_private *priv = NULL;
- int ret = 0;
-
- priv = this->private;
-
- LOCK (&priv->lock);
- {
- if (!priv->janitor_present) {
- ret = pthread_create (&priv->janitor, NULL,
- posix_janitor_thread_proc, this);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "spawning janitor thread failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- priv->janitor_present = _gf_true;
- }
- }
-unlock:
- UNLOCK (&priv->lock);
-}
-
-
int
posix_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *params)
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_path = NULL;
+ char *par_path = NULL;
struct iatt stbuf = {0, };
char was_present = 1;
struct posix_private *priv = NULL;
gid_t gid = 0;
- char *pathdup = NULL;
- char *parentpath = NULL;
struct iatt preparent = {0,};
struct iatt postparent = {0,};
@@ -1325,90 +1191,112 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get created from a user request */
+ if (__is_root_gfid (loc->pargfid) &&
+ (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "mkdir issued on %s, which is not permitted",
+ GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL);
gid = frame->root->gid;
- op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
if ((op_ret == -1) && (errno == ENOENT)) {
was_present = 0;
}
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
SET_FS_ID (frame->root->uid, gid);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
-
- parentpath = dirname (pathdup);
- op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "pre-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ mode |= S_ISGID;
+ }
+
op_ret = mkdir (real_path, mode);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "mkdir of %s failed: %s", loc->path,
+ "mkdir of %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
- op_ret = posix_gfid_set (this, real_path, params);
+ op_ret = posix_gfid_set (this, real_path, loc, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting gfid on %s failed", real_path);
+ }
#ifndef HAVE_SET_FSID
op_ret = chown (real_path, frame->root->uid, gid);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "chown on %s failed: %s", loc->path,
+ "chown on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
#endif
- op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
+ op_ret = posix_acl_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting ACLs on %s failed (%s)", real_path,
+ strerror (errno));
+ }
+
+ op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting xattrs on %s failed (%s)", real_path,
+ strerror (errno));
+ }
+
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "lstat on %s failed: %s", loc->path,
+ "lstat on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ real_path, strerror (op_errno));
goto out;
}
op_ret = 0;
- out:
- if (pathdup)
- GF_FREE (pathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);
+ (loc)?loc->inode:NULL, &stbuf, &preparent,
+ &postparent, NULL);
if ((op_ret == -1) && (!was_present)) {
unlink (real_path);
@@ -1420,17 +1308,17 @@ posix_mkdir (call_frame_t *frame, xlator_t *this,
int32_t
posix_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, int xflag, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- char *real_path = NULL;
- char *pathdup = NULL;
- char *parentpath = NULL;
- int32_t fd = -1;
- struct posix_private *priv = NULL;
- struct iatt preparent = {0,};
- struct iatt postparent = {0,};
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ char *par_path = NULL;
+ int32_t fd = -1;
+ struct iatt stbuf = {0,};
+ struct posix_private *priv = NULL;
+ struct iatt preparent = {0,};
+ struct iatt postparent = {0,};
DECLARE_OLD_FS_ID_VAR;
@@ -1439,22 +1327,20 @@ posix_unlink (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
-
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
-
- parentpath = dirname (pathdup);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
- op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "pre-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
+ if (stbuf.ia_nlink == 1)
+ posix_handle_unset (this, stbuf.ia_gfid, NULL);
+
priv = this->private;
if (priv->background_unlink) {
if (IA_ISREG (loc->inode->ia_type)) {
@@ -1463,7 +1349,7 @@ posix_unlink (call_frame_t *frame, xlator_t *this,
op_ret = -1;
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "open of %s failed: %s", loc->path,
+ "open of %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
@@ -1474,30 +1360,27 @@ posix_unlink (call_frame_t *frame, xlator_t *this,
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "unlink of %s failed: %s", loc->path,
+ "unlink of %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "post-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
op_ret = 0;
- out:
- if (pathdup)
- GF_FREE (pathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- &preparent, &postparent);
+ &preparent, &postparent, NULL);
if (fd != -1) {
close (fd);
@@ -1509,15 +1392,16 @@ posix_unlink (call_frame_t *frame, xlator_t *this,
int
posix_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags)
+ loc_t *loc, int flags, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
- char * pathdup = NULL;
- char * parentpath = NULL;
+ char * par_path = NULL;
+ char * gfid_str = NULL;
struct iatt preparent = {0,};
struct iatt postparent = {0,};
+ struct iatt stbuf;
struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -1526,69 +1410,84 @@ posix_rmdir (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- priv = this->private;
-
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
+ /* The Hidden directory should be for housekeeping purpose and it
+ should not get deleted from inside process */
+ if (__is_root_gfid (loc->pargfid) &&
+ (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "rmdir issued on %s, which is not permitted",
+ GF_HIDDEN_PATH);
+ op_errno = EPERM;
+ op_ret = -1;
+ goto out;
+ }
+
+ priv = this->private;
- parentpath = dirname (pathdup);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
- op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "pre-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
if (flags) {
- uint32_t hashval = 0;
- char *tmp_path = alloca (strlen (priv->trash_path) + 16);
+ gfid_str = uuid_utoa (stbuf.ia_gfid);
+ char *tmp_path = alloca (strlen (priv->trash_path) +
+ strlen ("/") +
+ strlen (gfid_str) + 1);
mkdir (priv->trash_path, 0755);
- hashval = gf_dm_hashfn (real_path, strlen (real_path));
- sprintf (tmp_path, "%s/%u", priv->trash_path, hashval);
+ sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str);
op_ret = rename (real_path, tmp_path);
} else {
op_ret = rmdir (real_path);
}
op_errno = errno;
- if (op_errno == EEXIST)
- /* Solaris sets errno = EEXIST instead of ENOTEMPTY */
- op_errno = ENOTEMPTY;
+ if (op_ret == 0) {
+ posix_handle_unset (this, stbuf.ia_gfid, NULL);
+ }
+
+ if (op_errno == EEXIST)
+ /* Solaris sets errno = EEXIST instead of ENOTEMPTY */
+ op_errno = ENOTEMPTY;
/* No need to log a common error as ENOTEMPTY */
if (op_ret == -1 && op_errno != ENOTEMPTY) {
gf_log (this->name, GF_LOG_ERROR,
- "rmdir of %s failed: %s", loc->path,
+ "rmdir of %s failed: %s", real_path,
strerror (op_errno));
}
- if (op_ret == -1)
+ if (op_ret == -1) {
+ gf_log (this->name,
+ (op_errno == ENOTEMPTY) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "%s on %s failed", (flags) ? "rename" : "rmdir",
+ real_path);
goto out;
+ }
- op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ par_path, strerror (op_errno));
goto out;
}
- out:
- if (pathdup)
- GF_FREE (pathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- &preparent, &postparent);
+ &preparent, &postparent, NULL);
return 0;
}
@@ -1596,17 +1495,16 @@ posix_rmdir (call_frame_t *frame, xlator_t *this,
int
posix_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc, dict_t *params)
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = 0;
+ char * par_path = 0;
struct iatt stbuf = { 0, };
struct posix_private *priv = NULL;
gid_t gid = 0;
- char was_present = 1;
- char *pathdup = NULL;
- char *parentpath = NULL;
+ char was_present = 1;
struct iatt preparent = {0,};
struct iatt postparent = {0,};
@@ -1620,48 +1518,44 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
- op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
if ((op_ret == -1) && (errno == ENOENT)){
was_present = 0;
}
- gid = frame->root->gid;
-
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
SET_FS_ID (frame->root->uid, gid);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
- parentpath = dirname (pathdup);
+ gid = frame->root->gid;
- op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "pre-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
op_ret = symlink (linkname, real_path);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"symlink of %s --> %s failed: %s",
- loc->path, linkname, strerror (op_errno));
+ real_path, linkname, strerror (op_errno));
goto out;
}
- op_ret = posix_gfid_set (this, real_path, params);
+ op_ret = posix_gfid_set (this, real_path, loc, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting gfid on %s failed", real_path);
+ }
#ifndef HAVE_SET_FSID
op_ret = lchown (real_path, frame->root->uid, gid);
@@ -1669,38 +1563,51 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"lchown failed on %s: %s",
- loc->path, strerror (op_errno));
+ real_path, strerror (op_errno));
goto out;
}
#endif
- op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
+
+ op_ret = posix_acl_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting ACLs on %s failed (%s)", real_path,
+ strerror (errno));
+ }
+
+ op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting xattrs on %s failed (%s)", real_path,
+ strerror (errno));
+ }
+
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"lstat failed on %s: %s",
- loc->path, strerror (op_errno));
+ real_path, strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "post-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
-
- op_ret = 0;
- out:
- if (pathdup)
- GF_FREE (pathdup);
+ op_ret = 0;
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno,
- (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent);
+ (loc)?loc->inode:NULL, &stbuf, &preparent,
+ &postparent, NULL);
if ((op_ret == -1) && (!was_present)) {
unlink (real_path);
@@ -1712,23 +1619,26 @@ posix_symlink (call_frame_t *frame, xlator_t *this,
int
posix_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_oldpath = NULL;
char *real_newpath = NULL;
+ char *par_oldpath = NULL;
+ char *par_newpath = NULL;
struct iatt stbuf = {0, };
struct posix_private *priv = NULL;
- char was_present = 1;
- char *oldpathdup = NULL;
- char *oldparentpath = NULL;
- char *newpathdup = NULL;
- char *newparentpath = NULL;
+ char was_present = 1;
struct iatt preoldparent = {0, };
struct iatt postoldparent = {0, };
struct iatt prenewparent = {0, };
struct iatt postnewparent = {0, };
+ char olddirid[64];
+ char newdirid[64];
+ uuid_t victim = {0};
+ int was_dir = 0;
+ int nlink = 0;
DECLARE_OLD_FS_ID_VAR;
@@ -1741,53 +1651,84 @@ posix_rename (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
- MAKE_REAL_PATH (real_newpath, this, newloc->path);
-
- oldpathdup = gf_strdup (real_oldpath);
- GF_VALIDATE_OR_GOTO (this->name, oldpathdup, out);
+ MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL);
+ MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf);
- oldparentpath = dirname (oldpathdup);
-
- op_ret = posix_lstat_with_gfid (this, oldparentpath, &preoldparent);
+ op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- oldloc->path, strerror (op_errno));
+ "pre-operation lstat on parent %s failed: %s",
+ par_oldpath, strerror (op_errno));
goto out;
}
- newpathdup = gf_strdup (real_newpath);
- GF_VALIDATE_OR_GOTO (this->name, newpathdup, out);
-
- newparentpath = dirname (newpathdup);
-
- op_ret = posix_lstat_with_gfid (this, newparentpath, &prenewparent);
+ op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"pre-operation lstat on parent of %s failed: %s",
- newloc->path, strerror (op_errno));
+ par_newpath, strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf);
+ op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
if ((op_ret == -1) && (errno == ENOENT)){
was_present = 0;
+ } else {
+ uuid_copy (victim, stbuf.ia_gfid);
+ if (IA_ISDIR (stbuf.ia_type))
+ was_dir = 1;
+ nlink = stbuf.ia_nlink;
+ }
+
+ if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "found directory at %s while expecting ENOENT",
+ real_newpath);
+ op_ret = -1;
+ op_errno = EEXIST;
+ goto out;
+ }
+
+ if (was_present && IA_ISDIR(stbuf.ia_type) &&
+ uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "found directory %s at %s while renaming %s",
+ uuid_utoa_r (newloc->inode->gfid, olddirid),
+ real_newpath,
+ uuid_utoa_r (stbuf.ia_gfid, newdirid));
+ op_ret = -1;
+ op_errno = EEXIST;
+ goto out;
+ }
+
+ if (IA_ISDIR (oldloc->inode->ia_type)) {
+ posix_handle_unset (this, oldloc->inode->gfid, NULL);
}
op_ret = sys_rename (real_oldpath, real_newpath);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name,
- (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR),
"rename of %s to %s failed: %s",
- oldloc->path, newloc->path, strerror (op_errno));
+ real_oldpath, real_newpath, strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf);
+ if (was_dir)
+ posix_handle_unset (this, victim, NULL);
+
+ if (was_present && !was_dir && nlink == 1)
+ posix_handle_unset (this, victim, NULL);
+
+ if (IA_ISDIR (oldloc->inode->ia_type)) {
+ posix_handle_soft (this, real_newpath, newloc,
+ oldloc->inode->gfid, NULL);
+ }
+
+ op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -1796,43 +1737,37 @@ posix_rename (call_frame_t *frame, xlator_t *this,
goto out;
}
- op_ret = posix_lstat_with_gfid (this, oldparentpath, &postoldparent);
+ op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- oldloc->path, strerror (op_errno));
+ "post-operation lstat on parent %s failed: %s",
+ par_oldpath, strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, newparentpath, &postnewparent);
+ op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- newloc->path, strerror (op_errno));
+ "post-operation lstat on parent %s failed: %s",
+ par_newpath, strerror (op_errno));
goto out;
}
op_ret = 0;
- out:
- if (oldpathdup)
- GF_FREE (oldpathdup);
-
- if (newpathdup)
- GF_FREE (newpathdup);
-
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf,
&preoldparent, &postoldparent,
- &prenewparent, &postnewparent);
+ &prenewparent, &postnewparent, NULL);
if ((op_ret == -1) && !was_present) {
unlink (real_newpath);
- }
+ }
return 0;
}
@@ -1840,17 +1775,16 @@ posix_rename (call_frame_t *frame, xlator_t *this,
int
posix_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc)
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_oldpath = 0;
char *real_newpath = 0;
+ char *par_newpath = 0;
struct iatt stbuf = {0, };
struct posix_private *priv = NULL;
char was_present = 1;
- char *newpathdup = NULL;
- char *newparentpath = NULL;
struct iatt preparent = {0,};
struct iatt postparent = {0,};
@@ -1865,40 +1799,42 @@ posix_link (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_oldpath, this, oldloc->path);
- MAKE_REAL_PATH (real_newpath, this, newloc->path);
+ MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf);
- op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf);
+ MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf);
if ((op_ret == -1) && (errno == ENOENT)) {
was_present = 0;
}
- newpathdup = gf_strdup (real_newpath);
- if (!newpathdup) {
- gf_log (this->name, GF_LOG_ERROR, "strdup failed");
- op_errno = ENOMEM;
- goto out;
- }
-
- newparentpath = dirname (newpathdup);
- op_ret = posix_lstat_with_gfid (this, newparentpath, &preparent);
+ op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s",
- newparentpath, strerror (op_errno));
+ par_newpath, strerror (op_errno));
goto out;
}
+#ifdef HAVE_LINKAT
+ /*
+ * On most systems (Linux being the notable exception), link(2)
+ * first resolves symlinks. If the target is a directory or
+ * is nonexistent, it will fail. linkat(2) operates on the
+ * symlink instead of its target when the AT_SYMLINK_FOLLOW
+ * flag is not supplied.
+ */
+ op_ret = linkat (AT_FDCWD, real_oldpath, AT_FDCWD, real_newpath, 0);
+#else
op_ret = link (real_oldpath, real_newpath);
+#endif
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"link %s to %s failed: %s",
- oldloc->path, newloc->path, strerror (op_errno));
+ real_oldpath, real_newpath, strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf);
+ op_ret = posix_pstat (this, NULL, real_newpath, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -1907,24 +1843,22 @@ posix_link (call_frame_t *frame, xlator_t *this,
goto out;
}
- op_ret = posix_lstat_with_gfid (this, newparentpath, &postparent);
+ op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s",
- newparentpath, strerror (op_errno));
+ par_newpath, strerror (op_errno));
goto out;
}
op_ret = 0;
- out:
- if (newpathdup)
- GF_FREE (newpathdup);
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (link, frame, op_ret, op_errno,
(oldloc)?oldloc->inode:NULL, &stbuf, &preparent,
- &postparent);
+ &postparent, NULL);
if ((op_ret == -1) && (!was_present)) {
unlink (real_newpath);
@@ -1935,10 +1869,8 @@ posix_link (call_frame_t *frame, xlator_t *this,
int32_t
-posix_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
+posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -1957,14 +1889,13 @@ posix_truncate (call_frame_t *frame,
VALIDATE_OR_GOTO (priv, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
- op_ret = posix_lstat_with_gfid (this, real_path, &prebuf);
+ MAKE_INODE_HANDLE (real_path, this, loc, &prebuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"pre-operation lstat on %s failed: %s",
- loc->path, strerror (op_errno));
+ real_path, strerror (op_errno));
goto out;
}
@@ -1973,11 +1904,11 @@ posix_truncate (call_frame_t *frame,
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"truncate on %s failed: %s",
- loc->path, strerror (op_errno));
+ real_path, strerror (op_errno));
goto out;
}
- op_ret = posix_lstat_with_gfid (this, real_path, &postbuf);
+ op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s",
@@ -1987,34 +1918,33 @@ posix_truncate (call_frame_t *frame,
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- &prebuf, &postbuf);
+ &prebuf, &postbuf, NULL);
return 0;
}
-int32_t
+int
posix_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *params)
+ mode_t umask, fd_t *fd, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int32_t _fd = -1;
int _flags = 0;
char * real_path = NULL;
+ char * par_path = NULL;
struct iatt stbuf = {0, };
struct posix_fd * pfd = NULL;
struct posix_private * priv = NULL;
- char was_present = 1;
+ char was_present = 1;
gid_t gid = 0;
- char *pathdup = NULL;
- char *parentpath = NULL;
struct iatt preparent = {0,};
struct iatt postparent = {0,};
@@ -2029,32 +1959,25 @@ posix_create (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf);
gid = frame->root->gid;
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
SET_FS_ID (frame->root->uid, gid);
- pathdup = gf_strdup (real_path);
- GF_VALIDATE_OR_GOTO (this->name, pathdup, out);
- parentpath = dirname (pathdup);
-
- op_ret = posix_lstat_with_gfid (this, parentpath, &preparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "pre-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "pre-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
+ if (preparent.ia_prot.sgid) {
+ gid = preparent.ia_gid;
+ }
+
if (!flags) {
_flags = O_CREAT | O_RDWR | O_EXCL;
}
@@ -2062,7 +1985,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
_flags = flags | O_CREAT;
}
- op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
+ op_ret = posix_pstat (this, NULL, real_path, &stbuf);
if ((op_ret == -1) && (errno == ENOENT)) {
was_present = 0;
}
@@ -2076,12 +1999,19 @@ posix_create (call_frame_t *frame, xlator_t *this,
op_errno = errno;
op_ret = -1;
gf_log (this->name, GF_LOG_ERROR,
- "open on %s failed: %s", loc->path,
+ "open on %s failed: %s", real_path,
strerror (op_errno));
goto out;
}
- op_ret = posix_gfid_set (this, real_path, params);
+ if (was_present)
+ goto fill_stat;
+
+ op_ret = posix_gfid_set (this, real_path, loc, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting gfid on %s failed", real_path);
+ }
#ifndef HAVE_SET_FSID
op_ret = chown (real_path, frame->root->uid, gid);
@@ -2089,11 +2019,26 @@ posix_create (call_frame_t *frame, xlator_t *this,
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
"chown on %s failed: %s",
- real_path, strerror (op_errno));
+ real_path, strerror (op_errno));
}
#endif
- op_ret = posix_fstat_with_gfid (this, _fd, &stbuf);
+ op_ret = posix_acl_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting ACLs on %s failed (%s)", real_path,
+ strerror (errno));
+ }
+
+ op_ret = posix_entry_create_xattr_set (this, real_path, xdata);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "setting xattrs on %s failed (%s)", real_path,
+ strerror (errno));
+ }
+
+fill_stat:
+ op_ret = posix_fdstat (this, _fd, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -2101,29 +2046,30 @@ posix_create (call_frame_t *frame, xlator_t *this,
goto out;
}
- op_ret = posix_lstat_with_gfid (this, parentpath, &postparent);
+ op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
- "post-operation lstat on parent of %s failed: %s",
- loc->path, strerror (op_errno));
+ "post-operation lstat on parent %s failed: %s",
+ par_path, strerror (op_errno));
goto out;
}
- op_ret = -1;
+ op_ret = -1;
pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
-
if (!pfd) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
pfd->flags = flags;
pfd->fd = _fd;
- fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the fd context path=%s fd=%p",
+ real_path, fd);
LOCK (&priv->lock);
{
@@ -2133,9 +2079,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- out:
- if (pathdup)
- GF_FREE (pathdup);
+out:
SET_TO_OLD_FS_ID ();
if ((-1 == op_ret) && (_fd != -1)) {
@@ -2148,14 +2092,14 @@ posix_create (call_frame_t *frame, xlator_t *this,
STACK_UNWIND_STRICT (create, frame, op_ret, op_errno,
fd, (loc)?loc->inode:NULL, &stbuf, &preparent,
- &postparent);
+ &postparent, xdata);
return 0;
}
int32_t
posix_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, fd_t *fd, int wbflags)
+ loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -2163,8 +2107,6 @@ posix_open (call_frame_t *frame, xlator_t *this,
int32_t _fd = -1;
struct posix_fd *pfd = NULL;
struct posix_private *priv = NULL;
- char was_present = 1;
- gid_t gid = 0;
struct iatt stbuf = {0, };
DECLARE_OLD_FS_ID_VAR;
@@ -2178,25 +2120,14 @@ posix_open (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, &stbuf);
- op_ret = setgid_override (this, real_path, &gid);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
- }
-
- SET_FS_ID (frame->root->uid, gid);
+ op_ret = -1;
+ SET_FS_ID (frame->root->uid, frame->root->gid);
if (priv->o_direct)
flags |= O_DIRECT;
- op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
- if ((op_ret == -1) && (errno == ENOENT)) {
- was_present = 0;
- }
-
_fd = open (real_path, flags, 0);
if (_fd == -1) {
op_ret = -1;
@@ -2207,43 +2138,19 @@ posix_open (call_frame_t *frame, xlator_t *this,
}
pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
-
if (!pfd) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
goto out;
}
pfd->flags = flags;
pfd->fd = _fd;
- if (wbflags == GF_OPEN_FSYNC)
- pfd->flushwrites = 1;
- fd_ctx_set (fd, this, (uint64_t)(long)pfd);
-
-#ifndef HAVE_SET_FSID
- if (flags & O_CREAT) {
- op_ret = chown (real_path, frame->root->uid, gid);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "chown on %s failed: %s",
- real_path, strerror (op_errno));
- goto out;
- }
- }
-#endif
-
- if (flags & O_CREAT) {
- op_ret = posix_lstat_with_gfid (this, real_path, &stbuf);
- if (op_ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat on (%s) "
- "failed: %s", real_path, strerror (op_errno));
- goto out;
- }
- }
+ op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to set the fd context path=%s fd=%p",
+ real_path, fd);
LOCK (&priv->lock);
{
@@ -2253,7 +2160,7 @@ posix_open (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- out:
+out:
if (op_ret == -1) {
if (_fd != -1) {
close (_fd);
@@ -2262,19 +2169,15 @@ posix_open (call_frame_t *frame, xlator_t *this,
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd);
+ STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL);
return 0;
}
-#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \
- (unsigned long)(~(bound - 1))))
-
int
posix_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset)
+ fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
{
- uint64_t tmp_pfd = 0;
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd = -1;
@@ -2284,7 +2187,6 @@ posix_readv (call_frame_t *frame, xlator_t *this,
struct iovec vec = {0,};
struct posix_fd * pfd = NULL;
struct iatt stbuf = {0,};
- int align = 1;
int ret = -1;
VALIDATE_OR_GOTO (frame, out);
@@ -2295,29 +2197,23 @@ posix_readv (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
if (!size) {
op_errno = EINVAL;
- gf_log (this->name, GF_LOG_DEBUG, "size=%"GF_PRI_SIZET, size);
+ gf_log (this->name, GF_LOG_WARNING, "size=%"GF_PRI_SIZET, size);
goto out;
}
- if (pfd->flags & O_DIRECT) {
- align = 4096; /* align to page boundary */
- }
-
- iobuf = iobuf_get (this->ctx->iobuf_pool);
+ iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
if (!iobuf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
@@ -2349,7 +2245,7 @@ posix_readv (call_frame_t *frame, xlator_t *this,
* we read from
*/
- op_ret = posix_fstat_with_gfid (this, _fd, &stbuf);
+ op_ret = posix_fdstat (this, _fd, &stbuf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -2359,16 +2255,14 @@ posix_readv (call_frame_t *frame, xlator_t *this,
}
/* Hack to notify higher layers of EOF. */
- if (stbuf.ia_size == 0)
- op_errno = ENOENT;
- else if ((offset + vec.iov_len) == stbuf.ia_size)
+ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
op_errno = ENOENT;
op_ret = vec.iov_len;
out:
STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- &vec, 1, &stbuf, iobref);
+ &vec, 1, &stbuf, iobref, NULL);
if (iobref)
iobref_unref (iobref);
@@ -2406,14 +2300,12 @@ err:
return op_ret;
}
-
int32_t
__posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
int odirect)
{
int32_t op_ret = 0;
int idx = 0;
- int align = 4096;
int max_buf_size = 0;
int retval = 0;
char *buf = NULL;
@@ -2429,7 +2321,7 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
max_buf_size = vector[idx].iov_len;
}
- alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char);
+ alloc_buf = _page_aligned_alloc (max_buf_size, &buf);
if (!alloc_buf) {
op_ret = -errno;
goto err;
@@ -2437,9 +2329,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
internal_off = startoff;
for (idx = 0; idx < count; idx++) {
- /* page aligned buffer */
- buf = ALIGN_BUF (alloc_buf, align);
-
memcpy (buf, vector[idx].iov_base, vector[idx].iov_len);
/* not sure whether writev works on O_DIRECT'd fd */
@@ -2454,17 +2343,58 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
}
err:
- if (alloc_buf)
- GF_FREE (alloc_buf);
+ GF_FREE (alloc_buf);
return op_ret;
}
+dict_t*
+_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
+{
+ dict_t *rsp_xdata = NULL;
+ int32_t ret = 0;
+ inode_t *inode = NULL;
+
+ if (fd)
+ inode = fd->inode;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: "
+ "fd: %p inode: %p gfid:%s", fd, inode?inode:0,
+ inode?uuid_utoa(inode->gfid):"N/A");
+ goto out;
+ }
+
+ if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT))
+ goto out;
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata)
+ goto out;
+
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
+ fd->inode->fd_count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set "
+ "dictionary value for %s", uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_OPEN_FD_COUNT);
+ }
+
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
+ is_append);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set "
+ "dictionary value for %s", uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_WRITE_IS_APPEND);
+ }
+out:
+ return rsp_xdata;
+}
int32_t
-posix_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector, int32_t count, off_t offset,
- struct iobref *iobref)
+posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -2474,8 +2404,9 @@ posix_writev (call_frame_t *frame, xlator_t *this,
struct iatt preop = {0,};
struct iatt postop = {0,};
int ret = -1;
-
- uint64_t tmp_pfd = 0;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -2487,18 +2418,28 @@ posix_writev (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (priv, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
op_errno = -ret;
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- op_ret = posix_fstat_with_gfid (this, _fd, &preop);
+ if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
+ /* The write_is_append check and write must happen
+ atomically. Else another write can overtake this
+ write after the check and get written earlier.
+
+ So lock before preop-stat and unlock after write.
+ */
+ locked = _gf_true;
+ LOCK(&fd->inode->lock);
+ }
+
+ op_ret = posix_fdstat (this, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -2507,8 +2448,19 @@ posix_writev (call_frame_t *frame, xlator_t *this,
goto out;
}
+ if (locked) {
+ if (preop.ia_size == offset || (fd->flags & O_APPEND))
+ is_append = 1;
+ }
+
op_ret = __posix_writev (_fd, vector, count, offset,
(pfd->flags & O_DIRECT));
+
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
+
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
@@ -2524,38 +2476,53 @@ posix_writev (call_frame_t *frame, xlator_t *this,
UNLOCK (&priv->lock);
if (op_ret >= 0) {
+ rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append);
/* wiretv successful, we also need to get the stat of
* the file we wrote to
*/
- if (pfd->flushwrites) {
- /* NOTE: ignore the error, if one occurs at this
- * point */
- fsync (_fd);
+ if (flags & (O_SYNC|O_DSYNC)) {
+ ret = fsync (_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync() in writev on fd %d failed: %s",
+ _fd, strerror (errno));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
}
- ret = posix_fstat_with_gfid (this, _fd, &postop);
+ ret = posix_fdstat (this, _fd, &postop);
if (ret == -1) {
- op_ret = -1;
+ op_ret = -1;
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"post-operation fstat failed on fd=%p: %s",
fd, strerror (op_errno));
goto out;
}
}
- out:
+out:
+
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop);
+ STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop,
+ rsp_xdata);
+ if (rsp_xdata)
+ dict_unref (rsp_xdata);
return 0;
}
int32_t
posix_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc)
+ loc_t *loc, dict_t *xdata)
{
char * real_path = NULL;
int32_t op_ret = -1;
@@ -2568,7 +2535,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
VALIDATE_OR_GOTO (this->private, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
priv = this->private;
@@ -2576,7 +2543,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
+ gf_log (this->name, GF_LOG_ERROR,
"statvfs failed on %s: %s",
real_path, strerror (op_errno));
goto out;
@@ -2593,68 +2560,65 @@ posix_statfs (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- out:
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf);
+out:
+ STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL);
return 0;
}
int32_t
posix_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
- struct posix_fd * pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
+ struct posix_fd *pfd = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pfd is NULL on fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
op_ret = 0;
- out:
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno);
+out:
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
return 0;
}
int32_t
-posix_release (xlator_t *this,
- fd_t *fd)
+posix_release (xlator_t *this, fd_t *fd)
{
struct posix_private * priv = NULL;
struct posix_fd * pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
+ uint64_t tmp_pfd = 0;
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
priv = this->private;
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = fd_ctx_del (fd, this, &tmp_pfd);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
+ pfd = (struct posix_fd *)(long)tmp_pfd;
if (pfd->dir) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pfd->dir is %p (not NULL) for file fd=%p",
pfd->dir, fd);
}
@@ -2673,23 +2637,50 @@ posix_release (xlator_t *this,
}
UNLOCK (&priv->lock);
- out:
+out:
return 0;
}
+int
+posix_batch_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ pthread_mutex_lock (&priv->fsync_mutex);
+ {
+ list_add_tail (&stub->list, &priv->fsyncs);
+ priv->fsync_queue_count++;
+ pthread_cond_signal (&priv->fsync_cond);
+ }
+ pthread_mutex_unlock (&priv->fsync_mutex);
+
+ return 0;
+}
+
+
int32_t
posix_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync)
+ fd_t *fd, int32_t datasync, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
int _fd = -1;
struct posix_fd * pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
struct iatt preop = {0,};
struct iatt postop = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -2705,21 +2696,26 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
goto out;
#endif
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ priv = this->private;
+ if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
+ posix_batch_fsync (frame, this, fd, datasync, xdata);
+ return 0;
+ }
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pfd not found in fd's ctx");
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- op_ret = posix_fstat_with_gfid (this, _fd, &preop);
+ op_ret = posix_fdstat (this, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pre-operation fstat failed on fd=%p: %s", fd,
strerror (op_errno));
goto out;
@@ -2746,10 +2742,10 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
}
}
- op_ret = posix_fstat_with_gfid (this, _fd, &postop);
+ op_ret = posix_fdstat (this, _fd, &postop);
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"post-operation fstat failed on fd=%p: %s", fd,
strerror (op_errno));
goto out;
@@ -2757,155 +2753,37 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop);
+ STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop,
+ NULL);
return 0;
}
static int gf_posix_xattr_enotsup_log;
-
-int
-set_file_contents (xlator_t *this, char *real_path,
- data_pair_t *trav, int flags)
-{
- char * key = NULL;
- char real_filepath[ZR_PATH_MAX] = {0,};
- int32_t file_fd = -1;
- int op_ret = 0;
- int ret = -1;
-
- key = &(trav->key[15]);
- sprintf (real_filepath, "%s/%s", real_path, key);
-
- if (flags & XATTR_REPLACE) {
- /* if file exists, replace it
- * else, error out */
- file_fd = open (real_filepath, O_TRUNC|O_WRONLY);
-
- if (file_fd == -1) {
- goto create;
- }
-
- if (trav->value->len) {
- ret = write (file_fd, trav->value->data,
- trav->value->len);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "write failed while doing setxattr "
- "for key %s on path %s: %s",
- key, real_filepath, strerror (errno));
- goto out;
- }
-
- ret = close (file_fd);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "close failed on %s: %s",
- real_filepath, strerror (errno));
- goto out;
- }
- }
-
- create: /* we know file doesn't exist, create it */
-
- file_fd = open (real_filepath, O_CREAT|O_WRONLY, 0644);
-
- if (file_fd == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s with O_CREAT: %s",
- key, strerror (errno));
- goto out;
- }
-
- ret = write (file_fd, trav->value->data, trav->value->len);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "write failed on %s while setxattr with "
- "key %s: %s",
- real_filepath, key, strerror (errno));
- goto out;
- }
-
- ret = close (file_fd);
- if (ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR,
- "close failed on %s while setxattr with "
- "key %s: %s",
- real_filepath, key, strerror (errno));
- goto out;
- }
- }
-
- out:
- return op_ret;
-}
-
-int
-handle_pair (xlator_t *this, char *real_path,
- data_pair_t *trav, int flags)
+static int
+_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
{
- int sys_ret = -1;
- int ret = 0;
+ posix_xattr_filler_t *filler = NULL;
- if (ZR_FILE_CONTENT_REQUEST(trav->key)) {
- ret = set_file_contents (this, real_path, trav, flags);
- } else {
- sys_ret = sys_lsetxattr (real_path, trav->key,
- trav->value->data,
- trav->value->len, flags);
+ filler = tmp;
- if (sys_ret < 0) {
- if (errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported");
- } else if (errno == ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr on %s failed: %s", real_path,
- strerror (errno));
- } else {
-
-#ifdef GF_DARWIN_HOST_OS
- gf_log (this->name,
- ((errno == EINVAL) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "%s: key:%s error:%s",
- real_path, trav->key,
- strerror (errno));
-#else /* ! DARWIN */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: key:%s error:%s",
- real_path, trav->key,
- strerror (errno));
-#endif /* DARWIN */
- }
-
- ret = -errno;
- goto out;
- }
- }
- out:
- return ret;
+ return posix_handle_pair (filler->this, filler->real_path, k, v,
+ filler->flags);
}
int32_t
posix_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int flags)
+ loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
- data_pair_t * trav = NULL;
- int ret = -1;
+
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -2915,99 +2793,72 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
VALIDATE_OR_GOTO (dict, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ op_ret = -1;
dict_del (dict, GFID_XATTR_KEY);
- trav = dict->members_list;
-
- while (trav) {
- ret = handle_pair (this, real_path, trav, flags);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- trav = trav->next;
- }
-
- op_ret = 0;
+ filler.real_path = real_path;
+ filler.this = this;
+ filler.flags = flags;
+ op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair,
+ &filler);
+ if (op_ret < 0)
+ op_errno = -op_ret;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
return 0;
}
+
int
-get_file_contents (xlator_t *this, char *real_path,
- const char *name, char **contents)
+posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *dict, dict_t *xdata)
{
- char real_filepath[ZR_PATH_MAX] = {0,};
- char * key = NULL;
- int32_t file_fd = -1;
- struct iatt stbuf = {0,};
- int op_ret = 0;
- int ret = -1;
-
- key = (char *) &(name[15]);
- sprintf (real_filepath, "%s/%s", real_path, key);
-
- op_ret = posix_lstat_with_gfid (this, real_filepath, &stbuf);
- if (op_ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s",
- real_filepath, strerror (errno));
- goto out;
- }
-
- file_fd = open (real_filepath, O_RDONLY);
-
- if (file_fd == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s",
- real_filepath, strerror (errno));
- goto out;
- }
-
- *contents = GF_CALLOC (stbuf.ia_size + 1, sizeof(char),
- gf_posix_mt_char);
-
- if (! *contents) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
- goto out;
- }
+ char *real_path = NULL;
+ struct dirent *dirent = NULL;
+ DIR *fd = NULL;
+ const char *fname = NULL;
+ char *found = NULL;
+ int ret = -1;
+ int op_ret = -1;
+
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+
+ fd = opendir (real_path);
+ if (!fd)
+ return -errno;
+
+ fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY);
+
+ while ((dirent = readdir (fd))) {
+ if (strcasecmp (dirent->d_name, fname) == 0) {
+ found = gf_strdup (dirent->d_name);
+ if (!found) {
+ closedir (fd);
+ return -ENOMEM;
+ }
+ break;
+ }
+ }
- ret = read (file_fd, *contents, stbuf.ia_size);
- if (ret <= 0) {
- op_ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "read on %s failed: %s",
- real_filepath, strerror (errno));
- goto out;
- }
+ closedir (fd);
- *contents[stbuf.ia_size] = '\0';
+ if (!found)
+ return -ENOENT;
- op_ret = close (file_fd);
- file_fd = -1;
- if (op_ret == -1) {
- op_ret = -errno;
- gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s",
- real_filepath, strerror (errno));
- goto out;
- }
-
- out:
- if (op_ret < 0) {
- if (*contents)
- GF_FREE (*contents);
- if (file_fd != -1)
- close (file_fd);
- }
+ ret = dict_set_dynstr (dict, (char *)key, found);
+ if (ret) {
+ GF_FREE (found);
+ return -ENOMEM;
+ }
+ ret = strlen (found) + 1;
- return op_ret;
+ return ret;
}
/**
@@ -3017,22 +2868,25 @@ get_file_contents (xlator_t *this, char *real_path,
*/
int32_t
posix_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
- struct posix_private *priv = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t list_offset = 0;
- size_t size = 0;
- size_t remaining_size = 0;
- char key[1024] = {0,};
- char host_buf[1024] = {0,};
- char * value = NULL;
- char * list = NULL;
- char * real_path = NULL;
- dict_t * dict = NULL;
- char * file_contents = NULL;
- int ret = -1;
+ struct posix_private *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t list_offset = 0;
+ ssize_t size = 0;
+ size_t remaining_size = 0;
+ char key[4096] = {0,};
+ char host_buf[1024] = {0,};
+ char *value = NULL;
+ char *list = NULL;
+ char *real_path = NULL;
+ dict_t *dict = NULL;
+ char *file_contents = NULL;
+ int ret = -1;
+ char *path = NULL;
+ char *rpath = NULL;
+ char *dyn_rpath = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -3041,59 +2895,193 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (loc, out);
SET_FS_ID (frame->root->uid, frame->root->gid);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+ op_ret = -1;
priv = this->private;
if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name &&
- ZR_FILE_CONTENT_REQUEST(name)) {
- ret = get_file_contents (this, real_path, name,
- &file_contents);
+ ZR_FILE_CONTENT_REQUEST(name)) {
+ ret = posix_get_file_contents (this, loc->gfid, &name[15],
+ &file_contents);
if (ret < 0) {
op_errno = -ret;
gf_log (this->name, GF_LOG_ERROR,
- "getting file contents failed: %s",
+ "getting file contents failed: %s",
strerror (op_errno));
goto out;
}
}
- /* Get the total size */
- dict = get_new_dict ();
+ dict = dict_new ();
if (!dict) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
+ op_errno = ENOMEM;
goto out;
}
+ if (loc->inode && name &&
+ (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) {
+ ret = posix_xattr_get_real_filename (frame, this, loc,
+ name, dict, xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ gf_log (this->name, (op_errno == ENOENT) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING,
+ "Failed to get real filename (%s, %s): %s",
+ loc->path, name, strerror (op_errno));
+ goto out;
+ }
+
+ size = ret;
+ goto done;
+ }
+
if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) {
- if (!list_empty (&loc->inode->fd_list)) {
- ret = dict_set_uint32 (dict, (char *)name, 1);
+ if (!list_empty (&loc->inode->fd_list)) {
+ ret = dict_set_uint32 (dict, (char *)name, 1);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
"Failed to set dictionary value for %s",
name);
- } else {
- ret = dict_set_uint32 (dict, (char *)name, 0);
+ } else {
+ ret = dict_set_uint32 (dict, (char *)name, 0);
if (ret < 0)
gf_log (this->name, GF_LOG_WARNING,
"Failed to set dictionary value for %s",
name);
- }
+ }
+ goto done;
+ }
+ if (loc->inode && name &&
+ (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {
+ if (LOC_HAS_ABSPATH (loc))
+ MAKE_REAL_PATH (rpath, this, loc->path);
+ else
+ rpath = real_path;
+
+ (void) snprintf (host_buf, 1024,
+ "<POSIX(%s):%s:%s>", priv->base_path,
+ ((priv->node_uuid_pathinfo
+ && !uuid_is_null(priv->glusterd_uuid))
+ ? uuid_utoa (priv->glusterd_uuid)
+ : priv->hostname),
+ rpath);
+
+ dyn_rpath = gf_strdup (host_buf);
+ if (!dyn_rpath) {
+ ret = -1;
+ goto done;
+ }
+ size = strlen (dyn_rpath) + 1;
+ ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY,
+ dyn_rpath);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not set value (%s) in dictionary",
+ dyn_rpath);
+ GF_FREE (dyn_rpath);
+ }
+
goto done;
}
- if (loc->inode && IA_ISREG (loc->inode->ia_type) && name &&
- (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) {
- snprintf (host_buf, 1024, "%s:%s", priv->hostname,
- real_path);
- ret = dict_set_str (dict, GF_XATTR_PATHINFO_KEY,
- host_buf);
+
+ if (loc->inode && name &&
+ (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0)
+ && !uuid_is_null (priv->glusterd_uuid)) {
+ (void) snprintf (host_buf, 1024, "%s",
+ uuid_utoa (priv->glusterd_uuid));
+
+ dyn_rpath = gf_strdup (host_buf);
+ if (!dyn_rpath) {
+ ret = -1;
+ goto done;
+ }
+
+ size = strlen (dyn_rpath) + 1;
+ ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY,
+ dyn_rpath);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "could not set value (%s) in dictionary",
+ dyn_rpath);
+ GF_FREE (dyn_rpath);
+ }
+ goto done;
+ }
+
+ if (loc->inode && name &&
+ (strcmp (name, GFID_TO_PATH_KEY) == 0)) {
+ ret = inode_path (loc->inode, NULL, &path);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: could not get "
+ "inode path", uuid_utoa (loc->inode->gfid));
+ goto done;
+ }
+
+ ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path);
if (ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"could not set value (%s) in dictionary",
host_buf);
+ GF_FREE (path);
}
goto done;
- }
+ }
+
+ if (name) {
+ strcpy (key, name);
+
+ size = sys_lgetxattr (real_path, key, NULL, 0);
+ if (size <= 0) {
+ op_errno = errno;
+ if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else if (op_errno == ENOATTR ||
+ op_errno == ENODATA) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No such attribute:%s for file %s",
+ key, real_path);
+ } else {
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr failed on %s: %s (%s)",
+ real_path, key, strerror (op_errno));
+ }
+
+ goto done;
+ }
+ value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ goto out;
+ }
+ size = sys_lgetxattr (real_path, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
+ "%s: key = %s (%s)", real_path, key,
+ strerror (op_errno));
+ GF_FREE (value);
+ goto out;
+ }
+ value [size] = '\0';
+ op_ret = dict_set_dynptr (dict, key, value, size);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "on %s for the key %s failed.", real_path, key);
+ GF_FREE (value);
+ goto out;
+ }
+
+ goto done;
+ }
size = sys_llistxattr (real_path, NULL, 0);
if (size == -1) {
@@ -3102,11 +3090,13 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
this->name, GF_LOG_WARNING,
"Extended attributes not "
- "supported.");
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
}
else {
gf_log (this->name, GF_LOG_ERROR,
- "listxattr failed on %s: %s",
+ "listxattr failed on %s: %s",
real_path, strerror (op_errno));
}
goto out;
@@ -3118,7 +3108,6 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
list = alloca (size + 1);
if (!list) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
@@ -3131,45 +3120,59 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
break;
strcpy (key, list + list_offset);
- op_ret = sys_lgetxattr (real_path, key, NULL, 0);
- if (op_ret == -1)
+ size = sys_lgetxattr (real_path, key, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
+ "%s: key = %s (%s)", real_path, key,
+ strerror (op_errno));
break;
+ }
- value = GF_CALLOC (op_ret + 1, sizeof(char),
+ value = GF_CALLOC (size + 1, sizeof(char),
gf_posix_mt_char);
if (!value) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
- op_ret = sys_lgetxattr (real_path, key, value, op_ret);
- if (op_ret == -1) {
+ size = sys_lgetxattr (real_path, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
op_errno = errno;
-
+ gf_log (this->name, GF_LOG_ERROR, "getxattr failed on "
+ "%s: key = %s (%s)", real_path, key,
+ strerror (op_errno));
+ GF_FREE (value);
break;
}
- value [op_ret] = '\0';
- dict_set (dict, key, data_from_dynptr (value, op_ret));
+ value [size] = '\0';
+ op_ret = dict_set_dynptr (dict, key, value, size);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "on %s for the key %s failed.", real_path, key);
+ GF_FREE (value);
+ goto out;
+ }
remaining_size -= strlen (key) + 1;
list_offset += strlen (key) + 1;
} /* while (remaining_size > 0) */
- done:
+done:
op_ret = size;
if (dict) {
dict_del (dict, GFID_XATTR_KEY);
- dict_ref (dict);
}
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL);
if (dict)
dict_unref (dict);
@@ -3180,17 +3183,16 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
int32_t
posix_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name)
+ fd_t *fd, const char *name, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = ENOENT;
- uint64_t tmp_pfd = 0;
struct posix_fd * pfd = NULL;
int _fd = -1;
int32_t list_offset = 0;
- size_t size = 0;
+ ssize_t size = 0;
size_t remaining_size = 0;
- char key[1024] = {0,};
+ char key[4096] = {0,};
char * value = NULL;
char * list = NULL;
dict_t * dict = NULL;
@@ -3204,21 +3206,19 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
SET_FS_ID (frame->root->uid, frame->root->gid);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
/* Get the total size */
dict = get_new_dict ();
if (!dict) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
@@ -3231,6 +3231,43 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
goto done;
}
+ if (name) {
+ strcpy (key, name);
+
+ size = sys_fgetxattr (_fd, key, NULL, 0);
+ if (size <= 0) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
+ "key %s (%s)", key, strerror (op_errno));
+ goto done;
+ }
+
+ value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ goto out;
+ }
+ size = sys_fgetxattr (_fd, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
+ "fd %p for the key %s (%s)", fd, key,
+ strerror (op_errno));
+ GF_FREE (value);
+ goto out;
+ }
+ value [size] = '\0';
+ op_ret = dict_set_dynptr (dict, key, value, size);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "on key %s failed", key);
+ GF_FREE (value);
+ goto out;
+ }
+ goto done;
+ }
+
size = sys_flistxattr (_fd, NULL, 0);
if (size == -1) {
op_errno = errno;
@@ -3238,11 +3275,12 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
this->name, GF_LOG_WARNING,
"Extended attributes not "
- "supported.");
+ "supported (try remounting "
+ "brick with 'user_xattr' flag)");
}
else {
gf_log (this->name, GF_LOG_ERROR,
- "listxattr failed on %p: %s",
+ "listxattr failed on %p: %s",
fd, strerror (op_errno));
}
goto out;
@@ -3254,7 +3292,6 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
list = alloca (size + 1);
if (!list) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
@@ -3267,30 +3304,49 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
break;
strcpy (key, list + list_offset);
- op_ret = sys_fgetxattr (_fd, key, NULL, 0);
- if (op_ret == -1)
+ size = sys_fgetxattr (_fd, key, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
+ "fd %p for the key %s (%s)", fd, key,
+ strerror (op_errno));
break;
+ }
- value = GF_CALLOC (op_ret + 1, sizeof(char),
+ value = GF_CALLOC (size + 1, sizeof(char),
gf_posix_mt_char);
if (!value) {
+ op_ret = -1;
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "Out of memory.");
goto out;
}
- op_ret = sys_fgetxattr (_fd, key, value, op_ret);
- if (op_ret == -1)
+ size = sys_fgetxattr (_fd, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on "
+ "the fd %p for the key %s (%s)", fd, key,
+ strerror (op_errno));
+ GF_FREE (value);
break;
+ }
- value [op_ret] = '\0';
- dict_set (dict, key, data_from_dynptr (value, op_ret));
+ value [size] = '\0';
+ op_ret = dict_set_dynptr (dict, key, value, size);
+ if (op_ret) {
+ gf_log (this->name, GF_LOG_ERROR, "dict set operation "
+ "failed on key %s", key);
+ GF_FREE (value);
+ goto out;
+ }
remaining_size -= strlen (key) + 1;
list_offset += strlen (key) + 1;
} /* while (remaining_size > 0) */
- done:
+done:
op_ret = size;
if (dict) {
@@ -3298,10 +3354,10 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
dict_ref (dict);
}
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict);
+ STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL);
if (dict)
dict_unref (dict);
@@ -3309,64 +3365,29 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
-
-int
-fhandle_pair (xlator_t *this, int fd,
- data_pair_t *trav, int flags)
+static int
+_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
{
- int sys_ret = -1;
- int ret = 0;
-
- sys_ret = sys_fsetxattr (fd, trav->key, trav->value->data,
- trav->value->len, flags);
-
- if (sys_ret < 0) {
- if (errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported");
- } else if (errno == ENOENT) {
- gf_log (this->name, GF_LOG_ERROR,
- "fsetxattr on fd=%d failed: %s", fd,
- strerror (errno));
- } else {
-
-#ifdef GF_DARWIN_HOST_OS
- gf_log (this->name,
- ((errno == EINVAL) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "fd=%d: key:%s error:%s",
- fd, trav->key,
- strerror (errno));
-#else /* ! DARWIN */
- gf_log (this->name, GF_LOG_ERROR,
- "fd=%d: key:%s error:%s",
- fd, trav->key,
- strerror (errno));
-#endif /* DARWIN */
- }
-
- ret = -errno;
- goto out;
- }
+ posix_xattr_filler_t *filler = NULL;
-out:
- return ret;
-}
+ filler = tmp;
+ return posix_fhandle_pair (filler->this, filler->fd, k, v,
+ filler->flags);
+}
int32_t
posix_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int flags)
+ fd_t *fd, dict_t *dict, int flags, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
struct posix_fd * pfd = NULL;
- uint64_t tmp_pfd = 0;
int _fd = -1;
- data_pair_t * trav = NULL;
- int ret = -1;
+ int ret = -1;
+
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -3376,108 +3397,190 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd, out);
VALIDATE_OR_GOTO (dict, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL from fd=%p", fd);
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
dict_del (dict, GFID_XATTR_KEY);
- trav = dict->members_list;
-
- while (trav) {
- ret = fhandle_pair (this, _fd, trav, flags);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- trav = trav->next;
- }
-
- op_ret = 0;
+ filler.fd = _fd;
+ filler.this = this;
+ filler.flags = flags;
+ op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair,
+ &filler);
+ if (op_ret < 0)
+ op_errno = -op_ret;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL);
return 0;
}
+int
+_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data)
+{
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = (posix_xattr_filler_t *) data;
+ this = filler->this;
+
+ op_ret = sys_lremovexattr (filler->real_path, key);
+ if (op_ret == -1) {
+ filler->op_errno = errno;
+ if (errno != ENOATTR && errno != EPERM)
+ gf_log (this->name, GF_LOG_ERROR,
+ "removexattr failed on %s (for %s): %s",
+ filler->real_path, key, strerror (errno));
+ }
+
+ return op_ret;
+}
+
int32_t
posix_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name)
+ loc_t *loc, const char *name, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+
if (!strcmp (GFID_XATTR_KEY, name)) {
gf_log (this->name, GF_LOG_WARNING, "Remove xattr called"
- " on gfid for file %s", loc->path);
+ " on gfid for file %s", real_path);
+ op_ret = -1;
goto out;
}
- MAKE_REAL_PATH (real_path, this, loc->path);
SET_FS_ID (frame->root->uid, frame->root->gid);
+ /**
+ * sending an empty key name with xdata containing the
+ * list of key(s) to be removed implies "bulk remove request"
+ * for removexattr.
+ */
+ if (name && (strcmp (name, "") == 0) && xdata) {
+ filler.real_path = real_path;
+ filler.this = this;
+ op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler);
+ if (op_ret) {
+ op_errno = filler.op_errno;
+ }
+
+ goto out;
+ }
+
op_ret = sys_lremovexattr (real_path, name);
+ if (op_ret == -1) {
+ op_errno = errno;
+ if (op_errno != ENOATTR && op_errno != EPERM)
+ gf_log (this->name, GF_LOG_ERROR,
+ "removexattr on %s (for %s): %s", real_path,
+ name, strerror (op_errno));
+ goto out;
+ }
+
+ op_ret = 0;
+out:
+ SET_TO_OLD_FS_ID ();
+
+ STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+posix_fremovexattr (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd * pfd = NULL;
+ int _fd = -1;
+ int ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ if (!strcmp (GFID_XATTR_KEY, name)) {
+ gf_log (this->name, GF_LOG_WARNING, "Remove xattr called"
+ " on gfid for file");
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ _fd = pfd->fd;
+
+
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ op_ret = sys_fremovexattr (_fd, name);
if (op_ret == -1) {
op_errno = errno;
- if (op_errno != ENOATTR && op_errno != EPERM)
- gf_log (this->name, GF_LOG_ERROR,
- "removexattr on %s: %s", loc->path,
- strerror (op_errno));
+ if (op_errno != ENOATTR && op_errno != EPERM)
+ gf_log (this->name, GF_LOG_ERROR,
+ "fremovexattr (for %s): %s",
+ name, strerror (op_errno));
goto out;
}
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL);
return 0;
}
int32_t
posix_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int datasync)
+ fd_t *fd, int datasync, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
- struct posix_fd * pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
+ struct posix_fd *pfd = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pfd is NULL, fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
op_ret = 0;
- out:
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno);
+out:
+ STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL);
return 0;
}
@@ -3485,12 +3588,12 @@ posix_fsyncdir (call_frame_t *frame, xlator_t *this,
void
posix_print_xattr (dict_t *this,
- char *key,
- data_t *value,
- void *data)
+ char *key,
+ data_t *value,
+ void *data)
{
- gf_log ("posix", GF_LOG_DEBUG,
- "(key/val) = (%s/%d)", key, data_to_int32 (value));
+ gf_log ("posix", GF_LOG_DEBUG,
+ "(key/val) = (%s/%d)", key, data_to_int32 (value));
}
@@ -3504,200 +3607,259 @@ posix_print_xattr (dict_t *this,
static void
__add_array (int32_t *dest, int32_t *src, int count)
{
- int i = 0;
- for (i = 0; i < count; i++) {
- dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i]));
- }
+ int i = 0;
+ int32_t destval = 0;
+ for (i = 0; i < count; i++) {
+ destval = ntoh32 (dest[i]);
+ if (destval == 0xffffffff)
+ continue;
+ dest[i] = hton32 (destval + ntoh32 (src[i]));
+ }
}
+static void
+__or_array (int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton32 (ntoh32 (dest[i]) | ntoh32 (src[i]));
+ }
+}
-/**
- * xattrop - xattr operations - for internal use by GlusterFS
- * @optype: ADD_ARRAY:
- * dict should contain:
- * "key" ==> array of 32-bit numbers
- */
+static void
+__and_array (int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton32 (ntoh32 (dest[i]) & ntoh32 (src[i]));
+ }
+}
-int
-do_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
+static void
+__add_long_array (int64_t *dest, int64_t *src, int count)
{
- char *real_path = NULL;
- int32_t *array = NULL;
- int size = 0;
- int count = 0;
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton64 (ntoh64 (dest[i]) + ntoh64 (src[i]));
+ }
+}
- int op_ret = 0;
- int op_errno = 0;
+static int
+_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int size = 0;
+ int count = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ gf_xattrop_flags_t optype = 0;
+ char *array = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ optype = (gf_xattrop_flags_t)(filler->flags);
+ this = filler->this;
+ inode = filler->inode;
+
+ count = v->len;
+ array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char);
+
+ LOCK (&inode->lock);
+ {
+ if (filler->real_path) {
+ size = sys_lgetxattr (filler->real_path, k,
+ (char *)array, v->len);
+ } else {
+ size = sys_fgetxattr (filler->fd, k, (char *)array,
+ v->len);
+ }
- int ret = 0;
- int _fd = -1;
- uint64_t tmp_pfd = 0;
- struct posix_fd *pfd = NULL;
+ op_errno = errno;
+ if ((size == -1) && (op_errno != ENODATA) &&
+ (op_errno != ENOATTR)) {
+ if (op_errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported by filesystem");
+ } else if (op_errno != ENOENT ||
+ !posix_special_xattr (marker_xattrs,
+ k)) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr failed on %s while doing "
+ "xattrop: Key:%s (%s)",
+ filler->real_path,
+ k, strerror (op_errno));
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "fgetxattr failed on fd=%d while doing "
+ "xattrop: Key:%s (%s)",
+ filler->fd,
+ k, strerror (op_errno));
+ }
- data_pair_t *trav = NULL;
+ op_ret = -1;
+ goto unlock;
+ }
- char * path = NULL;
- inode_t * inode = NULL;
+ switch (optype) {
+
+ case GF_XATTROP_ADD_ARRAY:
+ __add_array ((int32_t *) array, (int32_t *) v->data,
+ v->len / 4);
+ break;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (xattr, out);
- VALIDATE_OR_GOTO (this, out);
+ case GF_XATTROP_ADD_ARRAY64:
+ __add_long_array ((int64_t *) array, (int64_t *) v->data,
+ v->len / 8);
+ break;
- trav = xattr->members_list;
+ case GF_XATTROP_OR_ARRAY:
+ __or_array ((int32_t *) array,
+ (int32_t *) v->data,
+ v->len / 4);
+ break;
- if (fd) {
- ret = fd_ctx_get (fd, this, &tmp_pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get pfd from fd=%p",
- fd);
- op_ret = -1;
- op_errno = EBADFD;
- goto out;
- }
- pfd = (struct posix_fd *)(long)tmp_pfd;
- _fd = pfd->fd;
- }
+ case GF_XATTROP_AND_ARRAY:
+ __and_array ((int32_t *) array,
+ (int32_t *) v->data,
+ v->len / 4);
+ break;
- if (loc && loc->path)
- MAKE_REAL_PATH (real_path, this, loc->path);
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown xattrop type (%d) on %s. Please send "
+ "a bug report to gluster-devel@nongnu.org",
+ optype, filler->real_path);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
- if (loc) {
- path = gf_strdup (loc->path);
- inode = loc->inode;
- } else if (fd) {
- inode = fd->inode;
+ if (filler->real_path) {
+ size = sys_lsetxattr (filler->real_path, k, array,
+ v->len, 0);
+ } else {
+ size = sys_fsetxattr (filler->fd, k, (char *)array,
+ v->len, 0);
+ }
}
+unlock:
+ UNLOCK (&inode->lock);
- while (trav && inode) {
- count = trav->value->len / sizeof (int32_t);
- array = GF_CALLOC (count, sizeof (int32_t),
- gf_posix_mt_int32_t);
+ if (op_ret == -1)
+ goto out;
- LOCK (&inode->lock);
- {
- if (loc) {
- size = sys_lgetxattr (real_path, trav->key, (char *)array,
- trav->value->len);
- } else {
- size = sys_fgetxattr (_fd, trav->key, (char *)array,
- trav->value->len);
- }
+ op_errno = errno;
+ if (size == -1) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_ERROR,
+ "setxattr failed on %s while doing xattrop: "
+ "key=%s (%s)", filler->real_path,
+ k, strerror (op_errno));
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsetxattr failed on fd=%d while doing xattrop: "
+ "key=%s (%s)", filler->fd,
+ k, strerror (op_errno));
- op_errno = errno;
- if ((size == -1) && (op_errno != ENODATA) &&
- (op_errno != ENOATTR)) {
- if (op_errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported by filesystem");
- } else {
- if (loc)
- gf_log (this->name, GF_LOG_ERROR,
- "getxattr failed on %s while doing "
- "xattrop: %s", path,
- strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fgetxattr failed on fd=%d while doing "
- "xattrop: %s", _fd,
- strerror (op_errno));
- }
+ op_ret = -1;
+ goto out;
+ } else {
+ size = dict_set_bin (d, k, array, v->len);
- op_ret = -1;
- goto unlock;
- }
+ if (size != 0) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_bin failed (path=%s): "
+ "key=%s (%s)", filler->real_path,
+ k, strerror (-size));
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_bin failed (fd=%d): "
+ "key=%s (%s)", filler->fd,
+ k, strerror (-size));
- switch (optype) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ array = NULL;
+ }
- case GF_XATTROP_ADD_ARRAY:
- __add_array (array, (int32_t *) trav->value->data,
- trav->value->len / 4);
- break;
+ array = NULL;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unknown xattrop type (%d) on %s. Please send "
- "a bug report to gluster-devel@nongnu.org",
- optype, path);
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
+out:
+ return op_ret;
+}
- if (loc) {
- size = sys_lsetxattr (real_path, trav->key, array,
- trav->value->len, 0);
- } else {
- size = sys_fsetxattr (_fd, trav->key, (char *)array,
- trav->value->len, 0);
- }
- }
- unlock:
- UNLOCK (&inode->lock);
+/**
+ * xattrop - xattr operations - for internal use by GlusterFS
+ * @optype: ADD_ARRAY:
+ * dict should contain:
+ * "key" ==> array of 32-bit numbers
+ */
+
+int
+do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr)
+{
+ int op_ret = 0;
+ int op_errno = 0;
+ int _fd = -1;
+ char *real_path = NULL;
+ struct posix_fd *pfd = NULL;
+ inode_t *inode = NULL;
+ posix_xattr_filler_t filler = {0,};
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (xattr, out);
+ VALIDATE_OR_GOTO (this, out);
- if (op_ret == -1)
+ if (fd) {
+ op_ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to get pfd from fd=%p",
+ fd);
+ op_errno = EBADFD;
goto out;
+ }
+ _fd = pfd->fd;
+ }
- op_errno = errno;
- if (size == -1) {
- if (loc)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr failed on %s while doing xattrop: "
- "key=%s (%s)", path,
- trav->key, strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fsetxattr failed on fd=%d while doing xattrop: "
- "key=%s (%s)", _fd,
- trav->key, strerror (op_errno));
+ if (loc && !uuid_is_null (loc->gfid))
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
- op_ret = -1;
- goto out;
- } else {
- size = dict_set_bin (xattr, trav->key, array,
- trav->value->len);
-
- if (size != 0) {
- if (loc)
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (path=%s): "
- "key=%s (%s)", path,
- trav->key, strerror (-size));
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (fd=%d): "
- "key=%s (%s)", _fd,
- trav->key, strerror (-size));
+ if (real_path) {
+ inode = loc->inode;
+ } else if (fd) {
+ inode = fd->inode;
+ }
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- array = NULL;
- }
+ filler.this = this;
+ filler.fd = _fd;
+ filler.real_path = real_path;
+ filler.flags = (int)optype;
+ filler.inode = inode;
- array = NULL;
- trav = trav->next;
- }
+ op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair,
+ &filler);
out:
- if (array)
- GF_FREE (array);
- if (path)
- GF_FREE (path);
-
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr);
- return 0;
+ STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL);
+ return 0;
}
int
posix_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
+ loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
do_xattrop (frame, this, loc, NULL, optype, xattr);
return 0;
@@ -3706,7 +3868,7 @@ posix_xattrop (call_frame_t *frame, xlator_t *this,
int
posix_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
+ fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
do_xattrop (frame, this, NULL, fd, optype, xattr);
return 0;
@@ -3715,7 +3877,7 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this,
int
posix_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask)
+ loc_t *loc, int32_t mask, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -3728,28 +3890,28 @@ posix_access (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- MAKE_REAL_PATH (real_path, this, loc->path);
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
op_ret = access (real_path, mask & 07);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR, "access failed on %s: %s",
- loc->path, strerror (op_errno));
+ real_path, strerror (op_errno));
goto out;
}
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno);
+ STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL);
return 0;
}
int32_t
posix_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset)
+ fd_t *fd, off_t offset, dict_t *xdata)
{
int32_t op_ret = -1;
int32_t op_errno = 0;
@@ -3758,7 +3920,6 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
struct iatt postop = {0,};
struct posix_fd *pfd = NULL;
int ret = -1;
- uint64_t tmp_pfd = 0;
struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -3771,18 +3932,17 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pfd is NULL, fd=%p", fd);
op_errno = -ret;
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- op_ret = posix_fstat_with_gfid (this, _fd, &preop);
+ op_ret = posix_fdstat (this, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -3795,13 +3955,13 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "ftruncate failed on fd=%p: %s",
- fd, strerror (errno));
+ gf_log (this->name, GF_LOG_ERROR,
+ "ftruncate failed on fd=%p (%"PRId64": %s",
+ fd, offset, strerror (errno));
goto out;
}
- op_ret = posix_fstat_with_gfid (this, _fd, &postop);
+ op_ret = posix_fdstat (this, _fd, &postop);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR,
@@ -3812,10 +3972,11 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
op_ret = 0;
- out:
+out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, &postop);
+ STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop,
+ &postop, NULL);
return 0;
}
@@ -3823,16 +3984,15 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this,
int32_t
posix_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd)
+ fd_t *fd, dict_t *xdata)
{
int _fd = -1;
int32_t op_ret = -1;
int32_t op_errno = 0;
struct iatt buf = {0,};
struct posix_fd *pfd = NULL;
- uint64_t tmp_pfd = 0;
int ret = -1;
- struct posix_private *priv = NULL;
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -3844,18 +4004,17 @@ posix_fstat (call_frame_t *frame, xlator_t *this,
priv = this->private;
VALIDATE_OR_GOTO (priv, out);
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pfd is NULL, fd=%p", fd);
op_errno = -ret;
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
_fd = pfd->fd;
- op_ret = posix_fstat_with_gfid (this, _fd, &buf);
+ op_ret = posix_fdstat (this, _fd, &buf);
if (op_ret == -1) {
op_errno = errno;
gf_log (this->name, GF_LOG_ERROR, "fstat failed on fd=%p: %s",
@@ -3868,7 +4027,7 @@ posix_fstat (call_frame_t *frame, xlator_t *this,
out:
SET_TO_OLD_FS_ID ();
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf);
+ STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, NULL);
return 0;
}
@@ -3876,154 +4035,102 @@ static int gf_posix_lk_log;
int32_t
posix_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
struct gf_flock nullock = {0, };
- gf_posix_lk_log++;
-
- GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_ERROR,
- "\"features/locks\" translator is "
- "not loaded. You need to use it for proper "
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
"functioning of your application.");
- STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock);
+ STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL);
return 0;
}
int32_t
posix_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is not loaded. "
- "You need to use it for proper functioning of GlusterFS");
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS);
+ STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is not loaded. "
- "You need to use it for proper functioning of GlusterFS");
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS);
+ STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is not loaded. "
- "You need to use it for proper functioning of GlusterFS");
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS);
+ STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL);
return 0;
}
int32_t
posix_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
{
- gf_log (this->name, GF_LOG_CRITICAL,
- "\"features/locks\" translator is not loaded. "
- " You need to use it for proper functioning of GlusterFS");
+ GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS);
+ STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL);
return 0;
}
-int32_t
-posix_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, int whichop)
+int
+posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
+ gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
{
- uint64_t tmp_pfd = 0;
- struct posix_fd *pfd = NULL;
- DIR *dir = NULL;
- int ret = -1;
- size_t filled = 0;
- int count = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- gf_dirent_t *this_entry = NULL;
- gf_dirent_t entries;
- struct dirent *entry = NULL;
- off_t in_case = -1;
+ off_t in_case = -1;
+ size_t filled = 0;
+ int count = 0;
+ char entrybuf[sizeof(struct dirent) + 256 + 8];
+ struct dirent *entry = NULL;
int32_t this_size = -1;
- char *real_path = NULL;
- int real_path_len = -1;
- char *entry_path = NULL;
- int entry_path_len = -1;
- struct posix_private *priv = NULL;
- struct iatt stbuf = {0, };
- char base_path[PATH_MAX] = {0,};
- gf_dirent_t *tmp_entry = NULL;
- struct stat statbuf = {0, };
- char hidden_path[PATH_MAX] = {0, };
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
-
- INIT_LIST_HEAD (&entries.list);
-
- priv = this->private;
-
- ret = fd_ctx_get (fd, this, &tmp_pfd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd is NULL, fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
- pfd = (struct posix_fd *)(long)tmp_pfd;
- if (!pfd->path) {
- op_errno = EBADFD;
- gf_log (this->name, GF_LOG_DEBUG,
- "pfd does not have path set (possibly file "
- "fd, fd=%p)", fd);
- goto out;
- }
-
- real_path = pfd->path;
- real_path_len = strlen (real_path);
-
- entry_path_len = real_path_len + NAME_MAX;
- entry_path = alloca (entry_path_len);
-
- strncpy(base_path, POSIX_BASE_PATH(this), sizeof(base_path));
- base_path[strlen(base_path)] = '/';
+ gf_dirent_t *this_entry = NULL;
+ uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
+ struct stat stbuf = {0,};
+ char *hpath = NULL;
+ int len = 0;
+ int ret = 0;
- if (!entry_path) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- goto out;
+ if (skip_dirs) {
+ len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
+ hpath = alloca (len + 256); /* NAME_MAX */
+ posix_handle_path (this, fd->inode->gfid, NULL, hpath, len);
+ len = strlen (hpath);
+ hpath[len] = '/';
}
- strncpy (entry_path, real_path, entry_path_len);
- entry_path[real_path_len] = '/';
-
- dir = pfd->dir;
-
- if (!dir) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dir is NULL for fd=%p", fd);
- op_errno = EINVAL;
- goto out;
- }
-
-
if (!off) {
rewinddir (dir);
} else {
@@ -4034,40 +4141,56 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
in_case = telldir (dir);
if (in_case == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "telldir failed on dir=%p: %s",
+ gf_log (THIS->name, GF_LOG_ERROR,
+ "telldir failed on dir=%p: %s",
dir, strerror (errno));
goto out;
}
errno = 0;
- entry = readdir (dir);
+ entry = NULL;
+ readdir_r (dir, (struct dirent *)entrybuf, &entry);
if (!entry) {
if (errno == EBADF) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir failed on dir=%p: %s",
- dir, strerror (op_errno));
+ gf_log (THIS->name, GF_LOG_WARNING,
+ "readdir failed on dir=%p: %s",
+ dir, strerror (errno));
goto out;
}
break;
}
- if ((!strcmp(real_path, base_path))
- && (!strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)))
+#ifdef __NetBSD__
+ /*
+ * NetBSD with UFS1 backend uses backing files for
+ * extended attributes. They can be found in a
+ * .attribute file located at the root of the filesystem
+ * We hide it to glusterfs clients, since chaos will occur
+ * when the cluster/dht xlator decides to distribute
+ * exended attribute backing file accross storage servers.
+ */
+ if ((uuid_compare (fd->inode->gfid, rootgfid) == 0)
+ && (!strcmp(entry->d_name, ".attribute")))
+ continue;
+#endif /* __NetBSD__ */
+
+ if ((uuid_compare (fd->inode->gfid, rootgfid) == 0)
+ && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) {
continue;
+ }
- if ((!strcmp (real_path, base_path))
- && (!strncmp (GF_HIDDEN_PATH, entry->d_name,
- strlen(GF_HIDDEN_PATH)))) {
- snprintf (hidden_path, PATH_MAX, "%s/%s", real_path,
- entry->d_name);
- ret = lstat (hidden_path, &statbuf);
- if (!ret && S_ISDIR (statbuf.st_mode))
+ if (skip_dirs) {
+ if (DT_ISDIR (entry->d_type)) {
continue;
+ } else if (hpath) {
+ strcpy (&hpath[len+1],entry->d_name);
+ ret = lstat (hpath, &stbuf);
+ if (!ret && S_ISDIR (stbuf.st_mode))
+ continue;
+ }
}
+
this_size = max (sizeof (gf_dirent_t),
sizeof (gfs3_dirplist))
+ strlen (entry->d_name) + 1;
@@ -4077,44 +4200,182 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
break;
}
- stbuf.ia_ino = entry->d_ino;
-
- entry->d_ino = stbuf.ia_ino;
-
this_entry = gf_dirent_for_name (entry->d_name);
if (!this_entry) {
- gf_log (this->name, GF_LOG_ERROR,
+ gf_log (THIS->name, GF_LOG_ERROR,
"could not create gf_dirent for entry %s: (%s)",
entry->d_name, strerror (errno));
goto out;
}
this_entry->d_off = telldir (dir);
this_entry->d_ino = entry->d_ino;
+ this_entry->d_type = entry->d_type;
- list_add_tail (&this_entry->list, &entries.list);
+ list_add_tail (&this_entry->list, &entries->list);
filled += this_size;
count ++;
}
- if (whichop == GF_FOP_READDIRP) {
- list_for_each_entry (tmp_entry, &entries.list, list) {
- strcpy (entry_path + real_path_len + 1,
- tmp_entry->d_name);
- posix_lstat_with_gfid (this, entry_path, &stbuf);
- tmp_entry->d_stat = stbuf;
+ if ((!readdir (dir) && (errno == 0)))
+ /* Indicate EOF */
+ errno = ENOENT;
+out:
+ return count;
+}
+
+dict_t *
+posix_entry_xattr_fill (xlator_t *this, inode_t *inode,
+ fd_t *fd, char *name, dict_t *dict,
+ struct iatt *stbuf)
+{
+ loc_t tmp_loc = {0,};
+ char *entry_path = NULL;
+
+ /* if we don't send the 'loc', open-fd-count be a problem. */
+ tmp_loc.inode = inode;
+
+ MAKE_HANDLE_PATH (entry_path, this, fd->inode->gfid, name);
+
+ return posix_lookup_xattr_fill (this, entry_path,
+ &tmp_loc, dict, stbuf);
+
+}
+
+
+int
+posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict)
+{
+ gf_dirent_t *entry = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ char *hpath = NULL;
+ int len = 0;
+ struct iatt stbuf = {0, };
+ uuid_t gfid;
+
+ if (list_empty(&entries->list))
+ return 0;
+
+ itable = fd->inode->table;
+
+ len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
+ hpath = alloca (len + 256); /* NAME_MAX */
+ posix_handle_path (this, fd->inode->gfid, NULL, hpath, len);
+ len = strlen (hpath);
+ hpath[len] = '/';
+
+ list_for_each_entry (entry, &entries->list, list) {
+ memset (gfid, 0, 16);
+ inode = inode_grep (fd->inode->table, fd->inode,
+ entry->d_name);
+ if (inode)
+ uuid_copy (gfid, inode->gfid);
+
+ strcpy (&hpath[len+1], entry->d_name);
+
+ posix_pstat (this, gfid, hpath, &stbuf);
+
+ if (!inode)
+ inode = inode_find (itable, stbuf.ia_gfid);
+
+ if (!inode)
+ inode = inode_new (itable);
+
+ entry->inode = inode;
+
+ if (dict) {
+ entry->dict =
+ posix_entry_xattr_fill (this, entry->inode,
+ fd, entry->d_name,
+ dict, &stbuf);
+ dict_ref (entry->dict);
}
+
+ entry->d_stat = stbuf;
+ if (stbuf.ia_ino)
+ entry->d_ino = stbuf.ia_ino;
+ inode = NULL;
+ }
+
+ return 0;
+}
+
+
+int32_t
+posix_do_readdir (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict)
+{
+ struct posix_fd *pfd = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ gf_dirent_t entries;
+ int32_t skip_dirs = 0;
+
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ INIT_LIST_HEAD (&entries.list);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pfd is NULL, fd=%p", fd);
+ op_errno = -ret;
+ goto out;
}
+
+ dir = pfd->dir;
+
+ if (!dir) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* When READDIR_FILTER option is set to on, we can filter out
+ * directory's entry from the entry->list.
+ */
+ ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
+
+ LOCK (&fd->lock);
+ {
+ /* posix_fill_readdir performs multiple separate individual
+ readdir() calls to fill up the buffer.
+
+ In case of NFS where the same anonymous FD is shared between
+ different applications, reading a common directory can
+ result in the anonymous fd getting re-used unsafely between
+ the two readdir requests (in two different io-threads).
+
+ It would also help, in the future, to replace the loop
+ around readdir() with a single large getdents() call.
+ */
+ count = posix_fill_readdir (fd, dir, off, size, &entries, this,
+ skip_dirs);
+ }
+ UNLOCK (&fd->lock);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
op_ret = count;
- errno = 0;
- if ((!readdir (dir) && (errno == 0)))
- op_errno = ENOENT;
- out:
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries);
+ if (whichop != GF_FOP_READDIRP)
+ goto out;
+
+ posix_readdirp_fill (this, fd, &entries, dict);
- gf_dirent_free (&entries);
+out:
+ STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free (&entries);
return 0;
}
@@ -4122,18 +4383,18 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
int32_t
posix_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+ fd_t *fd, size_t size, off_t off, dict_t *xdata)
{
- posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR);
+ posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata);
return 0;
}
int32_t
posix_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off)
+ fd_t *fd, size_t size, off_t off, dict_t *dict)
{
- posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP);
+ posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict);
return 0;
}
@@ -4142,30 +4403,24 @@ posix_priv (xlator_t *this)
{
struct posix_private *priv = NULL;
char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
- this->name);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type,
+ this->name);
gf_proc_dump_add_section(key_prefix);
- if (!this)
+ if (!this)
return 0;
priv = this->private;
- if (!priv)
+ if (!priv)
return 0;
- gf_proc_dump_build_key(key, key_prefix, "base_path");
- gf_proc_dump_write(key,"%s", priv->base_path);
- gf_proc_dump_build_key(key, key_prefix, "base_path_length");
- gf_proc_dump_write(key,"%d", priv->base_path_length);
- gf_proc_dump_build_key(key, key_prefix, "max_read");
- gf_proc_dump_write(key,"%d", priv->read_value);
- gf_proc_dump_build_key(key, key_prefix, "max_write");
- gf_proc_dump_write(key,"%d", priv->write_value);
- gf_proc_dump_build_key(key, key_prefix, "nr_files");
- gf_proc_dump_write(key,"%ld", priv->nr_files);
+ gf_proc_dump_write("base_path","%s", priv->base_path);
+ gf_proc_dump_write("base_path_length","%d", priv->base_path_length);
+ gf_proc_dump_write("max_read","%d", priv->read_value);
+ gf_proc_dump_write("max_write","%d", priv->write_value);
+ gf_proc_dump_write("nr_files","%ld", priv->nr_files);
return 0;
}
@@ -4179,67 +4434,72 @@ posix_inode (xlator_t *this)
int32_t
posix_rchecksum (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, int32_t len)
+ fd_t *fd, off_t offset, int32_t len, dict_t *xdata)
{
- char *buf = NULL;
-
- int _fd = -1;
- uint64_t tmp_pfd = 0;
-
- struct posix_fd *pfd = NULL;
-
- int op_ret = -1;
- int op_errno = 0;
-
- int ret = 0;
-
- int32_t weak_checksum = 0;
- uint8_t strong_checksum[MD5_DIGEST_LEN];
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ int32_t weak_checksum = 0;
+ unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0};
+ struct posix_private *priv = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
- memset (strong_checksum, 0, MD5_DIGEST_LEN);
- buf = GF_CALLOC (1, len, gf_posix_mt_char);
+ priv = this->private;
+ memset (strong_checksum, 0, MD5_DIGEST_LENGTH);
- if (!buf) {
+ alloc_buf = _page_aligned_alloc (len, &buf);
+ if (!alloc_buf) {
op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
goto out;
}
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
+ gf_log (this->name, GF_LOG_WARNING,
"pfd is NULL, fd=%p", fd);
op_errno = -ret;
goto out;
}
- pfd = (struct posix_fd *)(long) tmp_pfd;
_fd = pfd->fd;
- ret = pread (_fd, buf, len, offset);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "pread of %d bytes returned %d (%s)",
- len, ret, strerror (errno));
+ LOCK (&fd->lock);
+ {
+ if (priv->aio_capable && priv->aio_init_done)
+ __posix_fd_set_odirect (fd, pfd, 0, offset, len);
+
+ ret = pread (_fd, buf, len, offset);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pread of %d bytes returned %d (%s)",
+ len, ret, strerror (errno));
+
+ op_errno = errno;
+ }
- op_errno = errno;
- goto out;
}
+ UNLOCK (&fd->lock);
- weak_checksum = gf_rsync_weak_checksum (buf, len);
- gf_rsync_strong_checksum (buf, len, strong_checksum);
+ if (ret < 0)
+ goto out;
- GF_FREE (buf);
+ weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len);
+ gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum);
op_ret = 0;
out:
STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno,
- weak_checksum, strong_checksum);
+ weak_checksum, strong_checksum, NULL);
+
+ GF_FREE (alloc_buf);
+
return 0;
}
@@ -4254,17 +4514,17 @@ notify (xlator_t *this,
...)
{
switch (event)
- {
- case GF_EVENT_PARENT_UP:
- {
- /* Tell the parent that posix xlator is up */
- default_notify (this, GF_EVENT_CHILD_UP, data);
- }
- break;
- default:
- /* */
- break;
- }
+ {
+ case GF_EVENT_PARENT_UP:
+ {
+ /* Tell the parent that posix xlator is up */
+ default_notify (this, GF_EVENT_CHILD_UP, data);
+ }
+ break;
+ default:
+ /* */
+ break;
+ }
return 0;
}
@@ -4277,31 +4537,133 @@ mem_acct_init (xlator_t *this)
return ret;
ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1);
-
+
if (ret != 0) {
gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
+ "failed");
return ret;
}
return ret;
}
+static int
+posix_set_owner (xlator_t *this, uid_t uid, gid_t gid)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ ret = sys_chown (priv->base_path, uid, gid);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "uid/gid for brick path %s, %s",
+ priv->base_path, strerror (errno));
+
+ return ret;
+}
+
+
+static int
+set_batch_fsync_mode (struct posix_private *priv, const char *str)
+{
+ if (strcmp (str, "none") == 0)
+ priv->batch_fsync_mode = BATCH_NONE;
+ else if (strcmp (str, "syncfs") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS;
+ else if (strcmp (str, "syncfs-single-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC;
+ else if (strcmp (str, "syncfs-reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC;
+ else if (strcmp (str, "reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_REVERSE_FSYNC;
+ else
+ return -1;
+
+ return 0;
+}
+
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ uid_t uid = -1;
+ gid_t gid = -1;
+ char *batch_fsync_mode_str = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF ("brick-uid", uid, options, uint32, out);
+ GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out);
+ posix_set_owner (this, uid, gid);
+
+ GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str,
+ options, str, out);
+
+ if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
+ batch_fsync_mode_str);
+ goto out;
+ }
+
+ GF_OPTION_RECONF ("linux-aio", priv->aio_configured,
+ options, bool, out);
+
+ if (priv->aio_configured)
+ posix_aio_on (this);
+ else
+ posix_aio_off (this);
+
+ GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo,
+ options, bool, out);
+
+ if (priv->node_uuid_pathinfo &&
+ (uuid_is_null (priv->glusterd_uuid))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
+ options, uint32, out);
+ posix_spawn_health_check_thread (this);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
/**
* init -
*/
int
init (xlator_t *this)
{
- struct posix_private *_private = NULL;
- data_t *dir_data = NULL;
- data_t *tmp_data = NULL;
- struct stat buf = {0,};
- gf_boolean_t tmp_bool = 0;
- int dict_ret = 0;
- int ret = 0;
- int op_ret = -1;
- int32_t janitor_sleep = 0;
+ struct posix_private *_private = NULL;
+ data_t *dir_data = NULL;
+ data_t *tmp_data = NULL;
+ struct stat buf = {0,};
+ gf_boolean_t tmp_bool = 0;
+ int dict_ret = 0;
+ int ret = 0;
+ int op_ret = -1;
+ ssize_t size = -1;
+ int32_t janitor_sleep = 0;
+ uuid_t old_uuid = {0,};
+ uuid_t dict_uuid = {0,};
+ uuid_t gfid = {0,};
+ uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
+ char *guuid = NULL;
+ uid_t uid = -1;
+ gid_t gid = -1;
+ char *batch_fsync_mode_str;
dir_data = dict_get (this->options, "directory");
@@ -4312,10 +4674,10 @@ init (xlator_t *this)
goto out;
}
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. Please check the volume file.");
- }
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Volume is dangling. Please check the volume file.");
+ }
if (!dir_data) {
gf_log (this->name, GF_LOG_CRITICAL,
@@ -4331,73 +4693,145 @@ init (xlator_t *this)
if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) {
gf_log (this->name, GF_LOG_ERROR,
"Directory '%s' doesn't exist, exiting.",
- dir_data->data);
+ dir_data->data);
ret = -1;
goto out;
}
-
/* Check for Extended attribute support, if not present, log it */
op_ret = sys_lsetxattr (dir_data->data,
- "trusted.glusterfs.test", "working", 8, 0);
- if (op_ret < 0) {
- tmp_data = dict_get (this->options,
- "mandate-attribute");
- if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "wrong option provided for key "
- "\"mandate-attribute\"");
- ret = -1;
- goto out;
- }
- if (!tmp_bool) {
- gf_log (this->name, GF_LOG_WARNING,
- "Extended attribute not supported, "
- "starting as per option");
- } else {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Extended attribute not supported, "
- "exiting.");
- ret = -1;
- goto out;
- }
- } else {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Extended attribute not supported, exiting.");
- ret = -1;
- goto out;
- }
+ "trusted.glusterfs.test", "working", 8, 0);
+ if (op_ret == 0) {
+ sys_lremovexattr (dir_data->data, "trusted.glusterfs.test");
+ } else {
+ tmp_data = dict_get (this->options,
+ "mandate-attribute");
+ if (tmp_data) {
+ if (gf_string2boolean (tmp_data->data,
+ &tmp_bool) == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong option provided for key "
+ "\"mandate-attribute\"");
+ ret = -1;
+ goto out;
+ }
+ if (!tmp_bool) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "Extended attribute not supported, "
+ "starting as per option");
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Extended attribute not supported, "
+ "exiting.");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "Extended attribute not supported, exiting.");
+ ret = -1;
+ goto out;
+ }
}
- _private = GF_CALLOC (1, sizeof (*_private),
- gf_posix_mt_posix_private);
- if (!_private) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ tmp_data = dict_get (this->options, "volume-id");
+ if (tmp_data) {
+ op_ret = uuid_parse (tmp_data->data, dict_uuid);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong volume-id (%s) set in volume file",
+ tmp_data->data);
+ ret = -1;
+ goto out;
+ }
+ size = sys_lgetxattr (dir_data->data,
+ "trusted.glusterfs.volume-id", old_uuid, 16);
+ if (size == 16) {
+ if (uuid_compare (old_uuid, dict_uuid)) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "mismatching volume-id (%s) received. "
+ "already is a part of volume %s ",
+ tmp_data->data, uuid_utoa (old_uuid));
+ ret = -1;
+ goto out;
+ }
+ } else if ((size == -1) && (errno == ENODATA)) {
+
+ gf_log (this->name, GF_LOG_ERROR,
+ "Extended attribute trusted.glusterfs."
+ "volume-id is absent");
+ ret = -1;
+ goto out;
+
+ } else if ((size == -1) && (errno != ENODATA)) {
+ /* Wrong 'volume-id' is set, it should be error */
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to fetch volume-id (%s)",
+ dir_data->data, strerror (errno));
+ ret = -1;
+ goto out;
+ } else {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to fetch proper volume id from export");
+ goto out;
+ }
+ }
+
+ /* Now check if the export directory has some other 'gfid',
+ other than that of root '/' */
+ size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16);
+ if (size == 16) {
+ if (!__is_root_gfid (gfid)) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: gfid (%s) is not that of glusterfs '/' ",
+ dir_data->data, uuid_utoa (gfid));
+ ret = -1;
+ goto out;
+ }
+ } else if (size != -1) {
+ /* Wrong 'gfid' is set, it should be error */
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: wrong value set as gfid",
+ dir_data->data);
ret = -1;
goto out;
+ } else if ((size == -1) && (errno != ENODATA)) {
+ /* Wrong 'gfid' is set, it should be error */
+ gf_log (this->name, GF_LOG_WARNING,
+ "%s: failed to fetch gfid (%s)",
+ dir_data->data, strerror (errno));
+ ret = -1;
+ goto out;
+ } else {
+ /* First time volume, set the GFID */
+ size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid,
+ 16, XATTR_CREATE);
+ if (size) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid (%s)",
+ dir_data->data, strerror (errno));
+ ret = -1;
+ goto out;
+ }
}
- _private->base_path = gf_strdup (dir_data->data);
- _private->base_path_length = strlen (_private->base_path);
-
- _private->trash_path = GF_CALLOC (1, _private->base_path_length
- + strlen ("/")
- + strlen (GF_REPLICATE_TRASH_DIR)
- + 1,
- gf_posix_mt_trash_path);
+ size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR,
+ NULL, 0);
+ if ((size < 0) && (errno == ENOTSUP))
+ gf_log (this->name, GF_LOG_WARNING,
+ "Posix access control list is not supported.");
- if (!_private->trash_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
+ ret = 0;
+ _private = GF_CALLOC (1, sizeof (*_private),
+ gf_posix_mt_posix_private);
+ if (!_private) {
ret = -1;
goto out;
}
- strncpy (_private->trash_path, _private->base_path, _private->base_path_length);
- strcat (_private->trash_path, "/" GF_REPLICATE_TRASH_DIR);
+ _private->base_path = gf_strdup (dir_data->data);
+ _private->base_path_length = strlen (_private->base_path);
LOCK_INIT (&_private->lock);
@@ -4406,7 +4840,6 @@ init (xlator_t *this)
_private->hostname = GF_CALLOC (256, sizeof (char),
gf_common_mt_char);
if (!_private->hostname) {
- gf_log (this->name, GF_LOG_ERROR, "not enough memory");
goto out;
}
ret = gethostname (_private->hostname, 256);
@@ -4419,63 +4852,85 @@ init (xlator_t *this)
_private->export_statfs = 1;
tmp_data = dict_get (this->options, "export-statfs-size");
if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &_private->export_statfs) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "'export-statfs-size' takes only boolean "
- "options");
- goto out;
- }
+ if (gf_string2boolean (tmp_data->data,
+ &_private->export_statfs) == -1) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "'export-statfs-size' takes only boolean "
+ "options");
+ goto out;
+ }
if (!_private->export_statfs)
gf_log (this->name, GF_LOG_DEBUG,
- "'statfs()' returns dummy size");
+ "'statfs()' returns dummy size");
}
_private->background_unlink = 0;
tmp_data = dict_get (this->options, "background-unlink");
if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &_private->background_unlink) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "'background-unlink' takes only boolean "
- "options");
- goto out;
- }
+ if (gf_string2boolean (tmp_data->data,
+ &_private->background_unlink) == -1) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "'background-unlink' takes only boolean "
+ "options");
+ goto out;
+ }
if (_private->background_unlink)
gf_log (this->name, GF_LOG_DEBUG,
- "unlinks will be performed in background");
+ "unlinks will be performed in background");
}
tmp_data = dict_get (this->options, "o-direct");
if (tmp_data) {
- if (gf_string2boolean (tmp_data->data,
- &_private->o_direct) == -1) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "wrong option provided for 'o-direct'");
- goto out;
- }
- if (_private->o_direct)
+ if (gf_string2boolean (tmp_data->data,
+ &_private->o_direct) == -1) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "wrong option provided for 'o-direct'");
+ goto out;
+ }
+ if (_private->o_direct)
gf_log (this->name, GF_LOG_DEBUG,
"o-direct mode is enabled (O_DIRECT "
- "for every open)");
+ "for every open)");
+ }
+
+ ret = dict_get_str (this->options, "glusterd-uuid", &guuid);
+ if (!ret) {
+ if (uuid_parse (guuid, _private->glusterd_uuid))
+ gf_log (this->name, GF_LOG_WARNING, "Cannot parse "
+ "glusterd (node) UUID, node-uuid xattr "
+ "request would return - \"No such attribute\"");
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG, "No glusterd (node) UUID "
+ "passed - node-uuid xattr request will return "
+ "\"No such attribute\"");
}
+ ret = 0;
_private->janitor_sleep_duration = 600;
- dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration",
+ dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration",
&janitor_sleep);
- if (dict_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Setting janitor sleep duration to %d.",
- janitor_sleep);
-
- _private->janitor_sleep_duration = janitor_sleep;
- }
+ if (dict_ret == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Setting janitor sleep duration to %d.",
+ janitor_sleep);
+ _private->janitor_sleep_duration = janitor_sleep;
+ }
+ /* performing open dir on brick dir locks the brick dir
+ * and prevents it from being unmounted
+ */
+ _private->mount_lock = opendir (dir_data->data);
+ if (!_private->mount_lock) {
+ ret = -1;
+ gf_log (this->name, GF_LOG_ERROR,
+ "Could not lock brick directory");
+ goto out;
+ }
#ifndef GF_DARWIN_HOST_OS
{
struct rlimit lim;
@@ -4484,20 +4939,20 @@ init (xlator_t *this)
if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
gf_log (this->name, GF_LOG_WARNING,
- "Failed to set 'ulimit -n "
- " 1048576': %s", strerror(errno));
+ "Failed to set 'ulimit -n "
+ " 1048576': %s", strerror(errno));
lim.rlim_cur = 65536;
lim.rlim_max = 65536;
if (setrlimit (RLIMIT_NOFILE, &lim) == -1) {
gf_log (this->name, GF_LOG_WARNING,
- "Failed to set maximum allowed open "
- "file descriptors to 64k: %s",
+ "Failed to set maximum allowed open "
+ "file descriptors to 64k: %s",
strerror(errno));
}
else {
- gf_log (this->name, GF_LOG_NORMAL,
- "Maximum allowed open file descriptors "
+ gf_log (this->name, GF_LOG_INFO,
+ "Maximum allowed open file descriptors "
"set to 65536");
}
}
@@ -4505,12 +4960,85 @@ init (xlator_t *this)
#endif
this->private = (void *)_private;
+ op_ret = posix_handle_init (this);
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Posix handle setup failed");
+ ret = -1;
+ goto out;
+ }
+
+ op_ret = posix_handle_trash_init (this);
+ if (op_ret < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Posix landfill setup failed");
+ ret = -1;
+ goto out;
+ }
+
+ _private->aio_init_done = _gf_false;
+ _private->aio_capable = _gf_false;
+
+ GF_OPTION_INIT ("brick-uid", uid, uint32, out);
+ GF_OPTION_INIT ("brick-gid", gid, uint32, out);
+ posix_set_owner (this, uid, gid);
+
+ GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out);
+
+ if (_private->aio_configured) {
+ op_ret = posix_aio_on (this);
+
+ if (op_ret == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Posix AIO init failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ GF_OPTION_INIT ("node-uuid-pathinfo",
+ _private->node_uuid_pathinfo, bool, out);
+ if (_private->node_uuid_pathinfo &&
+ (uuid_is_null (_private->glusterd_uuid))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ _private->health_check_active = _gf_false;
+ GF_OPTION_INIT ("health-check-interval",
+ _private->health_check_interval, uint32, out);
+ if (_private->health_check_interval)
+ posix_spawn_health_check_thread (this);
+
pthread_mutex_init (&_private->janitor_lock, NULL);
pthread_cond_init (&_private->janitor_cond, NULL);
INIT_LIST_HEAD (&_private->janitor_fds);
posix_spawn_janitor_thread (this);
- out:
+
+ pthread_mutex_init (&_private->fsync_mutex, NULL);
+ pthread_cond_init (&_private->fsync_cond, NULL);
+ INIT_LIST_HEAD (&_private->fsyncs);
+
+ ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fsyncer thread"
+ " creation failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out);
+
+ if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
+ batch_fsync_mode_str);
+ goto out;
+ }
+
+ GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec,
+ uint32, out);
+out:
return ret;
}
@@ -4521,7 +5049,9 @@ fini (xlator_t *this)
if (!priv)
return;
this->private = NULL;
- sys_lremovexattr (priv->base_path, "trusted.glusterfs.test");
+ /*unlock brick dir*/
+ if (priv->mount_lock)
+ closedir (priv->mount_lock);
GF_FREE (priv);
return;
}
@@ -4558,42 +5088,105 @@ struct xlator_fops fops = {
.getxattr = posix_getxattr,
.fgetxattr = posix_fgetxattr,
.removexattr = posix_removexattr,
+ .fremovexattr = posix_fremovexattr,
.fsyncdir = posix_fsyncdir,
.access = posix_access,
.ftruncate = posix_ftruncate,
.fstat = posix_fstat,
.lk = posix_lk,
- .inodelk = posix_inodelk,
- .finodelk = posix_finodelk,
- .entrylk = posix_entrylk,
- .fentrylk = posix_fentrylk,
+ .inodelk = posix_inodelk,
+ .finodelk = posix_finodelk,
+ .entrylk = posix_entrylk,
+ .fentrylk = posix_fentrylk,
.rchecksum = posix_rchecksum,
- .xattrop = posix_xattrop,
- .fxattrop = posix_fxattrop,
+ .xattrop = posix_xattrop,
+ .fxattrop = posix_fxattrop,
.setattr = posix_setattr,
.fsetattr = posix_fsetattr,
+ .fallocate = _posix_fallocate,
+ .discard = posix_discard,
+ .zerofill = posix_zerofill,
};
struct xlator_cbks cbks = {
- .release = posix_release,
- .releasedir = posix_releasedir,
- .forget = posix_forget
+ .release = posix_release,
+ .releasedir = posix_releasedir,
+ .forget = posix_forget
};
struct volume_options options[] = {
- { .key = {"o-direct"},
- .type = GF_OPTION_TYPE_BOOL },
- { .key = {"directory"},
- .type = GF_OPTION_TYPE_PATH },
- { .key = {"hostname"},
- .type = GF_OPTION_TYPE_ANY },
- { .key = {"export-statfs-size"},
- .type = GF_OPTION_TYPE_BOOL },
- { .key = {"mandate-attribute"},
- .type = GF_OPTION_TYPE_BOOL },
+ { .key = {"o-direct"},
+ .type = GF_OPTION_TYPE_BOOL },
+ { .key = {"directory"},
+ .type = GF_OPTION_TYPE_PATH },
+ { .key = {"hostname"},
+ .type = GF_OPTION_TYPE_ANY },
+ { .key = {"export-statfs-size"},
+ .type = GF_OPTION_TYPE_BOOL },
+ { .key = {"mandate-attribute"},
+ .type = GF_OPTION_TYPE_BOOL },
{ .key = {"background-unlink"},
.type = GF_OPTION_TYPE_BOOL },
{ .key = {"janitor-sleep-duration"},
.type = GF_OPTION_TYPE_INT },
- { .key = {NULL} }
+ { .key = {"volume-id"},
+ .type = GF_OPTION_TYPE_ANY },
+ { .key = {"glusterd-uuid"},
+ .type = GF_OPTION_TYPE_STR },
+ {
+ .key = {"linux-aio"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Support for native Linux AIO"
+ },
+ {
+ .key = {"brick-uid"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Support for setting uid of brick's owner"
+ },
+ {
+ .key = {"brick-gid"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Support for setting gid of brick's owner"
+ },
+ { .key = {"node-uuid-pathinfo"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "return glusterd's node-uuid in pathinfo xattr"
+ " string instead of hostname"
+ },
+ {
+ .key = {"health-check-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "30",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Interval in seconds for a filesystem health check, "
+ "set to 0 to disable"
+ },
+ { .key = {"batch-fsync-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "reverse-fsync",
+ .description = "Possible values:\n"
+ "\t- syncfs: Perform one syncfs() on behalf oa batch"
+ "of fsyncs.\n"
+ "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch"
+ " of fsyncs and one fsync() per batch.\n"
+ "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch"
+ " of fsyncs and fsync() each file in the batch in reverse order.\n"
+ " in reverse order.\n"
+ "\t- reverse-fsync: Perform fsync() of each file in the batch in"
+ " reverse order."
+ },
+ { .key = {"batch-fsync-delay-usec"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description = "Num of usecs to wait for aggregating fsync"
+ " requests",
+ },
+ { .key = {NULL} }
};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index d7b56961a..3121db271 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _POSIX_H
#define _POSIX_H
@@ -52,7 +42,16 @@
#include "compat.h"
#include "timer.h"
#include "posix-mem-types.h"
+#include "posix-handle.h"
+#include "call-stub.h"
+
+#ifdef HAVE_LIBAIO
+#include <libaio.h>
+#include "posix-aio.h"
+#endif
+#define VECTOR_SIZE 64 * 1024 /* vector size 64KB*/
+#define MAX_NO_VECT 1024
/**
* posix_fd - internal structure common to file and directory fd's
*/
@@ -60,9 +59,8 @@
struct posix_fd {
int fd; /* fd returned by the kernel */
int32_t flags; /* flags for open/creat */
- char * path; /* used by setdents/getdents */
DIR * dir; /* handle returned by the kernel */
- int flushwrites;
+ int odirect;
struct list_head list; /* to add to the janitor list */
};
@@ -102,28 +100,109 @@ struct posix_private {
gf_boolean_t o_direct; /* always open files in O_DIRECT mode */
-/*
+/*
decide whether posix_unlink does open (file), unlink (file), close (fd)
instead of just unlink (file). with the former approach there is no lockout
of access to parent directory during removal of very large files for the
entire duration of freeing of data blocks.
-*/
+*/
gf_boolean_t background_unlink;
/* janitor thread which cleans up /.trash (created by replicate) */
pthread_t janitor;
gf_boolean_t janitor_present;
char * trash_path;
+/* lock for brick dir */
+ DIR *mount_lock;
+
+ struct stat handledir;
+
+/* uuid of glusterd that swapned the brick process */
+ uuid_t glusterd_uuid;
+
+ gf_boolean_t aio_configured;
+ gf_boolean_t aio_init_done;
+ gf_boolean_t aio_capable;
+#ifdef HAVE_LIBAIO
+ io_context_t ctxp;
+ pthread_t aiothread;
+#endif
+
+ /* node-uuid in pathinfo xattr */
+ gf_boolean_t node_uuid_pathinfo;
+
+ pthread_t fsyncer;
+ struct list_head fsyncs;
+ pthread_mutex_t fsync_mutex;
+ pthread_cond_t fsync_cond;
+ int fsync_queue_count;
+
+ enum {
+ BATCH_NONE = 0,
+ BATCH_SYNCFS,
+ BATCH_SYNCFS_SINGLE_FSYNC,
+ BATCH_REVERSE_FSYNC,
+ BATCH_SYNCFS_REVERSE_FSYNC
+ } batch_fsync_mode;
+
+ uint32_t batch_fsync_delay_usec;
+
+ /* seconds to sleep between health checks */
+ uint32_t health_check_interval;
+ pthread_t health_check;
+ gf_boolean_t health_check_active;
};
+typedef struct {
+ xlator_t *this;
+ const char *real_path;
+ dict_t *xattr;
+ struct iatt *stbuf;
+ loc_t *loc;
+ inode_t *inode; /* for all do_xattrop() key handling */
+ int fd;
+ int flags;
+ int32_t op_errno;
+} posix_xattr_filler_t;
+
+
#define POSIX_BASE_PATH(this) (((struct posix_private *)this->private)->base_path)
#define POSIX_BASE_PATH_LEN(this) (((struct posix_private *)this->private)->base_path_length)
-#define MAKE_REAL_PATH(var, this, path) do { \
- var = alloca (strlen (path) + POSIX_BASE_PATH_LEN(this) + 2); \
- strcpy (var, POSIX_BASE_PATH(this)); \
- strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \
- } while (0)
-
+/* Helper functions */
+int posix_gfid_set (xlator_t *this, const char *path, loc_t *loc,
+ dict_t *xattr_req);
+int posix_fdstat (xlator_t *this, int fd, struct iatt *stbuf_p);
+int posix_istat (xlator_t *this, uuid_t gfid, const char *basename,
+ struct iatt *iatt);
+int posix_pstat (xlator_t *this, uuid_t gfid, const char *real_path,
+ struct iatt *iatt);
+dict_t *posix_lookup_xattr_fill (xlator_t *this, const char *path,
+ loc_t *loc, dict_t *xattr, struct iatt *buf);
+int posix_handle_pair (xlator_t *this, const char *real_path, char *key,
+ data_t *value, int flags);
+int posix_fhandle_pair (xlator_t *this, int fd, char *key, data_t *value,
+ int flags);
+void posix_spawn_janitor_thread (xlator_t *this);
+int posix_get_file_contents (xlator_t *this, uuid_t pargfid,
+ const char *name, char **contents);
+int posix_set_file_contents (xlator_t *this, const char *path, char *key,
+ data_t *value, int flags);
+int posix_acl_xattr_set (xlator_t *this, const char *path, dict_t *xattr_req);
+int posix_gfid_heal (xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req);
+int posix_entry_create_xattr_set (xlator_t *this, const char *path,
+ dict_t *dict);
+
+int posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd);
+void posix_fill_ino_from_gfid (xlator_t *this, struct iatt *buf);
+
+gf_boolean_t posix_special_xattr (char **pattern, char *key);
+
+void
+__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
+ off_t offset, size_t size);
+void posix_spawn_health_check_thread (xlator_t *this);
+
+void *posix_fsyncer (void *);
#endif /* _POSIX_H */