diff options
author | Vikas Gorur <vikas@zresearch.com> | 2009-02-18 17:36:07 +0530 |
---|---|---|
committer | Vikas Gorur <vikas@zresearch.com> | 2009-02-18 17:36:07 +0530 |
commit | 77adf4cd648dce41f89469dd185deec6b6b53a0b (patch) | |
tree | 02e155a5753b398ee572b45793f889b538efab6b /xlators/cluster/unify | |
parent | f3b2e6580e5663292ee113c741343c8a43ee133f (diff) |
Added all files
Diffstat (limited to 'xlators/cluster/unify')
-rw-r--r-- | xlators/cluster/unify/Makefile.am | 3 | ||||
-rw-r--r-- | xlators/cluster/unify/src/Makefile.am | 16 | ||||
-rw-r--r-- | xlators/cluster/unify/src/unify-self-heal.c | 1225 | ||||
-rw-r--r-- | xlators/cluster/unify/src/unify.c | 4451 | ||||
-rw-r--r-- | xlators/cluster/unify/src/unify.h | 132 |
5 files changed, 5827 insertions, 0 deletions
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/cluster/unify/Makefile.am new file mode 100644 index 00000000000..d471a3f9243 --- /dev/null +++ b/xlators/cluster/unify/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am new file mode 100644 index 00000000000..b9e6f63e9d7 --- /dev/null +++ b/xlators/cluster/unify/src/Makefile.am @@ -0,0 +1,16 @@ + +xlator_LTLIBRARIES = unify.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster + +unify_la_LDFLAGS = -module -avoidversion + +unify_la_SOURCES = unify.c unify-self-heal.c +unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = unify.h + +AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ + -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) + +CLEANFILES = + diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c new file mode 100644 index 00000000000..4885dd91a35 --- /dev/null +++ b/xlators/cluster/unify/src/unify-self-heal.c @@ -0,0 +1,1225 @@ +/* + Copyright (c) 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/** + * unify-self-heal.c : + * This file implements few functions which enables 'unify' translator + * to be consistent in its behaviour when + * > a node fails, + * > a node gets added, + * > a failed node comes back + * > a new namespace server is added (ie, an fresh namespace server). + * + * This functionality of 'unify' will enable glusterfs to support storage + * system failure, and maintain consistancy. This works both ways, ie, when + * an entry (either file or directory) is found on namespace server, and not + * on storage nodes, its created in storage nodes and vica-versa. + * + * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()' + * + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "unify.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "common-utils.h" + +int32_t +unify_sh_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count); + +int32_t +unify_sh_ns_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count); + +int32_t +unify_bgsh_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count); + +int32_t +unify_bgsh_ns_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count); + +/** + * unify_local_wipe - free all the extra allocation of local->* here. + */ +static void +unify_local_wipe (unify_local_t *local) +{ + /* Free the strdup'd variables in the local structure */ + if (local->name) { + FREE (local->name); + } + + if (local->sh_struct) { + if (local->sh_struct->offset_list) + FREE (local->sh_struct->offset_list); + + if (local->sh_struct->entry_list) + FREE (local->sh_struct->entry_list); + + if (local->sh_struct->count_list) + FREE (local->sh_struct->count_list); + + FREE (local->sh_struct); + } + + loc_wipe (&local->loc1); + loc_wipe (&local->loc2); +} + +int32_t +unify_sh_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + inode_t *inode = NULL; + dict_t *tmp_dict = NULL; + dir_entry_t *prev, *entry, *trav; + + LOCK (&frame->lock); + { + /* if local->call_count == 0, that means, setdents on + * storagenodes is still pending. + */ + if (local->call_count) + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (callcnt == 0) { + if (local->sh_struct->entry_list[0]) { + prev = entry = local->sh_struct->entry_list[0]; + if (!entry) + return 0; + trav = entry->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + } + + if (!local->flags) { + if (local->sh_struct->count_list[0] >= + UNIFY_SELF_HEAL_GETDENTS_COUNT) { + /* count == size, that means, there are more entries + to read from */ + //local->call_count = 0; + local->sh_struct->offset_list[0] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND (frame, + unify_sh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[0], + GF_GET_DIR_ONLY); + } + } else { + inode = local->loc1.inode; + fd_unref (local->fd); + tmp_dict = local->dict; + + unify_local_wipe (local); + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + inode, &local->stbuf, local->dict); + if (tmp_dict) + dict_unref (local->dict); + } + } + + return 0; +} + + +int32_t +unify_sh_ns_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = 0; + unsigned long final = 0; + dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t)); + + local->sh_struct->entry_list[0] = tmp; + local->sh_struct->count_list[0] = count; + if (entry) { + tmp->next = entry->next; + entry->next = NULL; + } + + if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) { + final = 1; + } + + LOCK (&frame->lock); + { + /* local->call_count will be '0' till now. make it 1 so, it + can be UNWIND'ed for the last call. */ + local->call_count = priv->child_count; + if (final) + local->flags = 1; + } + UNLOCK (&frame->lock); + + for (index = 0; index < priv->child_count; index++) + { + STACK_WIND_COOKIE (frame, + unify_sh_setdents_cbk, + (void *)index, + priv->xl_array[index], + priv->xl_array[index]->fops->setdents, + local->fd, GF_SET_DIR_ONLY, + local->sh_struct->entry_list[0], count); + } + + return 0; +} + +int32_t +unify_sh_ns_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = (long)cookie; + dir_entry_t *prev, *entry, *trav; + + LOCK (&frame->lock); + { + if (local->sh_struct->entry_list[index]) { + prev = entry = local->sh_struct->entry_list[index]; + trav = entry->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + } + } + UNLOCK (&frame->lock); + + if (local->sh_struct->count_list[index] < + UNIFY_SELF_HEAL_GETDENTS_COUNT) { + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + } else { + /* count == size, that means, there are more entries + to read from */ + local->sh_struct->offset_list[index] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND_COOKIE (frame, + unify_sh_getdents_cbk, + cookie, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[index], + GF_GET_ALL); + + gf_log (this->name, GF_LOG_DEBUG, + "readdir on (%s) with offset %"PRId64"", + priv->xl_array[index]->name, + local->sh_struct->offset_list[index]); + } + + if (!callcnt) { + /* All storage nodes have done unified setdents on NS node. + * Now, do getdents from NS and do setdents on storage nodes. + */ + + /* sh_struct->offset_list is no longer required for + storage nodes now */ + local->sh_struct->offset_list[0] = 0; /* reset */ + + STACK_WIND (frame, + unify_sh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_DIR_ONLY); + } + + return 0; +} + + +/** + * unify_sh_getdents_cbk - + */ +int32_t +unify_sh_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = (long)cookie; + dir_entry_t *tmp = NULL; + + if (op_ret >= 0 && count > 0) { + /* There is some dentry found, just send the dentry to NS */ + tmp = CALLOC (1, sizeof (dir_entry_t)); + local->sh_struct->entry_list[index] = tmp; + local->sh_struct->count_list[index] = count; + if (entry) { + tmp->next = entry->next; + entry->next = NULL; + } + STACK_WIND_COOKIE (frame, + unify_sh_ns_setdents_cbk, + cookie, + NS(this), + NS(this)->fops->setdents, + local->fd, + GF_SET_IF_NOT_PRESENT, + local->sh_struct->entry_list[index], + count); + return 0; + } + + if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) { + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + } else { + /* count == size, that means, there are more entries + to read from */ + local->sh_struct->offset_list[index] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND_COOKIE (frame, + unify_sh_getdents_cbk, + cookie, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[index], + GF_GET_ALL); + + gf_log (this->name, GF_LOG_DEBUG, + "readdir on (%s) with offset %"PRId64"", + priv->xl_array[index]->name, + local->sh_struct->offset_list[index]); + } + + if (!callcnt) { + /* All storage nodes have done unified setdents on NS node. + * Now, do getdents from NS and do setdents on storage nodes. + */ + + /* sh_struct->offset_list is no longer required for + storage nodes now */ + local->sh_struct->offset_list[0] = 0; /* reset */ + + STACK_WIND (frame, + unify_sh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_DIR_ONLY); + } + + return 0; +} + +/** + * unify_sh_opendir_cbk - + * + * @cookie: + */ +int32_t +unify_sh_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t index = 0; + inode_t *inode = NULL; + dict_t *tmp_dict = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret >= 0) { + local->op_ret = op_ret; + } else { + gf_log (this->name, GF_LOG_WARNING, "failed"); + local->failed = 1; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local->call_count = priv->child_count + 1; + + if (!local->failed) { + /* send getdents() namespace after finishing + storage nodes */ + local->call_count--; + + fd_bind (fd); + + if (local->call_count) { + /* Used as the offset index. This list keeps + * track of offset sent to each node during + * STACK_WIND. + */ + local->sh_struct->offset_list = + calloc (priv->child_count, + sizeof (off_t)); + ERR_ABORT (local->sh_struct->offset_list); + + local->sh_struct->entry_list = + calloc (priv->child_count, + sizeof (dir_entry_t *)); + ERR_ABORT (local->sh_struct->entry_list); + + local->sh_struct->count_list = + calloc (priv->child_count, + sizeof (int)); + ERR_ABORT (local->sh_struct->count_list); + + /* Send getdents on all the fds */ + for (index = 0; + index < priv->child_count; index++) { + STACK_WIND_COOKIE (frame, + unify_sh_getdents_cbk, + (void *)(long)index, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_ALL); + } + + /* did stack wind, so no need to unwind here */ + return 0; + } /* (local->call_count) */ + } /* (!local->failed) */ + + /* Opendir failed on one node. */ + inode = local->loc1.inode; + fd_unref (local->fd); + tmp_dict = local->dict; + + unify_local_wipe (local); + /* Only 'self-heal' failed, lookup() was successful. */ + local->op_ret = 0; + + /* This is lookup_cbk ()'s UNWIND. */ + STACK_UNWIND (frame, local->op_ret, local->op_errno, inode, + &local->stbuf, local->dict); + if (tmp_dict) + dict_unref (tmp_dict); + } + + return 0; +} + +/** + * gf_sh_checksum_cbk - + * + * @frame: frame used in lookup. get a copy of it, and use that copy. + * @this: pointer to unify xlator. + * @inode: pointer to inode, for which the consistency check is required. + * + */ +int32_t +unify_sh_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *file_checksum, + uint8_t *dir_checksum) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t index = 0; + int32_t callcnt = 0; + inode_t *inode = NULL; + dict_t *tmp_dict = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret >= 0) { + if (NS(this) == (xlator_t *)cookie) { + memcpy (local->sh_struct->ns_file_checksum, + file_checksum, ZR_FILENAME_MAX); + memcpy (local->sh_struct->ns_dir_checksum, + dir_checksum, ZR_FILENAME_MAX); + } else { + if (local->entry_count == 0) { + /* Initialize the dir_checksum to be + * used for comparision with other + * storage nodes. Should be done for + * the first successful call *only*. + */ + /* Using 'entry_count' as a flag */ + local->entry_count = 1; + memcpy (local->sh_struct->dir_checksum, + dir_checksum, ZR_FILENAME_MAX); + } + + /* Reply from the storage nodes */ + for (index = 0; + index < ZR_FILENAME_MAX; index++) { + /* Files should be present in + only one node */ + local->sh_struct->file_checksum[index] ^= file_checksum[index]; + + /* directory structure should be + same accross */ + if (local->sh_struct->dir_checksum[index] != dir_checksum[index]) + local->failed = 1; + } + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + for (index = 0; index < ZR_FILENAME_MAX ; index++) { + if (local->sh_struct->file_checksum[index] != + local->sh_struct->ns_file_checksum[index]) { + local->failed = 1; + break; + } + if (local->sh_struct->dir_checksum[index] != + local->sh_struct->ns_dir_checksum[index]) { + local->failed = 1; + break; + } + } + + if (local->failed) { + /* Log it, it should be a rare event */ + gf_log (this->name, GF_LOG_WARNING, + "Self-heal triggered on directory %s", + local->loc1.path); + + /* Any self heal will be done at directory level */ + local->call_count = 0; + local->op_ret = -1; + local->failed = 0; + + local->fd = fd_create (local->loc1.inode, + frame->root->pid); + + local->call_count = priv->child_count + 1; + + for (index = 0; + index < (priv->child_count + 1); index++) { + STACK_WIND_COOKIE (frame, + unify_sh_opendir_cbk, + priv->xl_array[index]->name, + priv->xl_array[index], + priv->xl_array[index]->fops->opendir, + &local->loc1, + local->fd); + } + /* opendir can be done on the directory */ + return 0; + } + + /* no mismatch */ + inode = local->loc1.inode; + tmp_dict = local->dict; + + unify_local_wipe (local); + + /* This is lookup_cbk ()'s UNWIND. */ + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + inode, + &local->stbuf, + local->dict); + if (tmp_dict) + dict_unref (tmp_dict); + } + + return 0; +} + +/* Foreground self-heal part over */ + +/* Background self-heal part */ + +int32_t +unify_bgsh_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + dir_entry_t *prev, *entry, *trav; + + LOCK (&frame->lock); + { + /* if local->call_count == 0, that means, setdents + on storagenodes is still pending. */ + if (local->call_count) + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + + if (callcnt == 0) { + if (local->sh_struct->entry_list[0]) { + prev = entry = local->sh_struct->entry_list[0]; + trav = entry->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + } + + if (!local->flags) { + if (local->sh_struct->count_list[0] >= + UNIFY_SELF_HEAL_GETDENTS_COUNT) { + /* count == size, that means, there are more + entries to read from */ + //local->call_count = 0; + local->sh_struct->offset_list[0] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND (frame, + unify_bgsh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[0], + GF_GET_DIR_ONLY); + } + } else { + fd_unref (local->fd); + unify_local_wipe (local); + STACK_DESTROY (frame->root); + } + } + + return 0; +} + + +int32_t +unify_bgsh_ns_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = 0; + unsigned long final = 0; + dir_entry_t *tmp = CALLOC (1, sizeof (dir_entry_t)); + + local->sh_struct->entry_list[0] = tmp; + local->sh_struct->count_list[0] = count; + if (entry) { + tmp->next = entry->next; + entry->next = NULL; + } + + if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) { + final = 1; + } + + LOCK (&frame->lock); + { + /* local->call_count will be '0' till now. make it 1 so, + it can be UNWIND'ed for the last call. */ + local->call_count = priv->child_count; + if (final) + local->flags = 1; + } + UNLOCK (&frame->lock); + + for (index = 0; index < priv->child_count; index++) + { + STACK_WIND_COOKIE (frame, + unify_bgsh_setdents_cbk, + (void *)index, + priv->xl_array[index], + priv->xl_array[index]->fops->setdents, + local->fd, GF_SET_DIR_ONLY, + local->sh_struct->entry_list[0], count); + } + + return 0; +} + +int32_t +unify_bgsh_ns_setdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = (long)cookie; + dir_entry_t *prev, *entry, *trav; + + if (local->sh_struct->entry_list[index]) { + prev = entry = local->sh_struct->entry_list[index]; + if (!entry) + return 0; + trav = entry->next; + while (trav) { + prev->next = trav->next; + FREE (trav->name); + if (S_ISLNK (trav->buf.st_mode)) + FREE (trav->link); + FREE (trav); + trav = prev->next; + } + FREE (entry); + } + + if (local->sh_struct->count_list[index] < + UNIFY_SELF_HEAL_GETDENTS_COUNT) { + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + } else { + /* count == size, that means, there are more entries + to read from */ + local->sh_struct->offset_list[index] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + STACK_WIND_COOKIE (frame, + unify_bgsh_getdents_cbk, + cookie, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[index], + GF_GET_ALL); + + gf_log (this->name, GF_LOG_DEBUG, + "readdir on (%s) with offset %"PRId64"", + priv->xl_array[index]->name, + local->sh_struct->offset_list[index]); + } + + if (!callcnt) { + /* All storage nodes have done unified setdents on NS node. + * Now, do getdents from NS and do setdents on storage nodes. + */ + + /* sh_struct->offset_list is no longer required for + storage nodes now */ + local->sh_struct->offset_list[0] = 0; /* reset */ + + STACK_WIND (frame, + unify_bgsh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_DIR_ONLY); + } + + return 0; +} + + +/** + * unify_bgsh_getdents_cbk - + */ +int32_t +unify_bgsh_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + int32_t callcnt = -1; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + long index = (long)cookie; + dir_entry_t *tmp = NULL; + + if (op_ret >= 0 && count > 0) { + /* There is some dentry found, just send the dentry to NS */ + tmp = CALLOC (1, sizeof (dir_entry_t)); + local->sh_struct->entry_list[index] = tmp; + local->sh_struct->count_list[index] = count; + if (entry) { + tmp->next = entry->next; + entry->next = NULL; + } + STACK_WIND_COOKIE (frame, + unify_bgsh_ns_setdents_cbk, + cookie, + NS(this), + NS(this)->fops->setdents, + local->fd, + GF_SET_IF_NOT_PRESENT, + local->sh_struct->entry_list[index], + count); + return 0; + } + + if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) { + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + } else { + /* count == size, that means, there are more entries to read from */ + local->sh_struct->offset_list[index] += + UNIFY_SELF_HEAL_GETDENTS_COUNT; + + STACK_WIND_COOKIE (frame, + unify_bgsh_getdents_cbk, + cookie, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + local->sh_struct->offset_list[index], + GF_GET_ALL); + + gf_log (this->name, GF_LOG_DEBUG, + "readdir on (%s) with offset %"PRId64"", + priv->xl_array[index]->name, + local->sh_struct->offset_list[index]); + } + + if (!callcnt) { + /* All storage nodes have done unified setdents on NS node. + * Now, do getdents from NS and do setdents on storage nodes. + */ + + /* sh_struct->offset_list is no longer required for + storage nodes now */ + local->sh_struct->offset_list[0] = 0; /* reset */ + + STACK_WIND (frame, + unify_bgsh_ns_getdents_cbk, + NS(this), + NS(this)->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_DIR_ONLY); + } + + return 0; +} + +/** + * unify_bgsh_opendir_cbk - + * + * @cookie: + */ +int32_t +unify_bgsh_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int32_t callcnt = 0; + int16_t index = 0; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret >= 0) { + local->op_ret = op_ret; + } else { + local->failed = 1; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local->call_count = priv->child_count + 1; + + if (!local->failed) { + /* send getdents() namespace after finishing + storage nodes */ + local->call_count--; + callcnt = local->call_count; + + fd_bind (fd); + + if (local->call_count) { + /* Used as the offset index. This list keeps + track of offset sent to each node during + STACK_WIND. */ + local->sh_struct->offset_list = + calloc (priv->child_count, + sizeof (off_t)); + ERR_ABORT (local->sh_struct->offset_list); + + local->sh_struct->entry_list = + calloc (priv->child_count, + sizeof (dir_entry_t *)); + ERR_ABORT (local->sh_struct->entry_list); + + local->sh_struct->count_list = + calloc (priv->child_count, + sizeof (int)); + ERR_ABORT (local->sh_struct->count_list); + + /* Send getdents on all the fds */ + for (index = 0; + index < priv->child_count; index++) { + STACK_WIND_COOKIE (frame, + unify_bgsh_getdents_cbk, + (void *)(long)index, + priv->xl_array[index], + priv->xl_array[index]->fops->getdents, + local->fd, + UNIFY_SELF_HEAL_GETDENTS_COUNT, + 0, /* In this call, do send '0' as offset */ + GF_GET_ALL); + } + /* did a stack wind, so no need to unwind here */ + return 0; + } /* (local->call_count) */ + } /* (!local->failed) */ + + /* Opendir failed on one node. */ + fd_unref (local->fd); + + unify_local_wipe (local); + STACK_DESTROY (frame->root); + } + + return 0; +} + +/** + * gf_bgsh_checksum_cbk - + * + * @frame: frame used in lookup. get a copy of it, and use that copy. + * @this: pointer to unify xlator. + * @inode: pointer to inode, for which the consistency check is required. + * + */ +int32_t +unify_bgsh_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *file_checksum, + uint8_t *dir_checksum) +{ + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t index = 0; + int32_t callcnt = 0; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret >= 0) { + if (NS(this) == (xlator_t *)cookie) { + memcpy (local->sh_struct->ns_file_checksum, + file_checksum, ZR_FILENAME_MAX); + memcpy (local->sh_struct->ns_dir_checksum, + dir_checksum, ZR_FILENAME_MAX); + } else { + if (local->entry_count == 0) { + /* Initialize the dir_checksum to be + * used for comparision with other + * storage nodes. Should be done for + * the first successful call *only*. + */ + /* Using 'entry_count' as a flag */ + local->entry_count = 1; + memcpy (local->sh_struct->dir_checksum, + dir_checksum, ZR_FILENAME_MAX); + } + + /* Reply from the storage nodes */ + for (index = 0; + index < ZR_FILENAME_MAX; index++) { + /* Files should be present in only + one node */ + local->sh_struct->file_checksum[index] ^= file_checksum[index]; + + /* directory structure should be same + accross */ + if (local->sh_struct->dir_checksum[index] != dir_checksum[index]) + local->failed = 1; + } + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + for (index = 0; index < ZR_FILENAME_MAX ; index++) { + if (local->sh_struct->file_checksum[index] != + local->sh_struct->ns_file_checksum[index]) { + local->failed = 1; + break; + } + if (local->sh_struct->dir_checksum[index] != + local->sh_struct->ns_dir_checksum[index]) { + local->failed = 1; + break; + } + } + + if (local->failed) { + /* Log it, it should be a rare event */ + gf_log (this->name, GF_LOG_WARNING, + "Self-heal triggered on directory %s", + local->loc1.path); + + /* Any self heal will be done at the directory level */ + local->op_ret = -1; + local->failed = 0; + + local->fd = fd_create (local->loc1.inode, + frame->root->pid); + local->call_count = priv->child_count + 1; + + for (index = 0; + index < (priv->child_count + 1); index++) { + STACK_WIND_COOKIE (frame, + unify_bgsh_opendir_cbk, + priv->xl_array[index]->name, + priv->xl_array[index], + priv->xl_array[index]->fops->opendir, + &local->loc1, + local->fd); + } + + /* opendir can be done on the directory */ + return 0; + } + + /* no mismatch */ + unify_local_wipe (local); + STACK_DESTROY (frame->root); + } + + return 0; +} + +/* Background self-heal part over */ + + + + +/** + * zr_unify_self_heal - + * + * @frame: frame used in lookup. get a copy of it, and use that copy. + * @this: pointer to unify xlator. + * @inode: pointer to inode, for which the consistency check is required. + * + */ +int32_t +zr_unify_self_heal (call_frame_t *frame, + xlator_t *this, + unify_local_t *local) +{ + unify_private_t *priv = this->private; + call_frame_t *bg_frame = NULL; + unify_local_t *bg_local = NULL; + inode_t *tmp_inode = NULL; + dict_t *tmp_dict = NULL; + int16_t index = 0; + + if (local->inode_generation < priv->inode_generation) { + /* Any self heal will be done at the directory level */ + /* Update the inode's generation to the current generation + value. */ + local->inode_generation = priv->inode_generation; + inode_ctx_put (local->loc1.inode, this, + (uint64_t)(long)local->inode_generation); + + if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) { + local->op_ret = 0; + local->failed = 0; + local->call_count = priv->child_count + 1; + local->sh_struct = + calloc (1, sizeof (struct unify_self_heal_struct)); + + /* +1 is for NS */ + for (index = 0; + index < (priv->child_count + 1); index++) { + STACK_WIND_COOKIE (frame, + unify_sh_checksum_cbk, + priv->xl_array[index], + priv->xl_array[index], + priv->xl_array[index]->fops->checksum, + &local->loc1, + 0); + } + + /* Self-heal in foreground, hence no need + to UNWIND here */ + return 0; + } + + /* Self Heal done in background */ + bg_frame = copy_frame (frame); + INIT_LOCAL (bg_frame, bg_local); + loc_copy (&bg_local->loc1, &local->loc1); + bg_local->op_ret = 0; + bg_local->failed = 0; + bg_local->call_count = priv->child_count + 1; + bg_local->sh_struct = + calloc (1, sizeof (struct unify_self_heal_struct)); + + /* +1 is for NS */ + for (index = 0; index < (priv->child_count + 1); index++) { + STACK_WIND_COOKIE (bg_frame, + unify_bgsh_checksum_cbk, + priv->xl_array[index], + priv->xl_array[index], + priv->xl_array[index]->fops->checksum, + &bg_local->loc1, + 0); + } + } + + /* generation number matches, self heal already done or + * self heal done in background: just do STACK_UNWIND + */ + tmp_inode = local->loc1.inode; + tmp_dict = local->dict; + + unify_local_wipe (local); + + /* This is lookup_cbk ()'s UNWIND. */ + STACK_UNWIND (frame, + local->op_ret, + local->op_errno, + tmp_inode, + &local->stbuf, + local->dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + return 0; +} + diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c new file mode 100644 index 00000000000..e2a5e14b191 --- /dev/null +++ b/xlators/cluster/unify/src/unify.c @@ -0,0 +1,4451 @@ +/* + Copyright (c) 2006, 2007, 2008, 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +/** + * xlators/cluster/unify: + * - This xlator is one of the main translator in GlusterFS, which + * actually does the clustering work of the file system. One need to + * understand that, unify assumes file to be existing in only one of + * the child node, and directories to be present on all the nodes. + * + * NOTE: + * Now, unify has support for global namespace, which is used to keep a + * global view of fs's namespace tree. The stat for directories are taken + * just from the namespace, where as for files, just 'st_ino' is taken from + * Namespace node, and other stat info is taken from the actual storage node. + * Also Namespace node helps to keep consistant inode for files across + * glusterfs (re-)mounts. + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "unify.h" +#include "dict.h" +#include "xlator.h" +#include "hashfn.h" +#include "logging.h" +#include "stack.h" +#include "defaults.h" +#include "common-utils.h" +#include <signal.h> +#include <libgen.h> +#include "compat-errno.h" +#include "compat.h" + +#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \ + if (!(_loc && _loc->inode)) { \ + STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \ + return 0; \ + } \ +} while(0) + + +#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \ + if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \ + STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \ + return 0; \ + } \ +} while(0) + +#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \ + if (!_fd) { \ + STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \ + return 0; \ + } \ +} while(0) + +/** + * unify_local_wipe - free all the extra allocation of local->* here. + */ +static void +unify_local_wipe (unify_local_t *local) +{ + /* Free the strdup'd variables in the local structure */ + if (local->name) { + FREE (local->name); + } + loc_wipe (&local->loc1); + loc_wipe (&local->loc2); +} + + + +/* + * unify_normalize_stats - + */ +void +unify_normalize_stats (struct statvfs *buf, + unsigned long bsize, + unsigned long frsize) +{ + double factor; + + if (buf->f_bsize != bsize) { + factor = ((double) buf->f_bsize) / bsize; + buf->f_bsize = bsize; + buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree); + buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail); + } + + if (buf->f_frsize != frsize) { + factor = ((double) buf->f_frsize) / frsize; + buf->f_frsize = frsize; + buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks); + } +} + + +xlator_t * +unify_loc_subvol (loc_t *loc, xlator_t *this) +{ + unify_private_t *priv = NULL; + xlator_t *subvol = NULL; + int16_t *list = NULL; + long index = 0; + xlator_t *subvol_i = NULL; + int ret = 0; + uint64_t tmp_list = 0; + + priv = this->private; + subvol = NS (this); + + if (!S_ISDIR (loc->inode->st_mode)) { + ret = inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + if (!list) + goto out; + + for (index = 0; list[index] != -1; index++) { + subvol_i = priv->xl_array[list[index]]; + if (subvol_i != NS (this)) { + subvol = subvol_i; + break; + } + } + } +out: + return subvol; +} + + + +/** + * unify_statfs_cbk - + */ +int32_t +unify_statfs_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct statvfs *stbuf) +{ + int32_t callcnt = 0; + struct statvfs *dict_buf = NULL; + unsigned long bsize; + unsigned long frsize; + unify_local_t *local = (unify_local_t *)frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + if (op_ret >= 0) { + /* when a call is successfull, add it to local->dict */ + dict_buf = &local->statvfs_buf; + + if (dict_buf->f_bsize != 0) { + bsize = max (dict_buf->f_bsize, + stbuf->f_bsize); + + frsize = max (dict_buf->f_frsize, + stbuf->f_frsize); + unify_normalize_stats(dict_buf, bsize, frsize); + unify_normalize_stats(stbuf, bsize, frsize); + } else { + dict_buf->f_bsize = stbuf->f_bsize; + dict_buf->f_frsize = stbuf->f_frsize; + } + + dict_buf->f_blocks += stbuf->f_blocks; + dict_buf->f_bfree += stbuf->f_bfree; + dict_buf->f_bavail += stbuf->f_bavail; + dict_buf->f_files += stbuf->f_files; + dict_buf->f_ffree += stbuf->f_ffree; + dict_buf->f_favail += stbuf->f_favail; + dict_buf->f_fsid = stbuf->f_fsid; + dict_buf->f_flag = stbuf->f_flag; + dict_buf->f_namemax = stbuf->f_namemax; + local->op_ret = op_ret; + } else { + /* fop on storage node has failed due to some error */ + if (op_errno != ENOTCONN) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): %s", + prev_frame->this->name, + strerror (op_errno)); + } + local->op_errno = op_errno; + } + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->statvfs_buf); + } + + return 0; +} + +/** + * unify_statfs - + */ +int32_t +unify_statfs (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + unify_local_t *local = NULL; + xlator_list_t *trav = this->children; + + INIT_LOCAL (frame, local); + local->call_count = ((unify_private_t *)this->private)->child_count; + + while(trav) { + STACK_WIND (frame, + unify_statfs_cbk, + trav->xlator, + trav->xlator->fops->statfs, + loc); + trav = trav->next; + } + + return 0; +} + +/** + * unify_buf_cbk - + */ +int32_t +unify_buf_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "%s(): child(%s): path(%s): %s", + gf_fop_list[frame->root->op], + prev_frame->this->name, + (local->loc1.path)?local->loc1.path:"", + strerror (op_errno)); + + local->op_errno = op_errno; + if ((op_errno == ENOENT) && priv->optimist) + local->op_ret = 0; + } + + if (op_ret >= 0) { + local->op_ret = 0; + + if (NS (this) == prev_frame->this) { + local->st_ino = buf->st_ino; + /* If the entry is directory, get the stat + from NS node */ + if (S_ISDIR (buf->st_mode) || + !local->stbuf.st_blksize) { + local->stbuf = *buf; + } + } + + if ((!S_ISDIR (buf->st_mode)) && + (NS (this) != prev_frame->this)) { + /* If file, take the stat info from Storage + node. */ + local->stbuf = *buf; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + /* If the inode number is not filled, operation should + fail */ + if (!local->st_ino) + local->op_ret = -1; + + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + + return 0; +} + +#define check_if_dht_linkfile(s) ((s->st_mode & ~S_IFMT) == S_ISVTX) + +/** + * unify_lookup_cbk - + */ +int32_t +unify_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + inode_t *tmp_inode = NULL; + dict_t *local_dict = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + if ((op_errno != ENOTCONN) && (op_errno != ENOENT)) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + local->failed = 1; + + } else if (local->revalidate && + !(priv->optimist && (op_errno == ENOENT))) { + + gf_log (this->name, + (op_errno == ENOTCONN) ? + GF_LOG_DEBUG:GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + local->failed = 1; + } + } + + if (op_ret == 0) { + local->op_ret = 0; + + if (check_if_dht_linkfile(buf)) { + gf_log (this->name, GF_LOG_CRITICAL, + "file %s may be DHT link file on %s, " + "make sure the backend is not shared " + "between unify and DHT", + local->loc1.path, + priv->xl_array[(long)cookie]->name); + } + + if (local->stbuf.st_mode && local->stbuf.st_blksize) { + /* make sure we already have a stbuf + stored in local->stbuf */ + if (S_ISDIR (local->stbuf.st_mode) && + !S_ISDIR (buf->st_mode)) { + gf_log (this->name, GF_LOG_CRITICAL, + "[CRITICAL] '%s' is directory " + "on namespace, non-directory " + "on node '%s', returning EIO", + local->loc1.path, + priv->xl_array[(long)cookie]->name); + local->return_eio = 1; + } + if (!S_ISDIR (local->stbuf.st_mode) && + S_ISDIR (buf->st_mode)) { + gf_log (this->name, GF_LOG_CRITICAL, + "[CRITICAL] '%s' is directory " + "on node '%s', non-directory " + "on namespace, returning EIO", + local->loc1.path, + priv->xl_array[(long)cookie]->name); + local->return_eio = 1; + } + } + + if (!local->revalidate && !S_ISDIR (buf->st_mode)) { + /* This is the first time lookup on file*/ + if (!local->list) { + /* list is not allocated, allocate + the max possible range */ + local->list = CALLOC (1, 2 * (priv->child_count + 2)); + if (!local->list) { + gf_log (this->name, + GF_LOG_CRITICAL, + "Not enough memory"); + STACK_UNWIND (frame, -1, + ENOMEM, inode, + NULL, NULL); + return 0; + } + } + /* update the index of the list */ + local->list [local->index++] = + (int16_t)(long)cookie; + } + + if ((!local->dict) && dict && + (priv->xl_array[(long)cookie] != NS(this))) { + local->dict = dict_ref (dict); + } + + /* index of NS node is == total child count */ + if (priv->child_count == (int16_t)(long)cookie) { + /* Take the inode number from namespace */ + local->st_ino = buf->st_ino; + if (S_ISDIR (buf->st_mode) || + !(local->stbuf.st_blksize)) { + local->stbuf = *buf; + } + } else if (!S_ISDIR (buf->st_mode)) { + /* If file, then get the stat from + storage node */ + local->stbuf = *buf; + } + + if (local->st_nlink < buf->st_nlink) { + local->st_nlink = buf->st_nlink; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local_dict = local->dict; + if (local->return_eio) { + gf_log (this->name, GF_LOG_CRITICAL, + "[CRITICAL] Unable to fix the path (%s) with " + "self-heal, try manual verification. " + "returning EIO.", local->loc1.path); + unify_local_wipe (local); + STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL); + if (local_dict) { + dict_unref (local_dict); + } + return 0; + } + + if (!local->stbuf.st_blksize) { + /* Inode not present */ + local->op_ret = -1; + } else { + if (!local->revalidate && + !S_ISDIR (local->stbuf.st_mode)) { + /* If its a file, big array is useless, + allocate the smaller one */ + int16_t *list = NULL; + list = CALLOC (1, 2 * (local->index + 1)); + ERR_ABORT (list); + memcpy (list, local->list, 2 * local->index); + /* Make the end of the list as -1 */ + FREE (local->list); + local->list = list; + local->list [local->index] = -1; + /* Update the inode's ctx with proper array */ + /* TODO: log on failure */ + inode_ctx_put (local->loc1.inode, this, + (uint64_t)(long)local->list); + } + + if (S_ISDIR(local->loc1.inode->st_mode)) { + /* lookup is done for directory */ + if (local->failed && priv->self_heal) { + /* Triggering self-heal */ + /* means, self-heal required for this + inode */ + local->inode_generation = 0; + priv->inode_generation++; + } + } else { + local->stbuf.st_ino = local->st_ino; + } + + local->stbuf.st_nlink = local->st_nlink; + } + if (local->op_ret == -1) { + if (!local->revalidate && local->list) + FREE (local->list); + } + + if ((local->op_ret >= 0) && local->failed && + local->revalidate) { + /* Done revalidate, but it failed */ + if (op_errno != ENOTCONN) { + gf_log (this->name, GF_LOG_ERROR, + "Revalidate failed for path(%s): %s", + local->loc1.path, strerror (op_errno)); + } + local->op_ret = -1; + } + + if ((priv->self_heal && !priv->optimist) && + (!local->revalidate && (local->op_ret == 0) && + S_ISDIR(local->stbuf.st_mode))) { + /* Let the self heal be done here */ + zr_unify_self_heal (frame, this, local); + local_dict = NULL; + } else { + /* either no self heal, or op_ret == -1 (failure) */ + tmp_inode = local->loc1.inode; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + tmp_inode, &local->stbuf, local->dict); + } + if (local_dict) { + dict_unref (local_dict); + } + } + + return 0; +} + +/** + * unify_lookup - + */ +int32_t +unify_lookup (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *xattr_req) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int16_t *list = NULL; + long index = 0; + + if (!(loc && loc->inode)) { + gf_log (this->name, GF_LOG_ERROR, + "%s: Argument not right", loc?loc->path:"(null)"); + STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); + return 0; + } + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL); + return 0; + } + + if (!inode_ctx_get (loc->inode, this, NULL) && + loc->inode->st_mode && + !S_ISDIR (loc->inode->st_mode)) { + uint64_t tmp_list = 0; + /* check if revalidate or fresh lookup */ + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + } + + if (local->list) { + list = local->list; + for (index = 0; list[index] != -1; index++); + if (index != 2) { + if (index < 2) { + gf_log (this->name, GF_LOG_ERROR, + "returning ESTALE for %s: file " + "count is %ld", loc->path, index); + /* Print where all the file is present */ + for (index = 0; + local->list[index] != -1; index++) { + gf_log (this->name, GF_LOG_ERROR, + "%s: found on %s", loc->path, + priv->xl_array[list[index]]->name); + } + unify_local_wipe (local); + STACK_UNWIND (frame, -1, ESTALE, + NULL, NULL, NULL); + return 0; + } else { + /* There are more than 2 presences */ + /* Just log and continue */ + gf_log (this->name, GF_LOG_ERROR, + "%s: file count is %ld", + loc->path, index); + /* Print where all the file is present */ + for (index = 0; + local->list[index] != -1; index++) { + gf_log (this->name, GF_LOG_ERROR, + "%s: found on %s", loc->path, + priv->xl_array[list[index]]->name); + } + } + } + + /* is revalidate */ + local->revalidate = 1; + + for (index = 0; list[index] != -1; index++) + local->call_count++; + + for (index = 0; list[index] != -1; index++) { + char need_break = (list[index+1] == -1); + STACK_WIND_COOKIE (frame, + unify_lookup_cbk, + (void *)(long)list[index], //cookie + priv->xl_array [list[index]], + priv->xl_array [list[index]]->fops->lookup, + loc, + xattr_req); + if (need_break) + break; + } + } else { + if (loc->inode->st_mode) { + if (inode_ctx_get (loc->inode, this, NULL)) { + inode_ctx_get (loc->inode, this, + &local->inode_generation); + } + } + /* This is first call, there is no list */ + /* call count should be all child + 1 namespace */ + local->call_count = priv->child_count + 1; + + for (index = 0; index <= priv->child_count; index++) { + STACK_WIND_COOKIE (frame, + unify_lookup_cbk, + (void *)index, //cookie + priv->xl_array[index], + priv->xl_array[index]->fops->lookup, + loc, + xattr_req); + } + } + + return 0; +} + +/** + * unify_stat - if directory, get the stat directly from NameSpace child. + * if file, check for a hint and send it only there (also to NS). + * if its a fresh stat, then do it on all the nodes. + * + * NOTE: for all the call, sending cookie as xlator pointer, which will be + * used in cbk. + */ +int32_t +unify_stat (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int16_t index = 0; + int16_t *list = NULL; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + local->st_ino = loc->inode->ino; + if (S_ISDIR (loc->inode->st_mode)) { + /* Directory */ + local->call_count = 1; + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->stat, loc); + } else { + /* File */ + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) + local->call_count++; + + for (index = 0; list[index] != -1; index++) { + char need_break = (list[index+1] == -1); + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->stat, + loc); + if (need_break) + break; + } + } + + return 0; +} + +/** + * unify_access_cbk - + */ +int32_t +unify_access_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +/** + * unify_access - Send request to only namespace, which has all the + * attributes set for the file. + */ +int32_t +unify_access (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t mask) +{ + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + STACK_WIND (frame, + unify_access_cbk, + NS(this), + NS(this)->fops->access, + loc, + mask); + + return 0; +} + +int32_t +unify_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + inode_t *tmp_inode = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if ((op_ret == -1) && !(priv->optimist && + (op_errno == ENOENT || + op_errno == EEXIST))) { + /* TODO: Decrement the inode_generation of + * this->inode's parent inode, hence the missing + * directory is created properly by self-heal. + * Currently, there is no way to get the parent + * inode directly. + */ + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + if (op_errno != EEXIST) + local->failed = 1; + local->op_errno = op_errno; + } + + if (op_ret >= 0) + local->op_ret = 0; + + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (!local->failed) { + inode_ctx_put (local->loc1.inode, this, + priv->inode_generation); + } + + tmp_inode = local->loc1.inode; + unify_local_wipe (local); + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + tmp_inode, &local->stbuf); + } + + return 0; +} + +/** + * unify_ns_mkdir_cbk - + */ +int32_t +unify_ns_mkdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + long index = 0; + + if (op_ret == -1) { + /* No need to send mkdir request to other servers, + * as namespace action failed + */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s): %s", + local->name, strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, NULL); + return 0; + } + + /* Create one inode for this entry */ + local->op_ret = 0; + local->stbuf = *buf; + + local->call_count = priv->child_count; + + /* Send mkdir request to all the nodes now */ + for (index = 0; index < priv->child_count; index++) { + STACK_WIND_COOKIE (frame, + unify_mkdir_cbk, + (void *)index, //cookie + priv->xl_array[index], + priv->xl_array[index]->fops->mkdir, + &local->loc1, + local->mode); + } + + return 0; +} + + +/** + * unify_mkdir - + */ +int32_t +unify_mkdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + unify_local_t *local = NULL; + + /* Initialization */ + INIT_LOCAL (frame, local); + local->mode = mode; + + loc_copy (&local->loc1, loc); + + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + STACK_WIND (frame, + unify_ns_mkdir_cbk, + NS(this), + NS(this)->fops->mkdir, + loc, + mode); + return 0; +} + +/** + * unify_rmdir_cbk - + */ +int32_t +unify_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT))) + local->op_ret = 0; + if (op_ret == -1) + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + + return 0; +} + +/** + * unify_ns_rmdir_cbk - + */ +int32_t +unify_ns_rmdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int16_t index = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + if (op_ret == -1) { + /* No need to send rmdir request to other servers, + * as namespace action failed + */ + gf_log (this->name, + ((op_errno != ENOTEMPTY) ? + GF_LOG_ERROR : GF_LOG_DEBUG), + "namespace: path(%s): %s", + local->loc1.path, strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno); + return 0; + } + + local->call_count = priv->child_count; + + for (index = 0; index < priv->child_count; index++) { + STACK_WIND (frame, + unify_rmdir_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->rmdir, + &local->loc1); + } + + return 0; +} + +/** + * unify_rmdir - + */ +int32_t +unify_rmdir (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + unify_local_t *local = NULL; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM); + return 0; + } + + STACK_WIND (frame, + unify_ns_rmdir_cbk, + NS(this), + NS(this)->fops->rmdir, + loc); + + return 0; +} + +/** + * unify_open_cbk - + */ +int32_t +unify_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret >= 0) { + local->op_ret = op_ret; + if (NS(this) != (xlator_t *)cookie) { + /* Store child node's ptr, used in + all the f*** / FileIO calls */ + fd_ctx_set (fd, this, (uint64_t)(long)cookie); + } + } + if (op_ret == -1) { + local->op_errno = op_errno; + local->failed = 1; + } + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if ((local->failed == 1) && (local->op_ret >= 0)) { + local->call_count = 1; + /* return -1 to user */ + local->op_ret = -1; + //local->op_errno = EIO; + + if (!fd_ctx_get (local->fd, this, NULL)) { + gf_log (this->name, GF_LOG_ERROR, + "Open success on child node, " + "failed on namespace"); + } else { + gf_log (this->name, GF_LOG_ERROR, + "Open success on namespace, " + "failed on child node"); + } + } + + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->fd); + } + + return 0; +} + +#ifdef GF_DARWIN_HOST_OS +/** + * unify_create_lookup_cbk - + */ +int32_t +unify_open_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int32_t callcnt = 0; + int16_t index = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if ((op_ret == -1) && (op_errno != ENOENT)) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + local->index++; + if (NS(this) == priv->xl_array[(long)cookie]) { + local->list[0] = (int16_t)(long)cookie; + } else { + local->list[1] = (int16_t)(long)cookie; + } + if (S_ISDIR (buf->st_mode)) + local->failed = 1; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + int16_t file_list[3] = {0,}; + local->op_ret = -1; + + file_list[0] = local->list[0]; + file_list[1] = local->list[1]; + file_list[2] = -1; + + if (local->index != 2) { + /* Lookup failed, can't do open */ + gf_log (this->name, GF_LOG_ERROR, + "%s: present on %d nodes", + local->name, local->index); + + if (local->index < 2) { + unify_local_wipe (local); + gf_log (this->name, GF_LOG_ERROR, + "returning as file found on less " + "than 2 nodes"); + STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->fd); + return 0; + } + } + + if (local->failed) { + /* Open on directory, return EISDIR */ + unify_local_wipe (local); + STACK_UNWIND (frame, -1, EISDIR, local->fd); + return 0; + } + + /* Everything is perfect :) */ + local->call_count = 2; + + for (index = 0; file_list[index] != -1; index++) { + char need_break = (file_list[index+1] == -1); + STACK_WIND_COOKIE (frame, + unify_open_cbk, + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]]->fops->open, + &local->loc1, + local->flags, + local->fd); + if (need_break) + break; + } + } + + return 0; +} + + +int32_t +unify_open_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *path) +{ + int16_t index = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + if (op_ret == -1) { + STACK_UNWIND (frame, -1, ENOENT); + return 0; + } + + if (path[0] == '/') { + local->name = strdup (path); + ERR_ABORT (local->name); + } else { + char *tmp_str = strdup (local->loc1.path); + char *tmp_base = dirname (tmp_str); + local->name = CALLOC (1, ZR_PATH_MAX); + strcpy (local->name, tmp_base); + strncat (local->name, "/", 1); + strcat (local->name, path); + FREE (tmp_str); + } + + local->list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (local->list); + local->call_count = priv->child_count + 1; + local->op_ret = -1; + for (index = 0; index <= priv->child_count; index++) { + /* Send the lookup to all the nodes including namespace */ + STACK_WIND_COOKIE (frame, + unify_open_lookup_cbk, + (void *)(long)index, + priv->xl_array[index], + priv->xl_array[index]->fops->lookup, + &local->loc1, + NULL); + } + + return 0; +} +#endif /* GF_DARWIN_HOST_OS */ + +/** + * unify_open - + */ +int32_t +unify_open (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + fd_t *fd) +{ + unify_private_t *priv = this->private; + unify_local_t *local = NULL; + int16_t *list = NULL; + int16_t index = 0; + int16_t file_list[3] = {0,}; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Init */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->fd = fd; + local->flags = flags; + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + local->list = list; + file_list[0] = priv->child_count; /* Thats namespace */ + file_list[2] = -1; + for (index = 0; list[index] != -1; index++) { + local->call_count++; + if (list[index] != priv->child_count) + file_list[1] = list[index]; + } + + if (local->call_count != 2) { + /* If the lookup was done for file */ + gf_log (this->name, GF_LOG_ERROR, + "%s: entry_count is %d", + loc->path, local->call_count); + for (index = 0; local->list[index] != -1; index++) + gf_log (this->name, GF_LOG_ERROR, "%s: found on %s", + loc->path, priv->xl_array[list[index]]->name); + + if (local->call_count < 2) { + gf_log (this->name, GF_LOG_ERROR, + "returning EIO as file found on onlyone node"); + STACK_UNWIND (frame, -1, EIO, fd); + return 0; + } + } + +#ifdef GF_DARWIN_HOST_OS + /* Handle symlink here */ + if (S_ISLNK (loc->inode->st_mode)) { + /* Callcount doesn't matter here */ + STACK_WIND (frame, + unify_open_readlink_cbk, + NS(this), + NS(this)->fops->readlink, + loc, ZR_PATH_MAX); + return 0; + } +#endif /* GF_DARWIN_HOST_OS */ + + local->call_count = 2; + for (index = 0; file_list[index] != -1; index++) { + char need_break = (file_list[index+1] == -1); + STACK_WIND_COOKIE (frame, + unify_open_cbk, + priv->xl_array[file_list[index]], //cookie + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]]->fops->open, + loc, + flags, + fd); + if (need_break) + break; + } + + return 0; +} + + +int32_t +unify_create_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + unify_local_t *local = frame->local; + inode_t *inode = local->loc1.inode; + + unify_local_wipe (local); + + STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd, + inode, &local->stbuf); + + return 0; +} + +/** + * unify_create_open_cbk - + */ +int32_t +unify_create_open_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + int ret = 0; + int32_t callcnt = 0; + unify_local_t *local = frame->local; + inode_t *inode = NULL; + xlator_t *child = NULL; + uint64_t tmp_value = 0; + + LOCK (&frame->lock); + { + if (op_ret >= 0) { + local->op_ret = op_ret; + if (NS(this) != (xlator_t *)cookie) { + /* Store child node's ptr, used in all + the f*** / FileIO calls */ + /* TODO: log on failure */ + ret = fd_ctx_get (fd, this, &tmp_value); + cookie = (void *)(long)tmp_value; + } else { + /* NOTE: open successful on namespace. + * fd's ctx can be used to identify open + * failure on storage subvolume. cool + * ide ;) */ + local->failed = 0; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + ((xlator_t *)cookie)->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + local->failed = 1; + } + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed == 1 && (local->op_ret >= 0)) { + local->call_count = 1; + /* return -1 to user */ + local->op_ret = -1; + local->op_errno = EIO; + local->fd = fd; + local->call_count = 1; + + if (!fd_ctx_get (local->fd, this, &tmp_value)) { + child = (xlator_t *)(long)tmp_value; + + gf_log (this->name, GF_LOG_ERROR, + "Create success on child node, " + "failed on namespace"); + + STACK_WIND (frame, + unify_create_unlink_cbk, + child, + child->fops->unlink, + &local->loc1); + } else { + gf_log (this->name, GF_LOG_ERROR, + "Create success on namespace, " + "failed on child node"); + + STACK_WIND (frame, + unify_create_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + } + return 0; + } + inode = local->loc1.inode; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, fd, + inode, &local->stbuf); + } + return 0; +} + +/** + * unify_create_lookup_cbk - + */ +int32_t +unify_create_lookup_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf, + dict_t *dict) +{ + int32_t callcnt = 0; + int16_t index = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + priv->xl_array[(long)cookie]->name, + local->loc1.path, strerror (op_errno)); + local->op_errno = op_errno; + local->failed = 1; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + local->list[local->index++] = (int16_t)(long)cookie; + if (NS(this) == priv->xl_array[(long)cookie]) { + local->st_ino = buf->st_ino; + } else { + local->stbuf = *buf; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + int16_t *list = local->list; + int16_t file_list[3] = {0,}; + local->op_ret = -1; + + local->list [local->index] = -1; + file_list[0] = list[0]; + file_list[1] = list[1]; + file_list[2] = -1; + + local->stbuf.st_ino = local->st_ino; + /* TODO: log on failure */ + inode_ctx_put (local->loc1.inode, this, + (uint64_t)(long)local->list); + + if (local->index != 2) { + /* Lookup failed, can't do open */ + gf_log (this->name, GF_LOG_ERROR, + "%s: present on %d nodes", + local->loc1.path, local->index); + file_list[0] = priv->child_count; + for (index = 0; list[index] != -1; index++) { + gf_log (this->name, GF_LOG_ERROR, + "%s: found on %s", local->loc1.path, + priv->xl_array[list[index]]->name); + if (list[index] != priv->child_count) + file_list[1] = list[index]; + } + + if (local->index < 2) { + unify_local_wipe (local); + gf_log (this->name, GF_LOG_ERROR, + "returning EIO as file found on " + "only one node"); + STACK_UNWIND (frame, -1, EIO, + local->fd, inode, NULL); + return 0; + } + } + /* Everything is perfect :) */ + local->call_count = 2; + + for (index = 0; file_list[index] != -1; index++) { + char need_break = (file_list[index+1] == -1); + STACK_WIND_COOKIE (frame, + unify_create_open_cbk, + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]], + priv->xl_array[file_list[index]]->fops->open, + &local->loc1, + local->flags, + local->fd); + if (need_break) + break; + } + } + + return 0; +} + + +/** + * unify_create_cbk - + */ +int32_t +unify_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + int ret = 0; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + inode_t *tmp_inode = NULL; + + if (op_ret == -1) { + /* send unlink () on Namespace */ + local->op_errno = op_errno; + local->op_ret = -1; + local->call_count = 1; + gf_log (this->name, GF_LOG_ERROR, + "create failed on %s (file %s, error %s), " + "sending unlink to namespace", + prev_frame->this->name, + local->loc1.path, strerror (op_errno)); + + STACK_WIND (frame, + unify_create_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + if (op_ret >= 0) { + local->op_ret = op_ret; + local->stbuf = *buf; + /* Just inode number should be from NS node */ + local->stbuf.st_ino = local->st_ino; + + /* TODO: log on failure */ + ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this); + } + + tmp_inode = local->loc1.inode; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd, + tmp_inode, &local->stbuf); + + return 0; +} + +/** + * unify_ns_create_cbk - + * + */ +int32_t +unify_ns_create_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd, + inode_t *inode, + struct stat *buf) +{ + struct sched_ops *sched_ops = NULL; + xlator_t *sched_xl = NULL; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t *list = NULL; + int16_t index = 0; + + if (op_ret == -1) { + /* No need to send create request to other servers, as + namespace action failed. Handle exclusive create here. */ + if ((op_errno != EEXIST) || + ((op_errno == EEXIST) && + ((local->flags & O_EXCL) == O_EXCL))) { + /* If its just a create call without O_EXCL, + don't do this */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s): %s", + local->loc1.path, strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); + return 0; + } + } + + if (op_ret >= 0) { + /* Get the inode number from the NS node */ + local->st_ino = buf->st_ino; + + local->op_ret = -1; + + /* Start the mapping list */ + list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (list); + inode_ctx_put (inode, this, (uint64_t)(long)list); + list[0] = priv->child_count; + list[2] = -1; + + /* This means, file doesn't exist anywhere in the Filesystem */ + sched_ops = priv->sched_ops; + + /* Send create request to the scheduled node now */ + sched_xl = sched_ops->schedule (this, local->loc1.path); + if (sched_xl == NULL) + { + /* send unlink () on Namespace */ + local->op_errno = ENOTCONN; + local->op_ret = -1; + local->call_count = 1; + gf_log (this->name, GF_LOG_ERROR, + "no node online to schedule create:(file %s) " + "sending unlink to namespace", + (local->loc1.path)?local->loc1.path:""); + + STACK_WIND (frame, + unify_create_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + for (index = 0; index < priv->child_count; index++) + if (sched_xl == priv->xl_array[index]) + break; + list[1] = index; + + STACK_WIND (frame, unify_create_cbk, + sched_xl, sched_xl->fops->create, + &local->loc1, local->flags, local->mode, fd); + } else { + /* File already exists, and there is no O_EXCL flag */ + + gf_log (this->name, GF_LOG_DEBUG, + "File(%s) already exists on namespace, sending " + "open instead", local->loc1.path); + + local->list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (local->list); + local->call_count = priv->child_count + 1; + local->op_ret = -1; + for (index = 0; index <= priv->child_count; index++) { + /* Send lookup() to all nodes including namespace */ + STACK_WIND_COOKIE (frame, + unify_create_lookup_cbk, + (void *)(long)index, + priv->xl_array[index], + priv->xl_array[index]->fops->lookup, + &local->loc1, + NULL); + } + } + return 0; +} + +/** + * unify_create - create a file in global namespace first, so other + * clients can see them. Create the file in storage nodes in background. + */ +int32_t +unify_create (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flags, + mode_t mode, + fd_t *fd) +{ + unify_local_t *local = NULL; + + /* Initialization */ + INIT_LOCAL (frame, local); + local->mode = mode; + local->flags = flags; + local->fd = fd; + + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL); + return 0; + } + + STACK_WIND (frame, + unify_ns_create_cbk, + NS(this), + NS(this)->fops->create, + loc, + flags | O_EXCL, + mode, + fd); + + return 0; +} + + +/** + * unify_opendir_cbk - + */ +int32_t +unify_opendir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + fd_t *fd) +{ + STACK_UNWIND (frame, op_ret, op_errno, fd); + + return 0; +} + +/** + * unify_opendir - + */ +int32_t +unify_opendir (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + fd_t *fd) +{ + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + STACK_WIND (frame, unify_opendir_cbk, + NS(this), NS(this)->fops->opendir, loc, fd); + + return 0; +} + + +/** + * unify_chmod - + */ +int32_t +unify_chmod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int32_t index = 0; + int32_t callcnt = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + + loc_copy (&local->loc1, loc); + local->st_ino = loc->inode->ino; + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count + 1; + + for (index = 0; index < (priv->child_count + 1); index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->chmod, + loc, mode); + } + } else { + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + for (index = 0; local->list[index] != -1; index++) { + local->call_count++; + callcnt++; + } + + for (index = 0; local->list[index] != -1; index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[local->list[index]], + priv->xl_array[local->list[index]]->fops->chmod, + loc, + mode); + if (!--callcnt) + break; + } + } + + return 0; +} + +/** + * unify_chown - + */ +int32_t +unify_chown (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + uid_t uid, + gid_t gid) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int32_t index = 0; + int32_t callcnt = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->st_ino = loc->inode->ino; + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count + 1; + + for (index = 0; index < (priv->child_count + 1); index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->chown, + loc, uid, gid); + } + } else { + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + for (index = 0; local->list[index] != -1; index++) { + local->call_count++; + callcnt++; + } + + for (index = 0; local->list[index] != -1; index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[local->list[index]], + priv->xl_array[local->list[index]]->fops->chown, + loc, uid, gid); + if (!--callcnt) + break; + } + } + + return 0; +} + + +/** + * unify_truncate_cbk - + */ +int32_t +unify_truncate_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + prev_frame->this->name, + (local->loc1.path)?local->loc1.path:"", + strerror (op_errno)); + local->op_errno = op_errno; + if (!((op_errno == ENOENT) && priv->optimist)) + local->op_ret = -1; + } + + if (op_ret >= 0) { + if (NS (this) == prev_frame->this) { + local->st_ino = buf->st_ino; + /* If the entry is directory, get the + stat from NS node */ + if (S_ISDIR (buf->st_mode) || + !local->stbuf.st_blksize) { + local->stbuf = *buf; + } + } + + if ((!S_ISDIR (buf->st_mode)) && + (NS (this) != prev_frame->this)) { + /* If file, take the stat info from + Storage node. */ + local->stbuf = *buf; + } + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->st_ino) + local->stbuf.st_ino = local->st_ino; + else + local->op_ret = -1; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + + return 0; +} + +/** + * unify_truncate - + */ +int32_t +unify_truncate (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + off_t offset) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int32_t index = 0; + int32_t callcnt = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->st_ino = loc->inode->ino; + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = 1; + + STACK_WIND (frame, + unify_buf_cbk, + NS(this), + NS(this)->fops->stat, + loc); + } else { + local->op_ret = 0; + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + for (index = 0; local->list[index] != -1; index++) { + local->call_count++; + callcnt++; + } + + /* Don't send truncate to NS node */ + STACK_WIND (frame, unify_truncate_cbk, NS(this), + NS(this)->fops->stat, loc); + callcnt--; + + for (index = 0; local->list[index] != -1; index++) { + if (NS(this) != priv->xl_array[local->list[index]]) { + STACK_WIND (frame, + unify_truncate_cbk, + priv->xl_array[local->list[index]], + priv->xl_array[local->list[index]]->fops->truncate, + loc, + offset); + if (!--callcnt) + break; + } + } + } + + return 0; +} + +/** + * unify_utimens - + */ +int32_t +unify_utimens (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + struct timespec tv[2]) +{ + unify_local_t *local = NULL; + unify_private_t *priv = this->private; + int32_t index = 0; + int32_t callcnt = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->st_ino = loc->inode->ino; + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count + 1; + + for (index = 0; index < (priv->child_count + 1); index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->utimens, + loc, tv); + } + } else { + inode_ctx_get (loc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + for (index = 0; local->list[index] != -1; index++) { + local->call_count++; + callcnt++; + } + + for (index = 0; local->list[index] != -1; index++) { + STACK_WIND (frame, + unify_buf_cbk, + priv->xl_array[local->list[index]], + priv->xl_array[local->list[index]]->fops->utimens, + loc, + tv); + if (!--callcnt) + break; + } + } + + return 0; +} + +/** + * unify_readlink_cbk - + */ +int32_t +unify_readlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + const char *path) +{ + STACK_UNWIND (frame, op_ret, op_errno, path); + return 0; +} + +/** + * unify_readlink - Read the link only from the storage node. + */ +int32_t +unify_readlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + size_t size) +{ + unify_private_t *priv = this->private; + int32_t entry_count = 0; + int16_t *list = NULL; + int16_t index = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) + entry_count++; + + if (entry_count >= 2) { + for (index = 0; list[index] != -1; index++) { + if (priv->xl_array[list[index]] != NS(this)) { + STACK_WIND (frame, + unify_readlink_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->readlink, + loc, + size); + break; + } + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "returning ENOENT, no softlink files found " + "on storage node"); + STACK_UNWIND (frame, -1, ENOENT, NULL); + } + + return 0; +} + + +/** + * unify_unlink_cbk - + */ +int32_t +unify_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist)) + local->op_ret = 0; + if (op_ret == -1) + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + + return 0; +} + + +/** + * unify_unlink - + */ +int32_t +unify_unlink (call_frame_t *frame, + xlator_t *this, + loc_t *loc) +{ + unify_private_t *priv = this->private; + unify_local_t *local = NULL; + int16_t *list = NULL; + int16_t index = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) + local->call_count++; + + if (local->call_count) { + for (index = 0; list[index] != -1; index++) { + char need_break = (list[index+1] == -1); + STACK_WIND (frame, + unify_unlink_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->unlink, + loc); + if (need_break) + break; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "%s: returning ENOENT", loc->path); + STACK_UNWIND (frame, -1, ENOENT); + } + + return 0; +} + + +/** + * unify_readv_cbk - + */ +int32_t +unify_readv_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iovec *vector, + int32_t count, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); + return 0; +} + +/** + * unify_readv - + */ +int32_t +unify_readv (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, + unify_readv_cbk, + child, + child->fops->readv, + fd, + size, + offset); + + + return 0; +} + +/** + * unify_writev_cbk - + */ +int32_t +unify_writev_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *stbuf) +{ + STACK_UNWIND (frame, op_ret, op_errno, stbuf); + return 0; +} + +/** + * unify_writev - + */ +int32_t +unify_writev (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + struct iovec *vector, + int32_t count, + off_t off) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, + unify_writev_cbk, + child, + child->fops->writev, + fd, + vector, + count, + off); + + return 0; +} + +/** + * unify_ftruncate - + */ +int32_t +unify_ftruncate (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + off_t offset) +{ + xlator_t *child = NULL; + unify_local_t *local = NULL; + uint64_t tmp_child = 0; + + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd); + + /* Initialization */ + INIT_LOCAL (frame, local); + local->op_ret = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + local->call_count = 2; + + STACK_WIND (frame, unify_truncate_cbk, + child, child->fops->ftruncate, + fd, offset); + + STACK_WIND (frame, unify_truncate_cbk, + NS(this), NS(this)->fops->fstat, + fd); + + return 0; +} + + +/** + * unify_fchmod - + */ +int32_t +unify_fchmod (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + mode_t mode) +{ + unify_local_t *local = NULL; + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd); + + /* Initialization */ + INIT_LOCAL (frame, local); + local->st_ino = fd->inode->ino; + + if (!fd_ctx_get (fd, this, &tmp_child)) { + /* If its set, then its file */ + child = (xlator_t *)(long)tmp_child; + + local->call_count = 2; + + STACK_WIND (frame, unify_buf_cbk, child, + child->fops->fchmod, fd, mode); + + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->fchmod, fd, mode); + + } else { + /* this is an directory */ + local->call_count = 1; + + STACK_WIND (frame, unify_buf_cbk, + NS(this), NS(this)->fops->fchmod, fd, mode); + } + + return 0; +} + +/** + * unify_fchown - + */ +int32_t +unify_fchown (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + uid_t uid, + gid_t gid) +{ + unify_local_t *local = NULL; + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd); + + /* Initialization */ + INIT_LOCAL (frame, local); + local->st_ino = fd->inode->ino; + + if (!fd_ctx_get (fd, this, &tmp_child)) { + /* If its set, then its file */ + child = (xlator_t *)(long)tmp_child; + + local->call_count = 2; + + STACK_WIND (frame, unify_buf_cbk, child, + child->fops->fchown, fd, uid, gid); + + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->fchown, fd, uid, gid); + } else { + local->call_count = 1; + + STACK_WIND (frame, unify_buf_cbk, + NS(this), NS(this)->fops->fchown, + fd, uid, gid); + } + + return 0; +} + +/** + * unify_flush_cbk - + */ +int32_t +unify_flush_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_flush - + */ +int32_t +unify_flush (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_flush_cbk, child, + child->fops->flush, fd); + + return 0; +} + + +/** + * unify_fsync_cbk - + */ +int32_t +unify_fsync_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_fsync - + */ +int32_t +unify_fsync (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_fsync_cbk, child, + child->fops->fsync, fd, flags); + + return 0; +} + +/** + * unify_fstat - Send fstat FOP to Namespace only if its directory, and to + * both namespace and the storage node if its a file. + */ +int32_t +unify_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd) +{ + unify_local_t *local = NULL; + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd); + + INIT_LOCAL (frame, local); + local->st_ino = fd->inode->ino; + + if (!fd_ctx_get (fd, this, &tmp_child)) { + /* If its set, then its file */ + child = (xlator_t *)(long)tmp_child; + local->call_count = 2; + + STACK_WIND (frame, unify_buf_cbk, child, + child->fops->fstat, fd); + + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->fstat, fd); + + } else { + /* this is an directory */ + local->call_count = 1; + STACK_WIND (frame, unify_buf_cbk, NS(this), + NS(this)->fops->fstat, fd); + } + + return 0; +} + +/** + * unify_getdents_cbk - + */ +int32_t +unify_getdents_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dir_entry_t *entry, + int32_t count) +{ + STACK_UNWIND (frame, op_ret, op_errno, entry, count); + return 0; +} + +/** + * unify_getdents - send the FOP request to all the nodes. + */ +int32_t +unify_getdents (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset, + int32_t flag) +{ + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd); + + STACK_WIND (frame, unify_getdents_cbk, NS(this), + NS(this)->fops->getdents, fd, size, offset, flag); + + return 0; +} + + +/** + * unify_readdir_cbk - + */ +int32_t +unify_readdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + gf_dirent_t *buf) +{ + STACK_UNWIND (frame, op_ret, op_errno, buf); + + return 0; +} + +/** + * unify_readdir - send the FOP request to all the nodes. + */ +int32_t +unify_readdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + size_t size, + off_t offset) +{ + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd); + + STACK_WIND (frame, unify_readdir_cbk, NS(this), + NS(this)->fops->readdir, fd, size, offset); + + return 0; +} + + +/** + * unify_fsyncdir_cbk - + */ +int32_t +unify_fsyncdir_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + + return 0; +} + +/** + * unify_fsyncdir - + */ +int32_t +unify_fsyncdir (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t flags) +{ + UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd); + + STACK_WIND (frame, unify_fsyncdir_cbk, + NS(this), NS(this)->fops->fsyncdir, fd, flags); + + return 0; +} + +/** + * unify_lk_cbk - UNWIND frame with the proper return arguments. + */ +int32_t +unify_lk_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct flock *lock) +{ + STACK_UNWIND (frame, op_ret, op_errno, lock); + return 0; +} + +/** + * unify_lk - Send it to all the storage nodes, (should be 1) which has file. + */ +int32_t +unify_lk (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t cmd, + struct flock *lock) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_lk_cbk, child, + child->fops->lk, fd, cmd, lock); + + return 0; +} + + +int32_t +unify_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno); + +static int32_t +unify_setxattr_file_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + unify_private_t *private = this->private; + unify_local_t *local = frame->local; + xlator_t *sched_xl = NULL; + struct sched_ops *sched_ops = NULL; + + if (op_ret == -1) { + if (!ENOTSUP) + gf_log (this->name, GF_LOG_ERROR, + "setxattr with XATTR_CREATE on ns: " + "path(%s) key(%s): %s", + local->loc1.path, local->name, + strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno); + return 0; + } + + LOCK (&frame->lock); + { + local->failed = 0; + local->op_ret = 0; + local->op_errno = 0; + local->call_count = 1; + } + UNLOCK (&frame->lock); + + /* schedule XATTR_CREATE on one of the child node */ + sched_ops = private->sched_ops; + + /* Send create request to the scheduled node now */ + sched_xl = sched_ops->schedule (this, local->name); + if (!sched_xl) { + STACK_UNWIND (frame, -1, ENOTCONN); + return 0; + } + + STACK_WIND (frame, + unify_setxattr_cbk, + sched_xl, + sched_xl->fops->setxattr, + &local->loc1, + local->dict, + local->flags); + return 0; +} + +/** + * unify_setxattr_cbk - When all the child nodes return, UNWIND frame. + */ +int32_t +unify_setxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + dict_t *dict = NULL; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, (((op_errno == ENOENT) || + (op_errno == ENOTSUP))? + GF_LOG_DEBUG : GF_LOG_ERROR), + "child(%s): path(%s): %s", + prev_frame->this->name, + (local->loc1.path)?local->loc1.path:"", + strerror (op_errno)); + if (local->failed == -1) { + local->failed = 1; + } + local->op_errno = op_errno; + } else { + local->failed = 0; + local->op_ret = op_ret; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed && local->name && + ZR_FILE_CONTENT_REQUEST(local->name)) { + dict = get_new_dict (); + dict_set (dict, local->dict->members_list->key, + data_from_dynptr(NULL, 0)); + dict_ref (dict); + + local->call_count = 1; + + STACK_WIND (frame, + unify_setxattr_file_cbk, + NS(this), + NS(this)->fops->setxattr, + &local->loc1, + dict, + XATTR_CREATE); + + dict_unref (dict); + return 0; + } + + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + + return 0; +} + +/** + * unify_sexattr - This function should be sent to all the storage nodes, + * which contains the file, (excluding namespace). + */ +int32_t +unify_setxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + dict_t *dict, + int32_t flags) +{ + unify_private_t *priv = this->private; + unify_local_t *local = NULL; + int16_t *list = NULL; + int16_t index = 0; + int32_t call_count = 0; + uint64_t tmp_list = 0; + data_pair_t *trav = dict->members_list; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + local->failed = -1; + loc_copy (&local->loc1, loc); + + if (S_ISDIR (loc->inode->st_mode)) { + + if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) { + /* direct the storage xlators to change file + content only if file exists */ + local->flags = flags; + local->dict = dict; + local->name = strdup (trav->key); + flags |= XATTR_REPLACE; + } + + local->call_count = priv->child_count; + for (index = 0; index < priv->child_count; index++) { + STACK_WIND (frame, + unify_setxattr_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->setxattr, + loc, dict, flags); + } + return 0; + } + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + local->call_count++; + call_count++; + } + } + + if (local->call_count) { + for (index = 0; list[index] != -1; index++) { + if (priv->xl_array[list[index]] != NS(this)) { + STACK_WIND (frame, + unify_setxattr_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->setxattr, + loc, + dict, + flags); + if (!--call_count) + break; + } + } + return 0; + } + + /* No entry in storage nodes */ + gf_log (this->name, GF_LOG_DEBUG, + "returning ENOENT, file not found on storage node."); + STACK_UNWIND (frame, -1, ENOENT); + + return 0; +} + + +/** + * unify_getxattr_cbk - This function is called from only one child, so, no + * need of any lock or anything else, just send it to above layer + */ +int32_t +unify_getxattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + dict_t *value) +{ + int32_t callcnt = 0; + dict_t *local_value = NULL; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + local->op_errno = op_errno; + gf_log (this->name, + (((op_errno == ENOENT) || + (op_errno == ENODATA) || + (op_errno == ENOTSUP)) ? + GF_LOG_DEBUG : GF_LOG_ERROR), + "child(%s): path(%s): %s", + prev_frame->this->name, + (local->loc1.path)?local->loc1.path:"", + strerror (op_errno)); + } else { + if (!local->dict) + local->dict = dict_ref (value); + local->op_ret = op_ret; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local_value = local->dict; + local->dict = NULL; + + STACK_UNWIND (frame, local->op_ret, local->op_errno, + local_value); + + if (local_value) + dict_unref (local_value); + } + + return 0; +} + + +/** + * unify_getxattr - This FOP is sent to only the storage node. + */ +int32_t +unify_getxattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + unify_private_t *priv = this->private; + int16_t *list = NULL; + int16_t index = 0; + int16_t count = 0; + unify_local_t *local = NULL; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + INIT_LOCAL (frame, local); + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count; + for (index = 0; index < priv->child_count; index++) + STACK_WIND (frame, + unify_getxattr_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->getxattr, + loc, + name); + return 0; + } + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + local->call_count++; + count++; + } + } + + if (count) { + for (index = 0; list[index] != -1; index++) { + if (priv->xl_array[list[index]] != NS(this)) { + STACK_WIND (frame, + unify_getxattr_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->getxattr, + loc, + name); + if (!--count) + break; + } + } + } else { + dict_t *tmp_dict = get_new_dict (); + gf_log (this->name, GF_LOG_DEBUG, + "%s: returning ENODATA, no file found on storage node", + loc->path); + STACK_UNWIND (frame, -1, ENODATA, tmp_dict); + dict_destroy (tmp_dict); + } + + return 0; +} + +/** + * unify_removexattr_cbk - Wait till all the child node returns the call + * and then UNWIND to above layer. + */ +int32_t +unify_removexattr_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret == -1) { + local->op_errno = op_errno; + if (op_errno != ENOTSUP) + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + prev_frame->this->name, + local->loc1.path, strerror (op_errno)); + } else { + local->op_ret = op_ret; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + STACK_UNWIND (frame, local->op_ret, local->op_errno); + } + + return 0; +} + +/** + * unify_removexattr - Send it to all the child nodes which has the files. + */ +int32_t +unify_removexattr (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + const char *name) +{ + unify_private_t *priv = this->private; + unify_local_t *local = NULL; + int16_t *list = NULL; + int16_t index = 0; + int32_t call_count = 0; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc); + + /* Initialization */ + INIT_LOCAL (frame, local); + + if (S_ISDIR (loc->inode->st_mode)) { + local->call_count = priv->child_count; + for (index = 0; index < priv->child_count; index++) + STACK_WIND (frame, + unify_removexattr_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->removexattr, + loc, + name); + + return 0; + } + + inode_ctx_get (loc->inode, this, &tmp_list); + list = (int16_t *)(long)tmp_list; + + for (index = 0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + local->call_count++; + call_count++; + } + } + + if (local->call_count) { + for (index = 0; list[index] != -1; index++) { + if (priv->xl_array[list[index]] != NS(this)) { + STACK_WIND (frame, + unify_removexattr_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->removexattr, + loc, + name); + if (!--call_count) + break; + } + } + return 0; + } + + gf_log (this->name, GF_LOG_DEBUG, + "%s: returning ENOENT, not found on storage node.", loc->path); + STACK_UNWIND (frame, -1, ENOENT); + + return 0; +} + + +int32_t +unify_mknod_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + unify_local_t *local = frame->local; + + if (op_ret == -1) + gf_log (this->name, GF_LOG_ERROR, + "%s: %s", local->loc1.path, strerror (op_errno)); + + unify_local_wipe (local); + /* No log required here as this -1 is for mknod call */ + STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL); + return 0; +} + +/** + * unify_mknod_cbk - + */ +int32_t +unify_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_local_t *local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "mknod failed on storage node, sending unlink to " + "namespace"); + local->op_errno = op_errno; + STACK_WIND (frame, + unify_mknod_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + return 0; + } + + local->stbuf = *buf; + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf); + return 0; +} + +/** + * unify_ns_mknod_cbk - + */ +int32_t +unify_ns_mknod_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + struct sched_ops *sched_ops = NULL; + xlator_t *sched_xl = NULL; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t *list = NULL; + int16_t index = 0; + call_frame_t *prev_frame = cookie; + + if (op_ret == -1) { + /* No need to send mknod request to other servers, + * as namespace action failed + */ + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s): %s", + prev_frame->this->name, local->loc1.path, + strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; + } + + /* Create one inode for this entry */ + local->op_ret = 0; + local->stbuf = *buf; + local->st_ino = buf->st_ino; + + list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (list); + list[0] = priv->child_count; + list[2] = -1; + inode_ctx_put (inode, this, (uint64_t)(long)list); + + sched_ops = priv->sched_ops; + + /* Send mknod request to scheduled node now */ + sched_xl = sched_ops->schedule (this, local->loc1.path); + if (!sched_xl) { + gf_log (this->name, GF_LOG_ERROR, + "mknod failed on storage node, no node online " + "at the moment, sending unlink to NS"); + local->op_errno = ENOTCONN; + STACK_WIND (frame, + unify_mknod_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + for (index = 0; index < priv->child_count; index++) + if (sched_xl == priv->xl_array[index]) + break; + list[1] = index; + + STACK_WIND (frame, unify_mknod_cbk, + sched_xl, sched_xl->fops->mknod, + &local->loc1, local->mode, local->dev); + + return 0; +} + +/** + * unify_mknod - Create a device on namespace first, and later create on + * the storage node. + */ +int32_t +unify_mknod (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + mode_t mode, + dev_t rdev) +{ + unify_local_t *local = NULL; + + /* Initialization */ + INIT_LOCAL (frame, local); + local->mode = mode; + local->dev = rdev; + loc_copy (&local->loc1, loc); + if (local->loc1.path == NULL) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL); + return 0; + } + + STACK_WIND (frame, + unify_ns_mknod_cbk, + NS(this), + NS(this)->fops->mknod, + loc, + mode, + rdev); + + return 0; +} + +int32_t +unify_symlink_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + unify_local_t *local = frame->local; + if (op_ret == -1) + gf_log (this->name, GF_LOG_ERROR, + "%s: %s", local->loc1.path, strerror (op_errno)); + + unify_local_wipe (local); + STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL); + return 0; +} + +/** + * unify_symlink_cbk - + */ +int32_t +unify_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_local_t *local = frame->local; + + if (op_ret == -1) { + /* Symlink on storage node failed, hence send unlink + to the NS node */ + local->op_errno = op_errno; + gf_log (this->name, GF_LOG_ERROR, + "symlink on storage node failed, sending unlink " + "to namespace"); + + STACK_WIND (frame, + unify_symlink_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + local->stbuf = *buf; + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf); + + return 0; +} + +/** + * unify_ns_symlink_cbk - + */ +int32_t +unify_ns_symlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + + struct sched_ops *sched_ops = NULL; + xlator_t *sched_xl = NULL; + int16_t *list = NULL; + unify_local_t *local = frame->local; + unify_private_t *priv = this->private; + int16_t index = 0; + + if (op_ret == -1) { + /* No need to send symlink request to other servers, + * as namespace action failed + */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s): %s", + local->loc1.path, strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, NULL, buf); + return 0; + } + + /* Create one inode for this entry */ + local->op_ret = 0; + local->st_ino = buf->st_ino; + + /* Start the mapping list */ + + list = CALLOC (1, sizeof (int16_t) * 3); + ERR_ABORT (list); + list[0] = priv->child_count; //namespace's index + list[2] = -1; + inode_ctx_put (inode, this, (uint64_t)(long)list); + + sched_ops = priv->sched_ops; + + /* Send symlink request to all the nodes now */ + sched_xl = sched_ops->schedule (this, local->loc1.path); + if (!sched_xl) { + /* Symlink on storage node failed, hence send unlink + to the NS node */ + local->op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_ERROR, + "symlink on storage node failed, no node online, " + "sending unlink to namespace"); + + STACK_WIND (frame, + unify_symlink_unlink_cbk, + NS(this), + NS(this)->fops->unlink, + &local->loc1); + + return 0; + } + + for (index = 0; index < priv->child_count; index++) + if (sched_xl == priv->xl_array[index]) + break; + list[1] = index; + + STACK_WIND (frame, + unify_symlink_cbk, + sched_xl, + sched_xl->fops->symlink, + local->name, + &local->loc1); + + return 0; +} + +/** + * unify_symlink - + */ +int32_t +unify_symlink (call_frame_t *frame, + xlator_t *this, + const char *linkpath, + loc_t *loc) +{ + unify_local_t *local = NULL; + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, loc); + local->name = strdup (linkpath); + + if ((local->name == NULL) || + (local->loc1.path == NULL)) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL); + return 0; + } + + STACK_WIND (frame, + unify_ns_symlink_cbk, + NS(this), + NS(this)->fops->symlink, + linkpath, + loc); + + return 0; +} + + +int32_t +unify_rename_unlink_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno) +{ + int32_t callcnt = 0; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s -> %s): %s", + prev_frame->this->name, + local->loc1.path, local->loc2.path, + strerror (op_errno)); + + } + LOCK (&frame->lock); + { + callcnt = --local->call_count; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, + &local->stbuf); + } + return 0; +} + +int32_t +unify_ns_rename_undo_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + unify_local_t *local = frame->local; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s -> %s): %s", + local->loc1.path, local->loc2.path, + strerror (op_errno)); + } + + local->stbuf.st_ino = local->st_ino; + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf); + return 0; +} + +int32_t +unify_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t index = 0; + int32_t callcnt = 0; + int16_t *list = NULL; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + call_frame_t *prev_frame = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret >= 0) { + if (!S_ISDIR (buf->st_mode)) + local->stbuf = *buf; + local->op_ret = op_ret; + } else { + gf_log (this->name, GF_LOG_ERROR, + "child(%s): path(%s -> %s): %s", + prev_frame->this->name, + local->loc1.path, local->loc2.path, + strerror (op_errno)); + local->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + local->stbuf.st_ino = local->st_ino; + if (S_ISDIR (local->loc1.inode->st_mode)) { + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf); + return 0; + } + + if (local->op_ret == -1) { + /* TODO: check this logic */ + + /* Rename failed in storage node, successful on NS, + * hence, rename back the entries in NS */ + /* NOTE: this will be done only if the destination + * doesn't exists, if the destination exists, the + * job of correcting NS is left to self-heal + */ + if (!local->index) { + loc_t tmp_oldloc = { + /* its actual 'newloc->path' */ + .path = local->loc2.path, + .inode = local->loc1.inode, + .parent = local->loc2.parent + }; + + loc_t tmp_newloc = { + /* Actual 'oldloc->path' */ + .path = local->loc1.path, + .parent = local->loc1.parent + }; + + gf_log (this->name, GF_LOG_ERROR, + "rename succussful on namespace, on " + "stroage node failed, reverting back"); + + STACK_WIND (frame, + unify_ns_rename_undo_cbk, + NS(this), + NS(this)->fops->rename, + &tmp_oldloc, + &tmp_newloc); + return 0; + } + } else { + /* Rename successful on storage nodes */ + + int32_t idx = 0; + int16_t *tmp_list = NULL; + uint64_t tmp_list_int64 = 0; + if (local->loc2.inode) { + inode_ctx_get (local->loc2.inode, + this, &tmp_list_int64); + list = (int16_t *)(long)tmp_list_int64; + + } + + if (list) { + for (index = 0; list[index] != -1; index++); + tmp_list = CALLOC (1, index * 2); + memcpy (tmp_list, list, index * 2); + + for (index = 0; list[index] != -1; index++) { + /* TODO: Check this logic. */ + /* If the destination file exists in + * the same storage node where we sent + * 'rename' call, no need to send + * unlink + */ + for (idx = 0; + local->list[idx] != -1; idx++) { + if (tmp_list[index] == local->list[idx]) { + tmp_list[index] = priv->child_count; + continue; + } + } + + if (NS(this) != priv->xl_array[tmp_list[index]]) { + local->call_count++; + callcnt++; + } + } + + if (local->call_count) { + if (callcnt > 1) + gf_log (this->name, + GF_LOG_ERROR, + "%s->%s: more (%d) " + "subvolumes have the " + "newloc entry", + local->loc1.path, + local->loc2.path, + callcnt); + + for (index=0; + tmp_list[index] != -1; index++) { + if (NS(this) != priv->xl_array[tmp_list[index]]) { + STACK_WIND (frame, + unify_rename_unlink_cbk, + priv->xl_array[tmp_list[index]], + priv->xl_array[tmp_list[index]]->fops->unlink, + &local->loc2); + if (!--callcnt) + break; + } + } + + FREE (tmp_list); + return 0; + } + if (tmp_list) + FREE (tmp_list); + } + } + + /* Need not send 'unlink' to storage node */ + unify_local_wipe (local); + STACK_UNWIND (frame, local->op_ret, + local->op_errno, &local->stbuf); + } + + return 0; +} + +int32_t +unify_ns_rename_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct stat *buf) +{ + int32_t index = 0; + int32_t callcnt = 0; + int16_t *list = NULL; + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + + if (op_ret == -1) { + /* Free local->new_inode */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s -> %s): %s", + local->loc1.path, local->loc2.path, + strerror (op_errno)); + + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, buf); + return 0; + } + + local->stbuf = *buf; + local->st_ino = buf->st_ino; + + /* Everything is fine. */ + if (S_ISDIR (buf->st_mode)) { + local->call_count = priv->child_count; + for (index=0; index < priv->child_count; index++) { + STACK_WIND (frame, + unify_rename_cbk, + priv->xl_array[index], + priv->xl_array[index]->fops->rename, + &local->loc1, + &local->loc2); + } + + return 0; + } + + local->call_count = 0; + /* send rename */ + list = local->list; + for (index=0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + local->call_count++; + callcnt++; + } + } + + if (local->call_count) { + for (index=0; list[index] != -1; index++) { + if (NS(this) != priv->xl_array[list[index]]) { + STACK_WIND (frame, + unify_rename_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->rename, + &local->loc1, + &local->loc2); + if (!--callcnt) + break; + } + } + } else { + /* file doesn't seem to be present in storage nodes */ + gf_log (this->name, GF_LOG_CRITICAL, + "CRITICAL: source file not in storage node, " + "rename successful on namespace :O"); + unify_local_wipe (local); + STACK_UNWIND (frame, -1, EIO, NULL); + } + return 0; +} + + +/** + * unify_rename - One of the tricky function. The deadliest of all :O + */ +int32_t +unify_rename (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + unify_local_t *local = NULL; + uint64_t tmp_list = 0; + + /* Initialization */ + INIT_LOCAL (frame, local); + loc_copy (&local->loc1, oldloc); + loc_copy (&local->loc2, newloc); + + if ((local->loc1.path == NULL) || + (local->loc2.path == NULL)) { + gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O"); + STACK_UNWIND (frame, -1, ENOMEM, NULL); + return 0; + } + + inode_ctx_get (oldloc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + STACK_WIND (frame, + unify_ns_rename_cbk, + NS(this), + NS(this)->fops->rename, + oldloc, + newloc); + return 0; +} + +/** + * unify_link_cbk - + */ +int32_t +unify_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_local_t *local = frame->local; + + if (op_ret >= 0) + local->stbuf = *buf; + local->stbuf.st_ino = local->st_ino; + + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf); + + return 0; +} + +/** + * unify_ns_link_cbk - + */ +int32_t +unify_ns_link_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + inode_t *inode, + struct stat *buf) +{ + unify_private_t *priv = this->private; + unify_local_t *local = frame->local; + int16_t *list = local->list; + int16_t index = 0; + + if (op_ret == -1) { + /* No need to send link request to other servers, + * as namespace action failed + */ + gf_log (this->name, GF_LOG_ERROR, + "namespace: path(%s -> %s): %s", + local->loc1.path, local->loc2.path, + strerror (op_errno)); + unify_local_wipe (local); + STACK_UNWIND (frame, op_ret, op_errno, inode, buf); + return 0; + } + + /* Update inode for this entry */ + local->op_ret = 0; + local->st_ino = buf->st_ino; + + /* Send link request to the node now */ + for (index = 0; list[index] != -1; index++) { + char need_break = (list[index+1] == -1); + if (priv->xl_array[list[index]] != NS (this)) { + STACK_WIND (frame, + unify_link_cbk, + priv->xl_array[list[index]], + priv->xl_array[list[index]]->fops->link, + &local->loc1, + &local->loc2); + } + if (need_break) + break; + } + + return 0; +} + +/** + * unify_link - + */ +int32_t +unify_link (call_frame_t *frame, + xlator_t *this, + loc_t *oldloc, + loc_t *newloc) +{ + unify_local_t *local = NULL; + uint64_t tmp_list = 0; + + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc); + UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc); + + /* Initialization */ + INIT_LOCAL (frame, local); + + loc_copy (&local->loc1, oldloc); + loc_copy (&local->loc2, newloc); + + inode_ctx_get (oldloc->inode, this, &tmp_list); + local->list = (int16_t *)(long)tmp_list; + + STACK_WIND (frame, + unify_ns_link_cbk, + NS(this), + NS(this)->fops->link, + oldloc, + newloc); + + return 0; +} + + +/** + * unify_checksum_cbk - + */ +int32_t +unify_checksum_cbk (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + uint8_t *fchecksum, + uint8_t *dchecksum) +{ + STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum); + + return 0; +} + +/** + * unify_checksum - + */ +int32_t +unify_checksum (call_frame_t *frame, + xlator_t *this, + loc_t *loc, + int32_t flag) +{ + STACK_WIND (frame, + unify_checksum_cbk, + NS(this), + NS(this)->fops->checksum, + loc, + flag); + + return 0; +} + + +/** + * unify_finodelk_cbk - + */ +int +unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_finodelk + */ +int +unify_finodelk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int cmd, struct flock *flock) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_finodelk_cbk, + child, child->fops->finodelk, + fd, cmd, flock); + + return 0; +} + + + +/** + * unify_fentrylk_cbk - + */ +int +unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_fentrylk + */ +int +unify_fentrylk (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type) + +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_fentrylk_cbk, + child, child->fops->fentrylk, + fd, basename, cmd, type); + + return 0; +} + + + +/** + * unify_fxattrop_cbk - + */ +int +unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr) +{ + STACK_UNWIND (frame, op_ret, op_errno, xattr); + return 0; +} + +/** + * unify_fxattrop + */ +int +unify_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) +{ + UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd); + xlator_t *child = NULL; + uint64_t tmp_child = 0; + + fd_ctx_get (fd, this, &tmp_child); + child = (xlator_t *)(long)tmp_child; + + STACK_WIND (frame, unify_fxattrop_cbk, + child, child->fops->fxattrop, + fd, optype, xattr); + + return 0; +} + + +/** + * unify_inodelk_cbk - + */ +int +unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + + +/** + * unify_inodelk + */ +int +unify_inodelk (call_frame_t *frame, xlator_t *this, + loc_t *loc, int cmd, struct flock *flock) +{ + xlator_t *child = NULL; + + child = unify_loc_subvol (loc, this); + + STACK_WIND (frame, unify_inodelk_cbk, + child, child->fops->inodelk, + loc, cmd, flock); + + return 0; +} + + + +/** + * unify_entrylk_cbk - + */ +int +unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno) +{ + STACK_UNWIND (frame, op_ret, op_errno); + return 0; +} + +/** + * unify_entrylk + */ +int +unify_entrylk (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type) + +{ + xlator_t *child = NULL; + + child = unify_loc_subvol (loc, this); + + STACK_WIND (frame, unify_entrylk_cbk, + child, child->fops->entrylk, + loc, basename, cmd, type); + + return 0; +} + + + +/** + * unify_xattrop_cbk - + */ +int +unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr) +{ + STACK_UNWIND (frame, op_ret, op_errno, xattr); + return 0; +} + +/** + * unify_xattrop + */ +int +unify_xattrop (call_frame_t *frame, xlator_t *this, + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr) +{ + xlator_t *child = NULL; + + child = unify_loc_subvol (loc, this); + + STACK_WIND (frame, unify_xattrop_cbk, + child, child->fops->xattrop, + loc, optype, xattr); + + return 0; +} + + +/** + * notify + */ +int32_t +notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + unify_private_t *priv = this->private; + struct sched_ops *sched = NULL; + + if (!priv) { + return 0; + } + + sched = priv->sched_ops; + if (!sched) { + gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O"); + raise (SIGTERM); + return 0; + } + if (priv->namespace == data) { + if (event == GF_EVENT_CHILD_UP) { + sched->notify (this, event, data); + } + return 0; + } + + switch (event) + { + case GF_EVENT_CHILD_UP: + { + /* Call scheduler's update () to enable it for scheduling */ + sched->notify (this, event, data); + + LOCK (&priv->lock); + { + /* Increment the inode's generation, which is + used for self_heal */ + ++priv->inode_generation; + ++priv->num_child_up; + } + UNLOCK (&priv->lock); + + if (!priv->is_up) { + default_notify (this, event, data); + priv->is_up = 1; + } + } + break; + case GF_EVENT_CHILD_DOWN: + { + /* Call scheduler's update () to disable the child node + * for scheduling + */ + sched->notify (this, event, data); + LOCK (&priv->lock); + { + --priv->num_child_up; + } + UNLOCK (&priv->lock); + + if (priv->num_child_up == 0) { + /* Send CHILD_DOWN to upper layer */ + default_notify (this, event, data); + priv->is_up = 0; + } + } + break; + + default: + { + default_notify (this, event, data); + } + break; + } + + return 0; +} + +/** + * init - This function is called first in the xlator, while initializing. + * All the config file options are checked and appropriate flags are set. + * + * @this - + */ +int32_t +init (xlator_t *this) +{ + int32_t ret = 0; + int32_t count = 0; + data_t *scheduler = NULL; + data_t *data = NULL; + xlator_t *ns_xl = NULL; + xlator_list_t *trav = NULL; + xlator_list_t *xlparent = NULL; + xlator_list_t *parent = NULL; + unify_private_t *_private = NULL; + + /* Check for number of child nodes, if there is no child nodes, exit */ + if (!this->children) { + gf_log (this->name, GF_LOG_ERROR, + "No child nodes specified. check \"subvolumes \" " + "option in volfile"); + return -1; + } + + if (!this->parents) { + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + /* Check for 'scheduler' in volume */ + scheduler = dict_get (this->options, "scheduler"); + if (!scheduler) { + gf_log (this->name, GF_LOG_ERROR, + "\"option scheduler <x>\" is missing in volfile"); + return -1; + } + + /* Setting "option namespace <node>" */ + data = dict_get (this->options, "namespace"); + if(!data) { + gf_log (this->name, GF_LOG_CRITICAL, + "namespace option not specified, Exiting"); + return -1; + } + /* Search namespace in the child node, if found, exit */ + trav = this->children; + while (trav) { + if (strcmp (trav->xlator->name, data->data) == 0) + break; + trav = trav->next; + } + if (trav) { + gf_log (this->name, GF_LOG_CRITICAL, + "namespace node used as a subvolume, Exiting"); + return -1; + } + + /* Search for the namespace node, if found, continue */ + ns_xl = this->next; + while (ns_xl) { + if (strcmp (ns_xl->name, data->data) == 0) + break; + ns_xl = ns_xl->next; + } + if (!ns_xl) { + gf_log (this->name, GF_LOG_CRITICAL, + "namespace node not found in volfile, Exiting"); + return -1; + } + + gf_log (this->name, GF_LOG_DEBUG, + "namespace node specified as %s", data->data); + + _private = CALLOC (1, sizeof (*_private)); + ERR_ABORT (_private); + _private->sched_ops = get_scheduler (this, scheduler->data); + if (!_private->sched_ops) { + gf_log (this->name, GF_LOG_CRITICAL, + "Error while loading scheduler. Exiting"); + FREE (_private); + return -1; + } + + if (ns_xl->parents) { + gf_log (this->name, GF_LOG_CRITICAL, + "Namespace node should not be a child of any other node. Exiting"); + FREE (_private); + return -1; + } + + _private->namespace = ns_xl; + + /* update _private structure */ + { + count = 0; + trav = this->children; + /* Get the number of child count */ + while (trav) { + count++; + trav = trav->next; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Child node count is %d", count); + + _private->child_count = count; + if (count == 1) { + /* TODO: Should I error out here? */ + gf_log (this->name, GF_LOG_CRITICAL, + "WARNING: You have defined only one " + "\"subvolumes\" for unify volume. It may not " + "be the desired config, review your volume " + "volfile. If this is how you are testing it," + " you may hit some performance penalty"); + } + + _private->xl_array = CALLOC (1, + sizeof (xlator_t) * (count + 1)); + ERR_ABORT (_private->xl_array); + + count = 0; + trav = this->children; + while (trav) { + _private->xl_array[count++] = trav->xlator; + trav = trav->next; + } + _private->xl_array[count] = _private->namespace; + + /* self-heal part, start with generation '1' */ + _private->inode_generation = 1; + /* Because, Foreground part is tested well */ + _private->self_heal = ZR_UNIFY_FG_SELF_HEAL; + data = dict_get (this->options, "self-heal"); + if (data) { + if (strcasecmp (data->data, "off") == 0) + _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF; + + if (strcasecmp (data->data, "foreground") == 0) + _private->self_heal = ZR_UNIFY_FG_SELF_HEAL; + + if (strcasecmp (data->data, "background") == 0) + _private->self_heal = ZR_UNIFY_BG_SELF_HEAL; + } + + /* optimist - ask bulde for more about it */ + data = dict_get (this->options, "optimist"); + if (data) { + if (gf_string2boolean (data->data, + &_private->optimist) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "optimist excepts only boolean " + "options"); + } + } + + LOCK_INIT (&_private->lock); + } + + /* Now that everything is fine. */ + this->private = (void *)_private; + { + /* Initialize scheduler, if everything else is successful */ + ret = _private->sched_ops->init (this); + if (ret == -1) { + gf_log (this->name, GF_LOG_CRITICAL, + "Initializing scheduler failed, Exiting"); + FREE (_private); + return -1; + } + + ret = 0; + + /* This section is required because some fops may look + * for 'xl->parent' variable + */ + xlparent = CALLOC (1, sizeof (*xlparent)); + xlparent->xlator = this; + if (!ns_xl->parents) { + ns_xl->parents = xlparent; + } else { + parent = ns_xl->parents; + while (parent->next) + parent = parent->next; + parent->next = xlparent; + } + /* Initialize the namespace volume */ + if (!ns_xl->ready) { + ret = xlator_tree_init (ns_xl); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "initializing namespace node failed, " + "Exiting"); + FREE (_private); + return -1; + } + } + } + + /* Tell namespace node that init is done */ + ns_xl->notify (ns_xl, GF_EVENT_PARENT_UP, this); + + return 0; +} + +/** + * fini - Free all the allocated memory + */ +void +fini (xlator_t *this) +{ + unify_private_t *priv = this->private; + priv->sched_ops->fini (this); + this->private = NULL; + LOCK_DESTROY (&priv->lock); + FREE (priv->xl_array); + FREE (priv); + return; +} + + +struct xlator_fops fops = { + .stat = unify_stat, + .chmod = unify_chmod, + .readlink = unify_readlink, + .mknod = unify_mknod, + .mkdir = unify_mkdir, + .unlink = unify_unlink, + .rmdir = unify_rmdir, + .symlink = unify_symlink, + .rename = unify_rename, + .link = unify_link, + .chown = unify_chown, + .truncate = unify_truncate, + .create = unify_create, + .open = unify_open, + .readv = unify_readv, + .writev = unify_writev, + .statfs = unify_statfs, + .flush = unify_flush, + .fsync = unify_fsync, + .setxattr = unify_setxattr, + .getxattr = unify_getxattr, + .removexattr = unify_removexattr, + .opendir = unify_opendir, + .readdir = unify_readdir, + .fsyncdir = unify_fsyncdir, + .access = unify_access, + .ftruncate = unify_ftruncate, + .fstat = unify_fstat, + .lk = unify_lk, + .fchown = unify_fchown, + .fchmod = unify_fchmod, + .utimens = unify_utimens, + .lookup = unify_lookup, + .getdents = unify_getdents, + .checksum = unify_checksum, + .inodelk = unify_inodelk, + .finodelk = unify_finodelk, + .entrylk = unify_entrylk, + .fentrylk = unify_fentrylk, + .xattrop = unify_xattrop, + .fxattrop = unify_fxattrop +}; + +struct xlator_mops mops = { +}; + +struct xlator_cbks cbks = { +}; + +struct volume_options options[] = { + { .key = { "namespace" }, + .type = GF_OPTION_TYPE_XLATOR + }, + { .key = { "scheduler" }, + .value = { "alu", "rr", "random", "nufa", "switch" }, + .type = GF_OPTION_TYPE_STR + }, + { .key = {"self-heal"}, + .value = { "foreground", "background", "off" }, + .type = GF_OPTION_TYPE_STR + }, + /* TODO: remove it some time later */ + { .key = {"optimist"}, + .type = GF_OPTION_TYPE_BOOL + }, + + { .key = {NULL} }, +}; diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h new file mode 100644 index 00000000000..bc18dc53f52 --- /dev/null +++ b/xlators/cluster/unify/src/unify.h @@ -0,0 +1,132 @@ +/* + Copyright (c) 2006, 2007, 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#ifndef _UNIFY_H +#define _UNIFY_H + +#include "scheduler.h" +#include "list.h" + +#define MAX_DIR_ENTRY_STRING (32 * 1024) + +#define ZR_UNIFY_SELF_HEAL_OFF 0 +#define ZR_UNIFY_FG_SELF_HEAL 1 +#define ZR_UNIFY_BG_SELF_HEAL 2 + +/* Sometimes one should use completely random numbers.. its good :p */ +#define UNIFY_SELF_HEAL_GETDENTS_COUNT 1024 + +#define NS(xl) (((unify_private_t *)xl->private)->namespace) + +/* This is used to allocate memory for local structure */ +#define INIT_LOCAL(fr, loc) \ +do { \ + loc = CALLOC (1, sizeof (unify_local_t)); \ + ERR_ABORT (loc); \ + if (!loc) { \ + STACK_UNWIND (fr, -1, ENOMEM); \ + return 0; \ + } \ + fr->local = loc; \ + loc->op_ret = -1; \ + loc->op_errno = ENOENT; \ +} while (0) + + + +struct unify_private { + /* Update this structure depending on requirement */ + void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE, + if xlator is using scheduler */ + struct sched_ops *sched_ops; /* Scheduler options */ + xlator_t *namespace; /* ptr to namespace xlator */ + xlator_t **xl_array; + gf_boolean_t optimist; + int16_t child_count; + int16_t num_child_up; + uint8_t self_heal; + uint8_t is_up; + uint64_t inode_generation; + gf_lock_t lock; +}; +typedef struct unify_private unify_private_t; + +struct unify_self_heal_struct { + uint8_t dir_checksum[ZR_FILENAME_MAX]; + uint8_t ns_dir_checksum[ZR_FILENAME_MAX]; + uint8_t file_checksum[ZR_FILENAME_MAX]; + uint8_t ns_file_checksum[ZR_FILENAME_MAX]; + off_t *offset_list; + int *count_list; + dir_entry_t **entry_list; +}; + + +struct _unify_local_t { + int32_t call_count; + int32_t op_ret; + int32_t op_errno; + mode_t mode; + off_t offset; + dev_t dev; + uid_t uid; + gid_t gid; + int32_t flags; + int32_t entry_count; + int32_t count; // dir_entry_t count; + fd_t *fd; + struct stat stbuf; + struct statvfs statvfs_buf; + struct timespec tv[2]; + char *name; + int32_t revalidate; + + ino_t st_ino; + nlink_t st_nlink; + + dict_t *dict; + + int16_t *list; + int16_t *new_list; /* Used only in case of rename */ + int16_t index; + + int32_t failed; + int32_t return_eio; /* Used in case of different st-mode + present for a given path */ + + uint64_t inode_generation; /* used to store the per directory + * inode_generation. Got from inode's ctx + * of directory inodes + */ + + struct unify_self_heal_struct *sh_struct; + loc_t loc1, loc2; +}; +typedef struct _unify_local_t unify_local_t; + +int32_t zr_unify_self_heal (call_frame_t *frame, + xlator_t *this, + unify_local_t *local); + +#endif /* _UNIFY_H */ |