diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr.h')
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 523 |
1 files changed, 523 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h new file mode 100644 index 00000000000..4cf6cdf9dfe --- /dev/null +++ b/xlators/cluster/afr/src/afr.h @@ -0,0 +1,523 @@ +/* + Copyright (c) 2008 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef __AFR_H__ +#define __AFR_H__ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "scheduler.h" +#include "call-stub.h" +#include "compat-errno.h" + + +typedef struct _afr_private { + gf_lock_t lock; /* to guard access to child_count, etc */ + unsigned int child_count; /* total number of children */ + + xlator_t **children; + + unsigned char *child_up; + + gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t metadata_self_heal; /* on/off */ + gf_boolean_t entry_self_heal; /* on/off */ + + + gf_boolean_t data_change_log; /* on/off */ + gf_boolean_t metadata_change_log; /* on/off */ + gf_boolean_t entry_change_log; /* on/off */ + + unsigned int read_child; /* read-subvolume */ + unsigned int favorite_child; /* subvolume to be preferred in resolving + split-brain cases */ + + unsigned int data_lock_server_count; + unsigned int metadata_lock_server_count; + unsigned int entry_lock_server_count; + + unsigned int wait_count; /* # of servers to wait for success */ +} afr_private_t; + +typedef struct { + /* array of stat's, one for each child */ + struct stat *buf; + + /* array of xattr's, one for each child */ + dict_t **xattr; + + /* array of errno's, one for each child */ + int *child_errno; + + int32_t **pending_matrix; + int32_t **delta_matrix; + + int *sources; + int source; + int active_source; + int active_sinks; + int *success; + + fd_t *healing_fd; + int op_failed; + + int file_has_holes; + blksize_t block_size; + off_t file_size; + off_t offset; + + loc_t parent_loc; + int (*completion_cbk) (call_frame_t *frame, xlator_t *this); + call_frame_t *sh_frame; +} afr_self_heal_t; + + +typedef enum { + AFR_DATA_TRANSACTION, /* truncate, write, ... */ + AFR_METADATA_TRANSACTION, /* chmod, chown, ... */ + AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */ + AFR_ENTRY_RENAME_TRANSACTION, /* rename */ + AFR_FLUSH_TRANSACTION, /* flush */ +} afr_transaction_type; + +typedef struct _afr_local { + unsigned int call_count; + unsigned int success_count; + unsigned int enoent_count; + + unsigned int need_metadata_self_heal; + unsigned int need_entry_self_heal; + unsigned int need_data_self_heal; + unsigned int govinda_gOvinda; + + unsigned int reval_child_index; + int32_t op_ret; + int32_t op_errno; + + int32_t *pending_array; + + loc_t loc; + loc_t newloc; + + fd_t *fd; + + glusterfs_fop_t fop; + + unsigned char *child_up; + int child_count; + + int32_t *child_errno; + + dict_t *xattr_req; + int open_fd_count; + /* + This struct contains the arguments for the "continuation" + (scheme-like) of fops + */ + + int op; + struct { + struct { + unsigned char buf_set; + struct statvfs buf; + } statfs; + + struct { + inode_t *inode; + struct stat buf; + dict_t *xattr; + } lookup; + + struct { + int32_t flags; + } open; + + struct { + int32_t cmd; + struct flock flock; + unsigned char *locked_nodes; + } lk; + + struct { + uint8_t *file_checksum; + uint8_t *dir_checksum; + } checksum; + + /* inode read */ + + struct { + int32_t mask; + int last_tried; /* index of the child we tried previously */ + } access; + + struct { + int last_tried; + ino_t ino; + } stat; + + struct { + int last_tried; + ino_t ino; + } fstat; + + struct { + size_t size; + int last_tried; + } readlink; + + struct { + const char *name; + int last_tried; + } getxattr; + + struct { + size_t size; + off_t offset; + int last_tried; + } readv; + + /* dir read */ + + struct { + int success_count; + int32_t op_ret; + int32_t op_errno; + } opendir; + + struct { + int32_t op_ret; + int32_t op_errno; + size_t size; + off_t offset; + + int last_tried; + } readdir; + + struct { + int32_t op_ret; + int32_t op_errno; + + size_t size; + off_t offset; + int32_t flag; + + int last_tried; + } getdents; + + /* inode write */ + + struct { + ino_t ino; + mode_t mode; + struct stat buf; + } chmod; + + struct { + ino_t ino; + mode_t mode; + struct stat buf; + } fchmod; + + struct { + ino_t ino; + uid_t uid; + gid_t gid; + struct stat buf; + } chown; + + struct { + ino_t ino; + uid_t uid; + gid_t gid; + struct stat buf; + } fchown; + + struct { + ino_t ino; + struct stat buf; + + int32_t op_ret; + + struct iovec *vector; + dict_t *refs; + int32_t count; + off_t offset; + } writev; + + struct { + ino_t ino; + off_t offset; + struct stat buf; + } truncate; + + struct { + ino_t ino; + off_t offset; + struct stat buf; + } ftruncate; + + struct { + ino_t ino; + struct timespec tv[2]; + struct stat buf; + } utimens; + + struct { + dict_t *dict; + int32_t flags; + } setxattr; + + struct { + const char *name; + } removexattr; + + /* dir write */ + + struct { + ino_t ino; + fd_t *fd; + int32_t flags; + mode_t mode; + inode_t *inode; + struct stat buf; + } create; + + struct { + ino_t ino; + dev_t dev; + mode_t mode; + inode_t *inode; + struct stat buf; + } mknod; + + struct { + ino_t ino; + int32_t mode; + inode_t *inode; + struct stat buf; + } mkdir; + + struct { + int32_t op_ret; + int32_t op_errno; + } unlink; + + struct { + int32_t op_ret; + int32_t op_errno; + } rmdir; + + struct { + ino_t ino; + struct stat buf; + } rename; + + struct { + ino_t ino; + inode_t *inode; + struct stat buf; + } link; + + struct { + ino_t ino; + inode_t *inode; + struct stat buf; + char *linkpath; + } symlink; + + struct { + int32_t flags; + dir_entry_t *entries; + int32_t count; + } setdents; + } cont; + + struct { + off_t start, len; + + unsigned char *locked_nodes; + int lock_count; + + const char *basename; + const char *new_basename; + + char *pending; + + loc_t parent_loc; + loc_t new_parent_loc; + + afr_transaction_type type; + + int success_count; + int erase_pending; + int failure_count; + + int last_tried; + int32_t *child_errno; + + call_frame_t *main_frame; + + int (*fop) (call_frame_t *frame, xlator_t *this); + + int (*done) (call_frame_t *frame, xlator_t *this); + + int (*resume) (call_frame_t *frame, xlator_t *this); + + int (*unwind) (call_frame_t *frame, xlator_t *this); + } transaction; + + afr_self_heal_t self_heal; +} afr_local_t; + +/* try alloc and if it fails, goto label */ +#define ALLOC_OR_GOTO(var, type, label) do { \ + var = CALLOC (sizeof (type), 1); \ + if (!var) { \ + gf_log (this->name, GF_LOG_ERROR, \ + "out of memory :("); \ + op_errno = ENOMEM; \ + goto label; \ + } \ + } while (0); + + +/* did a call fail due to a child failing? */ +#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \ + ((op_errno == ENOTCONN) || \ + (op_errno == EBADFD))) + +/* have we tried all children? */ +#define all_tried(i, count) ((i) == (count) - 1) + +void +afr_build_parent_loc (loc_t *parent, loc_t *child); + +int +afr_up_children_count (int child_count, unsigned char *child_up); + +int +afr_locked_nodes_count (unsigned char *locked_nodes, int child_count); + +int +afr_first_up_child (afr_private_t *priv); + +ino64_t +afr_itransform (ino64_t ino, int child_count, int child_index); + +int +afr_deitransform (ino64_t ino, int child_count); + +void +afr_local_cleanup (afr_local_t *local, xlator_t *this); + +int +afr_frame_return (call_frame_t *frame); + +#define AFR_STACK_UNWIND(frame, params ...) \ + do { \ + afr_local_t *__local = NULL; \ + xlator_t *__this = NULL; \ + __local = frame->local; \ + __this = frame->this; \ + frame->local = NULL; \ + STACK_UNWIND (frame, params); \ + afr_local_cleanup (__local, __this); \ + free (__local); \ +} while (0); + +#define AFR_STACK_DESTROY(frame) \ + do { \ + afr_local_t *__local = NULL; \ + xlator_t *__this = NULL; \ + __local = frame->local; \ + __this = frame->this; \ + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + afr_local_cleanup (__local, __this); \ + free (__local); \ +} while (0); + +/* allocate and return a string that is the basename of argument */ +static inline char * +AFR_BASENAME (const char *str) +{ + char *__tmp_str = NULL; + char *__basename_str = NULL; + __tmp_str = strdup (str); + __basename_str = strdup (basename (__tmp_str)); + FREE (__tmp_str); + return __basename_str; +} + +/* initialize local_t */ +static inline int +AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) +{ + local->child_up = CALLOC (sizeof (*local->child_up), + priv->child_count); + if (!local->child_up) { + return -ENOMEM; + } + + memcpy (local->child_up, priv->child_up, + sizeof (*local->child_up) * priv->child_count); + + + local->call_count = afr_up_children_count (priv->child_count, local->child_up); + if (local->call_count == 0) + return -ENOTCONN; + + local->transaction.erase_pending = 1; + + local->op_ret = -1; + local->op_errno = EUCLEAN; + + return 0; +} + + +static inline int +afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) +{ + local->child_errno = CALLOC (sizeof (*local->child_errno), + priv->child_count); + if (!local->child_errno) { + return -ENOMEM; + } + + local->pending_array = CALLOC (sizeof (*local->pending_array), + priv->child_count); + if (!local->pending_array) { + return -ENOMEM; + } + + local->transaction.locked_nodes = CALLOC (sizeof (*local->transaction.locked_nodes), + priv->child_count); + + local->transaction.child_errno = CALLOC (sizeof (*local->transaction.child_errno), + priv->child_count); + + return 0; +} + +#endif /* __AFR_H__ */ |