summaryrefslogtreecommitdiffstats
path: root/xlators/experimental/nsr-server/src/nsr-internal.h
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2015-12-10 10:30:32 -0500
committerJeff Darcy <jdarcy@redhat.com>2016-02-11 15:46:26 -0800
commit55617ef037695f47ead1c1b753678402e1545f8c (patch)
tree7bc4dde5a51ae5c8ef3a45454e64ae5f5b7c5a86 /xlators/experimental/nsr-server/src/nsr-internal.h
parent320779d53ae013147d5e2556d2946c73e45734ab (diff)
NSR : nsr server code generation patch
The NSR-server with this patch, appoints the first node on every replica subvolume, as the leader for that subvolume. On receiving a 'write' fop, the leader first checks if there is quorum in the replica subvolume to proceeed. In case there isn't it fails with EROFS. If there is quorum, the leader forwards the fop to the followers. The followers on receiving the fop, perform the operation, and based on the success or failure of the outcome send a +ve or a -ve ack to the leader. The leader after receiving acks from the followers performs a quorum check of the acks, to see if it should even try to perform the fop. If quorum is not being met, and the leader's outcome wouldn't affect quorum, then it would send -ve ack to the client without even performing the fop. If quorum is being met, the leader will then try the fop on itself, and based on it's outcome perform a quorum check of all the acks received (this time, including it's own). Based on the result of the quorum check (irrespective of the outcome on the leader), a +ve or -ve ack is send back to the client. Change-Id: I860654b74c53e9b139b37dba43848e5504df6dce Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/12705 Smoke: Gluster Build System <jenkins@build.gluster.com> Tested-by: Jeff Darcy <jdarcy@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Diffstat (limited to 'xlators/experimental/nsr-server/src/nsr-internal.h')
-rw-r--r--xlators/experimental/nsr-server/src/nsr-internal.h114
1 files changed, 114 insertions, 0 deletions
diff --git a/xlators/experimental/nsr-server/src/nsr-internal.h b/xlators/experimental/nsr-server/src/nsr-internal.h
new file mode 100644
index 00000000000..b8c7fc314b7
--- /dev/null
+++ b/xlators/experimental/nsr-server/src/nsr-internal.h
@@ -0,0 +1,114 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define LEADER_XATTR "user.nsr.leader"
+#define SECOND_CHILD(xl) (xl->children->next->xlator)
+#define RECONCILER_PATH NSR_SCRIPT_PREFIX"/reconciler.py"
+#define CHANGELOG_ENTRY_SIZE 128
+
+enum {
+ gf_mt_nsr_private_t = gf_common_mt_end + 1,
+ gf_mt_nsr_fd_ctx_t,
+ gf_mt_nsr_inode_ctx_t,
+ gf_mt_nsr_dirty_t,
+ gf_mt_nsr_end
+};
+
+typedef enum nsr_recon_notify_ev_id_t {
+ NSR_RECON_SET_LEADER = 1,
+ NSR_RECON_ADD_CHILD = 2
+} nsr_recon_notify_ev_id_t;
+
+typedef struct _nsr_recon_notify_ev_s {
+ nsr_recon_notify_ev_id_t id;
+ uint32_t index; /* in case of add */
+ struct list_head list;
+} nsr_recon_notify_ev_t;
+
+typedef struct {
+ /*
+ * This is a hack to allow a non-leader to accept requests while the
+ * leader is down, and it only works for n=2. The way it works is that
+ * "config_leader" indicates the state from our options (via init or
+ * reconfigure) but "leader" is what the fop code actually looks at. If
+ * config_leader is true, then leader will *always* be true as well,
+ * giving that brick precedence. If config_leader is false, then
+ * leader will only be true if there is no connection to the other
+ * brick (tracked in nsr_notify).
+ *
+ * TBD: implement real leader election
+ */
+ gf_boolean_t config_leader;
+ gf_boolean_t leader;
+ uint8_t up_children;
+ uint8_t n_children;
+ char *vol_file;
+ uint32_t current_term;
+ uint32_t kid_state;
+ gf_lock_t dirty_lock;
+ struct list_head dirty_fds;
+ uint32_t index;
+ gf_lock_t index_lock;
+ double quorum_pct;
+ int term_fd;
+ long term_total;
+ long term_read;
+ /*
+ * This is a super-duper hack, but it will do for now. The reason it's
+ * a hack is that we pass this to dict_set_static_bin, so we don't have
+ * to mess around with allocating and freeing it on every single IPC
+ * request, but it's totally not thread-safe. On the other hand, there
+ * should only be one reconciliation thread running and calling these
+ * functions at a time, so maybe that doesn't matter.
+ *
+ * TBD: re-evaluate how to manage this
+ */
+ char term_buf[CHANGELOG_ENTRY_SIZE];
+} nsr_private_t;
+
+typedef struct {
+ call_stub_t *stub;
+ call_stub_t *qstub;
+ uint32_t call_count;
+ uint32_t successful_acks;
+ uint32_t successful_op_ret;
+ fd_t *fd;
+ struct list_head qlinks;
+} nsr_local_t;
+
+/*
+ * This should match whatever changelog returns on the pre-op for us to pass
+ * when we're ready for our post-op.
+ */
+typedef uint32_t log_id_t;
+
+typedef struct {
+ struct list_head links;
+ log_id_t id;
+} nsr_dirty_list_t;
+
+typedef struct {
+ fd_t *fd;
+ struct list_head dirty_list;
+ struct list_head fd_list;
+} nsr_fd_ctx_t;
+
+typedef struct {
+ gf_lock_t lock;
+ uint32_t active;
+ struct list_head aqueue;
+ uint32_t pending;
+ struct list_head pqueue;
+} nsr_inode_ctx_t;
+
+void nsr_start_reconciler (xlator_t *this);