NSR : nsr server code generation patch

The NSR-server with this patch, appoints the first node on every replica subvolume, as the leader for that subvolume. On receiving a 'write' fop, the leader first checks if there is quorum in the replica subvolume to proceeed. In case there isn't it fails with EROFS. If there is quorum, the leader forwards the fop to the followers. The followers on receiving the fop, perform the operation, and based on the success or failure of the outcome send a +ve or a -ve ack to the leader. The leader after receiving acks from the followers performs a quorum check of the acks, to see if it should even try to perform the fop. If quorum is not being met, and the leader's outcome wouldn't affect quorum, then it would send -ve ack to the client without even performing the fop. If quorum is being met, the leader will then try the fop on itself, and based on it's outcome perform a quorum check of all the acks received (this time, including it's own). Based on the result of the quorum check (irrespective of the outcome on the leader), a +ve or -ve ack is send back to the client. Change-Id: I860654b74c53e9b139b37dba43848e5504df6dce Signed-off-by: Avra Sengupta <asengupt@redhat.com> Reviewed-on: http://review.gluster.org/12705 Smoke: Gluster Build System <jenkins@build.gluster.com> Tested-by: Jeff Darcy <jdarcy@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
author: Jeff Darcy <jdarcy@redhat.com> 2015-12-10 10:30:32 -0500
committer: Jeff Darcy <jdarcy@redhat.com> 2016-02-11 15:46:26 -0800
commit: 55617ef037695f47ead1c1b753678402e1545f8c (patch)
tree: 7bc4dde5a51ae5c8ef3a45454e64ae5f5b7c5a86 /xlators/experimental/nsr-server/src/nsr-internal.h
parent: 320779d53ae013147d5e2556d2946c73e45734ab (diff)
1 files changed, 114 insertions, 0 deletions
diff --git a/xlators/experimental/nsr-server/src/nsr-internal.h b/xlators/experimental/nsr-server/src/nsr-internal.h
new file mode 100644
index 00000000000..b8c7fc314b7
--- /dev/null
+++ b/xlators/experimental/nsr-server/src/nsr-internal.h
@@ -0,0 +1,114 @@
+/*
+   Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+   This file is part of GlusterFS.
+
+   This file is licensed to you under your choice of the GNU Lesser
+   General Public License, version 3 or any later version (LGPLv3 or
+   later), or the GNU General Public License, version 2 (GPLv2), in all
+   cases as published by the Free Software Foundation.
+*/
+
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#define LEADER_XATTR            "user.nsr.leader"
+#define SECOND_CHILD(xl)        (xl->children->next->xlator)
+#define RECONCILER_PATH         NSR_SCRIPT_PREFIX"/reconciler.py"
+#define CHANGELOG_ENTRY_SIZE    128
+
+enum {
+        gf_mt_nsr_private_t = gf_common_mt_end + 1,
+        gf_mt_nsr_fd_ctx_t,
+        gf_mt_nsr_inode_ctx_t,
+        gf_mt_nsr_dirty_t,
+        gf_mt_nsr_end
+};
+
+typedef enum nsr_recon_notify_ev_id_t {
+        NSR_RECON_SET_LEADER = 1,
+        NSR_RECON_ADD_CHILD = 2
+} nsr_recon_notify_ev_id_t;
+
+typedef struct _nsr_recon_notify_ev_s {
+        nsr_recon_notify_ev_id_t id;
+        uint32_t index; /* in case of add */
+        struct list_head list;
+} nsr_recon_notify_ev_t;
+
+typedef struct {
+        /*
+         * This is a hack to allow a non-leader to accept requests while the
+         * leader is down, and it only works for n=2.  The way it works is that
+         * "config_leader" indicates the state from our options (via init or
+         * reconfigure) but "leader" is what the fop code actually looks at.  If
+         * config_leader is true, then leader will *always* be true as well,
+         * giving that brick precedence.  If config_leader is false, then
+         * leader will only be true if there is no connection to the other
+         * brick (tracked in nsr_notify).
+         *
+         * TBD: implement real leader election
+         */
+        gf_boolean_t            config_leader;
+        gf_boolean_t            leader;
+        uint8_t                 up_children;
+        uint8_t                 n_children;
+        char                    *vol_file;
+        uint32_t                current_term;
+        uint32_t                kid_state;
+        gf_lock_t               dirty_lock;
+        struct list_head        dirty_fds;
+	uint32_t                index;
+	gf_lock_t               index_lock;
+        double                  quorum_pct;
+        int                     term_fd;
+        long                    term_total;
+        long                    term_read;
+        /*
+         * This is a super-duper hack, but it will do for now.  The reason it's
+         * a hack is that we pass this to dict_set_static_bin, so we don't have
+         * to mess around with allocating and freeing it on every single IPC
+         * request, but it's totally not thread-safe.  On the other hand, there
+         * should only be one reconciliation thread running and calling these
+         * functions at a time, so maybe that doesn't matter.
+         *
+         * TBD: re-evaluate how to manage this
+         */
+        char                    term_buf[CHANGELOG_ENTRY_SIZE];
+} nsr_private_t;
+
+typedef struct {
+        call_stub_t             *stub;
+        call_stub_t             *qstub;
+        uint32_t                call_count;
+        uint32_t                successful_acks;
+        uint32_t                successful_op_ret;
+        fd_t                    *fd;
+        struct list_head        qlinks;
+} nsr_local_t;
+
+/*
+ * This should match whatever changelog returns on the pre-op for us to pass
+ * when we're ready for our post-op.
+ */
+typedef uint32_t log_id_t;
+
+typedef struct {
+        struct list_head        links;
+        log_id_t                id;
+} nsr_dirty_list_t;
+
+typedef struct {
+        fd_t                    *fd;
+        struct list_head        dirty_list;
+        struct list_head        fd_list;
+} nsr_fd_ctx_t;
+
+typedef struct {
+        gf_lock_t               lock;
+        uint32_t                active;
+        struct list_head        aqueue;
+        uint32_t                pending;
+        struct list_head        pqueue;
+} nsr_inode_ctx_t;
+
+void nsr_start_reconciler (xlator_t *this);
author	Jeff Darcy <jdarcy@redhat.com>	2015-12-10 10:30:32 -0500
committer	Jeff Darcy <jdarcy@redhat.com>	2016-02-11 15:46:26 -0800
commit	55617ef037695f47ead1c1b753678402e1545f8c (patch)
tree	7bc4dde5a51ae5c8ef3a45454e64ae5f5b7c5a86 /xlators/experimental/nsr-server/src/nsr-internal.h
parent	320779d53ae013147d5e2556d2946c73e45734ab (diff)