summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/Makefile.am2
-rw-r--r--xlators/cluster/afr/src/afr-common.c734
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h3
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c4
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h1
-rw-r--r--xlators/cluster/afr/src/afr.c164
-rw-r--r--xlators/cluster/afr/src/afr.h30
-rw-r--r--xlators/cluster/aha/Makefile.am3
-rw-r--r--xlators/cluster/aha/src/Makefile.am18
-rw-r--r--xlators/cluster/aha/src/aha-fops.c952
-rw-r--r--xlators/cluster/aha/src/aha-fops.h360
-rw-r--r--xlators/cluster/aha/src/aha-helpers.c46
-rw-r--r--xlators/cluster/aha/src/aha-helpers.h23
-rw-r--r--xlators/cluster/aha/src/aha-mem-types.h22
-rw-r--r--xlators/cluster/aha/src/aha-retry.c524
-rw-r--r--xlators/cluster/aha/src/aha-retry.h12
-rw-r--r--xlators/cluster/aha/src/aha.c345
-rw-r--r--xlators/cluster/aha/src/aha.h46
-rw-r--r--xlators/cluster/dht/src/dht-common.c41
-rw-r--r--xlators/cluster/dht/src/dht-common.h6
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c53
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c10
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c13
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c2
-rw-r--r--xlators/cluster/dht/src/dht-shared.c32
-rw-r--r--xlators/cluster/dht/src/nufa.c10
-rw-r--r--xlators/cluster/dht/src/switch.c10
27 files changed, 3362 insertions, 104 deletions
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index 903fbb39f12..bce94bb8b3b 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = stripe afr dht ec
+SUBDIRS = aha stripe afr dht ec
CLEANFILES =
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index c324c02c008..ebadba99a05 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -43,6 +43,13 @@
#include "afr-self-heald.h"
#include "afr-messages.h"
+#define CHILD_UP_STR "UP"
+#define CHILD_DOWN_STR "DOWN"
+#define CHILD_DISCONNECTED_STR "DOWN"
+
+static int32_t
+find_hybrid_children (xlator_t *this, unsigned char *fastest_children);
+
call_frame_t *
afr_copy_frame (call_frame_t *base)
{
@@ -1457,21 +1464,75 @@ afr_hash_child (afr_read_subvol_args_t *args, int32_t child_count, int hashmode)
sizeof(gfid_copy)) % child_count;
}
+/*
+ * afr_halo_read_subvol
+ *
+ * Given a array representing the readable children, this function will
+ * return which one of the readable children meet the halo hybrid criteria.
+ * In the event none are found, -1 is returned and another strategy will have
+ * to be used to figure out where the read should come from.
+ */
+int afr_halo_read_subvol (xlator_t *this, unsigned char *readable) {
+ afr_private_t *priv = NULL;
+ unsigned char *hybrid_children;
+ int32_t hybrid_cnt = 0;
+ int read_subvol = -1;
+ int i = 0;
+
+ priv = this->private;
+
+ /* Halo in-active or hybrid mode disabled, bail.... */
+ if (!priv->halo_enabled || !priv->halo_hybrid_mode)
+ return -1;
+
+ /* AFR Discovery edge case, if you are already pinned to a child
+ * which meets the latency threshold then go with this child for
+ * consistency purposes.
+ */
+ if (priv->read_child >= 0 && readable[priv->read_child] &&
+ priv->child_latency[priv->read_child] <=
+ AFR_HALO_HYBRID_LATENCY_MSEC) {
+ return priv->read_child;
+ }
+
+ hybrid_children = alloca0 (priv->child_count);
+ hybrid_cnt = find_hybrid_children (this, hybrid_children);
+ if (hybrid_cnt) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i] && hybrid_children[i]) {
+ read_subvol = i;
+ priv->read_child = read_subvol;
+ gf_log (this->name, GF_LOG_TRACE,
+ "Selected hybrid child %d for reads",
+ i);
+ break;
+ }
+ }
+ }
+
+ return read_subvol;
+}
+
int
afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
unsigned char *readable,
afr_read_subvol_args_t *args)
{
- int i = 0;
- int read_subvol = -1;
- afr_private_t *priv = NULL;
+ int i = 0;
+ int read_subvol = -1;
+ afr_private_t *priv = NULL;
afr_read_subvol_args_t local_args = {0,};
- priv = this->private;
+ priv = this->private;
+
+ /* Choose lowest latency child for reads */
+ read_subvol = afr_halo_read_subvol (this, readable);
+ if (read_subvol != -1)
+ return read_subvol;
- /* first preference - explicitly specified or local subvolume */
- if (priv->read_child >= 0 && readable[priv->read_child])
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
return priv->read_child;
if (inode_is_linked (inode)) {
@@ -1497,7 +1558,6 @@ afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
return -1;
}
-
int
afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
unsigned char *readable, int *event_p,
@@ -2157,6 +2217,13 @@ afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
priv->children[child_index]->name);
priv->read_child = child_index;
+ } else if (priv->halo_enabled) {
+ if (priv->read_child < 0) {
+ priv->read_child = child_index;
+ } else if (priv->child_latency[child_index] <
+ priv->child_latency[priv->read_child]) {
+ priv->read_child = child_index;
+ }
}
out:
STACK_DESTROY(frame->root);
@@ -2348,7 +2415,6 @@ unwind:
return 0;
}
-
int
afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this)
{
@@ -2574,6 +2640,7 @@ afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
int call_count = 0;
+ unsigned char *hybrid_children = NULL;
local = frame->local;
priv = this->private;
@@ -2584,8 +2651,19 @@ afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
goto out;
}
- call_count = local->call_count = AFR_COUNT (local->child_up,
- priv->child_count);
+ hybrid_children = alloca0 (priv->child_count);
+ call_count = find_hybrid_children (this, hybrid_children);
+ if (call_count) {
+ for (i = 0; i < priv->child_count; i++)
+ local->child_up[i] = hybrid_children[i];
+ gf_log (this->name, GF_LOG_TRACE, "Selected %d hybrid "
+ "children for LOOKUPs", call_count);
+ } else {
+ hybrid_children = NULL;
+ call_count = AFR_COUNT (local->child_up, priv->child_count);
+ }
+
+ local->call_count = call_count;
ret = afr_lookup_xattr_req_prepare (local, this, local->xattr_req,
&local->loc);
@@ -2818,6 +2896,15 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
afr_read_subvol_get (loc->parent, this, NULL, NULL, &event,
AFR_DATA_TRANSACTION, NULL);
+ /* So this is the "secret" to why "Hybrid" halo works. Encoded in
+ * the cached inodes, we store what is effectively the "generational"
+ * state of the cluster along with a "packed" version of the extended
+ * attributes which determine which nodes are wise/fools. We can
+ * consult these cached values to figure out who we can trust, in the
+ * event the state of our cluster changes and we can no longer trust
+ * the cached info we "refresh" the inode (and hit all regions) to
+ * ensure we know which bricks we can safely read from.
+ */
if (event != local->event_generation)
afr_inode_refresh (frame, this, loc->parent, NULL,
afr_lookup_do);
@@ -3042,7 +3129,7 @@ afr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ call_count = afr_frame_return (frame);
if (call_count == 0)
AFR_STACK_UNWIND (flush, frame, local->op_ret,
@@ -4290,25 +4377,569 @@ __get_heard_from_all_status (xlator_t *this)
return heard_from_all;
}
+/*
+ * afr_cmp_child
+ *
+ * Passed to the qsort function to order a list of children by the latency
+ * and/or up/down states.
+ *
+ * Note: This isn't as simple as taking the latencies and calling it a
+ * a day. Children can be marked down, which overrides their latency
+ * signal. Having a lower-latency child available doesn't guarentee this
+ * child shall be marked up: we don't want to constantly be swapping
+ * slightly better bricks for others...this is jarring to clients and
+ * could cause all sorts of issues. Plus, the fail-over, max-replicas
+ * flags must all be honored which manage the up/down state of children.
+ *
+ * In short, the (as marked) up/down down state of the brick shall always
+ * take precedence when sorting by latency.
+ */
+static int
+_afr_cmp_child (const void *child1, const void *child2)
+{
+ struct afr_child *child11 = (struct afr_child *)child1;
+ struct afr_child *child22 = (struct afr_child *)child2;
+
+ /* If both children are _marked_ down they are equal */
+ if (!child11->child_up && !child22->child_up)
+ return 0;
+
+ /* Prefer child 2, child 1 is _marked_ down, child 2 is not */
+ if (!child11->child_up && child22->child_up)
+ return 1;
+
+ /* Prefer child 1, child 2 is _marked_ down, child 1 is not */
+ if (child11->child_up && !child22->child_up)
+ return -1;
+
+ if (child11->latency > child22->latency) {
+ return 1;
+ }
+ if (child11->latency == child22->latency) {
+ return 0;
+ }
+ return -1;
+}
+
+/*
+ * find_hybrid_children
+ *
+ * Given a char array representing our children (aka bricks within our AFR
+ * AFR "subvolume"), we'll mark this array with the children which are
+ * within the halo_hybrid_read_max_latency_sec or if none fit this condition,
+ * we'll pick the fastest two bricks.
+ *
+ * You might ask, why not just pick the quickest brick and be done with it?
+ * Well, being within our set is not suffcient to be chosen for the read,
+ * we must also be marked "readable", we still want to choose as many as
+ * we can within our local region to ensure we have somebody that is readable.
+ *
+ * To illustrate this, consider the case where a 1/2 bricks received a sync
+ * from some other writer, and the 2nd brick although faster wasn't present.
+ * In this case we'll want to use the slower brick to service the read.
+ *
+ * In short, this function just tells the caller which hybrid children,
+ * it gives no signal as to their readability, nor should it since this is
+ * handled later in the various flows (e.g. by afr_halo_read_subvol).
+ */
+static int32_t
+find_hybrid_children (xlator_t *this, unsigned char *hybrid_children)
+{
+ int32_t i = 0;
+ afr_private_t *priv = NULL;
+ struct afr_child *sorted_list = NULL;
+ uint32_t max_latency;
+ uint32_t limit = AFR_HALO_HYBRID_CHILD_LIMIT;
+
+ priv = this->private;
+
+ if (!priv->halo_enabled || !priv->halo_hybrid_mode)
+ return 0;
+
+ if (limit > priv->child_count)
+ limit = priv->child_count;
+
+ max_latency = priv->halo_hybrid_read_max_latency_msec;
+
+ sorted_list = alloca (sizeof (struct afr_child) * priv->child_count);
+
+ /* Find children meeting the latency threshold */
+ for (i = 0; i < priv->child_count; i++) {
+ sorted_list[i].idx = i;
+ sorted_list[i].child_up = priv->child_up[i];
+ sorted_list[i].latency = priv->child_latency[i];
+ }
+
+ /* QuickSort the children according to latency */
+ qsort (sorted_list, priv->child_count, sizeof (struct afr_child),
+ _afr_cmp_child);
+
+ i = 0;
+ while (i < priv->child_count && sorted_list[i].latency <= max_latency)
+ hybrid_children[sorted_list[i++].idx] = 1;
+
+ /* Found some candidates */
+ if (i != 0)
+ return i;
+
+ /* If no candidates can be found meeting the max_latency threshold
+ * then find the best of those we have to our limit.
+ */
+ for (i = 0; i < limit; i++)
+ hybrid_children[sorted_list[i].idx] = 1;
+
+ return i;
+}
+
+int
+find_best_down_child (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t best_child = -1;
+ int64_t best_latency = INT64_MAX;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i] &&
+ priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] < best_latency) {
+ best_child = i;
+ best_latency = priv->child_latency[i];
+ }
+ }
+ if (best_child >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Found best down child (%d) "
+ "@ %ld ms latency", best_child, best_latency);
+ }
+ return best_child;
+}
+
+int
+find_worst_up_child (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t worst_child = -1;
+ int64_t worst_latency = INT64_MIN;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] &&
+ priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] >= worst_latency) {
+ worst_child = i;
+ worst_latency = priv->child_latency[i];
+ }
+ }
+ if (worst_child >= 0) {
+ gf_log (this->name, GF_LOG_DEBUG, "Found worst up child (%d)"
+ " @ %ld ms latency", worst_child, worst_latency);
+ }
+ return worst_child;
+}
+
+static const char *halo_state_str(int i)
+{
+ switch (i) {
+ case 0: return "DOWN";
+ case 1: return "UP";
+ }
+
+ return "unknown";
+}
+
+
+static void dump_halo_states (xlator_t *this) {
+ afr_private_t *priv = NULL;
+ int i = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_latency[i] == AFR_CHILD_DOWN_LATENCY) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Child %d halo state: %s (N/A)",
+ i,
+ halo_state_str(priv->child_up[i]));
+ } else {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Child %d halo state: %s (%"PRIi64" ms)",
+ i,
+ halo_state_str(priv->child_up[i]),
+ priv->child_latency[i]);
+ }
+ }
+}
+
+static void
+_afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator,
+ const int idx, const int64_t halo_max_latency_msec,
+ int32_t *event, int64_t *child_latency_msec,
+ gf_boolean_t child_halo_enabled)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int best_down_child = 0;
+ uint64_t latency_samples = 0;
+
+ priv = this->private;
+
+ /* Base it off the _minimum_ latency we've ever seen */
+ *child_latency_msec = child_xlator->client_latency.min / 1000.0;
+ latency_samples = child_xlator->client_latency.count;
+ priv->child_latency[idx] = *child_latency_msec;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] == 1) {
+ up_children++;
+ }
+ }
+
+ /* Don't do anything until you have some minimum numbner of
+ * latency samples */
+ if (priv->halo_enabled == _gf_true && child_halo_enabled == _gf_false) {
+ gf_log (child_xlator->name, GF_LOG_INFO, "In-sufficient "
+ " number of latency samples (%" PRIu64
+ " < %d), halo in-active.",
+ latency_samples, priv->halo_min_samples);
+ }
+
+ gf_log (this->name, GF_LOG_DEBUG,
+ "ping: child %u (%s) latency %"PRIu64" ms (max %"PRIu64" ms)"
+ " up_count %d (min %d) enabled %s",
+ idx, child_xlator ? child_xlator->name : "<null>",
+ *child_latency_msec,
+ halo_max_latency_msec,
+ up_children,
+ priv->halo_min_replicas,
+ child_halo_enabled ? "true" : "false");
+
+ /*
+ * Case 1: This child's latency exceeds the maximum allowable
+ * for this halo.
+ */
+ if (child_halo_enabled &&
+ *child_latency_msec > halo_max_latency_msec &&
+ priv->child_up[idx] == 1 &&
+ up_children > priv->halo_min_replicas) {
+ if (find_worst_up_child (this) == idx) {
+ gf_log (child_xlator->name, GF_LOG_INFO,
+ "Child latency (%"PRIi64"ms) "
+ "exceeds halo threshold (%"PRIi64"), "
+ "marking child down, "
+ "min_replicas (%d) still "
+ "satisfied.",
+ *child_latency_msec,
+ halo_max_latency_msec,
+ priv->halo_min_replicas);
+ *event = GF_EVENT_CHILD_DOWN;
+ }
+ /*
+ * Case 2: Child latency is within halo and currently marked down,
+ * mark it up.
+ */
+ } else if ((child_halo_enabled == _gf_false ||
+ *child_latency_msec <= halo_max_latency_msec) &&
+ priv->child_up[idx] == 0) {
+ if (child_halo_enabled == _gf_false ||
+ up_children < priv->halo_max_replicas) {
+ gf_log (child_xlator->name, GF_LOG_INFO,
+ "Child latency (%ld ms) "
+ "below halo threshold (%ld) or halo is "
+ "disabled, marking child up.",
+ *child_latency_msec,
+ halo_max_latency_msec);
+ *event = GF_EVENT_CHILD_UP;
+ } else {
+ gf_log (child_xlator->name, GF_LOG_INFO,
+ "Not marking child %d up, "
+ "max replicas (%d) reached.", idx,
+ priv->halo_max_replicas);
+ }
+ /*
+ * Case 3: Child latency is within halo,and currently marked up,
+ * mark it down if it's the highest latency child and the
+ * number of up children is greater than halo_max_replicas.
+ * UNLESS you are an SHD in which case do nothing.
+ */
+ } else if ((child_halo_enabled == _gf_true &&
+ *child_latency_msec <= halo_max_latency_msec) &&
+ priv->child_up[idx] == 1) {
+ if (find_worst_up_child (this) == idx &&
+ up_children > priv->halo_max_replicas &&
+ !priv->shd.iamshd) {
+ gf_log (child_xlator->name, GF_LOG_INFO,
+ "Child latency (%"PRIi64"ms) "
+ "exceeds halo threshold (%"PRIi64"), "
+ "but halo_max_replicas (%d) exceeded, "
+ "marking child down.",
+ *child_latency_msec,
+ halo_max_latency_msec,
+ priv->halo_max_replicas);
+ *event = GF_EVENT_CHILD_DOWN;
+ }
+ }
+
+ if (*event != GF_EVENT_CHILD_PING &&
+ gf_log_get_loglevel () >= GF_LOG_DEBUG) {
+ gf_log (this->name, GF_LOG_DEBUG, "Initial halo states:");
+ dump_halo_states (this);
+ }
+}
+
+void
+_afr_handle_child_up_event (xlator_t *this, xlator_t *child_xlator,
+ const int idx, int64_t halo_max_latency_msec,
+ int32_t *event, int32_t *call_psh, int32_t *up_child,
+ gf_boolean_t child_halo_enabled)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int worst_up_child = -1;
+ gf_boolean_t was_down = _gf_false;
+
+ priv = this->private;
+
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ /*
+ * Track the fact we did this, we may need to repeal this
+ * if we later decide to mark this brick down.
+ */
+ was_down = _gf_true;
+ priv->event_generation++;
+ }
+ priv->child_up[idx] = 1;
+
+ *call_psh = 1;
+ *up_child = idx;
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
+
+ /*
+ * Handle the edge case where we exceed
+ * halo_min_replicas and we've got a child which is
+ * marked up as it was helping to satisfy the
+ * halo_min_replicas even though it's latency exceeds
+ * halo_max_latency_msec.
+ */
+ if (child_halo_enabled == _gf_true &&
+ up_children > priv->halo_min_replicas) {
+ worst_up_child = find_worst_up_child (this);
+ if (worst_up_child >= 0 &&
+ priv->child_latency[worst_up_child] >
+ halo_max_latency_msec) {
+ if (was_down == _gf_true)
+ priv->event_generation--;
+ *call_psh = 0;
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "Marking child %d down, "
+ "doesn't meet halo threshold "
+ "(%ld), and > "
+ "halo_min_replicas (%d)",
+ worst_up_child,
+ halo_max_latency_msec,
+ priv->halo_min_replicas);
+ goto out;
+ }
+ }
+ if (priv->halo_enabled &&
+ up_children > priv->halo_max_replicas &&
+ !priv->shd.iamshd) {
+ if (was_down == _gf_true)
+ priv->event_generation--;
+ *call_psh = 0;
+ worst_up_child = find_worst_up_child (this);
+ if (worst_up_child < 0) {
+ worst_up_child = idx;
+ }
+ priv->child_up[worst_up_child] = 0;
+ gf_log (this->name, GF_LOG_INFO,
+ "Marking child %d down, "
+ "up_children (%d) > "
+ "halo_max_replicas (%d)",
+ worst_up_child,
+ up_children,
+ priv->halo_max_replicas);
+ up_children--;
+ goto out;
+ }
+out:
+ if (up_children == 1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Subvolume '%s' came back up; "
+ "going online.",
+ child_xlator->name);
+ } else {
+ *event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = *event;
+
+ if (gf_log_get_loglevel () >= GF_LOG_DEBUG) {
+ gf_log (this->name, GF_LOG_DEBUG, "New halo states:");
+ dump_halo_states (this);
+ }
+}
+
+void
+_afr_handle_child_down_event (xlator_t *this, xlator_t *child_xlator,
+ int idx, int64_t child_latency_msec,
+ int64_t halo_max_latency_msec, int32_t *event,
+ int32_t *call_psh, int32_t *up_child,
+ gf_boolean_t child_halo_enabled)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int down_children = 0;
+ int best_down_child = -1;
+ gf_boolean_t swap_child = _gf_false;
+
+ priv = this->private;
+
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->event_generation++;
+ }
+
+ /*
+ * If this is an _actual_ CHILD_DOWN event, we
+ * want to set the child_latency to AFR_CHILD_DOWN_LATENCY to
+ * indicate the child is really disconnected.
+ */
+ if (child_latency_msec == AFR_CHILD_DOWN_LATENCY) {
+ priv->child_latency[idx] = AFR_CHILD_DOWN_LATENCY;
+ }
+ priv->child_up[idx] = 0;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
+
+ /*
+ * Handle the edge case where we need to find the
+ * next best child (to mark up) as marking this child
+ * down would cause us to fall below halo_min_replicas.
+ * We will also force the SHD to heal this child _now_
+ * as we want it to be up to date if we are going to
+ * begin using it synchronously.
+ */
+ best_down_child = find_best_down_child (this);
+ if (child_halo_enabled == _gf_true) {
+ if (up_children < priv->halo_min_replicas &&
+ priv->halo_failover_enabled == _gf_true)
+ swap_child = _gf_true;
+ else if (up_children < priv->halo_max_replicas &&
+ priv->child_latency[best_down_child] <=
+ halo_max_latency_msec &&
+ priv->halo_failover_enabled == _gf_true)
+ swap_child = _gf_true;
+ }
+
+ if (swap_child) {
+ if (best_down_child >= 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Swapping out child %d for "
+ "child %d to satisfy "
+ "halo_min_replicas (%d).",
+ idx, best_down_child,
+ priv->halo_min_replicas);
+ priv->child_up[best_down_child] = 1;
+ *call_psh = 1;
+ *up_child = best_down_child;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "All subvolumes are down. Going "
+ "offline until atleast one of them "
+ "comes back up.");
+ } else {
+ *event = GF_EVENT_CHILD_MODIFIED;
+ }
+ priv->last_event[idx] = *event;
+
+ if (gf_log_get_loglevel () >= GF_LOG_DEBUG) {
+ gf_log (this->name, GF_LOG_DEBUG, "New halo states:");
+ dump_halo_states (this);
+ }
+}
+
+int64_t
+_afr_get_halo_latency (xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int64_t halo_max_latency_msec = 0;
+
+ priv = this->private;
+
+ if (priv->shd.iamshd) {
+ halo_max_latency_msec = priv->shd.halo_max_latency_msec;
+ } else if (priv->nfsd.iamnfsd) {
+ halo_max_latency_msec =
+ priv->nfsd.halo_max_latency_msec;
+ } else {
+ halo_max_latency_msec = priv->halo_max_latency_msec;
+ }
+ gf_log (this->name, GF_LOG_DEBUG, "Using halo latency %ld",
+ halo_max_latency_msec);
+ return halo_max_latency_msec;
+}
+
+
int32_t
afr_notify (xlator_t *this, int32_t event,
void *data, void *data2)
{
+ xlator_t *child_xlator = NULL;
afr_private_t *priv = NULL;
int i = -1;
- int up_children = 0;
- int down_children = 0;
int propagate = 0;
int had_heard_from_all = 0;
int have_heard_from_all = 0;
int idx = -1;
int ret = -1;
int call_psh = 0;
+ int up_child = -1;
+ uint64_t latency_samples = 0;
dict_t *input = NULL;
dict_t *output = NULL;
gf_boolean_t had_quorum = _gf_false;
gf_boolean_t has_quorum = _gf_false;
+ int64_t halo_max_latency_msec = 0;
+ int64_t child_latency_msec = AFR_CHILD_DOWN_LATENCY;
+ gf_boolean_t child_halo_enabled = _gf_false;
+ child_xlator = (xlator_t *)data;
priv = this->private;
if (!priv)
@@ -4321,7 +4952,7 @@ afr_notify (xlator_t *this, int32_t event,
* O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
*/
priv->did_discovery = _gf_false;
-
+ latency_samples = child_xlator->client_latency.count;
/* parent xlators dont need to know about every child_up, child_down
* because of afr ha. If all subvolumes go down, child_down has
@@ -4332,7 +4963,7 @@ afr_notify (xlator_t *this, int32_t event,
* subsequent revalidate lookup happens on all the dht's subvolumes
* which triggers afr self-heals if any.
*/
- idx = find_child_index (this, data);
+ idx = find_child_index (this, child_xlator);
if (idx < 0) {
gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP,
"Received child_up from invalid subvolume");
@@ -4341,6 +4972,28 @@ afr_notify (xlator_t *this, int32_t event,
had_quorum = priv->quorum_count && afr_has_quorum (priv->child_up,
this);
+
+ if (!priv->halo_enabled ||
+ latency_samples < priv->halo_min_samples) {
+ child_halo_enabled = _gf_false;
+ halo_max_latency_msec = INT64_MAX;
+ } else {
+ child_halo_enabled = _gf_true;
+ halo_max_latency_msec = _afr_get_halo_latency (this);
+ }
+
+ if (event == GF_EVENT_CHILD_PING) {
+ /* Calculates the child latency and sets event
+ */
+ LOCK (&priv->lock);
+ {
+ _afr_handle_ping_event (this, child_xlator, idx,
+ halo_max_latency_msec, &event,
+ &child_latency_msec, child_halo_enabled);
+ }
+ UNLOCK (&priv->lock);
+ }
+
if (event == GF_EVENT_TRANSLATOR_OP) {
LOCK (&priv->lock);
{
@@ -4367,52 +5020,16 @@ afr_notify (xlator_t *this, int32_t event,
propagate = 1;
break;
case GF_EVENT_CHILD_UP:
- /*
- * This only really counts if the child was never up
- * (value = -1) or had been down (value = 0). See
- * comment at GF_EVENT_CHILD_DOWN for a more detailed
- * explanation.
- */
- if (priv->child_up[idx] != 1) {
- priv->event_generation++;
- }
- priv->child_up[idx] = 1;
-
- call_psh = 1;
- up_children = __afr_get_up_children_count (priv);
- if (up_children == 1) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- AFR_MSG_SUBVOL_UP,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
- } else {
- event = GF_EVENT_CHILD_MODIFIED;
- }
-
- priv->last_event[idx] = event;
-
+ _afr_handle_child_up_event (this, child_xlator,
+ idx, halo_max_latency_msec, &event, &call_psh,
+ &up_child, child_halo_enabled);
break;
case GF_EVENT_CHILD_DOWN:
- if (priv->child_up[idx] == 1) {
- priv->event_generation++;
- }
- priv->child_up[idx] = 0;
-
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i] == 0)
- down_children++;
- if (down_children == priv->child_count) {
- gf_msg (this->name, GF_LOG_ERROR, 0,
- AFR_MSG_ALL_SUBVOLS_DOWN,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
- } else {
- event = GF_EVENT_SOME_CHILD_DOWN;
- }
-
- priv->last_event[idx] = event;
-
+ _afr_handle_child_down_event (this, child_xlator, idx,
+ child_latency_msec, halo_max_latency_msec,
+ &event, &call_psh, &up_child,
+ child_halo_enabled);
break;
case GF_EVENT_CHILD_CONNECTING:
@@ -4439,7 +5056,6 @@ afr_notify (xlator_t *this, int32_t event,
had come up, propagate CHILD_UP, but only this time
*/
event = GF_EVENT_CHILD_DOWN;
- up_children = __afr_get_up_children_count (priv);
for (i = 0; i < priv->child_count; i++) {
if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
event = GF_EVENT_CHILD_UP;
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 7f7962013d7..c7d6261b110 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -46,7 +46,8 @@ enum gf_afr_mem_types_ {
gf_afr_mt_spbc_timeout_t,
gf_afr_mt_spb_status_t,
gf_afr_mt_empty_brick_t,
- gf_afr_mt_end
+ gf_afr_mt_child_latency_t,
+ gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 1b3b1ca0af1..9c12e433097 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -371,7 +371,7 @@ afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
{
afr_private_t *priv = NULL;
off_t off = 0;
- size_t block = 128 * 1024;
+ size_t block = 0;
int type = AFR_SELFHEAL_DATA_FULL;
int ret = -1;
call_frame_t *iter_frame = NULL;
@@ -383,6 +383,8 @@ afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd,
healed_sinks[ARBITER_BRICK_INDEX] = 0;
}
+ block = 128 * 1024 * priv->data_self_heal_window_size;
+
type = afr_data_self_heal_type_get (priv, healed_sinks, source,
replies);
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index c6ac5ebfd1b..4ac1d32f58a 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -58,6 +58,7 @@ typedef struct {
eh_t **statistics;
uint32_t max_threads;
uint32_t wait_qlength;
+ uint32_t halo_max_latency_msec;
} afr_self_heald_t;
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 6f4783c9213..ae9b28c7fb4 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -176,6 +176,42 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("data-self-heal-algorithm",
priv->data_self_heal_algorithm, options, str, out);
+ GF_OPTION_RECONF ("halo-enabled",
+ priv->halo_enabled, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("halo-failover-enabled",
+ priv->halo_failover_enabled, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("halo-shd-max-latency",
+ priv->shd.halo_max_latency_msec, options, uint32,
+ out);
+
+ GF_OPTION_RECONF ("halo-nfsd-max-latency",
+ priv->nfsd.halo_max_latency_msec, options, uint32,
+ out);
+
+ GF_OPTION_RECONF ("halo-max-latency", priv->halo_max_latency_msec,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("halo-hybrid-mode",
+ priv->halo_hybrid_mode, options, bool,
+ out);
+
+ GF_OPTION_RECONF ("halo-hybrid-read-max-latency",
+ priv->halo_hybrid_read_max_latency_msec, options,
+ uint32, out);
+
+ GF_OPTION_RECONF ("halo-max-replicas", priv->halo_max_replicas, options,
+ uint32, out);
+
+ GF_OPTION_RECONF ("halo-min-replicas", priv->halo_min_replicas, options,
+ uint32, out);
+
+ GF_OPTION_RECONF ("halo-min-samples", priv->halo_min_samples, options,
+ uint32, out);
+
GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
@@ -396,6 +432,35 @@ init (xlator_t *this)
GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
+ GF_OPTION_INIT ("halo-hybrid-mode",
+ priv->halo_hybrid_mode, bool, out);
+
+ GF_OPTION_INIT ("halo-hybrid-read-max-latency",
+ priv->halo_hybrid_read_max_latency_msec, uint32,
+ out);
+
+ GF_OPTION_INIT ("halo-enabled",
+ priv->halo_enabled, bool, out);
+
+ GF_OPTION_INIT ("halo-failover-enabled",
+ priv->halo_failover_enabled, bool, out);
+
+ GF_OPTION_INIT ("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ uint32, out);
+ GF_OPTION_INIT ("halo-max-latency", priv->halo_max_latency_msec,
+ uint32, out);
+ GF_OPTION_INIT ("halo-max-replicas", priv->halo_max_replicas, uint32,
+ out);
+ GF_OPTION_INIT ("halo-min-replicas", priv->halo_min_replicas, uint32,
+ out);
+ GF_OPTION_INIT ("halo-min-samples", priv->halo_min_samples, uint32,
+ out);
+
+ GF_OPTION_INIT ("halo-nfsd-max-latency",
+ priv->nfsd.halo_max_latency_msec, uint32, out);
+
+ GF_OPTION_INIT ("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out);
+
GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
@@ -445,17 +510,24 @@ init (xlator_t *this)
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
gf_afr_mt_char);
- if (!priv->child_up) {
+
+ priv->child_latency = GF_CALLOC (sizeof (*priv->child_latency),
+ child_count,
+ gf_afr_mt_child_latency_t);
+
+ if (!priv->child_up || !priv->child_latency) {
ret = -ENOMEM;
goto out;
}
- for (i = 0; i < child_count; i++)
+ for (i = 0; i < child_count; i++) {
+ priv->child_latency[i] = 0.0;
priv->child_up[i] = -1; /* start with unknown state.
this initialization needed
for afr_notify() to work
reliably
*/
+ }
priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
gf_afr_mt_xlator_t);
@@ -663,6 +735,85 @@ struct volume_options options[] = {
"jobs that can perform parallel heals in the "
"background."
},
+ { .key = {"halo-shd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .description = "Maximum latency for shd halo replication in msec."
+ },
+ { .key = {"halo-enabled"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+ .description = "Enable Halo (geo) replication mode."
+ },
+ { .key = {"halo-failover-enabled"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+ .description = "Enable x-halo failover: will allow failover "
+ "to bricks outside the client or daemons' halo "
+ "in an attempt to satisfy halo-min-replicas."
+ },
+ { .key = {"halo-nfsd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "5",
+ .description = "Maximum latency for nfsd halo replication in msec."
+ },
+ { .key = {"halo-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "5",
+ .description = "Maximum latency for halo replication in msec."
+ },
+ { .key = {"halo-hybrid-mode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enable hybrid sync mounts. When enabled, halo will "
+ "do write FOPs synchronously, and read FOPs will be "
+ "services in-region if the inode is clean/consistent."
+ "If no bricks can be found below "
+ "halo-hybrid-max-read-latency then the best 2 shall "
+ "be selected. This option can be used in "
+ "conjunction with all other halo options."
+ },
+ { .key = {"halo-hybrid-read-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "8",
+ .description = "Maximum latency hybrid mode will use to select "
+ "children for read FOPs. Don't tune this unless "
+ "you really know what you are doing (i.e. you've "
+ "read/understand the associated source code)."
+ },
+ { .key = {"halo-max-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .description = "The maximum number of halo replicas; replicas"
+ " beyond this value will be written asynchronously"
+ "via the SHD."
+ },
+ { .key = {"halo-min-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "2",
+ .description = "The minimum number of halo replicas, before adding "
+ "out of region replicas."
+ },
+ { .key = {"halo-min-samples"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "3",
+ .description = "The minimum number of halo latency samples, before "
+ "we start forming the halos."
+ },
{ .key = {"heal-wait-queue-length"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
@@ -802,6 +953,13 @@ struct volume_options options[] = {
"translator is running as part of self-heal-daemon "
"or not."
},
+ { .key = {"iam-nfs-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of an NFS daemon "
+ "or not."
+ },
{ .key = {"quorum-type"},
.type = GF_OPTION_TYPE_STR,
.value = { "none", "auto", "fixed"},
@@ -866,7 +1024,7 @@ struct volume_options options[] = {
},
{ .key = {"heal-timeout"},
.type = GF_OPTION_TYPE_INT,
- .min = 60,
+ .min = 5,
.max = INT_MAX,
.default_value = "600",
.description = "time interval for checking the need to self-heal "
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 70c3e349743..aa19f1eeb37 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -30,6 +30,9 @@
#define AFR_DIRTY_DEFAULT AFR_XATTR_PREFIX ".dirty"
#define AFR_DIRTY (((afr_private_t *) (THIS->private))->afr_dirty)
+#define AFR_CHILD_DOWN_LATENCY INT64_MAX /* Latency for down children */
+#define AFR_HALO_HYBRID_CHILD_LIMIT 2 /* Examine bricks <= 10 msec */
+#define AFR_HALO_HYBRID_LATENCY_MSEC 10.0 /* Examine bricks <= 10 msec */
#define AFR_LOCKEE_COUNT_MAX 3
#define AFR_DOM_COUNT_MAX 3
#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
@@ -72,6 +75,17 @@ typedef enum {
AFR_FAV_CHILD_POLICY_MAX,
} afr_favorite_child_policy;
+struct afr_nfsd {
+ gf_boolean_t iamnfsd;
+ uint32_t halo_max_latency_msec;
+};
+
+struct afr_child {
+ uint32_t idx;
+ int64_t latency;
+ unsigned char child_up;
+};
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -83,6 +97,7 @@ typedef struct _afr_private {
inode_t *root_inode;
unsigned char *child_up;
+ int64_t *child_latency;
unsigned char *local;
char **pending_key;
@@ -153,8 +168,19 @@ typedef struct _afr_private {
gf_boolean_t ensure_durability;
char *sh_domain;
char *afr_dirty;
-
- afr_self_heald_t shd;
+ gf_boolean_t halo_enabled;
+
+ /* Halo geo-replication tunables */
+ gf_boolean_t halo_failover_enabled;
+ gf_boolean_t halo_hybrid_mode;
+ uint32_t halo_hybrid_read_max_latency_msec;
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
+ uint32_t halo_min_samples;
+
+ afr_self_heald_t shd;
+ struct afr_nfsd nfsd;
gf_boolean_t consistent_metadata;
uint64_t spb_choice_timeout;
diff --git a/xlators/cluster/aha/Makefile.am b/xlators/cluster/aha/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/cluster/aha/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/cluster/aha/src/Makefile.am b/xlators/cluster/aha/src/Makefile.am
new file mode 100644
index 00000000000..006db127d28
--- /dev/null
+++ b/xlators/cluster/aha/src/Makefile.am
@@ -0,0 +1,18 @@
+
+xlator_LTLIBRARIES = aha.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+aha_la_LDFLAGS = -module -avoid-version
+
+aha_la_SOURCES = aha.c aha-fops.c aha-helpers.c aha-retry.c
+aha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = aha-mem-types.h aha.h aha-helpers.h aha.h aha-retry.h aha-fops.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/cluster/aha/src/aha-fops.c b/xlators/cluster/aha/src/aha-fops.c
new file mode 100644
index 00000000000..3b2ca641de2
--- /dev/null
+++ b/xlators/cluster/aha/src/aha-fops.c
@@ -0,0 +1,952 @@
+#include "aha-fops.h"
+
+static void
+__save_fop (struct aha_fop *fop, struct aha_conf *conf)
+{
+ list_add_tail (&fop->list, &conf->failed);
+}
+
+void
+save_fop (struct aha_fop *fop, struct aha_conf *conf)
+{
+ LOCK (&conf->lock);
+ {
+ __save_fop (fop, conf);
+ }
+ UNLOCK (&conf->lock);
+}
+
+#define AHA_HANDLE_FOP(frame, type, cbk, obj, fn, args ...) \
+ do { \
+ struct aha_fop *fop = aha_fop_new (); \
+ if (!fop) { \
+ gf_log (GF_AHA, GF_LOG_CRITICAL, \
+ "Allocation failed, terminating " \
+ "to prevent a hung mount."); \
+ assert (0); \
+ } \
+ fop->stub = fop_##type##_stub (frame, aha_##type, \
+ args); \
+ fop->frame = frame; \
+ frame->local = fop; \
+ STACK_WIND (frame, cbk, obj, fn, args); \
+ } while (0) \
+
+/*
+ * AHA_HANDLE_FOP_CBK
+ *
+ * 1) If the error returned is ENOTCONN *and* the timer that waits
+ * for the server to come back has not expired, store the fop to retry later.
+ * 2) If the timer waiting for the server has expired, just unwind.
+ * 3) If the error returned is something other than ENOTCONN, just unwind.
+ *
+ */
+#define AHA_HANDLE_FOP_CBK(type, frame, args ...) \
+ do { \
+ struct aha_conf *conf = frame->this->private; \
+ struct aha_fop *fop = frame->local; \
+ if (op_ret != 0 && op_errno == ENOTCONN && \
+ !aha_is_timer_expired (conf)) { \
+ gf_log (GF_AHA, GF_LOG_WARNING, \
+ "Got ENOTCONN from client, storing " \
+ "to retry later!"); \
+ save_fop (fop, conf); \
+ } else { \
+ AHA_DESTROY_LOCAL (frame); \
+ STACK_UNWIND_STRICT (type, frame, args); \
+ } \
+ } while (0) \
+
+int
+aha_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ AHA_HANDLE_FOP_CBK (lookup, frame, op_ret, op_errno, inode,
+ buf, xdata, postparent);
+ return 0;
+}
+
+
+int
+aha_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, lookup, aha_lookup_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lookup,
+ loc, xdata);
+ return 0;
+}
+
+
+int
+aha_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+aha_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, stat, aha_stat_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->stat,
+ loc, xdata);
+ return 0;
+}
+
+
+int
+aha_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (setattr, frame, op_ret, op_errno, preop,
+ postop, xdata);
+ return 0;
+}
+
+
+int
+aha_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, setattr, aha_setattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
+
+
+int
+aha_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fsetattr, frame, op_ret, op_errno, preop,
+ postop, xdata);
+ return 0;
+}
+
+int
+aha_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fsetattr, aha_fsetattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+}
+
+
+int
+aha_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (truncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+
+int
+aha_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, truncate, aha_truncate_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
+}
+
+
+int
+aha_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (ftruncate, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+
+int
+aha_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, ftruncate, aha_ftruncate_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->ftruncate,
+ fd, offset, xdata);
+ return 0;
+}
+
+
+int
+aha_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, access, aha_access_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->access,
+ loc, mask, xdata);
+ return 0;
+}
+
+
+int
+aha_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *path, struct iatt *sbuf, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (readlink, frame, op_ret, op_errno,
+ path, sbuf, xdata);
+ return 0;
+}
+
+
+int
+aha_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, readlink, aha_readlink_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readlink,
+ loc, size, xdata);
+ return 0;
+}
+
+
+int
+aha_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (mknod, frame, op_ret, op_errno,
+ inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+aha_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, mknod, aha_mknod_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
+}
+
+
+int
+aha_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (mkdir, frame, op_ret, op_errno,
+ inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+aha_mkdir (call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, mkdir, aha_mkdir_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
+}
+
+
+int
+aha_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (unlink, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+aha_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, unlink, aha_unlink_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+}
+
+
+int
+aha_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (rmdir, frame, op_ret, op_errno,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+aha_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, rmdir, aha_rmdir_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+}
+
+
+int
+aha_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+aha_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, symlink, aha_symlink_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
+}
+
+
+int
+aha_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (rename, frame, op_ret, op_errno, buf,
+ preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+
+int
+aha_rename (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, rename, aha_rename_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+
+int
+aha_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (link, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+aha_link (call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, link, aha_link_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+
+int
+aha_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+
+int
+aha_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, create, aha_create_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
+
+
+int
+aha_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+
+int
+aha_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, open, aha_open_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
+}
+
+int
+aha_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (readv, frame, op_ret, op_errno,
+ vector, count, stbuf, iobref, xdata);
+ return 0;
+}
+
+int
+aha_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, readv, aha_readv_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+
+int
+aha_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (writev, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+int
+aha_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, writev, aha_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
+}
+
+
+int
+aha_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (flush, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, flush, aha_flush_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->flush,
+ fd, xdata);
+ return 0;
+}
+
+
+int
+aha_fsync_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fsync, frame, op_ret, op_errno,
+ prebuf, postbuf, xdata);
+ return 0;
+}
+
+
+int
+aha_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fsync, aha_fsync_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsync,
+ fd, flags, xdata);
+ return 0;
+}
+
+
+int
+aha_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+
+int
+aha_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fstat, aha_fstat_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fstat,
+ fd, xdata);
+ return 0;
+}
+
+
+int
+aha_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+
+int
+aha_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, opendir, aha_opendir_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+}
+
+int
+aha_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fsyncdir, aha_fsyncdir_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsyncdir,
+ fd, flags, xdata);
+ return 0;
+}
+
+
+int
+aha_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+
+int
+aha_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, statfs, aha_statfs_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->statfs,
+ loc, xdata);
+ return 0;
+}
+
+
+
+int
+aha_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, setxattr, aha_setxattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+aha_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+aha_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, getxattr, aha_getxattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+}
+
+int
+aha_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fsetxattr, aha_fsetxattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetxattr,
+ fd, dict, flags, xdata);
+ return 0;
+}
+
+
+int
+aha_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+aha_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fgetxattr, aha_fgetxattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fgetxattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+aha_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+aha_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, xattrop, aha_xattrop_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->xattrop,
+ loc, flags, dict, xdata);
+ return 0;
+}
+
+
+int
+aha_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+
+int
+aha_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fxattrop, aha_fxattrop_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fxattrop,
+ fd, flags, dict, xdata);
+ return 0;
+}
+
+
+int
+aha_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, removexattr, aha_removexattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->removexattr,
+ loc, name, xdata);
+ return 0;
+}
+
+int
+aha_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fremovexattr, aha_fremovexattr_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fremovexattr,
+ fd, name, xdata);
+ return 0;
+}
+
+
+int
+aha_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
+}
+
+
+int
+aha_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, lk, aha_lk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->lk,
+ fd, cmd, lock, xdata);
+ return 0;
+}
+
+
+int
+aha_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_inodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, inodelk, aha_inodelk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+ return 0;
+}
+
+
+int
+aha_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, finodelk, aha_finodelk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+ return 0;
+}
+
+
+int
+aha_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, entrylk, aha_entrylk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
+}
+
+
+int
+aha_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+
+int
+aha_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, fentrylk, aha_fentrylk_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
+}
+
+int
+aha_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+
+int
+aha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
+{
+ AHA_HANDLE_FOP (frame, readdir, aha_readdir_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readdir,
+ fd, size, off, xdata);
+ return 0;
+}
+
+
+int
+aha_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ AHA_HANDLE_FOP_CBK (readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+
+int
+aha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
+{
+ AHA_HANDLE_FOP (frame, readdirp, aha_readdirp_cbk,
+ FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->readdirp,
+ fd, size, off, dict);
+ return 0;
+}
diff --git a/xlators/cluster/aha/src/aha-fops.h b/xlators/cluster/aha/src/aha-fops.h
new file mode 100644
index 00000000000..b1fb9d38a80
--- /dev/null
+++ b/xlators/cluster/aha/src/aha-fops.h
@@ -0,0 +1,360 @@
+#ifndef _AHA_FOPS_H
+#define _AHA_FOPS_H
+
+#include "aha.h"
+#include "aha-helpers.h"
+
+/* FOP functions */
+int
+aha_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+aha_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+aha_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int
+aha_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int
+aha_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+int
+aha_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int
+aha_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+int
+aha_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int
+aha_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int
+aha_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int
+aha_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+aha_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
+
+int
+aha_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int
+aha_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+aha_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+aha_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+aha_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+aha_readv (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, size_t size, off_t offset, uint32_t flags,
+ dict_t *xdata);
+
+int
+aha_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int
+aha_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int
+aha_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata);
+
+int
+aha_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int
+aha_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+int
+aha_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata);
+
+int
+aha_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+aha_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int
+aha_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int
+aha_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+int
+aha_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int
+aha_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int
+aha_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int
+aha_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int
+aha_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int
+aha_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
+
+int
+aha_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata);
+
+int
+aha_finodelk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
+
+int
+aha_entrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+int
+aha_fentrylk (call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+int
+aha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int
+aha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+/* Callback functions */
+
+int
+aha_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent);
+
+int
+aha_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int
+aha_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata);
+
+int
+aha_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata);
+
+int
+aha_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+
+int
+aha_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+
+int
+aha_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+
+int
+aha_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ const char *path, struct iatt *sbuf, dict_t *xdata);
+
+
+int
+aha_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+
+int
+aha_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+aha_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int
+aha_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+int
+aha_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+int
+aha_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+int
+aha_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+int
+aha_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+int
+aha_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
+int
+aha_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
+
+int
+aha_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata);
+int
+aha_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+aha_fsync_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+int
+aha_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int
+aha_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata);
+int
+aha_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+int
+aha_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata);
+int
+aha_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+int
+aha_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int
+aha_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+aha_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int
+aha_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int
+aha_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int
+aha_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+int
+aha_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+aha_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
+
+int
+aha_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+int
+aha_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+aha_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+int
+aha_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+int
+aha_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+int
+aha_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+#endif /* _AHA_FOPS_H */
diff --git a/xlators/cluster/aha/src/aha-helpers.c b/xlators/cluster/aha/src/aha-helpers.c
new file mode 100644
index 00000000000..e3b713688d3
--- /dev/null
+++ b/xlators/cluster/aha/src/aha-helpers.c
@@ -0,0 +1,46 @@
+#include "aha-helpers.h"
+
+struct aha_conf *aha_conf_new ()
+{
+ struct aha_conf *conf = NULL;
+
+ conf = GF_CALLOC (1, sizeof (*conf), gf_aha_mt_conf);
+ if (!conf)
+ goto err;
+
+ INIT_LIST_HEAD (&conf->failed);
+
+ LOCK_INIT (&conf->lock);
+err:
+ return conf;
+}
+
+void aha_conf_destroy (struct aha_conf *conf)
+{
+ LOCK_DESTROY (&conf->lock);
+ GF_FREE (conf);
+}
+
+struct aha_fop *aha_fop_new ()
+{
+ struct aha_fop *fop = NULL;
+
+ fop = GF_CALLOC (1, sizeof (*fop), gf_aha_mt_fop);
+ if (!fop)
+ goto err;
+
+ INIT_LIST_HEAD (&fop->list);
+
+err:
+ return fop;
+}
+
+void aha_fop_destroy (struct aha_fop *fop)
+{
+ if (!fop)
+ return;
+
+ call_stub_destroy (fop->stub);
+ fop->stub = NULL;
+ GF_FREE (fop);
+}
diff --git a/xlators/cluster/aha/src/aha-helpers.h b/xlators/cluster/aha/src/aha-helpers.h
new file mode 100644
index 00000000000..d9cf9b3295d
--- /dev/null
+++ b/xlators/cluster/aha/src/aha-helpers.h
@@ -0,0 +1,23 @@
+#ifndef _AHA_HELPERS_H
+#define _AHA_HELPERS_H
+
+#include "aha.h"
+
+#define GF_AHA "aha"
+
+struct aha_conf *aha_conf_new ();
+
+void aha_conf_destroy (struct aha_conf *conf);
+
+struct aha_fop *aha_fop_new ();
+
+void aha_fop_destroy (struct aha_fop *fop);
+
+#define AHA_DESTROY_LOCAL(frame) \
+ do { \
+ struct aha_fop *fop = frame->local; \
+ aha_fop_destroy (fop); \
+ frame->local = NULL; \
+ } while (0) \
+
+#endif /* _AHA_HELPERS_H */
diff --git a/xlators/cluster/aha/src/aha-mem-types.h b/xlators/cluster/aha/src/aha-mem-types.h
new file mode 100644
index 00000000000..117dda27e8b
--- /dev/null
+++ b/xlators/cluster/aha/src/aha-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __AHA_MEM_TYPES_H__
+#define __AHA_MEM_TYPES_H__
+
+#include "mem-types.h"
+
+enum gf_aha_mem_types_ {
+ gf_aha_mt_begin_t = gf_common_mt_end + 1,
+ gf_aha_mt_conf,
+ gf_aha_mt_fop,
+ gf_aha_mt_end
+};
+#endif
diff --git a/xlators/cluster/aha/src/aha-retry.c b/xlators/cluster/aha/src/aha-retry.c
new file mode 100644
index 00000000000..8810f913f42
--- /dev/null
+++ b/xlators/cluster/aha/src/aha-retry.c
@@ -0,0 +1,524 @@
+#include "aha.h"
+#include "aha-helpers.h"
+#include "aha-retry.h"
+#include "aha-fops.h"
+
+/*
+ * AHA_RETRY_FOP:
+ *
+ * - We STACK_WIND the fop using the arguments in the call_stub.
+ * We use STACK_WIND because we need a *new* frame, since we already
+ * exhausted the existing frame with the original STACK_WIND.
+ *
+ * - After STACK_WIND completes, we can destroy this frame's local (which
+ * should be struct aha_fop *). The frame itself will get destroyed higher in
+ * the xlator graph, since its still part of the call stack.
+ */
+#define AHA_RETRY_FOP(fop, type, args ...) \
+ do { \
+ call_stub_t *stub = fop->stub; \
+ call_frame_t *frame = fop->frame; \
+ xlator_t *this = frame->this; \
+ STACK_WIND (frame, aha_##type##_cbk, this, \
+ this->fops->type, args); \
+ AHA_DESTROY_LOCAL (frame); \
+ } while (0) \
+
+#define AHA_UNWIND_FOP(fop, type) \
+ do { \
+ call_frame_t *frame = fop->frame; \
+ AHA_DESTROY_LOCAL (frame); \
+ default_##type##_failure_cbk (frame, ETIMEDOUT); \
+ } while (0) \
+
+void
+__aha_retry_force_unwind_fops (struct aha_conf *conf)
+{
+ struct aha_fop *fop = NULL;
+ struct aha_fop *tmp = NULL;
+ size_t ndrained = 0;
+
+ /*
+ * Drain the queue. After we finish the loop, the list
+ * must be empty.
+ */
+ list_for_each_entry_safe (fop, tmp, &conf->failed, list) {
+ list_del (&fop->list);
+ aha_force_unwind_fop (fop);
+ ndrained++;
+ }
+
+ gf_log (GF_AHA, GF_LOG_WARNING,
+ "Force-unwound %"GF_PRI_SIZET" fops!", ndrained);
+
+ assert (list_empty (&conf->failed));
+}
+
+void
+aha_force_unwind_fops (struct aha_conf *conf)
+{
+ LOCK (&conf->lock);
+ {
+ __aha_retry_force_unwind_fops (conf);
+ }
+ UNLOCK (&conf->lock);
+}
+
+void
+__aha_retry_failed_fops (struct aha_conf *conf)
+{
+ struct aha_fop *fop = NULL;
+ struct aha_fop *tmp = NULL;
+ size_t ndrained = 0;
+
+ /*
+ * Skip if the child is not up
+ */
+ if (!conf->child_up) {
+ gf_log (GF_AHA, GF_LOG_WARNING,
+ "Waiting for child to come up before retrying.");
+ return;
+ }
+
+ /*
+ * Skip if the the queue is empty.
+ */
+ if (list_empty (&conf->failed)) {
+ gf_log (GF_AHA, GF_LOG_WARNING, "No FOPs to retry.");
+ }
+
+ /*
+ * Drain the queue. After we finish the loop, the list
+ * must be empty.
+ */
+ list_for_each_entry_safe (fop, tmp, &conf->failed, list) {
+ list_del (&fop->list);
+ aha_retry_fop (fop);
+ ndrained++;
+ }
+
+ gf_log (GF_AHA, GF_LOG_WARNING,
+ "Drained %"GF_PRI_SIZET" fops!", ndrained);
+
+ assert (list_empty (&conf->failed));
+}
+
+
+void
+aha_retry_failed_fops (struct aha_conf *conf)
+{
+ LOCK (&conf->lock);
+ {
+ __aha_retry_failed_fops (conf);
+ }
+ UNLOCK (&conf->lock);
+}
+
+void aha_retry_fop (struct aha_fop *fop)
+{
+ call_stub_t *stub = fop->stub;
+
+ switch (stub->fop) {
+ case GF_FOP_OPEN:
+ AHA_RETRY_FOP (fop, open, &stub->args.loc, stub->args.flags,
+ stub->args.fd, stub->args.xdata);
+ break;
+
+ case GF_FOP_CREATE:
+ AHA_RETRY_FOP (fop, create, &stub->args.loc, stub->args.flags,
+ stub->args.mode, stub->args.umask,
+ stub->args.fd,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_STAT:
+ AHA_RETRY_FOP (fop, stat, &stub->args.loc, stub->args.xdata);
+ break;
+
+ case GF_FOP_READLINK:
+ AHA_RETRY_FOP (fop, readlink, &stub->args.loc,
+ stub->args.size, stub->args.xdata);
+ break;
+
+ case GF_FOP_MKNOD:
+ AHA_RETRY_FOP (fop, mknod, &stub->args.loc, stub->args.mode,
+ stub->args.rdev, stub->args.umask,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_MKDIR:
+ AHA_RETRY_FOP (fop, mkdir, &stub->args.loc, stub->args.mode,
+ stub->args.umask, stub->args.xdata);
+ break;
+
+ case GF_FOP_UNLINK:
+ AHA_RETRY_FOP (fop, unlink, &stub->args.loc, stub->args.xflag,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_RMDIR:
+ AHA_RETRY_FOP (fop, rmdir, &stub->args.loc,
+ stub->args.flags, stub->args.xdata);
+ break;
+
+ case GF_FOP_SYMLINK:
+ AHA_RETRY_FOP (fop, symlink, stub->args.linkname,
+ &stub->args.loc, stub->args.umask,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_RENAME:
+ AHA_RETRY_FOP (fop, rename, &stub->args.loc,
+ &stub->args.loc2, stub->args.xdata);
+ break;
+
+ case GF_FOP_LINK:
+ AHA_RETRY_FOP (fop, link, &stub->args.loc,
+ &stub->args.loc2, stub->args.xdata);
+ break;
+
+ case GF_FOP_TRUNCATE:
+ AHA_RETRY_FOP (fop, truncate, &stub->args.loc,
+ stub->args.offset, stub->args.xdata);
+ break;
+
+ case GF_FOP_READ:
+ AHA_RETRY_FOP (fop, readv, stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.flags,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_WRITE:
+ AHA_RETRY_FOP (fop, writev, stub->args.fd, stub->args.vector,
+ stub->args.count, stub->args.offset,
+ stub->args.flags, stub->args.iobref,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_STATFS:
+ AHA_RETRY_FOP (fop, statfs, &stub->args.loc, stub->args.xdata);
+ break;
+
+ case GF_FOP_FLUSH:
+ AHA_RETRY_FOP (fop, flush, stub->args.fd, stub->args.xdata);
+ break;
+
+ case GF_FOP_FSYNC:
+ AHA_RETRY_FOP (fop, fsync, stub->args.fd, stub->args.datasync,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_SETXATTR:
+ AHA_RETRY_FOP (fop, setxattr, &stub->args.loc, stub->args.xattr,
+ stub->args.flags, stub->args.xdata);
+ break;
+
+ case GF_FOP_GETXATTR:
+ AHA_RETRY_FOP (fop, getxattr, &stub->args.loc,
+ stub->args.name, stub->args.xdata);
+ break;
+
+ case GF_FOP_FSETXATTR:
+ AHA_RETRY_FOP (fop, fsetxattr, stub->args.fd,
+ stub->args.xattr, stub->args.flags,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ AHA_RETRY_FOP (fop, fgetxattr, stub->args.fd,
+ stub->args.name, stub->args.xdata);
+ break;
+
+ case GF_FOP_REMOVEXATTR:
+ AHA_RETRY_FOP (fop, removexattr, &stub->args.loc,
+ stub->args.name, stub->args.xdata);
+ break;
+
+ case GF_FOP_FREMOVEXATTR:
+ AHA_RETRY_FOP (fop, fremovexattr, stub->args.fd,
+ stub->args.name, stub->args.xdata);
+ break;
+
+ case GF_FOP_OPENDIR:
+ AHA_RETRY_FOP (fop, opendir, &stub->args.loc,
+ stub->args.fd, stub->args.xdata);
+ break;
+
+ case GF_FOP_FSYNCDIR:
+ AHA_RETRY_FOP (fop, fsyncdir, stub->args.fd,
+ stub->args.datasync, stub->args.xdata);
+ break;
+
+ case GF_FOP_ACCESS:
+ AHA_RETRY_FOP (fop, access, &stub->args.loc,
+ stub->args.mask, stub->args.xdata);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ AHA_RETRY_FOP (fop, ftruncate, stub->args.fd,
+ stub->args.offset, stub->args.xdata);
+ break;
+
+ case GF_FOP_FSTAT:
+ AHA_RETRY_FOP (fop, fstat, stub->args.fd, stub->args.xdata);
+ break;
+
+ case GF_FOP_LK:
+ AHA_RETRY_FOP (fop, lk, stub->args.fd, stub->args.cmd,
+ &stub->args.lock, stub->args.xdata);
+ break;
+
+ case GF_FOP_INODELK:
+ AHA_RETRY_FOP (fop, inodelk, stub->args.volume,
+ &stub->args.loc, stub->args.cmd,
+ &stub->args.lock, stub->args.xdata);
+ break;
+
+ case GF_FOP_FINODELK:
+ AHA_RETRY_FOP (fop, finodelk, stub->args.volume,
+ stub->args.fd, stub->args.cmd,
+ &stub->args.lock, stub->args.xdata);
+ break;
+
+ case GF_FOP_ENTRYLK:
+ AHA_RETRY_FOP (fop, entrylk, stub->args.volume, &stub->args.loc,
+ stub->args.name, stub->args.entrylkcmd,
+ stub->args.entrylktype, stub->args.xdata);
+ break;
+
+ case GF_FOP_FENTRYLK:
+ AHA_RETRY_FOP (fop, fentrylk, stub->args.volume, stub->args.fd,
+ stub->args.name, stub->args.entrylkcmd,
+ stub->args.entrylktype, stub->args.xdata);
+ break;
+
+ case GF_FOP_LOOKUP:
+ AHA_RETRY_FOP (fop, lookup, &stub->args.loc, stub->args.xdata);
+ break;
+
+ case GF_FOP_READDIR:
+ AHA_RETRY_FOP (fop, readdir, stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.xdata);
+ break;
+
+ case GF_FOP_READDIRP:
+ AHA_RETRY_FOP (fop, readdirp, stub->args.fd, stub->args.size,
+ stub->args.offset, stub->args.xdata);
+ break;
+
+ case GF_FOP_XATTROP:
+ AHA_RETRY_FOP (fop, xattrop, &stub->args.loc, stub->args.optype,
+ stub->args.xattr, stub->args.xdata);
+ break;
+
+ case GF_FOP_FXATTROP:
+ AHA_RETRY_FOP (fop, fxattrop, stub->args.fd, stub->args.optype,
+ stub->args.xattr, stub->args.xdata);
+ break;
+
+ case GF_FOP_SETATTR:
+ AHA_RETRY_FOP (fop, setattr, &stub->args.loc, &stub->args.stat,
+ stub->args.valid, stub->args.xdata);
+ break;
+
+ case GF_FOP_FSETATTR:
+ AHA_RETRY_FOP (fop, fsetattr, stub->args.fd, &stub->args.stat,
+ stub->args.valid, stub->args.xdata);
+ break;
+
+ default:
+ /* Some fops are not implemented yet:
+ *
+ * GF_FOP_NULL
+ * GF_FOP_RCHECKSUM
+ * GF_FOP_FORGET
+ * GF_FOP_RELEASE
+ * GF_FOP_RELEASEDIR
+ * GF_FOP_GETSPEC
+ * GF_FOP_FALLOCATE
+ * GF_FOP_DISCARD
+ * GF_FOP_ZEROFILL
+ * GF_FOP_MAXVALUE
+ *
+ */
+ gf_log (GF_AHA, GF_LOG_CRITICAL, "Got unexpected FOP %s",
+ gf_fop_list[stub->fop]);
+ assert (0);
+ break;
+ }
+}
+
+void
+aha_force_unwind_fop (struct aha_fop *fop)
+{
+ call_stub_t *stub = fop->stub;
+
+ switch (stub->fop) {
+ case GF_FOP_OPEN:
+ AHA_UNWIND_FOP (fop, open);
+ break;
+
+ case GF_FOP_CREATE:
+ AHA_UNWIND_FOP (fop, create);
+ break;
+
+ case GF_FOP_STAT:
+ AHA_UNWIND_FOP (fop, stat);
+ break;
+
+ case GF_FOP_READLINK:
+ AHA_UNWIND_FOP (fop, readlink);
+ break;
+
+ case GF_FOP_MKNOD:
+ AHA_UNWIND_FOP (fop, mknod);
+ break;
+
+ case GF_FOP_MKDIR:
+ AHA_UNWIND_FOP (fop, mkdir);
+ break;
+
+ case GF_FOP_UNLINK:
+ AHA_UNWIND_FOP (fop, unlink);
+ break;
+
+ case GF_FOP_RMDIR:
+ AHA_UNWIND_FOP (fop, rmdir);
+ break;
+
+ case GF_FOP_SYMLINK:
+ AHA_UNWIND_FOP (fop, symlink);
+ break;
+
+ case GF_FOP_RENAME:
+ AHA_UNWIND_FOP (fop, rename);
+ break;
+
+ case GF_FOP_LINK:
+ AHA_UNWIND_FOP (fop, link);
+ break;
+
+ case GF_FOP_TRUNCATE:
+ AHA_UNWIND_FOP (fop, truncate);
+ break;
+
+ case GF_FOP_READ:
+ AHA_UNWIND_FOP (fop, readv);
+ break;
+
+ case GF_FOP_WRITE:
+ AHA_UNWIND_FOP (fop, writev);
+ break;
+
+ case GF_FOP_STATFS:
+ AHA_UNWIND_FOP (fop, statfs);
+ break;
+
+ case GF_FOP_FLUSH:
+ AHA_UNWIND_FOP (fop, flush);
+ break;
+
+ case GF_FOP_FSYNC:
+ AHA_UNWIND_FOP (fop, fsync);
+ break;
+
+ case GF_FOP_SETXATTR:
+ AHA_UNWIND_FOP (fop, setxattr);
+ break;
+
+ case GF_FOP_GETXATTR:
+ AHA_UNWIND_FOP (fop, getxattr);
+ break;
+
+ case GF_FOP_FSETXATTR:
+ AHA_UNWIND_FOP (fop, fsetxattr);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ AHA_UNWIND_FOP (fop, fgetxattr);
+ break;
+
+ case GF_FOP_REMOVEXATTR:
+ AHA_UNWIND_FOP (fop, removexattr);
+ break;
+
+ case GF_FOP_FREMOVEXATTR:
+ AHA_UNWIND_FOP (fop, fremovexattr);
+ break;
+
+ case GF_FOP_OPENDIR:
+ AHA_UNWIND_FOP (fop, opendir);
+ break;
+
+ case GF_FOP_FSYNCDIR:
+ AHA_UNWIND_FOP (fop, fsyncdir);
+ break;
+
+ case GF_FOP_ACCESS:
+ AHA_UNWIND_FOP (fop, access);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ AHA_UNWIND_FOP (fop, ftruncate);
+ break;
+
+ case GF_FOP_FSTAT:
+ AHA_UNWIND_FOP (fop, fstat);
+ break;
+
+ case GF_FOP_LK:
+ AHA_UNWIND_FOP (fop, lk);
+ break;
+
+ case GF_FOP_INODELK:
+ AHA_UNWIND_FOP (fop, inodelk);
+ break;
+
+ case GF_FOP_FINODELK:
+ AHA_UNWIND_FOP (fop, finodelk);
+ break;
+
+ case GF_FOP_ENTRYLK:
+ AHA_UNWIND_FOP (fop, entrylk);
+ break;
+
+ case GF_FOP_FENTRYLK:
+ AHA_UNWIND_FOP (fop, fentrylk);
+ break;
+
+ case GF_FOP_LOOKUP:
+ AHA_UNWIND_FOP (fop, lookup);
+ break;
+
+ case GF_FOP_READDIR:
+ AHA_UNWIND_FOP (fop, readdir);
+ break;
+
+ case GF_FOP_READDIRP:
+ AHA_UNWIND_FOP (fop, readdirp);
+ break;
+
+ case GF_FOP_XATTROP:
+ AHA_UNWIND_FOP (fop, xattrop);
+ break;
+
+ case GF_FOP_FXATTROP:
+ AHA_UNWIND_FOP (fop, fxattrop);
+ break;
+
+ case GF_FOP_SETATTR:
+ AHA_UNWIND_FOP (fop, setattr);
+ break;
+
+ case GF_FOP_FSETATTR:
+ AHA_UNWIND_FOP (fop, fsetattr);
+ break;
+
+ default:
+ /* Some fops are not implemented yet,
+ * and this would never happen cause we wouldn't
+ * queue them (see the assert statement in aha_retry_fop())
+ */
+ break;
+ }
+}
diff --git a/xlators/cluster/aha/src/aha-retry.h b/xlators/cluster/aha/src/aha-retry.h
new file mode 100644
index 00000000000..5c8f56bca97
--- /dev/null
+++ b/xlators/cluster/aha/src/aha-retry.h
@@ -0,0 +1,12 @@
+#ifndef _AHA_RETRY_H
+#define _AHA_RETRY_H
+
+void aha_retry_failed_fops (struct aha_conf *conf);
+
+void aha_retry_fop (struct aha_fop *fop);
+
+void aha_force_unwind_fops (struct aha_conf *conf);
+
+void aha_force_unwind_fop (struct aha_fop *fop);
+
+#endif /* _AHA_RETRY_H */
diff --git a/xlators/cluster/aha/src/aha.c b/xlators/cluster/aha/src/aha.c
new file mode 100644
index 00000000000..2135e47f37f
--- /dev/null
+++ b/xlators/cluster/aha/src/aha.c
@@ -0,0 +1,345 @@
+#include "aha-helpers.h"
+#include "aha-retry.h"
+#include "aha-fops.h"
+#include "aha.h"
+
+#include "syncop.h"
+
+
+int
+retry_failed_fops_cbk (int ret, call_frame_t *frame, void *arg)
+{
+ /* Nothing to do here ... */
+ return 0;
+}
+
+int
+retry_failed_fops (void *arg)
+{
+ xlator_t *this = NULL;
+
+ struct aha_conf *conf = NULL;
+
+ this = arg;
+ conf = this->private;
+
+ aha_retry_failed_fops (conf);
+
+ return 0;
+}
+
+void
+dispatch_fop_queue_drain (xlator_t *this)
+{
+ struct syncenv *env = NULL;
+ int ret = 0;
+
+ env = this->ctx->env;
+
+ ret = synctask_new (env, retry_failed_fops,
+ retry_failed_fops_cbk, NULL, this);
+ if (ret != 0) {
+ gf_log (GF_AHA, GF_LOG_CRITICAL,
+ "Failed to dispatch synctask "
+ "to drain fop queue!");
+ }
+}
+
+inline void
+__aha_set_timer_status (struct aha_conf *conf, gf_boolean_t expired)
+{
+ conf->timer_expired = expired;
+}
+
+inline gf_boolean_t
+__aha_is_timer_expired (struct aha_conf *conf)
+{
+ return conf->timer_expired;
+}
+
+gf_boolean_t
+aha_is_timer_expired (struct aha_conf *conf)
+{
+ gf_boolean_t expired = _gf_false;
+
+ LOCK (&conf->lock);
+ {
+ expired = __aha_is_timer_expired (conf);
+ }
+ UNLOCK (&conf->lock);
+
+ return expired;
+}
+
+void
+aha_child_down_timer_expired (void *data)
+{
+ struct aha_conf *conf = NULL;
+
+ conf = data;
+
+ gf_log (GF_AHA, GF_LOG_INFO, "Timer expired!");
+
+ LOCK (&conf->lock);
+ {
+ __aha_set_timer_status (conf, _gf_true);
+ }
+ UNLOCK (&conf->lock);
+
+ aha_force_unwind_fops ((struct aha_conf *)data);
+}
+
+void
+__aha_start_timer (struct aha_conf *conf)
+{
+ struct timespec child_down_timeout = {
+ .tv_sec = conf->server_wait_timeout,
+ .tv_nsec = 0
+ };
+
+ __aha_set_timer_status (conf, _gf_false);
+
+ conf->timer = gf_timer_call_after (conf->this->ctx, child_down_timeout,
+ aha_child_down_timer_expired, conf);
+ if (!conf->timer) {
+ gf_log (GF_AHA, GF_LOG_CRITICAL, "Failed to start the timer!");
+ }
+
+ gf_log (GF_AHA, GF_LOG_INFO,
+ "Registered timer for %lu seconds.",
+ conf->server_wait_timeout);
+}
+
+void
+__aha_cancel_timer (struct aha_conf *conf)
+{
+ if (!conf->timer)
+ goto out;
+
+ gf_timer_call_cancel (conf->this->ctx, conf->timer);
+ conf->timer = NULL;
+ gf_log (GF_AHA, GF_LOG_INFO, "Timer cancelled!");
+out:
+ return;
+}
+
+void
+__aha_update_child_status (struct aha_conf *conf, int status)
+{
+ conf->child_up = status;
+}
+
+void
+aha_handle_child_up (xlator_t *this)
+{
+ struct aha_conf *conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ __aha_update_child_status (
+ conf, AHA_CHILD_STATUS_UP); /* Mark the child as up */
+ __aha_set_timer_status (
+ conf, _gf_false); /* Timer is no longer expired */
+ __aha_cancel_timer (conf); /* Cancel the timer */
+ }
+ UNLOCK (&conf->lock);
+}
+
+void
+aha_handle_child_down (xlator_t *this)
+{
+ struct aha_conf *conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ __aha_update_child_status (conf, AHA_CHILD_STATUS_DOWN);
+ __aha_set_timer_status (conf, _gf_true);
+ __aha_start_timer (conf);
+ }
+ UNLOCK (&conf->lock);
+}
+
+int32_t
+notify (xlator_t *this, int32_t event, void *data, ...)
+{
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ gf_log (this->name, GF_LOG_WARNING, "Got child-down event!");
+ aha_handle_child_down (this);
+ break;
+ case GF_EVENT_CHILD_UP:
+ gf_log (this->name, GF_LOG_WARNING, "Got child-up event!");
+ aha_handle_child_up (this);
+ dispatch_fop_queue_drain (this);
+ break;
+ default:
+ break;
+ }
+
+ default_notify (this, event, data);
+
+ return 0;
+}
+
+int32_t
+aha_priv_dump (xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init (this, gf_aha_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "Memory accounting init failed!");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+reconfigure (xlator_t *this, dict_t *options)
+{
+ struct aha_conf *conf = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF ("server-wait-timeout-seconds",
+ conf->server_wait_timeout,
+ options, size_uint64, err);
+
+ return 0;
+err:
+ return -1;
+}
+
+int
+aha_init_options (xlator_t *this)
+{
+ struct aha_conf *conf = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_INIT ("server-wait-timeout-seconds",
+ conf->server_wait_timeout,
+ size_uint64, err);
+
+ return 0;
+err:
+ return -1;
+}
+
+
+int
+init (xlator_t *this)
+{
+ int ret = 0;
+ struct aha_conf *conf = NULL;
+
+ conf = aha_conf_new ();
+ if (!conf) {
+ ret = -(ENOMEM);
+ goto err;
+ }
+
+ conf->this = this;
+ this->private = conf;
+
+ aha_init_options (this);
+
+ /* init() completed successfully */
+ goto done;
+err:
+ gf_log (GF_AHA, GF_LOG_ERROR,
+ "init() failed, please see "
+ "logs for details.");
+
+ /* Free all allocated memory */
+ aha_conf_destroy (conf);
+done:
+ return ret;
+}
+
+void
+fini (xlator_t *this)
+{
+ struct aha_conf *conf = this->private;
+
+ aha_conf_destroy (conf);
+
+ this->private = NULL;
+}
+
+struct xlator_dumpops dumpops = {
+ .priv = aha_priv_dump,
+};
+
+struct xlator_fops cbks;
+
+struct xlator_fops fops = {
+ .lookup = aha_lookup,
+ .stat = aha_stat,
+ .readlink = aha_readlink,
+ .mknod = aha_mknod,
+ .mkdir = aha_mkdir,
+ .unlink = aha_unlink,
+ .rmdir = aha_rmdir,
+ .symlink = aha_symlink,
+ .rename = aha_rename,
+ .link = aha_link,
+ .truncate = aha_truncate,
+ .create = aha_create,
+ .open = aha_open,
+ .readv = aha_readv,
+ .writev = aha_writev,
+ .statfs = aha_statfs,
+ .flush = aha_flush,
+ .fsync = aha_fsync,
+ .setxattr = aha_setxattr,
+ .getxattr = aha_getxattr,
+ .removexattr = aha_removexattr,
+ .fsetxattr = aha_fsetxattr,
+ .fgetxattr = aha_fgetxattr,
+ .fremovexattr = aha_fremovexattr,
+ .opendir = aha_opendir,
+ .readdir = aha_readdir,
+ .readdirp = aha_readdirp,
+ .fsyncdir = aha_fsyncdir,
+ .access = aha_access,
+ .ftruncate = aha_ftruncate,
+ .fstat = aha_fstat,
+ .lk = aha_lk,
+ .lookup_cbk = aha_lookup_cbk,
+ .xattrop = aha_xattrop,
+ .fxattrop = aha_fxattrop,
+ .inodelk = aha_inodelk,
+ .finodelk = aha_finodelk,
+ .entrylk = aha_entrylk,
+ .fentrylk = aha_fentrylk,
+ .setattr = aha_setattr,
+ .fsetattr = aha_fsetattr,
+};
+
+struct volume_options options[] = {
+ { .key = {"server-wait-timeout-seconds"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 10,
+ .max = 20 * 60,
+ .default_value = TOSTRING (120),
+ .description = "Specifies the number of seconds the "
+ "AHA translator will wait "
+ "for a CHILD_UP event before "
+ "force-unwinding the frames it has "
+ "currently stored for retry."
+ },
+ { .key = {NULL} }
+};
diff --git a/xlators/cluster/aha/src/aha.h b/xlators/cluster/aha/src/aha.h
new file mode 100644
index 00000000000..3dbf3199776
--- /dev/null
+++ b/xlators/cluster/aha/src/aha.h
@@ -0,0 +1,46 @@
+#ifndef _AHA_H
+#define _AHA_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "xlator.h"
+#include "statedump.h"
+#include "call-stub.h"
+#include "defaults.h"
+#include "list.h"
+#include "timer.h"
+
+#include "aha-mem-types.h"
+
+/* new() and destroy() functions for all structs can be found in
+ * aha-helpers.c
+ */
+struct aha_conf {
+ xlator_t *this;
+ uint8_t child_up;
+ gf_lock_t lock;
+ struct list_head failed;
+ gf_timer_t *timer;
+ gf_boolean_t timer_expired;
+ uint64_t server_wait_timeout;
+};
+
+struct aha_fop {
+ call_stub_t *stub; /* Only used to store function arguments */
+ call_frame_t *frame; /* Frame corresponding to this fop */
+ uint64_t tries;
+ struct list_head list;
+};
+
+enum {
+ AHA_CHILD_STATUS_DOWN = 0,
+ AHA_CHILD_STATUS_UP = 1,
+ AHA_CHILD_STATUS_MAX
+};
+
+gf_boolean_t aha_is_timer_expired (struct aha_conf *conf);
+
+#endif
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index a9714b02b79..a97d03bb055 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -5559,6 +5559,7 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
{
dht_local_t *local = NULL;
xlator_t *avail_subvol = NULL;
+ int op_errno = 0;
local = frame->local;
@@ -5571,9 +5572,15 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
subvol, subvol->fops->mknod, loc, mode,
rdev, umask, params);
} else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
-
- if (avail_subvol != subvol) {
+ /* This will return NULL if all subvolumes are full
+ * and/or no subvolume needs the min_free_disk limit
+ */
+ avail_subvol = dht_free_disk_available_subvol (this, subvol,
+ local);
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (avail_subvol != subvol) {
local->params = dict_ref (params);
local->rdev = rdev;
local->mode = mode;
@@ -5603,6 +5610,8 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
}
out:
return 0;
+err:
+ return op_errno;
}
int32_t
@@ -6242,8 +6251,12 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
}
}
- dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc, rdev, mode,
- umask, params);
+ op_errno = dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc,
+ rdev, mode, umask,
+ params);
+ if (op_errno != 0) {
+ goto err;
+ }
done:
return 0;
@@ -6738,6 +6751,7 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
{
dht_local_t *local = NULL;
xlator_t *avail_subvol = NULL;
+ int op_errno = 0;
local = frame->local;
@@ -6752,8 +6766,10 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
} else {
avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
-
- if (avail_subvol != subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (avail_subvol != subvol) {
local->params = dict_ref (params);
local->flags = flags;
local->mode = mode;
@@ -6780,6 +6796,10 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
}
out:
return 0;
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return op_errno;
}
int
@@ -6882,9 +6902,10 @@ dht_create_do (call_frame_t *frame)
goto err;
}
- dht_create_wind_to_avail_subvol (frame, this, subvol, &local->loc,
- local->flags, local->mode,
- local->umask, local->fd, local->params);
+ dht_create_wind_to_avail_subvol (frame, this, subvol,
+ &local->loc, local->flags,
+ local->mode, local->umask,
+ local->fd, local->params);
return 0;
err:
local->refresh_layout_unlock (frame, this, -1, 1);
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 9e9ca712417..613a9d39816 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -300,6 +300,7 @@ struct dht_du {
uint64_t avail_space;
uint32_t log;
uint32_t chunks;
+ gf_boolean_t is_full;
};
typedef struct dht_du dht_du_t;
@@ -484,6 +485,7 @@ struct dht_conf {
dht_du_t *du_stats;
double min_free_disk;
double min_free_inodes;
+ gf_boolean_t min_free_strict_mode;
char disk_unit;
int32_t refresh_interval;
gf_boolean_t unhashed_sticky_bit;
@@ -549,6 +551,10 @@ struct dht_conf {
gf_boolean_t lock_migration_enabled;
gf_lock_t lock;
+
+ /* du stats */
+ uint32_t du_refresh_interval_sec;
+ gf_lock_t du_refresh_lock;
};
typedef struct dht_conf dht_conf_t;
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 1eb9e63c531..1b20dabc61f 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -153,19 +153,25 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
struct timeval tv = {0,};
+ struct timeval cmp_tv = {0,};
loc_t tmp_loc = {0,};
conf = this->private;
+ /* Somebody else is already refreshing the statfs info */
+ if (TRY_LOCK (&conf->du_refresh_lock) != 0)
+ return 0;
+
gettimeofday (&tv, NULL);
+ cmp_tv = conf->last_stat_fetch;
+ cmp_tv.tv_sec += conf->du_refresh_interval_sec;
+
/* make it root gfid, should be enough to get the proper
info back */
tmp_loc.gfid[15] = 1;
- if (tv.tv_sec > (conf->refresh_interval
- + conf->last_stat_fetch.tv_sec)) {
-
+ if (timercmp (&tv, &cmp_tv, >)) {
statfs_frame = copy_frame (frame);
if (!statfs_frame) {
goto err;
@@ -200,14 +206,18 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
&tmp_loc, statfs_local->params);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ conf->last_stat_fetch = tv;
}
- return 0;
+ ret = 0;
+ goto out;
err:
if (statfs_frame)
DHT_STACK_DESTROY (statfs_frame);
- return -1;
+ ret = -1;
+out:
+ UNLOCK (&conf->du_refresh_lock);
+ return ret;
}
@@ -223,8 +233,13 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
conf = this->private;
/* Check for values above specified percent or free disk */
- LOCK (&conf->subvolume_lock);
- {
+ if (TRY_LOCK (&conf->subvolume_lock) != 0) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ return conf->du_stats[i].is_full;
+ }
+ }
+ } else {
for (i = 0; i < conf->subvolume_cnt; i++) {
if (subvol == conf->subvolumes[i]) {
if (conf->disk_unit == 'p') {
@@ -248,7 +263,15 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
}
}
}
- }
+
+ /* i will be less than subvolume_cnt if either of
+ * these booleans are true */
+ is_subvol_filled = (
+ subvol_filled_space || subvol_filled_inodes);
+ if (is_subvol_filled) {
+ conf->du_stats[i].is_full = is_subvol_filled;
+ }
+ }
UNLOCK (&conf->subvolume_lock);
if (subvol_filled_space && conf->subvolume_status[i]) {
@@ -273,8 +296,6 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
}
}
- is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
-
return is_subvol_filled;
}
@@ -309,15 +330,8 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
LOCK (&conf->subvolume_lock);
{
- avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this, subvol,
layout);
- if(!avail_subvol)
- {
- avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
- subvol,
- layout);
- }
-
}
UNLOCK (&conf->subvolume_lock);
out:
@@ -325,7 +339,6 @@ out:
gf_msg_debug (this->name, 0,
"No subvolume has enough free space \
and/or inodes to create");
- avail_subvol = subvol;
}
if (layout)
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 8abf0d59b88..ac0f0e186fa 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -104,10 +104,15 @@ dht_open (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ conf = this->private;
+
+ if (conf->min_free_strict_mode == _gf_true)
+ dht_get_du_info (frame, this, loc);
local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
if (!local) {
@@ -121,6 +126,11 @@ dht_open (call_frame_t *frame, xlator_t *this,
"no cached subvolume for fd=%p", fd);
op_errno = EINVAL;
goto err;
+ } else if (conf->min_free_strict_mode == _gf_true &&
+ dht_is_subvol_filled (this, subvol) == _gf_true &&
+ flags & O_APPEND) {
+ op_errno = ENOSPC;
+ goto err;
}
local->rebalance.flags = flags;
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index 112685b659e..7420461da76 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -161,11 +161,16 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
+ loc_t *nil_loc = {0,};
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ conf = this->private;
+
+
local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
if (!local) {
@@ -173,15 +178,21 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto err;
}
+ if (conf->min_free_strict_mode == _gf_true)
+ dht_get_du_info (frame, this, nil_loc);
+
subvol = local->cached_subvol;
if (!subvol) {
gf_msg_debug (this->name, 0,
"no cached subvolume for fd=%p", fd);
op_errno = EINVAL;
goto err;
+ } else if (conf->min_free_strict_mode == _gf_true &&
+ dht_is_subvol_filled (this, subvol) == _gf_true) {
+ op_errno = ENOSPC;
+ goto err;
}
-
local->rebalance.vector = iov_dup (vector, count);
local->rebalance.offset = off;
local->rebalance.count = count;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index a478f06b2a9..ffd8bac9e4f 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -20,7 +20,7 @@
#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (128 * 1024)
+#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
#define MAX_MIGRATE_QUEUE_COUNT 500
#define MIN_MIGRATE_QUEUE_COUNT 200
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 5c810f0dc77..ccbf66b626d 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -443,6 +443,8 @@ dht_reconfigure (xlator_t *this, dict_t *options)
conf->disk_unit = 0;
if (conf->min_free_disk < 100.0)
conf->disk_unit = 'p';
+ GF_OPTION_RECONF ("min-free-strict-mode", conf->min_free_strict_mode,
+ options, bool, out);
GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
percent, out);
@@ -499,6 +501,9 @@ dht_reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("use-readdirp", conf->use_readdirp, options,
bool, out);
+
+ GF_OPTION_RECONF ("du-refresh-interval-sec",
+ conf->du_refresh_interval_sec, options, uint32, out);
ret = 0;
out:
return ret;
@@ -720,7 +725,10 @@ dht_init (xlator_t *this)
GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
- err);
+ err);
+
+ GF_OPTION_INIT ("min-free-strict-mode", conf->min_free_strict_mode,
+ bool, err);
GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
err);
@@ -738,6 +746,11 @@ dht_init (xlator_t *this)
GF_OPTION_INIT ("lock-migration", conf->lock_migration_enabled,
bool, err);
+ GF_OPTION_INIT ("du-refresh-interval-sec",
+ conf->du_refresh_interval_sec, uint32, err);
+
+ LOCK_INIT (&conf->du_refresh_lock);
+
if (defrag) {
defrag->lock_migration_enabled = conf->lock_migration_enabled;
@@ -907,6 +920,14 @@ struct volume_options options[] = {
"process starts balancing out the cluster, and logs will appear "
"in log files",
},
+ { .key = {"min-free-strict-mode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When enabled, will reject in-flight writes or "
+ "append operations to files when the target subvolume falls "
+ "below min-free-(disk|inodes). When disabled, these are allowed "
+ "through and only new files will be affected.",
+ },
{ .key = {"min-free-inodes"},
.type = GF_OPTION_TYPE_PERCENT,
.default_value = "5%",
@@ -1089,5 +1110,14 @@ struct volume_options options[] = {
" associated with a file during rebalance"
},
+ { .key = {"du-refresh-interval-sec"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "60",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies how many seconds before subvolume statfs "
+ "info is re-validated."
+ },
+
{ .key = {NULL} },
};
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 56e17d6e884..996faffa37f 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -325,7 +325,10 @@ nufa_create (call_frame_t *frame, xlator_t *this,
local);
}
- if (subvol != avail_subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (subvol != avail_subvol) {
/* create a link file instead of actual file */
local->params = dict_ref (params);
local->mode = mode;
@@ -430,7 +433,10 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
local);
}
- if (avail_subvol != subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (avail_subvol != subvol) {
/* Create linkfile first */
local->params = dict_ref (params);
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index f1e9a399442..8b14ac99b8f 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -440,7 +440,10 @@ switch_create (call_frame_t *frame, xlator_t *this,
local);
}
- if (subvol != avail_subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (subvol != avail_subvol) {
/* create a link file instead of actual file */
local->mode = mode;
local->flags = flags;
@@ -540,7 +543,10 @@ switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local);
}
- if (avail_subvol != subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (avail_subvol != subvol) {
/* Create linkfile first */
local->params = dict_ref (params);