summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-common.c137
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c4
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c14
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h2
-rw-r--r--xlators/cluster/afr/src/afr.c2
-rw-r--r--xlators/cluster/afr/src/afr.h26
6 files changed, 173 insertions, 12 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index e60d5315dbe..8b10e263974 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2993,8 +2993,12 @@ afr_ta_id_file_check (void *opaque)
priv = this->private;
ret = afr_fill_ta_loc (this, &loc);
- if (ret)
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.",
+ loc.name);
goto out;
+ }
ret = syncop_lookup (priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
&stbuf, 0, 0, 0);
@@ -6748,3 +6752,134 @@ afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode)
}
return ret;
}
+
+gf_boolean_t
+afr_ta_is_fop_called_from_synctask (xlator_t *this)
+{
+ struct synctask *task = NULL;
+ gf_lkowner_t tmp_owner = {0,};
+
+ task = synctask_get ();
+ if (!task)
+ return _gf_false;
+
+ set_lk_owner_from_ptr(&tmp_owner, (void *)this);
+
+ if (!is_same_lkowner (&tmp_owner, &task->frame->root->lk_owner))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+int
+afr_ta_post_op_lock (xlator_t *this, loc_t *loc)
+{
+ /*Note: At any given time, only one instance of this function must
+ * be in progress.*/
+
+ int ret = 0;
+ uuid_t gfid = {0,};
+ afr_private_t *priv = this->private;
+ gf_boolean_t locked = _gf_false;
+ struct gf_flock flock1 = {0, };
+ struct gf_flock flock2 = {0, };
+ int32_t cmd = 0;
+
+ GF_ASSERT (afr_ta_is_fop_called_from_synctask (this));
+ flock1.l_type = F_WRLCK;
+
+ while (!locked) {
+ if (priv->shd.iamshd) {
+ cmd = F_SETLKW;
+ flock1.l_start = 0;
+ flock1.l_len = 0;
+
+ } else {
+ cmd = F_SETLK;
+ if (priv->ta_notify_dom_lock_offset) {
+ flock1.l_start =
+ priv->ta_notify_dom_lock_offset;
+ } else {
+ gf_uuid_generate (gfid);
+ flock1.l_start = gfid_to_ino (gfid);
+ if (flock1.l_start < 0)
+ flock1.l_start = -flock1.l_start;
+ }
+ flock1.l_len = 1;
+ }
+ ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, cmd, &flock1,
+ NULL, NULL);
+ if (!ret) {
+ locked = _gf_true;
+ priv->ta_notify_dom_lock_offset = flock1.l_start;
+ } else if (ret == -EAGAIN) {
+ continue;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_THIN_ARB, "Failed to get "
+ "AFR_TA_DOM_NOTIFY lock on %s.", loc->name);
+ goto out;
+ }
+ }
+
+ flock2.l_type = F_WRLCK;
+ flock2.l_start = 0;
+ flock2.l_len = 0;
+ ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLKW, &flock2,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get AFR_TA_DOM_MODIFY lock.");
+ if (!locked)
+ goto out;
+ flock1.l_type = F_UNLCK;
+ ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock1,
+ NULL, NULL);
+ }
+out:
+ return ret;
+}
+
+int
+afr_ta_post_op_unlock (xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = {0, };
+ int ret = 0;
+
+ GF_ASSERT (afr_ta_is_fop_called_from_synctask (this));
+ flock.l_type = F_UNLCK;
+ flock.l_start = 0;
+ flock.l_len = 0;
+
+ ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLK, &flock, NULL,
+ NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_MODIFY lock.");
+ goto out;
+ }
+
+ if (!priv->shd.iamshd)
+ /* Mounts (clients) will not release the AFR_TA_DOM_NOTIFY lock
+ * in post-op as they use it as a notification mechanism. When
+ * shd sends a lock request on TA during heal, the clients will
+ * receive a lock-contention upcall notification upon which they
+ * will release the AFR_TA_DOM_NOTIFY lock after completing the
+ * in flight I/O.*/
+ goto out;
+
+ ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock,
+ NULL, NULL);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_NOTIFY lock.");
+ }
+out:
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 4667be63220..1f7ae7bb43d 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -631,7 +631,7 @@ afr_shd_ta_set_xattrs (xlator_t *this, loc_t *loc, dict_t **xdata,
flock.l_len = 0;
ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
- THIN_ARBITER_DOM1, loc, F_SETLKW, &flock,
+ AFR_TA_DOM_NOTIFY, loc, F_SETLKW, &flock,
NULL, NULL);
if (ret)
goto out;
@@ -644,7 +644,7 @@ afr_shd_ta_set_xattrs (xlator_t *this, loc_t *loc, dict_t **xdata,
flock.l_type = F_UNLCK;
syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
- THIN_ARBITER_DOM1, loc, F_SETLKW, &flock, NULL, NULL);
+ AFR_TA_DOM_NOTIFY, loc, F_SETLKW, &flock, NULL, NULL);
out:
if (xattr)
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 77398519ba0..9c587db0562 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -915,8 +915,10 @@ afr_fill_ta_loc (xlator_t *this, loc_t *loc)
loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
gf_uuid_copy (loc->gfid, priv->ta_gfid);
loc->inode = inode_new (loc->parent->table);
- if (!loc->inode)
+ if (!loc->inode) {
+ loc_wipe(loc);
return -ENOMEM;
+ }
return 0;
}
@@ -943,8 +945,12 @@ afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local)
GF_ASSERT (failed_count == 1);
ret = afr_fill_ta_loc (this, &loc);
- if (ret)
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.",
+ loc.name);
goto out;
+ }
xattr = dict_new ();
if (!xattr) {
@@ -965,7 +971,7 @@ afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local)
/*TODO: Convert to two domain locking. */
ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
- THIN_ARBITER_DOM1, &loc, F_SETLKW, &flock,
+ AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock,
NULL, NULL);
if (ret)
goto out;
@@ -987,7 +993,7 @@ afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local)
}
flock.l_type = F_UNLCK;
syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX],
- THIN_ARBITER_DOM1, &loc, F_SETLKW, &flock, NULL, NULL);
+ AFR_TA_DOM_NOTIFY, &loc, F_SETLK, &flock, NULL, NULL);
out:
if (xattr)
dict_unref (xattr);
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 629f6dd557c..fe0f45f2f93 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -61,6 +61,4 @@ afr_lock (call_frame_t *frame, xlator_t *this);
void
afr_delayed_changelog_wake_up_cbk (void *data);
-int
-afr_fill_ta_loc (xlator_t *this, loc_t *loc);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 0e86e33d03b..dac714d5d2c 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -410,6 +410,8 @@ init (xlator_t *this)
if (thin_arbiter && strlen(thin_arbiter) > 0) {
priv->thin_arbiter_count = 1;
priv->child_count--;
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->ta_notify_dom_lock_offset = 0;
}
INIT_LIST_HEAD (&priv->healing);
INIT_LIST_HEAD (&priv->heal_waiting);
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 68087e0ea20..d32ff5f8d85 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -36,7 +36,8 @@
#define ARBITER_BRICK_INDEX 2
#define THIN_ARBITER_BRICK_INDEX 2
-#define THIN_ARBITER_DOM1 "afr.ta.domain-1"
+#define AFR_TA_DOM_NOTIFY "afr.ta.dom-notify"
+#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
#define AFR_HALO_MAX_LATENCY 99999
typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
@@ -73,6 +74,12 @@ typedef enum {
AFR_FAV_CHILD_POLICY_MAX,
} afr_favorite_child_policy;
+typedef enum {
+ AFR_CHILD_UNKNOWN = -1,
+ AFR_CHILD_ZERO,
+ AFR_CHILD_ONE,
+} afr_child_index;
+
struct afr_nfsd {
gf_boolean_t iamnfsd;
uint32_t halo_max_latency_msec;
@@ -83,12 +90,16 @@ typedef struct _afr_private {
unsigned int child_count; /* total number of children */
unsigned int arbiter_count; /*subset of child_count.
Has to be 0 or 1.*/
- unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
xlator_t **children;
inode_t *root_inode;
- uuid_t ta_gfid; /*For thin arbiter.*/
+
+ /* For thin-arbiter. */
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
+ uuid_t ta_gfid;
+ int ta_bad_child_index;
+ off_t ta_notify_dom_lock_offset;
unsigned char *child_up;
int64_t *child_latency;
@@ -1229,4 +1240,13 @@ afr_write_subvol_reset (call_frame_t *frame, xlator_t *this);
int
afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode);
+
+int
+afr_fill_ta_loc (xlator_t *this, loc_t *loc);
+
+int
+afr_ta_post_op_lock (xlator_t *this, loc_t *loc);
+
+int
+afr_ta_post_op_unlock (xlator_t *this, loc_t *loc);
#endif /* __AFR_H__ */