From 9ec821f9e67848b3527f6b7dc776cb9ba440610b Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 16 Aug 2018 17:28:54 +0530 Subject: afr: common thin-arbiter functions ...that can be used by client and self-heal daemon, namely: afr_ta_post_op_lock() afr_ta_post_op_unlock() Note: These are not yet consumed. They will be used in the write txn changes patch which will introduce 2 domain locking. updates: bz#1579788 Change-Id: I636d50f8fde00736665060e8f9ee4510d5f38795 Signed-off-by: Ravishankar N --- xlators/cluster/afr/src/afr-common.c | 137 +++++++++++++++++++++++++++++- xlators/cluster/afr/src/afr-self-heald.c | 4 +- xlators/cluster/afr/src/afr-transaction.c | 14 ++- xlators/cluster/afr/src/afr-transaction.h | 2 - xlators/cluster/afr/src/afr.c | 2 + xlators/cluster/afr/src/afr.h | 26 +++++- 6 files changed, 173 insertions(+), 12 deletions(-) (limited to 'xlators') diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index e60d5315dbe..8b10e263974 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2993,8 +2993,12 @@ afr_ta_id_file_check (void *opaque) priv = this->private; ret = afr_fill_ta_loc (this, &loc); - if (ret) + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to populate thin-arbiter loc for: %s.", + loc.name); goto out; + } ret = syncop_lookup (priv->children[THIN_ARBITER_BRICK_INDEX], &loc, &stbuf, 0, 0, 0); @@ -6748,3 +6752,134 @@ afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode) } return ret; } + +gf_boolean_t +afr_ta_is_fop_called_from_synctask (xlator_t *this) +{ + struct synctask *task = NULL; + gf_lkowner_t tmp_owner = {0,}; + + task = synctask_get (); + if (!task) + return _gf_false; + + set_lk_owner_from_ptr(&tmp_owner, (void *)this); + + if (!is_same_lkowner (&tmp_owner, &task->frame->root->lk_owner)) + return _gf_false; + + return _gf_true; +} + +int +afr_ta_post_op_lock (xlator_t *this, loc_t *loc) +{ + /*Note: At any given time, only one instance of this function must + * be in progress.*/ + + int ret = 0; + uuid_t gfid = {0,}; + afr_private_t *priv = this->private; + gf_boolean_t locked = _gf_false; + struct gf_flock flock1 = {0, }; + struct gf_flock flock2 = {0, }; + int32_t cmd = 0; + + GF_ASSERT (afr_ta_is_fop_called_from_synctask (this)); + flock1.l_type = F_WRLCK; + + while (!locked) { + if (priv->shd.iamshd) { + cmd = F_SETLKW; + flock1.l_start = 0; + flock1.l_len = 0; + + } else { + cmd = F_SETLK; + if (priv->ta_notify_dom_lock_offset) { + flock1.l_start = + priv->ta_notify_dom_lock_offset; + } else { + gf_uuid_generate (gfid); + flock1.l_start = gfid_to_ino (gfid); + if (flock1.l_start < 0) + flock1.l_start = -flock1.l_start; + } + flock1.l_len = 1; + } + ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_NOTIFY, loc, cmd, &flock1, + NULL, NULL); + if (!ret) { + locked = _gf_true; + priv->ta_notify_dom_lock_offset = flock1.l_start; + } else if (ret == -EAGAIN) { + continue; + } else { + gf_msg (this->name, GF_LOG_ERROR, -ret, + AFR_MSG_THIN_ARB, "Failed to get " + "AFR_TA_DOM_NOTIFY lock on %s.", loc->name); + goto out; + } + } + + flock2.l_type = F_WRLCK; + flock2.l_start = 0; + flock2.l_len = 0; + ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_MODIFY, loc, F_SETLKW, &flock2, + NULL, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to get AFR_TA_DOM_MODIFY lock."); + if (!locked) + goto out; + flock1.l_type = F_UNLCK; + ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock1, + NULL, NULL); + } +out: + return ret; +} + +int +afr_ta_post_op_unlock (xlator_t *this, loc_t *loc) +{ + afr_private_t *priv = this->private; + struct gf_flock flock = {0, }; + int ret = 0; + + GF_ASSERT (afr_ta_is_fop_called_from_synctask (this)); + flock.l_type = F_UNLCK; + flock.l_start = 0; + flock.l_len = 0; + + ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_MODIFY, loc, F_SETLK, &flock, NULL, + NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to unlock AFR_TA_DOM_MODIFY lock."); + goto out; + } + + if (!priv->shd.iamshd) + /* Mounts (clients) will not release the AFR_TA_DOM_NOTIFY lock + * in post-op as they use it as a notification mechanism. When + * shd sends a lock request on TA during heal, the clients will + * receive a lock-contention upcall notification upon which they + * will release the AFR_TA_DOM_NOTIFY lock after completing the + * in flight I/O.*/ + goto out; + + ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], + AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock, + NULL, NULL); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to unlock AFR_TA_DOM_NOTIFY lock."); + } +out: + return ret; +} diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 4667be63220..1f7ae7bb43d 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -631,7 +631,7 @@ afr_shd_ta_set_xattrs (xlator_t *this, loc_t *loc, dict_t **xdata, flock.l_len = 0; ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - THIN_ARBITER_DOM1, loc, F_SETLKW, &flock, + AFR_TA_DOM_NOTIFY, loc, F_SETLKW, &flock, NULL, NULL); if (ret) goto out; @@ -644,7 +644,7 @@ afr_shd_ta_set_xattrs (xlator_t *this, loc_t *loc, dict_t **xdata, flock.l_type = F_UNLCK; syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - THIN_ARBITER_DOM1, loc, F_SETLKW, &flock, NULL, NULL); + AFR_TA_DOM_NOTIFY, loc, F_SETLKW, &flock, NULL, NULL); out: if (xattr) diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 77398519ba0..9c587db0562 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -915,8 +915,10 @@ afr_fill_ta_loc (xlator_t *this, loc_t *loc) loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX]; gf_uuid_copy (loc->gfid, priv->ta_gfid); loc->inode = inode_new (loc->parent->table); - if (!loc->inode) + if (!loc->inode) { + loc_wipe(loc); return -ENOMEM; + } return 0; } @@ -943,8 +945,12 @@ afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local) GF_ASSERT (failed_count == 1); ret = afr_fill_ta_loc (this, &loc); - if (ret) + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB, + "Failed to populate thin-arbiter loc for: %s.", + loc.name); goto out; + } xattr = dict_new (); if (!xattr) { @@ -965,7 +971,7 @@ afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local) /*TODO: Convert to two domain locking. */ ret = syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - THIN_ARBITER_DOM1, &loc, F_SETLKW, &flock, + AFR_TA_DOM_NOTIFY, &loc, F_SETLKW, &flock, NULL, NULL); if (ret) goto out; @@ -987,7 +993,7 @@ afr_changelog_thin_arbiter_post_op (xlator_t *this, afr_local_t *local) } flock.l_type = F_UNLCK; syncop_inodelk (priv->children[THIN_ARBITER_BRICK_INDEX], - THIN_ARBITER_DOM1, &loc, F_SETLKW, &flock, NULL, NULL); + AFR_TA_DOM_NOTIFY, &loc, F_SETLK, &flock, NULL, NULL); out: if (xattr) dict_unref (xattr); diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index 629f6dd557c..fe0f45f2f93 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -61,6 +61,4 @@ afr_lock (call_frame_t *frame, xlator_t *this); void afr_delayed_changelog_wake_up_cbk (void *data); -int -afr_fill_ta_loc (xlator_t *this, loc_t *loc); #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 0e86e33d03b..dac714d5d2c 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -410,6 +410,8 @@ init (xlator_t *this) if (thin_arbiter && strlen(thin_arbiter) > 0) { priv->thin_arbiter_count = 1; priv->child_count--; + priv->ta_bad_child_index = AFR_CHILD_UNKNOWN; + priv->ta_notify_dom_lock_offset = 0; } INIT_LIST_HEAD (&priv->healing); INIT_LIST_HEAD (&priv->heal_waiting); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 68087e0ea20..d32ff5f8d85 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -36,7 +36,8 @@ #define ARBITER_BRICK_INDEX 2 #define THIN_ARBITER_BRICK_INDEX 2 -#define THIN_ARBITER_DOM1 "afr.ta.domain-1" +#define AFR_TA_DOM_NOTIFY "afr.ta.dom-notify" +#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify" #define AFR_HALO_MAX_LATENCY 99999 typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); @@ -73,6 +74,12 @@ typedef enum { AFR_FAV_CHILD_POLICY_MAX, } afr_favorite_child_policy; +typedef enum { + AFR_CHILD_UNKNOWN = -1, + AFR_CHILD_ZERO, + AFR_CHILD_ONE, +} afr_child_index; + struct afr_nfsd { gf_boolean_t iamnfsd; uint32_t halo_max_latency_msec; @@ -83,12 +90,16 @@ typedef struct _afr_private { unsigned int child_count; /* total number of children */ unsigned int arbiter_count; /*subset of child_count. Has to be 0 or 1.*/ - unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/ xlator_t **children; inode_t *root_inode; - uuid_t ta_gfid; /*For thin arbiter.*/ + + /* For thin-arbiter. */ + unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/ + uuid_t ta_gfid; + int ta_bad_child_index; + off_t ta_notify_dom_lock_offset; unsigned char *child_up; int64_t *child_latency; @@ -1229,4 +1240,13 @@ afr_write_subvol_reset (call_frame_t *frame, xlator_t *this); int afr_set_inode_local (xlator_t *this, afr_local_t *local, inode_t *inode); + +int +afr_fill_ta_loc (xlator_t *this, loc_t *loc); + +int +afr_ta_post_op_lock (xlator_t *this, loc_t *loc); + +int +afr_ta_post_op_unlock (xlator_t *this, loc_t *loc); #endif /* __AFR_H__ */ -- cgit