summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2015-06-24 08:02:51 +0530
committerVijay Bellur <vbellur@redhat.com>2015-06-24 17:11:00 -0700
commit4c751009f668910a734cd400c10e8b3bdd6360a1 (patch)
tree9fa6f4e5b679073f6b4d61f4114d56b7ba4c4652
parent2f0d36d16c241365760aaa6d857b7a4d438e1042 (diff)
cluster/afr: Pick gfid from poststat during fresh lookup for read child calculation
Change-Id: I12c1e4f67f4ec4affbe13d7daf871044a8a2a12e BUG: 1235216 Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: http://review.gluster.org/11373 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: NetBSD Build System <jenkins@build.gluster.org>
-rw-r--r--libglusterfs/src/inode.c22
-rw-r--r--libglusterfs/src/inode.h3
-rw-r--r--xlators/cluster/afr/src/afr-common.c68
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c7
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c6
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c5
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c4
-rw-r--r--xlators/cluster/afr/src/afr.h19
8 files changed, 93 insertions, 41 deletions
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index f0bb5238d56..f3dd48653df 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -2169,6 +2169,28 @@ inode_ctx_reset0 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
return ret;
}
+int
+inode_is_linked (inode_t *inode)
+{
+ int ret = 0;
+ inode_table_t *table = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn (THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INODE_NOT_FOUND, "inode not found");
+ return 0;
+ }
+
+ table = inode->table;
+
+ pthread_mutex_lock (&table->lock);
+ {
+ ret = __is_inode_hashed (inode);
+ }
+ pthread_mutex_unlock (&table->lock);
+
+ return ret;
+}
void
inode_dump (inode_t *inode, char *prefix)
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
index 83646ebda5a..3a7aa063920 100644
--- a/libglusterfs/src/inode.h
+++ b/libglusterfs/src/inode.h
@@ -264,4 +264,7 @@ inode_table_set_lru_limit (inode_table_t *table, uint32_t lru_limit);
void
inode_ctx_merge (fd_t *fd, inode_t *inode, inode_t *linked_inode);
+int
+inode_is_linked (inode_t *inode);
+
#endif /* _INODE_H */
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2bbf0e3570e..f644c9dc200 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -982,7 +982,7 @@ out:
int
-afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
+afr_hash_child (afr_read_subvol_args_t *args, int32_t child_count, int hashmode)
{
uuid_t gfid_copy = {0,};
pid_t pid;
@@ -991,11 +991,9 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
return -1;
}
- if (inode) {
- gf_uuid_copy (gfid_copy, inode->gfid);
- }
+ gf_uuid_copy (gfid_copy, args->gfid);
- if (hashmode > 1 && inode->ia_type != IA_IFDIR) {
+ if ((hashmode > 1) && (args->ia_type != IA_IFDIR)) {
/*
* Why getpid? Because it's one of the cheapest calls
* available - faster than gethostname etc. - and returns a
@@ -1016,32 +1014,41 @@ afr_hash_child (inode_t *inode, int32_t child_count, int hashmode)
int
afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
- unsigned char *readable)
+ unsigned char *readable,
+ afr_read_subvol_args_t *args)
{
- afr_private_t *priv = NULL;
- int read_subvol = -1;
- int i = 0;
+ int i = 0;
+ int read_subvol = -1;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t local_args = {0,};
priv = this->private;
/* first preference - explicitly specified or local subvolume */
if (priv->read_child >= 0 && readable[priv->read_child])
- return priv->read_child;
+ return priv->read_child;
+
+ if (inode_is_linked (inode)) {
+ gf_uuid_copy (local_args.gfid, inode->gfid);
+ local_args.ia_type = inode->ia_type;
+ } else if (args) {
+ local_args = *args;
+ }
/* second preference - use hashed mode */
- read_subvol = afr_hash_child (inode, priv->child_count,
- priv->hash_mode);
+ read_subvol = afr_hash_child (&local_args, priv->child_count,
+ priv->hash_mode);
if (read_subvol >= 0 && readable[read_subvol])
- return read_subvol;
+ return read_subvol;
for (i = 0; i < priv->child_count; i++) {
- if (readable[i])
- return i;
+ if (readable[i])
+ return i;
}
- /* no readable subvolumes, either split brain or all subvols down */
+ /* no readable subvolumes, either split brain or all subvols down */
- return -1;
+ return -1;
}
@@ -1064,7 +1071,8 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
int
afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
- int *event_p, afr_transaction_type type)
+ int *event_p, afr_transaction_type type,
+ afr_read_subvol_args_t *args)
{
afr_private_t *priv = NULL;
unsigned char *data_readable = NULL;
@@ -1091,10 +1099,10 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
if (AFR_COUNT (intersection, priv->child_count) > 0)
subvol = afr_read_subvol_select_by_policy (inode, this,
- intersection);
+ intersection, args);
else
subvol = afr_read_subvol_select_by_policy (inode, this,
- readable);
+ readable, args);
if (subvol_p)
*subvol_p = subvol;
if (event_p)
@@ -1408,7 +1416,8 @@ afr_get_parent_read_subvol (xlator_t *this, inode_t *parent,
priv = this->private;
if (parent)
- par_read_subvol = afr_data_subvol_get (parent, this, 0, 0);
+ par_read_subvol = afr_data_subvol_get (parent, this, 0, 0,
+ NULL);
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
@@ -1457,6 +1466,8 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
gf_boolean_t can_interpret = _gf_true;
inode_t *parent = NULL;
int spb_choice = -1;
+ ia_type_t ia_type = IA_INVAL;
+ afr_read_subvol_args_t args = {0,};
priv = this->private;
local = frame->local;
@@ -1504,6 +1515,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
if (read_subvol == -1 || !readable[read_subvol]) {
read_subvol = i;
gf_uuid_copy (read_gfid, replies[i].poststat.ia_gfid);
+ ia_type = replies[i].poststat.ia_type;
local->op_ret = 0;
}
}
@@ -1549,14 +1561,16 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
a response from all the UP subvolumes and all of them resolved
to the same GFID
*/
+ gf_uuid_copy (args.gfid, read_gfid);
+ args.ia_type = ia_type;
if (afr_replies_interpret (frame, this, local->inode)) {
read_subvol = afr_data_subvol_get (local->inode, this,
- 0, 0);
+ 0, 0, &args);
afr_inode_read_subvol_reset (local->inode, this);
goto cant_interpret;
} else {
read_subvol = afr_data_subvol_get (local->inode, this,
- 0, 0);
+ 0, 0, &args);
}
} else {
cant_interpret:
@@ -1974,7 +1988,7 @@ afr_discover_done (call_frame_t *frame, xlator_t *this)
afr_replies_interpret (frame, this, local->inode);
- read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
+ read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL);
if (read_subvol == -1) {
gf_log (this->name, GF_LOG_WARNING, "no read subvols for %s",
local->loc.path);
@@ -2137,7 +2151,7 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req
}
afr_read_subvol_get (loc->inode, this, NULL, &event,
- AFR_DATA_TRANSACTION);
+ AFR_DATA_TRANSACTION, NULL);
if (event != local->event_generation)
afr_inode_refresh (frame, this, loc->inode, afr_discover_do);
@@ -2283,7 +2297,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
}
afr_read_subvol_get (loc->parent, this, NULL, &event,
- AFR_DATA_TRANSACTION);
+ AFR_DATA_TRANSACTION, NULL);
if (event != local->event_generation)
afr_inode_refresh (frame, this, loc->parent, afr_lookup_do);
@@ -2603,7 +2617,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- read_subvol = afr_data_subvol_get (local->inode, this, 0, 0);
+ read_subvol = afr_data_subvol_get (local->inode, this, 0, 0, NULL);
LOCK (&frame->lock);
{
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 1237b261b14..fae5661d021 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -148,7 +148,12 @@ afr_validate_read_subvol (inode_t *inode, xlator_t *this, int par_read_subvol)
if (!priv->consistent_metadata)
return 0;
- entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0);
+ /* For an inode fetched through readdirp which is yet to be linked,
+ * inode ctx would not be initialised (yet). So this function returns
+ * -1 above due to gen being 0, which is why it is OK to pass NULL for
+ * read_subvol_args here.
+ */
+ entry_read_subvol = afr_data_subvol_get (inode, this, 0, 0, NULL);
if (entry_read_subvol != par_read_subvol)
return -1;
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 05b4cc02923..8f525bee7e1 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -90,14 +90,14 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
if (local->inode) {
afr_replies_interpret (frame, this, local->inode);
inode_read_subvol = afr_data_subvol_get (local->inode, this,
- NULL, NULL);
+ NULL, NULL, NULL);
}
if (local->parent)
parent_read_subvol = afr_data_subvol_get (local->parent, this,
- NULL, NULL);
+ NULL, NULL, NULL);
if (local->parent2)
parent2_read_subvol = afr_data_subvol_get (local->parent2, this,
- NULL, NULL);
+ NULL, NULL, NULL);
local->op_ret = -1;
local->op_errno = afr_final_errno (local, priv);
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 931785c2c5b..866294ea20e 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -48,10 +48,11 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
if (local->inode) {
if (local->transaction.type == AFR_METADATA_TRANSACTION)
read_subvol = afr_metadata_subvol_get (local->inode, this,
- NULL, NULL);
+ NULL, NULL,
+ NULL);
else
read_subvol = afr_data_subvol_get (local->inode, this,
- NULL, NULL);
+ NULL, NULL, NULL);
}
local->op_ret = -1;
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 0ec1d912675..6121108872f 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -90,7 +90,7 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
memcpy (local->readable, local->child_up, priv->child_count);
read_subvol = afr_read_subvol_select_by_policy (inode, this,
- local->readable);
+ local->readable, NULL);
if (read_subvol == -1)
AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn);
@@ -232,7 +232,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
goto refresh;
read_subvol = afr_read_subvol_select_by_policy (inode, this,
- local->readable);
+ local->readable, NULL);
if (read_subvol < 0 || read_subvol > priv->child_count) {
gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 3d28ac22ab1..0bb184c78ae 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -750,6 +750,11 @@ typedef struct afr_spbc_timeout {
int spb_child_index;
} afr_spbc_timeout_t;
+typedef struct afr_read_subvol_args {
+ ia_type_t ia_type;
+ uuid_t gfid;
+} afr_read_subvol_args_t;
+
/* did a call fail due to a child failing? */
#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
((op_errno == ENOTCONN) || \
@@ -782,7 +787,8 @@ afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this);
int
afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this,
- unsigned char *readable);
+ unsigned char *readable,
+ afr_read_subvol_args_t *args);
int
afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
@@ -790,13 +796,14 @@ afr_inode_read_subvol_type_get (inode_t *inode, xlator_t *this,
int type);
int
afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
- int *event_p, afr_transaction_type type);
+ int *event_p, afr_transaction_type type,
+ afr_read_subvol_args_t *args);
-#define afr_data_subvol_get(i, t, s, e) \
- afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION)
+#define afr_data_subvol_get(i, t, s, e, a) \
+ afr_read_subvol_get(i, t, s, e, AFR_DATA_TRANSACTION, a)
-#define afr_metadata_subvol_get(i, t, s, e) \
- afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION)
+#define afr_metadata_subvol_get(i, t, s, e, a) \
+ afr_read_subvol_get(i, t, s, e, AFR_METADATA_TRANSACTION, a)
int
afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,