summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-06-25 00:22:41 +0530
committerVijay Bellur <vbellur@redhat.com>2015-06-26 19:30:33 -0700
commit0104b5869d89ac58cb13e10417626455c8ba2143 (patch)
tree5db665e349e5787e308487f8474652cd346df93a
parentb335fbe14e0afbec3cf50409707f3c8df5d5c01d (diff)
afr: Block fops when file is in split-brain
For directories, block metadata FOPS. For non-directories, block data and metadata FOPS. Do not block entry FOPS. Change-Id: Id7f656f4a513b9d33c457dd7f2d58028dbef8e61 BUG: 1235007 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/11371 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: NetBSD Build System <jenkins@build.gluster.org>
-rwxr-xr-xtests/bugs/glusterfs/bug-873962.t8
-rw-r--r--xlators/cluster/afr/src/afr-common.c58
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c22
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c7
-rw-r--r--xlators/cluster/afr/src/afr.h3
5 files changed, 80 insertions, 18 deletions
diff --git a/tests/bugs/glusterfs/bug-873962.t b/tests/bugs/glusterfs/bug-873962.t
index 492d0285497..7faa9998159 100755
--- a/tests/bugs/glusterfs/bug-873962.t
+++ b/tests/bugs/glusterfs/bug-873962.t
@@ -65,8 +65,8 @@ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $
#Files are in split-brain, so open should fail
TEST ! cat $M0/a;
TEST ! cat $M1/a;
-TEST cat $M0/b;
-TEST cat $M1/b;
+TEST ! cat $M0/b;
+TEST ! cat $M1/b;
#Reset split-brain status
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/a;
@@ -92,8 +92,8 @@ TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $
#Files are in split-brain, so open should fail
TEST ! cat $M0/c
TEST ! cat $M1/c
-TEST cat $M0/d
-TEST cat $M1/d
+TEST ! cat $M0/d
+TEST ! cat $M1/d
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/c
TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/d
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index f644c9dc200..b42772fda09 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -336,6 +336,58 @@ out:
}
int
+afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
+{
+
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ unsigned char *data = alloca0 (priv->child_count);
+ unsigned char *metadata = alloca0 (priv->child_count);
+ int data_count = 0;
+ int metadata_count = 0;
+ int event_generation = 0;
+ int ret = 0;
+
+ /* We don't care about split-brains for entry transactions. */
+ if (type == AFR_ENTRY_TRANSACTION || type == AFR_ENTRY_RENAME_TRANSACTION)
+ return 0;
+
+ ret = afr_inode_read_subvol_get (inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ return -EIO;
+
+ data_count = AFR_COUNT (data, priv->child_count);
+ metadata_count = AFR_COUNT (metadata, priv->child_count);
+
+ if (inode->ia_type == IA_IFDIR) {
+ /* For directories, allow even if it is in data split-brain. */
+ if (type == AFR_METADATA_TRANSACTION) {
+ if (!metadata_count)
+ return -EIO;
+ }
+ } else {
+ /* For files, abort in case of data/metadata split-brain. */
+ if (!data_count || !metadata_count)
+ return -EIO;
+ }
+
+ if (type == AFR_METADATA_TRANSACTION && readable)
+ memcpy (readable, metadata, priv->child_count * sizeof *metadata);
+ if (type == AFR_DATA_TRANSACTION && readable) {
+ if (!data_count)
+ memcpy (readable, local->child_up,
+ priv->child_count * sizeof *readable);
+ else
+ memcpy (readable, data, priv->child_count * sizeof *data);
+ }
+ if (event_p)
+ *event_p = event_generation;
+ return 0;
+}
+
+int
afr_inode_split_brain_choice_get (inode_t *inode, xlator_t *this,
int *spb_choice)
{
@@ -593,6 +645,8 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies,
for (i = 0; i < priv->child_count; i++) {
if (data_accused[i])
continue;
+ if ((priv->arbiter_count == 1) && (i == ARBITER_BRICK_INDEX))
+ continue;
if (replies[i].poststat.ia_size < maxsize)
data_accused[i] = 1;
}
@@ -1677,6 +1731,10 @@ afr_local_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
* the slowest local subvolume is far preferable to a remote one.
*/
if (is_local) {
+ /* Don't set arbiter as read child. */
+ if ((priv->arbiter_count == 1) &&
+ (child_index == ARBITER_BRICK_INDEX))
+ goto out;
gf_log (this->name, GF_LOG_INFO,
"selecting local read_child %s",
priv->children[child_index]->name);
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 6121108872f..6e545497108 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -52,6 +52,9 @@ afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)
local->op_ret = ret; \
local->op_errno = errnum; \
read_subvol = index; \
+ gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,\
+ "Failing %s on gfid %s: split-brain observed.",\
+ gf_fop_list[local->op], uuid_utoa (inode->gfid));\
goto label; \
} while (0)
@@ -59,7 +62,6 @@ int
afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
{
afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
int read_subvol = 0;
int event_generation = 0;
inode_t *inode = NULL;
@@ -68,27 +70,19 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
local = frame->local;
inode = local->inode;
- priv = frame->this->private;
if (err)
AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, -err, -1, readfn);
- ret = afr_inode_read_subvol_type_get (inode, this, local->readable,
- &event_generation,
- local->transaction.type);
+ ret = afr_inode_get_readable (frame, inode, this, local->readable,
+ &event_generation,
+ local->transaction.type);
if (ret == -1 || !event_generation)
/* Even after refresh, we don't have a good
read subvolume. Time to bail */
AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn);
- /* For directories in split-brain, we need to allow all fops
- * except (f)getxattr and access. */
- if (!AFR_COUNT(local->readable, priv->child_count) &&
- local->transaction.type == AFR_DATA_TRANSACTION &&
- inode->ia_type == IA_IFDIR)
- memcpy (local->readable, local->child_up, priv->child_count);
-
read_subvol = afr_read_subvol_select_by_policy (inode, this,
local->readable, NULL);
if (read_subvol == -1)
@@ -237,8 +231,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
if (read_subvol < 0 || read_subvol > priv->child_count) {
gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
"Unreadable subvolume %d found with event generation "
- "%d. (Possible split-brain)",
- read_subvol, event_generation);
+ "%d for gfid %s. (Possible split-brain)",
+ read_subvol, event_generation, uuid_utoa(inode->gfid));
goto refresh;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index a2023884465..b27cfedaddb 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1967,6 +1967,13 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
if (ret < 0)
goto out;
+ ret = afr_inode_get_readable (frame, local->inode, this, 0, 0, type);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,
+ "Failing %s on gfid %s: split-brain observed.",
+ gf_fop_list[local->op], uuid_utoa (local->inode->gfid));
+ goto out;
+ }
afr_transaction_eager_lock_init (local, this);
if (local->fd && local->transaction.eager_lock_on)
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 0bb184c78ae..c8e1a5f0008 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -761,6 +761,9 @@ typedef struct afr_read_subvol_args {
(op_errno == EBADFD)))
int
+afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type);
+int
afr_inode_read_subvol_get (inode_t *inode, xlator_t *this,
unsigned char *data_subvols,
unsigned char *metadata_subvols,