diff options
| author | Shreyas Siravara <sshreyas@fb.com> | 2015-10-23 15:52:22 -0700 |
|---|---|---|
| committer | Shreyas Siravara <sshreyas@fb.com> | 2017-08-30 01:18:39 +0000 |
| commit | d5bc1267359cf78a5e5d65bd70f51e41239f5e0e (patch) | |
| tree | 03d78d1c085cb71030641c5db6f4464a4d2a4020 | |
| parent | 476a5c3fb72ec4c1c791c0fcf9be3c95fcc398c6 (diff) | |
Make a DHT subvolume go read-only when a subvolume crashes
Summary:
When subvolumes crash, users get messages like "No such file or directory" or
"I/O Error" when doing operations that are cluster-wide, i.e., operations that
touch the subvolume that has crashed. These include operations like mkdir() and
rmdir() which are cluster-wide, as well as reads/writes/creates that hash to
the dead subvolume.
DHT does the right thing by disallowing operations to the subvolume -- it is
effectively putting the subvolume in "read-only" mode to protect data, but it
does not return the correct error. As a result, users of the filesystem think
that the data is gone (in the case of "No such file or directory", or worse a
blanket error that means nothing in the case of EIO). DHT sets the errno to
ENOENT, which while makes sense in the context of DHT (No subvolume entry,
hence ENOENT), the error it should bubble up to the user is EROFS, since it is
putting the system in read-only mode.
This diff changes the error messages to EROFS so the users get a more clear
message of what is going on.
Test Plan: Tested by downing a subvolume and checking error codes. Also ran
other prove tests to make sure they pass.
Change-Id: I20ad6fe31dbd66536db2a69246771ffad0140db3
Reviewers: rwareing, dph, moox
Reviewed-on: https://review.gluster.org/17952
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shreyas Siravara <sshreyas@fb.com>
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 8 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 2 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs3-helpers.c | 7 |
4 files changed, 13 insertions, 6 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 333ea888395..6a9b20d4443 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5145,7 +5145,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) local->call_count = AFR_COUNT (local->child_up, priv->child_count); if (local->call_count == 0) { gf_msg (THIS->name, GF_LOG_INFO, 0, - AFR_MSG_ALL_SUBVOLS_DOWN, "no subvolumes up"); + AFR_MSG_ALL_SUBVOLS_DOWN, "no bricks up"); if (op_errno) *op_errno = ENOTCONN; goto out; diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 81f7a8a40e6..45632b04845 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -6193,7 +6193,7 @@ dht_mknod (call_frame_t *frame, xlator_t *this, gf_msg_debug (this->name, 0, "no subvolume in layout for path=%s", loc->path); - op_errno = EIO; + op_errno = NO_SUBVOL_HASH_ERRNO; goto err; } @@ -6603,7 +6603,7 @@ dht_link (call_frame_t *frame, xlator_t *this, gf_msg_debug (this->name, 0, "no subvolume in layout for path=%s", newloc->path); - op_errno = EIO; + op_errno = NO_SUBVOL_HASH_ERRNO; goto err; } @@ -7105,7 +7105,7 @@ dht_create (call_frame_t *frame, xlator_t *this, "no subvolume in layout for path=%s", loc->path); - op_errno = EIO; + op_errno = NO_SUBVOL_HASH_ERRNO; goto err; } @@ -7628,7 +7628,7 @@ dht_mkdir (call_frame_t *frame, xlator_t *this, gf_msg_debug (this->name, 0, "hashed subvol not found for %s", loc->path); - local->op_errno = EIO; + local->op_errno = NO_SUBVOL_HASH_ERRNO; goto err; } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 613a9d39816..fa973f294fb 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -609,6 +609,8 @@ typedef struct dht_fd_ctx { } dht_fd_ctx_t; +#define NO_SUBVOL_HASH_ERRNO EROFS + #define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT) #define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0) diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c index 1edc2ac4429..64bd08a3fc7 100644 --- a/xlators/nfs/server/src/nfs3-helpers.c +++ b/xlators/nfs/server/src/nfs3-helpers.c @@ -239,7 +239,12 @@ nfs3_errno_to_nfsstat3 (int errnum) break; case ENOTCONN: - stat = NFS3ERR_IO; + /* If connections to bricks cannot be established, + * the filesystem is effectively in read-only mode + * to protect data. E.g., when all bricks in a subvolume + * crash. + */ + stat = NFS3ERR_ROFS; break; case EDQUOT: |
