diff options
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 193 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-messages.h | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-open.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 26 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 25 |
9 files changed, 229 insertions, 41 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 9b2c0d7caea..dec667fd460 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -43,6 +43,20 @@ #include "afr-self-heald.h" #include "afr-messages.h" +gf_boolean_t +afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, + int32_t *op_errno) +{ + if (priv->consistent_io && local->call_count != priv->child_count) { + gf_msg (THIS->name, GF_LOG_INFO, 0, + AFR_MSG_SUBVOLS_DOWN, "All subvolumes are not up"); + if (op_errno) + *op_errno = ENOTCONN; + return _gf_false; + } + return _gf_true; +} + call_frame_t * afr_copy_frame (call_frame_t *base) { @@ -1555,6 +1569,100 @@ afr_remove_eager_lock_stub (afr_local_t *local) UNLOCK (&local->fd->lock); } +static gf_boolean_t +afr_entrylk_is_unlock (entrylk_cmd cmd) +{ + if (ENTRYLK_UNLOCK == cmd) + return _gf_true; + return _gf_false; +} + +static gf_boolean_t +afr_inodelk_is_unlock (int32_t cmd, struct gf_flock *flock) +{ + switch (cmd) { + case F_SETLKW: + case F_SETLK: + if (F_UNLCK == flock->l_type) + return _gf_true; + break; + default: + return _gf_false; + } + return _gf_false; +} + +static gf_boolean_t +afr_lk_is_unlock (int32_t cmd, struct gf_flock *flock) +{ + switch (cmd) { + case F_RESLK_UNLCK: + return _gf_true; + break; + +#if F_SETLKW != F_SETLKW64 + case F_SETLKW64: +#endif + case F_SETLKW: + +#if F_SETLK != F_SETLK64 + case F_SETLK64: +#endif + case F_SETLK: + if (F_UNLCK == flock->l_type) + return _gf_true; + break; + default: + return _gf_false; + } + return _gf_false; +} + +void +afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret, + int32_t *op_errno) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + if (!frame || !frame->this || !frame->local || !frame->this->private) + return; + + if (*op_ret < 0) + return; + + /* Failing inodelk/entrylk/lk here is not a good idea because we + * need to cleanup the locks on the other bricks if we choose to fail + * the fop here. The brick may go down just after unwind happens as well + * so anyways the fop will fail when the next fop is sent so leaving + * it like this for now.*/ + local = frame->local; + switch (local->op) { + case GF_FOP_LOOKUP: + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + case GF_FOP_LK: + return; + default: + break; + } + + priv = frame->this->private; + if (!priv->consistent_io) + return; + + if (local->event_generation && + (local->event_generation != priv->event_generation)) + goto inconsistent; + + return; +inconsistent: + *op_ret = -1; + *op_errno = ENOTCONN; +} + void afr_local_cleanup (afr_local_t *local, xlator_t *this) { @@ -2997,10 +3105,9 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) if (!local) goto out; - if (!local->call_count) { - op_errno = ENOTCONN; + local->op = GF_FOP_FLUSH; + if (!afr_is_consistent_io_possible (local, this->private, &op_errno)) goto out; - } local->fd = fd_ref(fd); @@ -3126,11 +3233,9 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; + local->op = GF_FOP_FSYNC; + if (!afr_is_consistent_io_possible (local, priv, &op_errno)) goto out; - } local->fd = fd_ref (fd); @@ -3140,6 +3245,7 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, local->inode = inode_ref (fd->inode); + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND_COOKIE (frame, afr_fsync_cbk, @@ -3210,12 +3316,11 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; + local->op = GF_FOP_FSYNCDIR; + if (!afr_is_consistent_io_possible (local, priv, &op_errno)) goto out; - } + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND (frame, afr_fsyncdir_cbk, @@ -3506,6 +3611,11 @@ afr_inodelk (call_frame_t *frame, xlator_t *this, if (!local) goto out; + local->op = GF_FOP_INODELK; + if (!afr_inodelk_is_unlock (cmd, flock) && + !afr_is_consistent_io_possible (local, this->private, &op_errno)) + goto out; + loc_copy (&local->loc, loc); local->cont.inodelk.volume = gf_strdup (volume); if (!local->cont.inodelk.volume) { @@ -3589,12 +3699,23 @@ afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } + local->op = GF_FOP_FINODELK; + if (!afr_inodelk_is_unlock (cmd, flock) && + !afr_is_consistent_io_possible (local, this->private, &op_errno)) + goto out; + local->cont.inodelk.volume = gf_strdup (volume); + if (!local->cont.inodelk.volume) { + op_errno = ENOMEM; + goto out; + } + + local->fd = fd_ref (fd); + local->cont.inodelk.cmd = cmd; + local->cont.inodelk.flock = *flock; + if (xdata) + local->xdata_req = dict_ref (xdata); + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND (frame, afr_finodelk_cbk, @@ -3610,7 +3731,6 @@ afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, return 0; out: AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL); - return 0; } @@ -3642,7 +3762,6 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } - int afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, entrylk_cmd cmd, @@ -3660,12 +3779,13 @@ afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } + local->op = GF_FOP_ENTRYLK; + if (!afr_entrylk_is_unlock (cmd) && + !afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + local->cont.entrylk.cmd = cmd; + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND (frame, afr_entrylk_cbk, @@ -3733,12 +3853,13 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } + local->op = GF_FOP_FENTRYLK; + if (!afr_entrylk_is_unlock (cmd) && + !afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + local->cont.entrylk.cmd = cmd; + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND (frame, afr_fentrylk_cbk, @@ -3823,6 +3944,10 @@ afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (!local) goto out; + local->op = GF_FOP_STATFS; + if (!afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX]) local->call_count--; call_count = local->call_count; @@ -3963,7 +4088,6 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } - int afr_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) @@ -3979,6 +4103,11 @@ afr_lk (call_frame_t *frame, xlator_t *this, if (!local) goto out; + local->op = GF_FOP_LK; + if (!afr_lk_is_unlock (cmd, flock) && + !afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count, sizeof (*local->cont.lk.locked_nodes), gf_afr_mt_char); @@ -4311,7 +4440,7 @@ afr_notify (xlator_t *this, int32_t event, down_children++; if (down_children == priv->child_count) { gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_ALL_SUBVOLS_DOWN, + AFR_MSG_SUBVOLS_DOWN, "All subvolumes are down. Going offline " "until atleast one of them comes back up."); } else { @@ -4399,7 +4528,6 @@ out: return ret; } - int afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) { @@ -4422,11 +4550,12 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) local->call_count = AFR_COUNT (local->child_up, priv->child_count); if (local->call_count == 0) { gf_msg (THIS->name, GF_LOG_INFO, 0, - AFR_MSG_ALL_SUBVOLS_DOWN, "no subvolumes up"); + AFR_MSG_SUBVOLS_DOWN, "no subvolumes up"); if (op_errno) *op_errno = ENOTCONN; goto out; } + local->event_generation = priv->event_generation; local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char), diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 2260e5dac26..4e29171482a 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -88,6 +88,10 @@ afr_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) if (!local) goto out; + local->op = GF_FOP_OPENDIR; + if (!afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + fd_ctx = afr_fd_ctx_get (fd, this); if (!fd_ctx) goto out; diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index c2a5f526c08..718ba318cfe 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1622,7 +1622,7 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) if (!call_count) { gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_ALL_SUBVOLS_DOWN, + AFR_MSG_SUBVOLS_DOWN, "All bricks are down, aborting."); afr_unlock (frame, this); goto out; diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index c7af18d0f25..5fb81c696d8 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -93,11 +93,11 @@ /*! * @messageid 108006 - * @diagnosis All bricks of a replica set are down. Data residing in that + * @diagnosis bricks of a replica set are down. Data residing in that * replica cannot be accessed until one of the bricks come back up. * @recommendedaction Ensure that the bricks are up. */ -#define AFR_MSG_ALL_SUBVOLS_DOWN (GLFS_COMP_BASE_AFR + 6) +#define AFR_MSG_SUBVOLS_DOWN (GLFS_COMP_BASE_AFR + 6) /*! diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 059d3f9bd71..7a628350c34 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -130,12 +130,16 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, if (!local) goto out; + local->op = GF_FOP_OPEN; fd_ctx = afr_fd_ctx_get (fd, this); if (!fd_ctx) { op_errno = ENOMEM; goto out; } + if (!afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + local->fd = fd_ref (fd); local->fd_ctx = fd_ctx; fd_ctx->flags = flags; diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 74749f029c8..cb81af42510 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -217,6 +217,12 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, goto read; } + if (!afr_is_consistent_io_possible (local, priv, &local->op_errno)) { + local->op_ret = -1; + read_subvol = -1; + goto read; + } + local->transaction.type = type; ret = afr_inode_read_subvol_get (inode, this, data, metadata, &event_generation); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 6130ad76543..64a42d9fc7e 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -207,6 +207,7 @@ afr_transaction_detach_fop_frame (call_frame_t *frame) local = frame->local; + afr_handle_inconsistent_fop (frame, &local->op_ret, &local->op_errno); LOCK (&frame->lock); { fop_frame = local->transaction.main_frame; @@ -2238,6 +2239,11 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) local->transaction.resume = afr_transaction_resume; local->transaction.type = type; + if (!afr_is_consistent_io_possible (local, priv, &ret)) { + ret = -ret; /*op_errno to ret conversion*/ + goto out; + } + ret = afr_transaction_local_init (local, this); if (ret < 0) goto out; diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index da62564e93a..48beaf24a6e 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -132,6 +132,7 @@ reconfigure (xlator_t *this, dict_t *options) int index = -1; char *qtype = NULL; char *fav_child_policy = NULL; + gf_boolean_t consistent_io = _gf_false; priv = this->private; @@ -258,6 +259,11 @@ reconfigure (xlator_t *this, dict_t *options) priv->did_discovery = _gf_false; + GF_OPTION_RECONF ("consistent-io", consistent_io, options, bool, out); + if (priv->quorum_count != 0) + consistent_io = _gf_false; + priv->consistent_io = consistent_io; + ret = 0; out: return ret; @@ -494,6 +500,10 @@ init (xlator_t *this) GF_OPTION_INIT ("quorum-reads", priv->quorum_reads, bool, out); GF_OPTION_INIT ("consistent-metadata", priv->consistent_metadata, bool, out); + GF_OPTION_INIT ("consistent-io", priv->consistent_io, bool, out); + + if (priv->quorum_count != 0) + priv->consistent_io = _gf_false; priv->wait_count = 1; @@ -594,14 +604,11 @@ fini (xlator_t *this) struct xlator_fops fops = { .lookup = afr_lookup, - .open = afr_open, .lk = afr_lk, .flush = afr_flush, .statfs = afr_statfs, .fsync = afr_fsync, .fsyncdir = afr_fsyncdir, - .xattrop = afr_xattrop, - .fxattrop = afr_fxattrop, .inodelk = afr_inodelk, .finodelk = afr_finodelk, .entrylk = afr_entrylk, @@ -629,9 +636,14 @@ struct xlator_fops fops = { .fallocate = afr_fallocate, .discard = afr_discard, .zerofill = afr_zerofill, + .xattrop = afr_xattrop, + .fxattrop = afr_fxattrop, - /* dir read */ + /*inode open*/ .opendir = afr_opendir, + .open = afr_open, + + /* dir read */ .readdir = afr_readdir, .readdirp = afr_readdirp, @@ -986,5 +998,11 @@ struct volume_options options[] = { " with identical mtime and size in more than half the " "number of bricks in the replica.", }, + { .key = {"consistent-io"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .description = "If this option is enabled, i/o will fail even if " + "one of the bricks is down in the replicas", + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 29008287e6d..983f07fcce9 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -152,6 +152,7 @@ typedef struct _afr_private { gf_boolean_t use_afr_in_pump; char *locking_scheme; gf_boolean_t esh_granular; + gf_boolean_t consistent_io; } afr_private_t; @@ -663,6 +664,10 @@ typedef struct _afr_local { } inodelk; struct { + entrylk_cmd cmd; + } entrylk; + + struct { off_t offset; gf_seek_what_t what; } seek; @@ -965,16 +970,25 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this); int afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd); -#define AFR_STACK_UNWIND(fop, frame, params ...) \ +#define AFR_STACK_UNWIND(fop, frame, op_ret, op_errno, params ...)\ do { \ afr_local_t *__local = NULL; \ xlator_t *__this = NULL; \ + int32_t __op_ret = 0; \ + int32_t __op_errno = 0; \ + \ + __op_ret = op_ret; \ + __op_errno = op_errno; \ if (frame) { \ __local = frame->local; \ __this = frame->this; \ + afr_handle_inconsistent_fop (frame, &__op_ret,\ + &__op_errno);\ frame->local = NULL; \ } \ - STACK_UNWIND_STRICT (fop, frame, params); \ + \ + STACK_UNWIND_STRICT (fop, frame, __op_ret, \ + __op_errno, params); \ if (__local) { \ afr_local_cleanup (__local, __this); \ mem_put (__local); \ @@ -1160,4 +1174,11 @@ afr_get_msg_id (char *op_type); int afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, inode_t *inode); + +gf_boolean_t +afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, + int32_t *op_errno); +void +afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret, + int32_t *op_errno); #endif /* __AFR_H__ */ |