diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2016-08-16 16:04:37 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-08-22 13:55:42 -0700 |
commit | 413594ed647400f1b39e05d4f1b12ad846e48800 (patch) | |
tree | fd7072bc983754156eb66ff2348c7ac99773c9e2 /xlators/cluster/afr/src/afr-common.c | |
parent | eddada59f7ad3cf21463a558a5f62591f4b72c68 (diff) |
cluster/afr: Give option to do consistent-io
Problem:
When tiering/rebalance does migrations and afr with 2-way replica is in
picture, migration can read stale data if the source brick goes down and writes
to the destination. After this deletion of the file leads to permanent loss of
data after migration.
Fix:
Rebalance/tiering should migrate only when the data is definitely not stale. So
introduce an option in afr called consistent-io which will be enabled in
migration daemons.
BUG: 1306398
Change-Id: I750f65091cc70a3ed4bf3c12f83d0949af43920a
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/13425
Reviewed-by: Anuradha Talur <atalur@redhat.com>
Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/cluster/afr/src/afr-common.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 193 |
1 files changed, 161 insertions, 32 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 9b2c0d7caea..dec667fd460 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -43,6 +43,20 @@ #include "afr-self-heald.h" #include "afr-messages.h" +gf_boolean_t +afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, + int32_t *op_errno) +{ + if (priv->consistent_io && local->call_count != priv->child_count) { + gf_msg (THIS->name, GF_LOG_INFO, 0, + AFR_MSG_SUBVOLS_DOWN, "All subvolumes are not up"); + if (op_errno) + *op_errno = ENOTCONN; + return _gf_false; + } + return _gf_true; +} + call_frame_t * afr_copy_frame (call_frame_t *base) { @@ -1555,6 +1569,100 @@ afr_remove_eager_lock_stub (afr_local_t *local) UNLOCK (&local->fd->lock); } +static gf_boolean_t +afr_entrylk_is_unlock (entrylk_cmd cmd) +{ + if (ENTRYLK_UNLOCK == cmd) + return _gf_true; + return _gf_false; +} + +static gf_boolean_t +afr_inodelk_is_unlock (int32_t cmd, struct gf_flock *flock) +{ + switch (cmd) { + case F_SETLKW: + case F_SETLK: + if (F_UNLCK == flock->l_type) + return _gf_true; + break; + default: + return _gf_false; + } + return _gf_false; +} + +static gf_boolean_t +afr_lk_is_unlock (int32_t cmd, struct gf_flock *flock) +{ + switch (cmd) { + case F_RESLK_UNLCK: + return _gf_true; + break; + +#if F_SETLKW != F_SETLKW64 + case F_SETLKW64: +#endif + case F_SETLKW: + +#if F_SETLK != F_SETLK64 + case F_SETLK64: +#endif + case F_SETLK: + if (F_UNLCK == flock->l_type) + return _gf_true; + break; + default: + return _gf_false; + } + return _gf_false; +} + +void +afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret, + int32_t *op_errno) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + if (!frame || !frame->this || !frame->local || !frame->this->private) + return; + + if (*op_ret < 0) + return; + + /* Failing inodelk/entrylk/lk here is not a good idea because we + * need to cleanup the locks on the other bricks if we choose to fail + * the fop here. The brick may go down just after unwind happens as well + * so anyways the fop will fail when the next fop is sent so leaving + * it like this for now.*/ + local = frame->local; + switch (local->op) { + case GF_FOP_LOOKUP: + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + case GF_FOP_LK: + return; + default: + break; + } + + priv = frame->this->private; + if (!priv->consistent_io) + return; + + if (local->event_generation && + (local->event_generation != priv->event_generation)) + goto inconsistent; + + return; +inconsistent: + *op_ret = -1; + *op_errno = ENOTCONN; +} + void afr_local_cleanup (afr_local_t *local, xlator_t *this) { @@ -2997,10 +3105,9 @@ afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) if (!local) goto out; - if (!local->call_count) { - op_errno = ENOTCONN; + local->op = GF_FOP_FLUSH; + if (!afr_is_consistent_io_possible (local, this->private, &op_errno)) goto out; - } local->fd = fd_ref(fd); @@ -3126,11 +3233,9 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; + local->op = GF_FOP_FSYNC; + if (!afr_is_consistent_io_possible (local, priv, &op_errno)) goto out; - } local->fd = fd_ref (fd); @@ -3140,6 +3245,7 @@ afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, local->inode = inode_ref (fd->inode); + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND_COOKIE (frame, afr_fsync_cbk, @@ -3210,12 +3316,11 @@ afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; + local->op = GF_FOP_FSYNCDIR; + if (!afr_is_consistent_io_possible (local, priv, &op_errno)) goto out; - } + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND (frame, afr_fsyncdir_cbk, @@ -3506,6 +3611,11 @@ afr_inodelk (call_frame_t *frame, xlator_t *this, if (!local) goto out; + local->op = GF_FOP_INODELK; + if (!afr_inodelk_is_unlock (cmd, flock) && + !afr_is_consistent_io_possible (local, this->private, &op_errno)) + goto out; + loc_copy (&local->loc, loc); local->cont.inodelk.volume = gf_strdup (volume); if (!local->cont.inodelk.volume) { @@ -3589,12 +3699,23 @@ afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } + local->op = GF_FOP_FINODELK; + if (!afr_inodelk_is_unlock (cmd, flock) && + !afr_is_consistent_io_possible (local, this->private, &op_errno)) + goto out; + local->cont.inodelk.volume = gf_strdup (volume); + if (!local->cont.inodelk.volume) { + op_errno = ENOMEM; + goto out; + } + + local->fd = fd_ref (fd); + local->cont.inodelk.cmd = cmd; + local->cont.inodelk.flock = *flock; + if (xdata) + local->xdata_req = dict_ref (xdata); + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND (frame, afr_finodelk_cbk, @@ -3610,7 +3731,6 @@ afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, return 0; out: AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL); - return 0; } @@ -3642,7 +3762,6 @@ afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } - int afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, entrylk_cmd cmd, @@ -3660,12 +3779,13 @@ afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } + local->op = GF_FOP_ENTRYLK; + if (!afr_entrylk_is_unlock (cmd) && + !afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + local->cont.entrylk.cmd = cmd; + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND (frame, afr_entrylk_cbk, @@ -3733,12 +3853,13 @@ afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, if (!local) goto out; - call_count = local->call_count; - if (!call_count) { - op_errno = ENOTCONN; - goto out; - } + local->op = GF_FOP_FENTRYLK; + if (!afr_entrylk_is_unlock (cmd) && + !afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + local->cont.entrylk.cmd = cmd; + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { STACK_WIND (frame, afr_fentrylk_cbk, @@ -3823,6 +3944,10 @@ afr_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (!local) goto out; + local->op = GF_FOP_STATFS; + if (!afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX]) local->call_count--; call_count = local->call_count; @@ -3963,7 +4088,6 @@ afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } - int afr_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) @@ -3979,6 +4103,11 @@ afr_lk (call_frame_t *frame, xlator_t *this, if (!local) goto out; + local->op = GF_FOP_LK; + if (!afr_lk_is_unlock (cmd, flock) && + !afr_is_consistent_io_possible (local, priv, &op_errno)) + goto out; + local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count, sizeof (*local->cont.lk.locked_nodes), gf_afr_mt_char); @@ -4311,7 +4440,7 @@ afr_notify (xlator_t *this, int32_t event, down_children++; if (down_children == priv->child_count) { gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_ALL_SUBVOLS_DOWN, + AFR_MSG_SUBVOLS_DOWN, "All subvolumes are down. Going offline " "until atleast one of them comes back up."); } else { @@ -4399,7 +4528,6 @@ out: return ret; } - int afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) { @@ -4422,11 +4550,12 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) local->call_count = AFR_COUNT (local->child_up, priv->child_count); if (local->call_count == 0) { gf_msg (THIS->name, GF_LOG_INFO, 0, - AFR_MSG_ALL_SUBVOLS_DOWN, "no subvolumes up"); + AFR_MSG_SUBVOLS_DOWN, "no subvolumes up"); if (op_errno) *op_errno = ENOTCONN; goto out; } + local->event_generation = priv->event_generation; local->read_attempted = GF_CALLOC (priv->child_count, sizeof (char), |