diff options
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 187 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 122 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 187 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 22 |
4 files changed, 205 insertions, 313 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index faaf75e45..5c6c14736 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -71,41 +71,6 @@ afr_build_parent_loc (loc_t *parent, loc_t *child) parent->ino = parent->inode->ino; } - -afr_inode_ctx_t * -afr_get_inode_ctx (xlator_t *this, inode_t *inode) -{ - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - - int ret = 0; - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); - - if (ret < 0) { - inode_ctx = CALLOC (1, sizeof (afr_inode_ctx_t)); - - ret = __inode_ctx_put (inode, this, - (uint64_t)(long) inode_ctx); - - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "could not set inode ctx"); - FREE (inode_ctx); - inode_ctx = NULL; - } - } else { - inode_ctx = (afr_inode_ctx_t *)(long) ctx; - } - } - UNLOCK (&inode->lock); - - return inode_ctx; -} - - /* {{{ create */ int @@ -144,8 +109,6 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_local_t * local = NULL; afr_private_t * priv = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - int call_count = -1; int child_index = -1; @@ -169,26 +132,22 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, priv->child_count, child_index); - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } if (child_index == local->read_child_index) { - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } @@ -379,8 +338,6 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_local_t * local = NULL; afr_private_t * priv = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - int call_count = -1; int child_index = -1; @@ -404,26 +361,22 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, priv->child_count, child_index); - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } if (child_index == local->read_child_index) { - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } @@ -610,8 +563,6 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_local_t * local = NULL; afr_private_t * priv = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - int call_count = -1; int child_index = -1; @@ -633,27 +584,23 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->cont.mkdir.buf.st_ino = afr_itransform (buf->st_ino, priv->child_count, child_index); - - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } if (child_index == local->read_child_index) { - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } @@ -843,8 +790,6 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_local_t * local = NULL; afr_private_t * priv = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - int call_count = -1; int child_index = -1; @@ -867,26 +812,22 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_itransform (buf->st_ino, priv->child_count, child_index); - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } if (child_index == local->read_child_index) { - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } @@ -1074,8 +1015,6 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_local_t * local = NULL; afr_private_t * priv = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - int call_count = -1; int child_index = -1; @@ -1098,26 +1037,22 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_itransform (buf->st_ino, priv->child_count, child_index); - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } if (child_index == local->read_child_index) { - inode_ctx = afr_get_inode_ctx (this, inode); - - if (inode_ctx) { - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; - } else { - inode_ctx->read_child = local->read_child_index; - } + if (priv->read_child >= 0) { + afr_set_read_child (this, inode, + priv->read_child); + } else { + afr_set_read_child (this, inode, + local->read_child_index); } } diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 97b429049..c7a7e9987 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -119,9 +119,8 @@ afr_access (call_frame_t *frame, xlator_t *this, int call_child = 0; afr_local_t *local = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - int ret = 0; + int32_t read_child = -1; + int32_t op_ret = -1; int32_t op_errno = 0; @@ -137,19 +136,10 @@ afr_access (call_frame_t *frame, xlator_t *this, ALLOC_OR_GOTO (local, afr_local_t, out); - ret = inode_ctx_get (loc->inode, this, - &ctx); - if (ret < 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "inode ctx not set!"); - goto out; - } - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; - - if (inode_ctx->read_child >= 0) { - call_child = inode_ctx->read_child; + read_child = afr_read_child (this, loc->inode); + + if (read_child >= 0) { + call_child = read_child; local->cont.access.last_tried = -1; @@ -249,10 +239,7 @@ afr_stat (call_frame_t *frame, xlator_t *this, afr_local_t * local = NULL; xlator_t ** children = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - int ret = 0; - + int32_t read_child = -1; int call_child = 0; int32_t op_ret = -1; @@ -271,19 +258,10 @@ afr_stat (call_frame_t *frame, xlator_t *this, frame->local = local; - ret = inode_ctx_get (loc->inode, this, - &ctx); - if (ret < 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "inode ctx not set!"); - goto out; - } - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; + read_child = afr_read_child (this, loc->inode); - if (inode_ctx->read_child >= 0) { - call_child = inode_ctx->read_child; + if (read_child >= 0) { + call_child = read_child; local->cont.stat.last_tried = -1; @@ -386,10 +364,7 @@ afr_fstat (call_frame_t *frame, xlator_t *this, xlator_t ** children = NULL; int call_child = 0; - - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - int ret = 0; + int32_t read_child = -1; int32_t op_ret = -1; int32_t op_errno = 0; @@ -410,20 +385,10 @@ afr_fstat (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd->inode, out); - ret = inode_ctx_get (fd->inode, this, - &ctx); - - if (ret < 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "inode ctx not set!"); - goto out; - } - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; + read_child = afr_read_child (this, fd->inode); - if (inode_ctx->read_child >= 0) { - call_child = inode_ctx->read_child; + if (read_child >= 0) { + call_child = read_child; local->cont.fstat.last_tried = -1; } else { @@ -521,9 +486,7 @@ afr_readlink (call_frame_t *frame, xlator_t *this, int call_child = 0; afr_local_t *local = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - int ret = 0; + int32_t read_child = -1; int32_t op_ret = -1; int32_t op_errno = 0; @@ -541,19 +504,10 @@ afr_readlink (call_frame_t *frame, xlator_t *this, frame->local = local; - ret = inode_ctx_get (loc->inode, this, - &ctx); - if (ret < 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "inode ctx not set!"); - goto out; - } - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; + read_child = afr_read_child (this, loc->inode); - if (inode_ctx->read_child >= 0) { - call_child = inode_ctx->read_child; + if (read_child >= 0) { + call_child = read_child; local->cont.readlink.last_tried = -1; @@ -652,9 +606,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, int call_child = 0; afr_local_t * local = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - int ret = 0; + int read_child = -1; int32_t op_ret = -1; int32_t op_errno = 0; @@ -671,19 +623,10 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, ALLOC_OR_GOTO (local, afr_local_t, out); frame->local = local; - ret = inode_ctx_get (loc->inode, this, &ctx); - - if (ret < 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "inode ctx not set!"); - goto out; - } - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; + read_child = afr_read_child (this, loc->inode); - if (inode_ctx->read_child >= 0) { - call_child = inode_ctx->read_child; + if (read_child >= 0) { + call_child = read_child; local->cont.getxattr.last_tried = -1; } else { @@ -804,10 +747,7 @@ afr_readv (call_frame_t *frame, xlator_t *this, afr_local_t * local = NULL; xlator_t ** children = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - int ret = 0; - + int32_t read_child = -1; int call_child = 0; int32_t op_ret = -1; @@ -825,20 +765,10 @@ afr_readv (call_frame_t *frame, xlator_t *this, frame->local = local; - ret = inode_ctx_get (fd->inode, this, - &ctx); - - if (ret < 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "inode ctx not set!"); - goto out; - } - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; + read_child = afr_read_child (this, fd->inode); - if (inode_ctx->read_child >= 0) { - call_child = inode_ctx->read_child; + if (read_child >= 0) { + call_child = read_child; /* if read fails from the read child, we try diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index e3526087a..76e28cc4c 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -53,6 +53,100 @@ #include "afr-self-heal.h" +uint64_t +afr_is_split_brain (xlator_t *this, inode_t *inode) +{ + int ret = 0; + + uint64_t ctx = 0; + uint64_t split_brain = 0; + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) + goto unlock; + + split_brain = ctx & 0xFFFFFFFF00000000ULL; + } +unlock: + UNLOCK (&inode->lock); + + return split_brain; +} + + +void +afr_set_split_brain (xlator_t *this, inode_t *inode, int32_t split_brain) +{ + uint64_t ctx = 0; + int ret = 0; + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) { + ctx = 0; + } + + ctx = (0x00000000FFFFFFFFULL & ctx) + | (split_brain & 0xFFFFFFFF00000000ULL); + + __inode_ctx_put (inode, this, ctx); + } + UNLOCK (&inode->lock); +} + + +uint64_t +afr_read_child (xlator_t *this, inode_t *inode) +{ + int ret = 0; + + uint64_t ctx = 0; + uint64_t read_child = 0; + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) + goto unlock; + + read_child = ctx & 0x00000000FFFFFFFFULL; + } +unlock: + UNLOCK (&inode->lock); + + return read_child; +} + + +void +afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child) +{ + uint64_t ctx = 0; + int ret = 0; + + LOCK (&inode->lock); + { + ret = __inode_ctx_get (inode, this, &ctx); + + if (ret < 0) { + ctx = 0; + } + + ctx = (0xFFFFFFFF00000000ULL & ctx) + | (0x00000000FFFFFFFFULL & read_child); + + __inode_ctx_put (inode, this, ctx); + } + UNLOCK (&inode->lock); +} + + /** * afr_local_cleanup - cleanup everything in frame->local */ @@ -306,24 +400,17 @@ afr_self_heal_cbk (call_frame_t *frame, xlator_t *this) afr_local_t *local = NULL; int ret = -1; - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - local = frame->local; - ret = inode_ctx_get (local->cont.lookup.inode, this, &ctx); - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; - if (local->govinda_gOvinda) { - inode_ctx->split_brain = 1; + afr_set_split_brain (this, local->cont.lookup.inode, 1); if (ret < 0) { local->op_ret = -1; local->op_errno = -ret; } } else { - inode_ctx->split_brain = 0; + afr_set_split_brain (this, local->cont.lookup.inode, 0); } AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, @@ -349,9 +436,6 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie, uint32_t open_fd_count = 0; int ret = 0; - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - child_index = (long) cookie; priv = this->private; @@ -412,16 +496,15 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie, lookup_buf->st_ino = afr_itransform (buf->st_ino, priv->child_count, child_index); - - ret = inode_ctx_get (local->cont.lookup.inode, this, - &ctx); - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; - + if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; + afr_set_read_child (this, + local->cont.lookup.inode, + priv->read_child); } else { - inode_ctx->read_child = child_index; + afr_set_read_child (this, + local->cont.lookup.inode, + child_index); } } else { @@ -446,15 +529,14 @@ afr_lookup_cbk (call_frame_t *frame, void *cookie, priv->child_count, child_index); - ret = inode_ctx_get (local->cont.lookup.inode, this, - &ctx); - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; - if (priv->read_child >= 0) { - inode_ctx->read_child = priv->read_child; + afr_set_read_child (this, + local->cont.lookup.inode, + priv->read_child); } else { - inode_ctx->read_child = local->read_child_index; + afr_set_read_child (this, + local->cont.lookup.inode, + local->read_child_index); } } @@ -505,12 +587,8 @@ unlock: if (local->success_count) { /* check for split-brain case in previous lookup */ - ret = inode_ctx_get (local->cont.lookup.inode, this, - &ctx); - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; - - if (inode_ctx->split_brain) + if (afr_is_split_brain (this, + local->cont.lookup.inode)) local->need_data_self_heal = 1; } @@ -550,9 +628,6 @@ afr_lookup (call_frame_t *frame, xlator_t *this, int32_t op_errno = 0; - afr_inode_ctx_t *inode_ctx = NULL; - uint64_t ctx; - priv = this->private; ALLOC_OR_GOTO (local, afr_local_t, out); @@ -563,30 +638,6 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_copy (&local->loc, loc); - ret = inode_ctx_get (loc->inode, this, &ctx); - inode_ctx = (afr_inode_ctx_t *)(long) ctx; - - if (ret < 0) { - inode_ctx = CALLOC (1, sizeof (afr_inode_ctx_t)); - - if (!inode_ctx) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, - "out of memory :("); - goto out; - } - - ret = inode_ctx_put (loc->inode, this, - (uint64_t)(long) inode_ctx); - - if (ret < 0) { - op_errno = EINVAL; - gf_log (this->name, GF_LOG_ERROR, - "could not set inode ctx"); - goto out; - } - } - LOCK (&priv->read_child_lock); { local->read_child_index = (++priv->read_child_rr) @@ -708,9 +759,6 @@ afr_open (call_frame_t *frame, xlator_t *this, afr_private_t * priv = NULL; afr_local_t * local = NULL; - afr_inode_ctx_t * inode_ctx = NULL; - uint64_t ctx; - int i = 0; int ret = -1; @@ -726,18 +774,7 @@ afr_open (call_frame_t *frame, xlator_t *this, priv = this->private; - ret = inode_ctx_get (loc->inode, this, &ctx); - - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "inode ctx not set!"); - op_errno = EINVAL; - goto out; - } - - inode_ctx = (afr_inode_ctx_t *)(long) ctx; - - if (inode_ctx->split_brain) { + if (afr_is_split_brain (this, loc->inode)) { /* self-heal failed */ gf_log (this->name, GF_LOG_WARNING, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index a447b74f4..ac1a5f6ad 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -394,22 +394,6 @@ typedef struct _afr_local { } afr_local_t; -typedef struct { - /* - split-brain situation in which afr - can do nothing - */ - gf_boolean_t split_brain; - - /* - subvolume from which all reads should - happen for this inode - */ - int read_child; - -} afr_inode_ctx_t; - - /* try alloc and if it fails, goto label */ #define ALLOC_OR_GOTO(var, type, label) do { \ var = CALLOC (sizeof (type), 1); \ @@ -430,6 +414,12 @@ typedef struct { /* have we tried all children? */ #define all_tried(i, count) ((i) == (count) - 1) +uint64_t +afr_read_child (xlator_t *this, inode_t *inode); + +void +afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child); + void afr_build_parent_loc (loc_t *parent, loc_t *child); |