From ab99355f55ce8a55d5c9b339ced830723be5f213 Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Tue, 1 Dec 2009 06:11:04 +0000 Subject: cluster/afr: Set the self-heal "source" as read subvolume even when not doing self-heal. This patch sets the read-subvolume equal to the self-heal "source" even if we're not doing self-heal (because some one else is already doing it). Signed-off-by: Vikas Gorur Signed-off-by: Anand V. Avati BUG: 320 (Improve self-heal performance) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=320 --- xlators/cluster/afr/src/afr-self-heal-common.c | 15 +++- xlators/cluster/afr/src/afr-self-heal-data.c | 32 +++++++++ xlators/cluster/afr/src/afr-self-heal.h | 3 + xlators/cluster/afr/src/afr.c | 95 +++++++++++++++++++------- xlators/cluster/afr/src/afr.h | 1 + 5 files changed, 117 insertions(+), 29 deletions(-) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index a8160f4e2..327cab032 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -364,10 +364,13 @@ afr_sh_mark_if_size_differs (afr_self_heal_t *sh, int child_count) for (i = 0; i < child_count; i++) { for (j = 0; j < child_count; j++) { + if (!sh->buf) + break; + if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[j]) && (pending_matrix[i][j] == 0) && (pending_matrix[j][i] == 0)) { - + pending_matrix[i][j] = 1; pending_matrix[j][i] = 1; @@ -387,7 +390,7 @@ afr_sh_mark_biggest_fool_as_source (afr_self_heal_t *sh, { int i = 0; int biggest = 0; - + for (i = 0; i < child_count; i++) { if (characters[i].type == AFR_NODE_FOOL) { biggest = i; @@ -398,7 +401,10 @@ afr_sh_mark_biggest_fool_as_source (afr_self_heal_t *sh, for (i = 0; i < child_count; i++) { if (characters[i].type != AFR_NODE_FOOL) continue; - + + if (!sh->buf) + break; + if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) { biggest = i; } @@ -417,6 +423,9 @@ afr_sh_mark_biggest_as_source (afr_self_heal_t *sh, int child_count) int i; for (i = 0; i < child_count; i++) { + if (!sh->buf) + break; + if (SIZE_GREATER (&sh->buf[i], &sh->buf[biggest])) { biggest = i; } diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 66a424902..eb0ee9b20 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -741,6 +741,38 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this) } +int +afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr) +{ + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + + int nsources = 0; + int source = 0; + int i = 0; + + sh = &local->self_heal; + priv = this->private; + + sh->pending_matrix = CALLOC (sizeof (int32_t *), priv->child_count); + for (i = 0; i < priv->child_count; i++) { + sh->pending_matrix[i] = CALLOC (sizeof (int32_t), + priv->child_count); + } + sh->sources = CALLOC (priv->child_count, sizeof (*sh->sources)); + + afr_sh_build_pending_matrix (priv, sh->pending_matrix, xattr, + priv->child_count, AFR_DATA_TRANSACTION); + + nsources = afr_sh_mark_sources (sh, priv->child_count, + AFR_SELF_HEAL_DATA); + + source = afr_sh_select_source (sh->sources, priv->child_count); + + return source; +} + + int afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 3c8b1333e..1c2743a48 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -45,6 +45,9 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this); int afr_self_heal_metadata (call_frame_t *frame, xlator_t *this); +int +afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr); + int afr_self_heal (call_frame_t *frame, xlator_t *this); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 98eaeb6dd..9448dc4e6 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -332,8 +332,20 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) FREE (local->child_up); { /* lookup */ - if (local->cont.lookup.xattr) - dict_unref (local->cont.lookup.xattr); + if (local->cont.lookup.xattrs) { + for (i = 0; i < priv->child_count; i++) { + if (local->cont.lookup.xattrs[i]) { + dict_unref (local->cont.lookup.xattrs[i]); + local->cont.lookup.xattrs[i] = NULL; + } + } + FREE (local->cont.lookup.xattrs); + local->cont.lookup.xattrs = NULL; + } + + if (local->cont.lookup.xattr) { + dict_unref (local->cont.lookup.xattr); + } } { /* getxattr */ @@ -559,6 +571,9 @@ afr_lookup_self_heal_check (afr_local_t *local, struct stat *buf, static void afr_lookup_done (call_frame_t *frame, xlator_t *this, struct stat *lookup_buf) { + int unwind = 1; + int source = -1; + afr_local_t *local = NULL; local = frame->local; @@ -597,25 +612,44 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this, struct stat *lookup_buf) if ((local->self_heal.need_metadata_self_heal || local->self_heal.need_data_self_heal || local->self_heal.need_entry_self_heal) - && (!local->open_fd_count && - !local->inodelk_count && - !local->entrylk_count) && ((!local->cont.lookup.is_revalidate) || (local->op_ret != -1))) { - if (!local->cont.lookup.inode->st_mode) { - /* fix for RT #602 */ - local->cont.lookup.inode->st_mode = - lookup_buf->st_mode; - } + if (local->open_fd_count + || local->inodelk_count + || local->entrylk_count) { - local->self_heal.background = _gf_true; - local->self_heal.mode = local->cont.lookup.buf.st_mode; - local->self_heal.unwind = afr_self_heal_lookup_unwind; + /* Someone else is doing self-heal on this file. + So just make a best effort to set the read-subvolume + and return */ - afr_self_heal (frame, this); + if (S_ISREG (local->cont.lookup.inode->st_mode)) { + source = afr_self_heal_get_source (this, local, local->cont.lookup.xattrs); - } else { + if (source >= 0) { + afr_set_read_child (this, + local->cont.lookup.inode, + source); + } + } + } else { + if (!local->cont.lookup.inode->st_mode) { + /* fix for RT #602 */ + local->cont.lookup.inode->st_mode = + lookup_buf->st_mode; + } + + local->self_heal.background = _gf_true; + local->self_heal.mode = local->cont.lookup.buf.st_mode; + local->self_heal.unwind = afr_self_heal_lookup_unwind; + + unwind = 0; + + afr_self_heal (frame, this); + } + } + + if (unwind) { AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, local->cont.lookup.inode, @@ -705,9 +739,10 @@ afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie, if (local->op_errno != ESTALE) local->op_ret = op_ret; - local->cont.lookup.inode = inode; - local->cont.lookup.xattr = dict_ref (xattr); - local->cont.lookup.postparent = *postparent; + local->cont.lookup.inode = inode; + local->cont.lookup.xattr = dict_ref (xattr); + local->cont.lookup.xattrs[child_index] = dict_ref (xattr); + local->cont.lookup.postparent = *postparent; *lookup_buf = *buf; @@ -735,9 +770,11 @@ afr_fresh_lookup_cbk (call_frame_t *frame, void *cookie, if (local->cont.lookup.xattr) dict_unref (local->cont.lookup.xattr); - local->cont.lookup.inode = inode; local->cont.lookup.xattr = dict_ref (xattr); - local->cont.lookup.postparent = *postparent; + + local->cont.lookup.inode = inode; + local->cont.lookup.xattrs[child_index] = dict_ref (xattr); + local->cont.lookup.postparent = *postparent; *lookup_buf = *buf; @@ -830,9 +867,10 @@ afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie, if (local->op_errno != ESTALE) local->op_ret = op_ret; - local->cont.lookup.inode = inode; - local->cont.lookup.xattr = dict_ref (xattr); - local->cont.lookup.postparent = *postparent; + local->cont.lookup.inode = inode; + local->cont.lookup.xattr = dict_ref (xattr); + local->cont.lookup.xattrs[child_index] = dict_ref (xattr); + local->cont.lookup.postparent = *postparent; *lookup_buf = *buf; @@ -859,12 +897,14 @@ afr_revalidate_lookup_cbk (call_frame_t *frame, void *cookie, lookup has succeeded on the read child. So use its inode number */ + if (local->cont.lookup.xattr) dict_unref (local->cont.lookup.xattr); - local->cont.lookup.inode = inode; - local->cont.lookup.xattr = dict_ref (xattr); - local->cont.lookup.postparent = *postparent; + local->cont.lookup.inode = inode; + local->cont.lookup.xattr = dict_ref (xattr); + local->cont.lookup.xattrs[child_index] = dict_ref (xattr); + local->cont.lookup.postparent = *postparent; *lookup_buf = *buf; @@ -950,6 +990,9 @@ afr_lookup (call_frame_t *frame, xlator_t *this, local->child_up = memdup (priv->child_up, priv->child_count); + local->cont.lookup.xattrs = CALLOC (priv->child_count, + sizeof (*local->cont.lookup.xattr)); + local->call_count = afr_up_children_count (priv->child_count, local->child_up); call_count = local->call_count; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 09f76351a..bf07a89d7 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -257,6 +257,7 @@ typedef struct _afr_local { uint64_t gen; ino_t parent_ino; dict_t *xattr; + dict_t **xattrs; gf_boolean_t is_revalidate; } lookup; -- cgit