diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2016-01-18 12:16:31 +0000 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-02-17 01:49:28 -0800 | 
| commit | 45301bcd97825206f7f19b25a4ad722e7dc13cc6 (patch) | |
| tree | 841a3321def4d18d615daf30ae24c4b80eaa3a0e | |
| parent | d132a4704d9b1df55c4c4e56a8389078b80897bd (diff) | |
cli/ afr: op_ret for index heal launch
Backport of http://review.gluster.org/#/c/13303/
Problem:
If index heal is launched when some of the bricks are down, glustershd of that
node sends a -1 op_ret to glusterd which eventually propagates it to the CLI.
Also, glusterd sometimes sends an err_str and sometimes not (depending on the
failure happening in the brick-op phase or commit-op phase). So the message that
gets displayed varies in each case:
"Launching heal operation to perform index self heal on volume testvol has been
unsuccessful"
                (OR)
"Commit failed on <host>. Please check log file for details."
Fix:
1. Modify afr_xl_op() to return -1 even if index healing of atleast one brick
fails.
2. Ignore glusterd's error string in gf_cli_heal_volume_cbk and print a more
meaningful message.
The patch also fixes a bug in glusterfs_handle_translator_op() where if we
encounter an error in notify of one xlator, we break out of the loop instead of
sending the notify to other xlators.
Change-Id: I957f6c4b4d0a45453ffd5488e425cab5a3e0acca
BUG: 1306922
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/13435
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
| -rw-r--r-- | cli/src/cli-rpc-ops.c | 11 | ||||
| -rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 6 | ||||
| -rw-r--r-- | tests/basic/afr/arbiter.t | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 5 | 
4 files changed, 13 insertions, 11 deletions
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index b4fbd29c1f1..416b1e09539 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -8475,13 +8475,10 @@ gf_cli_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,          }          if (rsp.op_ret) { -                if (strcmp (rsp.op_errstr, "")) { -                        cli_err ("%s", rsp.op_errstr); -                } else { -                        cli_err ("%s%s on volume %s has been unsuccessful", -                                 operation, heal_op_str, volname); -                } - +                cli_err ("%s%s on volume %s has been unsuccessful on " +                         "bricks that are down. Please check if all brick " +                         "processes are running.", +                         operation, heal_op_str, volname);                  ret = rsp.op_ret;                  goto out;          } else { diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 877adba2938..da801779079 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -563,6 +563,7 @@ int  glusterfs_handle_translator_op (rpcsvc_request_t *req)  {          int32_t                  ret     = -1; +        int32_t                  op_ret  = 0;          gd1_mgmt_brick_op_req    xlator_req = {0,};          dict_t                   *input    = NULL;          xlator_t                 *xlator = NULL; @@ -632,9 +633,12 @@ glusterfs_handle_translator_op (rpcsvc_request_t *req)                  ret = dict_get_str (input, key, &xname);                  xlator = xlator_search_by_name (any, xname);                  XLATOR_NOTIFY (xlator, GF_EVENT_TRANSLATOR_OP, input, output); +                /* If notify fails for an xlator we need to capture it but +                 * continue with the loop. */                  if (ret) -                        break; +                        op_ret = -1;          } +        ret = op_ret;  out:          glusterfs_xlator_op_response_send (req, ret, "", output);          if (input) diff --git a/tests/basic/afr/arbiter.t b/tests/basic/afr/arbiter.t index f06fdb1c49f..be8f676d1ec 100644 --- a/tests/basic/afr/arbiter.t +++ b/tests/basic/afr/arbiter.t @@ -49,7 +49,7 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon on  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status  EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0  EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 -TEST $CLI volume heal $V0 +$CLI volume heal $V0  EXPECT_WITHIN $HEAL_TIMEOUT '1' echo $(count_sh_entries $B0/$V0"1")  EXPECT_WITHIN $HEAL_TIMEOUT '1' echo $(count_sh_entries $B0/$V0"2") diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 61b8b01afb4..9b8b8e85f2b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -1058,7 +1058,7 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)                  goto out;          switch (op) {          case GF_SHD_OP_HEAL_INDEX: -		op_ret = -1; +		op_ret = 0;  		for (i = 0; i < priv->child_count; i++) {  			healer = &shd->index_healers[i]; @@ -1067,10 +1067,12 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)  			if (!priv->child_up[i]) {  				ret = dict_set_str (output, key,  						    "Brick is not connected"); +                                op_ret = -1;  			} else if (AFR_COUNT (priv->child_up,  					      priv->child_count) < 2) {  				ret = dict_set_str (output, key,  						    "< 2 bricks in replica are up"); +                                op_ret = -1;  			} else if (!afr_shd_is_subvol_local (this, healer->subvol)) {  				ret = dict_set_str (output, key,  						    "Brick is remote"); @@ -1078,7 +1080,6 @@ afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)  				ret = dict_set_str (output, key,  						    "Started self-heal");  				afr_shd_index_healer_spawn (this, i); -				op_ret = 0;  			}  		}                  break;  | 
