diff options
| author | Jeff Darcy <jdarcy@redhat.com> | 2014-05-07 19:31:30 +0000 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2015-05-10 06:17:22 -0700 | 
| commit | 4eaaf5188fe24a4707dc2cf2934525083cf8e64f (patch) | |
| tree | 119e440d7ba0bbd85a929294915ef54085b74ffb /xlators/mgmt/glusterd/src | |
| parent | 4b7914384e2613e5ec7c618071cb89187ed6f870 (diff) | |
dht: make lookup-unhashed=auto do something actually useful
The key concept here is to determine whether a directory is "clean" by
comparing its last-known-good topology to the current one for the
volume.  These are stored as "commit hashes" on the directory and the
volume root respectively.  The volume's commit hash changes whenever a
brick is added or removed, and a fix-layout is done.  A directory's
commit hash changes only when a full rebalance (not just fix-layout)
is done on it.  If all bricks are present and have a directory
commit hash that matches the volume commit hash, then we can assume
that every file is in its "proper" place. Therefore, if we look for
a file in that proper place and don't find it, we can assume it's not
on any other subvolume and *safely* skip the global (broadcast to all)
lookup.
Change-Id: Id6ce4593ba1f7daffa74cfab591cb45960629ae3
BUG: 1219637
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Signed-off-by: Shyam <srangana@redhat.com>
Reviewed-on: http://review.gluster.org/7702
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Tested-by: NetBSD Build System
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src')
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 5 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 105 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-rebalance.c | 7 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 1 | 
4 files changed, 87 insertions, 31 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 75c7926e49e..f2e7d3a9d3e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -2022,6 +2022,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)          dict_t                  *bricks_dict   = NULL;          char                    *brick_tmpstr  = NULL;          int                      start_remove  = 0; +        uint32_t                 commit_hash   = 0; +          this = THIS;          GF_ASSERT (this); @@ -2287,6 +2289,9 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)                  break;          }          if (!force && need_rebalance) { +                if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) { +                        volinfo->rebal.commit_hash = commit_hash; +                }                  /* perform the rebalance operations */                  ret = glusterd_handle_defrag_start                          (volinfo, err_str, sizeof (err_str), diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index d90b392546a..969548b7600 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -3404,6 +3404,36 @@ out:  }  int +gd_set_commit_hash (dict_t *dict) +{ +        struct timeval          tv; +        uint32_t                hash; + +        /* +         * We need a commit hash that won't conflict with others we might have +         * set, or zero which is the implicit value if we never have.  Using +         * seconds<<3 like this ensures that we'll only get a collision if two +         * consecutive rebalances are separated by exactly 2^29 seconds - about +         * 17 years - and even then there's only a 1/8 chance of a collision in +         * the low order bits.  It's far more likely that this code will have +         * changed completely by then.  If not, call me in 2031. +         * +         * P.S. Time zone changes?  Yeah, right. +         */ +        gettimeofday (&tv, NULL); +        hash = tv.tv_sec << 3; + +        /* +         * Make sure at least one of those low-order bits is set.  The extra +         * shifting is because not all machines have sub-millisecond time +         * resolution. +         */ +        hash |= 1 << ((tv.tv_usec >> 10) % 3); + +        return dict_set_uint32 (dict, "commit-hash", hash); +} + +int  glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)  {          int                     ret = -1; @@ -3415,6 +3445,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)          uint32_t                status_cmd = GF_CLI_STATUS_NONE;          char                    *errstr = NULL;          xlator_t                *this = NULL; +        gf_boolean_t            do_common = _gf_false;          GF_ASSERT (req); @@ -3503,12 +3534,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)                          }                          break; -                case GD_OP_SYNC_VOLUME: -                        { -                                dict_copy (dict, req_dict); -                                break; -                        } -                  case GD_OP_REMOVE_BRICK:                          {                                  dict_t *dict = ctx; @@ -3525,6 +3550,10 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)                                  if (ret)                                          goto out; +                                if (gd_set_commit_hash(dict) != 0) { +                                        goto out; +                                } +                                  dict_destroy (req_dict);                                  req_dict = dict_ref (dict);                          } @@ -3544,8 +3573,10 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)                                          dict_copy (dict, req_dict);                                          break;                                  } +                                do_common = _gf_true;                          } -                        /*fall-through*/ +                        break; +                  case GD_OP_DELETE_VOLUME:                  case GD_OP_START_VOLUME:                  case GD_OP_STOP_VOLUME: @@ -3555,7 +3586,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)                  case GD_OP_LOG_ROTATE:                  case GD_OP_QUOTA:                  case GD_OP_PROFILE_VOLUME: -                case GD_OP_REBALANCE:                  case GD_OP_HEAL_VOLUME:                  case GD_OP_STATEDUMP_VOLUME:                  case GD_OP_CLEARLOCKS_VOLUME: @@ -3563,49 +3593,62 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)                  case GD_OP_BARRIER:                  case GD_OP_BITROT:                          { -                                ret = dict_get_str (dict, "volname", &volname); -                                if (ret) { -                                        gf_log (this->name, GF_LOG_CRITICAL, -                                                "volname is not present in " -                                                "operation ctx"); -                                        goto out; -                                } - -                                if (strcasecmp (volname, "all")) { -                                        ret = glusterd_dict_set_volid (dict, -                                                                       volname, -                                                                     op_errstr); -                                        if (ret) -                                                goto out; -                                } -                                dict_copy (dict, req_dict); +                                do_common = _gf_true;                          }                          break; -                case GD_OP_COPY_FILE: +                case GD_OP_REBALANCE:                          { -                                dict_copy (dict, req_dict); -                                break; +                                if (gd_set_commit_hash(dict) != 0) { +                                        goto out; +                                } +                                do_common = _gf_true;                          } +                        break; +                case GD_OP_SYNC_VOLUME: +                case GD_OP_COPY_FILE:                  case GD_OP_SYS_EXEC:                          {                                  dict_copy (dict, req_dict); -                                break;                          } +                        break;                  case GD_OP_GANESHA:                          {                                  dict_copy (dict, req_dict); -                                break;                          } +                        break;                  default:                          break;          } -        *req = req_dict; -        ret = 0; +        /* +         * This has been moved out of the switch so that multiple ops with +         * other special needs can all "fall through" to it. +         */ +        if (do_common) { +                ret = dict_get_str (dict, "volname", &volname); +                if (ret) { +                        gf_log (this->name, GF_LOG_CRITICAL, +                                "volname is not present in " +                                "operation ctx"); +                        goto out; +                } + +                if (strcasecmp (volname, "all")) { +                        ret = glusterd_dict_set_volid (dict, +                                                       volname, +                                                     op_errstr); +                        if (ret) +                                goto out; +                } +                dict_copy (dict, req_dict); +        } + +       *req = req_dict; +       ret = 0;  out:          return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 1566f54695f..9111c07b8fc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -284,6 +284,9 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,          runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);          runner_add_arg (&runner, "--xlator-option");          runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID)); +        runner_add_arg (&runner, "--xlator-option"); +        runner_argprintf (&runner, "*dht.commit-hash=%u", +                          volinfo->rebal.commit_hash);          runner_add_arg (&runner, "--socket-file");          runner_argprintf (&runner, "%s",sockfile);          runner_add_arg (&runner, "--pid-file"); @@ -725,6 +728,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)          char                    *task_id_str = NULL;          dict_t                  *ctx = NULL;          xlator_t                *this = NULL; +        uint32_t                commit_hash;          this = THIS;          GF_ASSERT (this); @@ -813,6 +817,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)                          glusterd_store_perform_node_state_store (volinfo);                          break;                  } +                if (dict_get_uint32 (dict, "commit-hash", &commit_hash) == 0) { +                        volinfo->rebal.commit_hash = commit_hash; +                }                  ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),                                                      cmd, NULL, GD_OP_REBALANCE);                  break; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 335513b4934..384b6f4186e 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -286,6 +286,7 @@ struct glusterd_rebalance_ {          glusterd_op_t            op;          dict_t                  *dict; /* Dict to store misc information                                          * like list of bricks being removed */ +        uint32_t                 commit_hash;  };  typedef struct glusterd_rebalance_ glusterd_rebalance_t;  | 
