diff options
author | Jeff Darcy <jdarcy@redhat.com> | 2014-05-07 19:31:30 +0000 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-05-10 06:17:22 -0700 |
commit | 4eaaf5188fe24a4707dc2cf2934525083cf8e64f (patch) | |
tree | 119e440d7ba0bbd85a929294915ef54085b74ffb /xlators/mgmt/glusterd/src/glusterd-op-sm.c | |
parent | 4b7914384e2613e5ec7c618071cb89187ed6f870 (diff) |
dht: make lookup-unhashed=auto do something actually useful
The key concept here is to determine whether a directory is "clean" by
comparing its last-known-good topology to the current one for the
volume. These are stored as "commit hashes" on the directory and the
volume root respectively. The volume's commit hash changes whenever a
brick is added or removed, and a fix-layout is done. A directory's
commit hash changes only when a full rebalance (not just fix-layout)
is done on it. If all bricks are present and have a directory
commit hash that matches the volume commit hash, then we can assume
that every file is in its "proper" place. Therefore, if we look for
a file in that proper place and don't find it, we can assume it's not
on any other subvolume and *safely* skip the global (broadcast to all)
lookup.
Change-Id: Id6ce4593ba1f7daffa74cfab591cb45960629ae3
BUG: 1219637
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Signed-off-by: Shyam <srangana@redhat.com>
Reviewed-on: http://review.gluster.org/7702
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Tested-by: NetBSD Build System
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-op-sm.c')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-op-sm.c | 105 |
1 files changed, 74 insertions, 31 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index d90b392546a..969548b7600 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -3404,6 +3404,36 @@ out: } int +gd_set_commit_hash (dict_t *dict) +{ + struct timeval tv; + uint32_t hash; + + /* + * We need a commit hash that won't conflict with others we might have + * set, or zero which is the implicit value if we never have. Using + * seconds<<3 like this ensures that we'll only get a collision if two + * consecutive rebalances are separated by exactly 2^29 seconds - about + * 17 years - and even then there's only a 1/8 chance of a collision in + * the low order bits. It's far more likely that this code will have + * changed completely by then. If not, call me in 2031. + * + * P.S. Time zone changes? Yeah, right. + */ + gettimeofday (&tv, NULL); + hash = tv.tv_sec << 3; + + /* + * Make sure at least one of those low-order bits is set. The extra + * shifting is because not all machines have sub-millisecond time + * resolution. + */ + hash |= 1 << ((tv.tv_usec >> 10) % 3); + + return dict_set_uint32 (dict, "commit-hash", hash); +} + +int glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) { int ret = -1; @@ -3415,6 +3445,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) uint32_t status_cmd = GF_CLI_STATUS_NONE; char *errstr = NULL; xlator_t *this = NULL; + gf_boolean_t do_common = _gf_false; GF_ASSERT (req); @@ -3503,12 +3534,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) } break; - case GD_OP_SYNC_VOLUME: - { - dict_copy (dict, req_dict); - break; - } - case GD_OP_REMOVE_BRICK: { dict_t *dict = ctx; @@ -3525,6 +3550,10 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) if (ret) goto out; + if (gd_set_commit_hash(dict) != 0) { + goto out; + } + dict_destroy (req_dict); req_dict = dict_ref (dict); } @@ -3544,8 +3573,10 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) dict_copy (dict, req_dict); break; } + do_common = _gf_true; } - /*fall-through*/ + break; + case GD_OP_DELETE_VOLUME: case GD_OP_START_VOLUME: case GD_OP_STOP_VOLUME: @@ -3555,7 +3586,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) case GD_OP_LOG_ROTATE: case GD_OP_QUOTA: case GD_OP_PROFILE_VOLUME: - case GD_OP_REBALANCE: case GD_OP_HEAL_VOLUME: case GD_OP_STATEDUMP_VOLUME: case GD_OP_CLEARLOCKS_VOLUME: @@ -3563,49 +3593,62 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) case GD_OP_BARRIER: case GD_OP_BITROT: { - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log (this->name, GF_LOG_CRITICAL, - "volname is not present in " - "operation ctx"); - goto out; - } - - if (strcasecmp (volname, "all")) { - ret = glusterd_dict_set_volid (dict, - volname, - op_errstr); - if (ret) - goto out; - } - dict_copy (dict, req_dict); + do_common = _gf_true; } break; - case GD_OP_COPY_FILE: + case GD_OP_REBALANCE: { - dict_copy (dict, req_dict); - break; + if (gd_set_commit_hash(dict) != 0) { + goto out; + } + do_common = _gf_true; } + break; + case GD_OP_SYNC_VOLUME: + case GD_OP_COPY_FILE: case GD_OP_SYS_EXEC: { dict_copy (dict, req_dict); - break; } + break; case GD_OP_GANESHA: { dict_copy (dict, req_dict); - break; } + break; default: break; } - *req = req_dict; - ret = 0; + /* + * This has been moved out of the switch so that multiple ops with + * other special needs can all "fall through" to it. + */ + if (do_common) { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "volname is not present in " + "operation ctx"); + goto out; + } + + if (strcasecmp (volname, "all")) { + ret = glusterd_dict_set_volid (dict, + volname, + op_errstr); + if (ret) + goto out; + } + dict_copy (dict, req_dict); + } + + *req = req_dict; + ret = 0; out: return ret; |