summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd/src/glusterd-op-sm.c
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2014-05-07 19:31:30 +0000
committerVijay Bellur <vbellur@redhat.com>2015-05-10 06:17:22 -0700
commit4eaaf5188fe24a4707dc2cf2934525083cf8e64f (patch)
tree119e440d7ba0bbd85a929294915ef54085b74ffb /xlators/mgmt/glusterd/src/glusterd-op-sm.c
parent4b7914384e2613e5ec7c618071cb89187ed6f870 (diff)
dht: make lookup-unhashed=auto do something actually useful
The key concept here is to determine whether a directory is "clean" by comparing its last-known-good topology to the current one for the volume. These are stored as "commit hashes" on the directory and the volume root respectively. The volume's commit hash changes whenever a brick is added or removed, and a fix-layout is done. A directory's commit hash changes only when a full rebalance (not just fix-layout) is done on it. If all bricks are present and have a directory commit hash that matches the volume commit hash, then we can assume that every file is in its "proper" place. Therefore, if we look for a file in that proper place and don't find it, we can assume it's not on any other subvolume and *safely* skip the global (broadcast to all) lookup. Change-Id: Id6ce4593ba1f7daffa74cfab591cb45960629ae3 BUG: 1219637 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Signed-off-by: Shyam <srangana@redhat.com> Reviewed-on: http://review.gluster.org/7702 Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-op-sm.c')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c105
1 files changed, 74 insertions, 31 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index d90b392546a..969548b7600 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -3404,6 +3404,36 @@ out:
}
int
+gd_set_commit_hash (dict_t *dict)
+{
+ struct timeval tv;
+ uint32_t hash;
+
+ /*
+ * We need a commit hash that won't conflict with others we might have
+ * set, or zero which is the implicit value if we never have. Using
+ * seconds<<3 like this ensures that we'll only get a collision if two
+ * consecutive rebalances are separated by exactly 2^29 seconds - about
+ * 17 years - and even then there's only a 1/8 chance of a collision in
+ * the low order bits. It's far more likely that this code will have
+ * changed completely by then. If not, call me in 2031.
+ *
+ * P.S. Time zone changes? Yeah, right.
+ */
+ gettimeofday (&tv, NULL);
+ hash = tv.tv_sec << 3;
+
+ /*
+ * Make sure at least one of those low-order bits is set. The extra
+ * shifting is because not all machines have sub-millisecond time
+ * resolution.
+ */
+ hash |= 1 << ((tv.tv_usec >> 10) % 3);
+
+ return dict_set_uint32 (dict, "commit-hash", hash);
+}
+
+int
glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
{
int ret = -1;
@@ -3415,6 +3445,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
uint32_t status_cmd = GF_CLI_STATUS_NONE;
char *errstr = NULL;
xlator_t *this = NULL;
+ gf_boolean_t do_common = _gf_false;
GF_ASSERT (req);
@@ -3503,12 +3534,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
}
break;
- case GD_OP_SYNC_VOLUME:
- {
- dict_copy (dict, req_dict);
- break;
- }
-
case GD_OP_REMOVE_BRICK:
{
dict_t *dict = ctx;
@@ -3525,6 +3550,10 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
if (ret)
goto out;
+ if (gd_set_commit_hash(dict) != 0) {
+ goto out;
+ }
+
dict_destroy (req_dict);
req_dict = dict_ref (dict);
}
@@ -3544,8 +3573,10 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
dict_copy (dict, req_dict);
break;
}
+ do_common = _gf_true;
}
- /*fall-through*/
+ break;
+
case GD_OP_DELETE_VOLUME:
case GD_OP_START_VOLUME:
case GD_OP_STOP_VOLUME:
@@ -3555,7 +3586,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
case GD_OP_LOG_ROTATE:
case GD_OP_QUOTA:
case GD_OP_PROFILE_VOLUME:
- case GD_OP_REBALANCE:
case GD_OP_HEAL_VOLUME:
case GD_OP_STATEDUMP_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
@@ -3563,49 +3593,62 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
case GD_OP_BARRIER:
case GD_OP_BITROT:
{
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "volname is not present in "
- "operation ctx");
- goto out;
- }
-
- if (strcasecmp (volname, "all")) {
- ret = glusterd_dict_set_volid (dict,
- volname,
- op_errstr);
- if (ret)
- goto out;
- }
- dict_copy (dict, req_dict);
+ do_common = _gf_true;
}
break;
- case GD_OP_COPY_FILE:
+ case GD_OP_REBALANCE:
{
- dict_copy (dict, req_dict);
- break;
+ if (gd_set_commit_hash(dict) != 0) {
+ goto out;
+ }
+ do_common = _gf_true;
}
+ break;
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_COPY_FILE:
case GD_OP_SYS_EXEC:
{
dict_copy (dict, req_dict);
- break;
}
+ break;
case GD_OP_GANESHA:
{
dict_copy (dict, req_dict);
- break;
}
+ break;
default:
break;
}
- *req = req_dict;
- ret = 0;
+ /*
+ * This has been moved out of the switch so that multiple ops with
+ * other special needs can all "fall through" to it.
+ */
+ if (do_common) {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all")) {
+ ret = glusterd_dict_set_volid (dict,
+ volname,
+ op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+
+ *req = req_dict;
+ ret = 0;
out:
return ret;