summaryrefslogtreecommitdiffstats
path: root/xlators/mgmt/glusterd
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2014-05-07 19:31:30 +0000
committerVijay Bellur <vbellur@redhat.com>2015-05-10 06:17:22 -0700
commit4eaaf5188fe24a4707dc2cf2934525083cf8e64f (patch)
tree119e440d7ba0bbd85a929294915ef54085b74ffb /xlators/mgmt/glusterd
parent4b7914384e2613e5ec7c618071cb89187ed6f870 (diff)
dht: make lookup-unhashed=auto do something actually useful
The key concept here is to determine whether a directory is "clean" by comparing its last-known-good topology to the current one for the volume. These are stored as "commit hashes" on the directory and the volume root respectively. The volume's commit hash changes whenever a brick is added or removed, and a fix-layout is done. A directory's commit hash changes only when a full rebalance (not just fix-layout) is done on it. If all bricks are present and have a directory commit hash that matches the volume commit hash, then we can assume that every file is in its "proper" place. Therefore, if we look for a file in that proper place and don't find it, we can assume it's not on any other subvolume and *safely* skip the global (broadcast to all) lookup. Change-Id: Id6ce4593ba1f7daffa74cfab591cb45960629ae3 BUG: 1219637 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Signed-off-by: Shyam <srangana@redhat.com> Reviewed-on: http://review.gluster.org/7702 Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd')
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c5
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c105
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1
4 files changed, 87 insertions, 31 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index 75c7926e49e..f2e7d3a9d3e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -2022,6 +2022,8 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
dict_t *bricks_dict = NULL;
char *brick_tmpstr = NULL;
int start_remove = 0;
+ uint32_t commit_hash = 0;
+
this = THIS;
GF_ASSERT (this);
@@ -2287,6 +2289,9 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
break;
}
if (!force && need_rebalance) {
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
/* perform the rebalance operations */
ret = glusterd_handle_defrag_start
(volinfo, err_str, sizeof (err_str),
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index d90b392546a..969548b7600 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -3404,6 +3404,36 @@ out:
}
int
+gd_set_commit_hash (dict_t *dict)
+{
+ struct timeval tv;
+ uint32_t hash;
+
+ /*
+ * We need a commit hash that won't conflict with others we might have
+ * set, or zero which is the implicit value if we never have. Using
+ * seconds<<3 like this ensures that we'll only get a collision if two
+ * consecutive rebalances are separated by exactly 2^29 seconds - about
+ * 17 years - and even then there's only a 1/8 chance of a collision in
+ * the low order bits. It's far more likely that this code will have
+ * changed completely by then. If not, call me in 2031.
+ *
+ * P.S. Time zone changes? Yeah, right.
+ */
+ gettimeofday (&tv, NULL);
+ hash = tv.tv_sec << 3;
+
+ /*
+ * Make sure at least one of those low-order bits is set. The extra
+ * shifting is because not all machines have sub-millisecond time
+ * resolution.
+ */
+ hash |= 1 << ((tv.tv_usec >> 10) % 3);
+
+ return dict_set_uint32 (dict, "commit-hash", hash);
+}
+
+int
glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
{
int ret = -1;
@@ -3415,6 +3445,7 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
uint32_t status_cmd = GF_CLI_STATUS_NONE;
char *errstr = NULL;
xlator_t *this = NULL;
+ gf_boolean_t do_common = _gf_false;
GF_ASSERT (req);
@@ -3503,12 +3534,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
}
break;
- case GD_OP_SYNC_VOLUME:
- {
- dict_copy (dict, req_dict);
- break;
- }
-
case GD_OP_REMOVE_BRICK:
{
dict_t *dict = ctx;
@@ -3525,6 +3550,10 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
if (ret)
goto out;
+ if (gd_set_commit_hash(dict) != 0) {
+ goto out;
+ }
+
dict_destroy (req_dict);
req_dict = dict_ref (dict);
}
@@ -3544,8 +3573,10 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
dict_copy (dict, req_dict);
break;
}
+ do_common = _gf_true;
}
- /*fall-through*/
+ break;
+
case GD_OP_DELETE_VOLUME:
case GD_OP_START_VOLUME:
case GD_OP_STOP_VOLUME:
@@ -3555,7 +3586,6 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
case GD_OP_LOG_ROTATE:
case GD_OP_QUOTA:
case GD_OP_PROFILE_VOLUME:
- case GD_OP_REBALANCE:
case GD_OP_HEAL_VOLUME:
case GD_OP_STATEDUMP_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
@@ -3563,49 +3593,62 @@ glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx)
case GD_OP_BARRIER:
case GD_OP_BITROT:
{
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "volname is not present in "
- "operation ctx");
- goto out;
- }
-
- if (strcasecmp (volname, "all")) {
- ret = glusterd_dict_set_volid (dict,
- volname,
- op_errstr);
- if (ret)
- goto out;
- }
- dict_copy (dict, req_dict);
+ do_common = _gf_true;
}
break;
- case GD_OP_COPY_FILE:
+ case GD_OP_REBALANCE:
{
- dict_copy (dict, req_dict);
- break;
+ if (gd_set_commit_hash(dict) != 0) {
+ goto out;
+ }
+ do_common = _gf_true;
}
+ break;
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_COPY_FILE:
case GD_OP_SYS_EXEC:
{
dict_copy (dict, req_dict);
- break;
}
+ break;
case GD_OP_GANESHA:
{
dict_copy (dict, req_dict);
- break;
}
+ break;
default:
break;
}
- *req = req_dict;
- ret = 0;
+ /*
+ * This has been moved out of the switch so that multiple ops with
+ * other special needs can all "fall through" to it.
+ */
+ if (do_common) {
+ ret = dict_get_str (dict, "volname", &volname);
+ if (ret) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ if (strcasecmp (volname, "all")) {
+ ret = glusterd_dict_set_volid (dict,
+ volname,
+ op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy (dict, req_dict);
+ }
+
+ *req = req_dict;
+ ret = 0;
out:
return ret;
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 1566f54695f..9111c07b8fc 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -284,6 +284,9 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
runner_add_arg (&runner, "--xlator-option");
runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID));
+ runner_add_arg (&runner, "--xlator-option");
+ runner_argprintf (&runner, "*dht.commit-hash=%u",
+ volinfo->rebal.commit_hash);
runner_add_arg (&runner, "--socket-file");
runner_argprintf (&runner, "%s",sockfile);
runner_add_arg (&runner, "--pid-file");
@@ -725,6 +728,7 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
char *task_id_str = NULL;
dict_t *ctx = NULL;
xlator_t *this = NULL;
+ uint32_t commit_hash;
this = THIS;
GF_ASSERT (this);
@@ -813,6 +817,9 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
glusterd_store_perform_node_state_store (volinfo);
break;
}
+ if (dict_get_uint32 (dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
cmd, NULL, GD_OP_REBALANCE);
break;
diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h
index 335513b4934..384b6f4186e 100644
--- a/xlators/mgmt/glusterd/src/glusterd.h
+++ b/xlators/mgmt/glusterd/src/glusterd.h
@@ -286,6 +286,7 @@ struct glusterd_rebalance_ {
glusterd_op_t op;
dict_t *dict; /* Dict to store misc information
* like list of bricks being removed */
+ uint32_t commit_hash;
};
typedef struct glusterd_rebalance_ glusterd_rebalance_t;