diff options
author | Kaushal M <kaushal@redhat.com> | 2013-08-12 10:43:52 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2013-09-29 21:39:38 -0700 |
commit | 9b286d5937e7c78fd17185e9afe25e809153a265 (patch) | |
tree | 3495dc5594d43d677555cb292d722a9790de55e2 | |
parent | 623d232d29bbed71349334988054a5bd205b1a39 (diff) |
glusterd: Calculate volume op-versions only on set/reset
The volume op-versions are calculated during a volume set/reset, reading a
volume from disk and importing a volume during probe or volume sync. The
calculation of the volume op-version depends on the clusters op-version as some
features are enabled automatically depending on the clusters op-version. We
also don't store the volume op-versions persistently and don't export the
volume op-versions during sync. Due to this, there can occur cases which will
lead to inconsistencies in volumes in different peers. One such case is below,
Consider, a cluster made up 3 peers P1, P2 and P3, operating at op-version N.
The cluster has two volumes V1 and V2, which have volume op-versions N (since
volume op-version cannot be greater than cluster op-version). We have,
Cluster-op-version = N
V1 op-version = N
V2 op-version = N
A set operation on V1 causes the clusters op-version to be bumped up to N+1.
Assume that there exist some features that are automatically enabled on
op-version N+1. The op-version of V2 remains at N as no operation has been
performed on it. So,
Cluster op-version = N+1
V1 op-version = N+1
V2 op-version = N
Now, we probe a new peer P4. On the new peer we will have the following
op-versions,
Cluster op-version = N+1
V1 op-version = N+1
V2 op-version = N+1
This happens because we don't send volume op-versions during the sync after
probe. P4 will freshly calculate the op-version of V2 (assuming features have
been auto enabled due to the cluster op-version being N+1) as N+1.
Another case is when glusterd on a peer restarts. Assume P3 was restarted,
glusterd will recalculate the volume op-versions during the restore state.
Again, op-version of V2 will be calculated as N+1 assuming auto enabled
features. This will lead to inconsistency in the volume representation in
memory and on disk, as glusterd will assume the volume contains auto enabled
features, but the volfiles don't contain them as they were not regenrated.
These kind of issues can be solved by calculating the volume op-version only
when features are enabled and disabled (ie. during volume set/reset),
persisting the volume-op-versions and exporting/importing them.
Change-Id: I52de0668c92628622e85f4588fb28829a7231132
BUG: 1005043
Signed-off-by: Kaushal M <kaushal@redhat.com>
Reviewed-on: http://review.gluster.org/5568
Reviewed-by: Amar Tumballi <amarts@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 24 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 49 |
3 files changed, 69 insertions, 6 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 2e6dd6b43aa..57809f400b8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -579,6 +579,17 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) goto out; } + snprintf (buf, sizeof (buf), "%d", volinfo->op_version); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, + buf); + if (ret) + goto out; + out: if (ret) gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume " @@ -1765,7 +1776,6 @@ glusterd_store_retrieve_volume (char *volname) gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; ret = glusterd_volinfo_new (&volinfo); - if (ret) goto out; @@ -1778,12 +1788,10 @@ glusterd_store_retrieve_volume (char *volname) GLUSTERD_VOLUME_INFO_FILE); ret = gf_store_handle_retrieve (path, &volinfo->shandle); - if (ret) goto out; ret = gf_store_iter_new (volinfo->shandle, &iter); - if (ret) goto out; @@ -1854,6 +1862,12 @@ glusterd_store_retrieve_volume (char *volname) } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_BACKEND, strlen (GLUSTERD_STORE_KEY_VOL_BACKEND))) { volinfo->backend = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_OP_VERSION, + strlen (GLUSTERD_STORE_KEY_VOL_OP_VERSION))) { + volinfo->op_version = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, + strlen (GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) { + volinfo->client_op_version = atoi (value); } else { if (is_key_glusterd_hooks_friendly (key)) { @@ -1932,6 +1946,9 @@ glusterd_store_retrieve_volume (char *volname) volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count); + /* Only calculate volume op-versions if they are not found */ + if (!volinfo->op_version && !volinfo->client_op_version) + gd_update_volume_op_versions (volinfo); } if (op_errno != GD_STORE_EOF) @@ -1950,7 +1967,6 @@ glusterd_store_retrieve_volume (char *volname) if (ret) goto out; - gd_update_volume_op_versions (volinfo); list_add_tail (&volinfo->vol_list, &priv->volumes); diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index e4f9e4a0a2f..9882225ab2b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -56,6 +56,8 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" #define GLUSTERD_STORE_KEY_USERNAME "username" #define GLUSTERD_STORE_KEY_PASSWORD "password" +#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" +#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" #define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" #define GLUSTERD_STORE_KEY_BRICK_PATH "path" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 0ddf6ca35a6..b0f9a210f1c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1916,6 +1916,17 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, i++; } + /* Add volume op-versions to dict. This prevents volume inconsistencies + * in the cluster + */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.op-version", count); + ret = dict_set_int32 (dict, key, volinfo->op_version); + if (ret) + goto out; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.client-op-version", count); + ret = dict_set_int32 (dict, key, volinfo->client_op_version); out: GF_FREE (volume_id_str); @@ -2506,6 +2517,8 @@ glusterd_import_volinfo (dict_t *vols, int count, int rb_status = 0; char *rebalance_id_str = NULL; char *rb_id_str = NULL; + int op_version = 0; + int client_op_version = 0; GF_ASSERT (vols); GF_ASSERT (volinfo); @@ -2732,6 +2745,40 @@ glusterd_import_volinfo (dict_t *vols, int count, ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo); if (ret) goto out; + + /* Import the volume's op-versions if available else set it to 1. + * Not having op-versions implies this informtation was obtained from a + * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version + * 1 and all volumes are at op-versions 1. + * + * Either both the volume op-versions should be absent or both should be + * present. Only one being present is a failure + */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.op-version", count); + ret = dict_get_int32 (vols, key, &op_version); + if (ret) + ret = 0; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.client-op-version", count); + ret = dict_get_int32 (vols, key, &client_op_version); + if (ret) + ret = 0; + + if (op_version && client_op_version) { + new_volinfo->op_version = op_version; + new_volinfo->client_op_version = client_op_version; + } else if (((op_version == 0) && (client_op_version != 0)) || + ((op_version != 0) && (client_op_version == 0))) { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, + "Only one volume op-version found"); + goto out; + } else { + new_volinfo->op_version = 1; + new_volinfo->client_op_version = 1; + } + ret = glusterd_import_bricks (vols, count, new_volinfo); if (ret) goto out; @@ -2899,8 +2946,6 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) (void) glusterd_start_bricks (new_volinfo); } - gd_update_volume_op_versions (new_volinfo); - ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); ret = glusterd_create_volfiles_and_notify_services (new_volinfo); if (ret) |