diff options
| author | Kaushal M <kaushal@redhat.com> | 2013-08-12 10:43:52 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2013-09-13 12:11:54 -0700 | 
| commit | 536eccde0bbda0166ca2a2769069e6b9f7ecbf89 (patch) | |
| tree | 5a89ce2e6619f76c1f196eaa51605e99ca27f7b4 | |
| parent | 67b0e817720eb95aee706a11fdf4633607aacd82 (diff) | |
glusterd: Calculate volume op-versions only on set/resetv3.4.1qa2
  Backport of http://review.gluster.org/5568
The volume op-versions are calculated during a volume set/reset, reading a
volume from disk and importing a volume during probe or volume sync. The
calculation of the volume op-version depends on the clusters op-version as some
features are enabled automatically depending on the clusters op-version. We
also don't store the volume op-versions persistently and don't export the
volume op-versions during sync. Due to this, there can occur cases which will
lead to inconsistencies in volumes in different peers. One such case is below,
Consider, a cluster made up 3 peers P1, P2 and P3, operating at op-version N.
The cluster has two volumes V1 and V2, which have volume op-versions N (since
volume op-version cannot be greater than cluster op-version). We have,
 Cluster-op-version = N
 V1 op-version = N
 V2 op-version = N
A set operation on V1 causes the clusters op-version to be bumped up to N+1.
Assume that there exist some features that are automatically enabled on
op-version N+1. The op-version of V2 remains at N as no operation has been
performed on it. So,
 Cluster op-version = N+1
 V1 op-version = N+1
 V2 op-version = N
Now, we probe a new peer P4. On the new peer we will have the following
op-versions,
 Cluster op-version = N+1
 V1 op-version = N+1
 V2 op-version = N+1
This happens because we don't send volume op-versions during the sync after
probe. P4 will freshly calculate the op-version of V2 (assuming features have
been auto enabled due to the cluster op-version being N+1) as N+1.
Another case is when glusterd on a peer restarts. Assume P3 was restarted,
glusterd will recalculate the volume op-versions during the restore state.
Again, op-version of V2 will be calculated as N+1 assuming auto enabled
features. This will lead to inconsistency in the volume representation in
memory and on disk, as glusterd will assume the volume contains auto enabled
features, but the volfiles don't contain them as they were not regenrated.
These kind of issues can be solved by calculating the volume op-version only
when features are enabled and disabled (ie. during volume set/reset),
persisting the volume-op-versions and exporting/importing them.
BUG: 1005043
Change-Id: Id8bb05ba2a77e510739b3b1833f98b4d6d1fa4d7
Signed-off-by: Kaushal M <kaushal@redhat.com>
Reviewed-on: http://review.gluster.org/5832
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 24 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 2 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 49 | 
3 files changed, 69 insertions, 6 deletions
| diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index fd8222184..ae0c4e83b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -583,6 +583,17 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)                          goto out;          } +        snprintf (buf, sizeof (buf), "%d", volinfo->op_version); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf); +        if (ret) +                goto out; + +        snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, +                                   buf); +        if (ret) +                goto out; +  out:          if (ret)                  gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume " @@ -1736,7 +1747,6 @@ glusterd_store_retrieve_volume (char    *volname)          gf_store_op_errno_t       op_errno              = GD_STORE_SUCCESS;          ret = glusterd_volinfo_new (&volinfo); -          if (ret)                  goto out; @@ -1749,12 +1759,10 @@ glusterd_store_retrieve_volume (char    *volname)                    GLUSTERD_VOLUME_INFO_FILE);          ret = gf_store_handle_retrieve (path, &volinfo->shandle); -          if (ret)                  goto out;          ret = gf_store_iter_new (volinfo->shandle, &iter); -          if (ret)                  goto out; @@ -1825,6 +1833,12 @@ glusterd_store_retrieve_volume (char    *volname)                  } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_BACKEND,                                       strlen (GLUSTERD_STORE_KEY_VOL_BACKEND))) {                          volinfo->backend = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_OP_VERSION, +                                strlen (GLUSTERD_STORE_KEY_VOL_OP_VERSION))) { +                        volinfo->op_version = atoi (value); +                } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, +                                strlen (GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) { +                        volinfo->client_op_version = atoi (value);                  } else {                          if (is_key_glusterd_hooks_friendly (key)) { @@ -1903,6 +1917,9 @@ glusterd_store_retrieve_volume (char    *volname)                  volinfo->subvol_count = (volinfo->brick_count /                                           volinfo->dist_leaf_count); +                /* Only calculate volume op-versions if they are not found */ +                if (!volinfo->op_version && !volinfo->client_op_version) +                        gd_update_volume_op_versions (volinfo);          }          if (op_errno != GD_STORE_EOF) @@ -1921,7 +1938,6 @@ glusterd_store_retrieve_volume (char    *volname)          if (ret)                  goto out; -        gd_update_volume_op_versions (volinfo);          list_add_tail (&volinfo->vol_list, &priv->volumes); diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 30c6e09f1..4f39bdf21 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -56,6 +56,8 @@ typedef enum glusterd_store_ver_ac_{  #define GLUSTERD_STORE_KEY_DEFRAG_OP      "rebalance_op"  #define GLUSTERD_STORE_KEY_USERNAME       "username"  #define GLUSTERD_STORE_KEY_PASSWORD       "password" +#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" +#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"  #define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname"  #define GLUSTERD_STORE_KEY_BRICK_PATH     "path" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 27c9bdca2..ad5765d7b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -2077,6 +2077,17 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo,                  i++;          } +        /* Add volume op-versions to dict. This prevents volume inconsistencies +         * in the cluster +         */ +        memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "volume%d.op-version", count); +        ret = dict_set_int32 (dict, key, volinfo->op_version); +        if (ret) +                goto out; +        memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "volume%d.client-op-version", count); +        ret = dict_set_int32 (dict, key, volinfo->client_op_version);  out:          GF_FREE (volume_id_str); @@ -2666,6 +2677,8 @@ glusterd_import_volinfo (dict_t *vols, int count,          int                rb_status         = 0;          char               *rebalance_id_str = NULL;          char               *rb_id_str        = NULL; +        int                op_version        = 0; +        int                client_op_version = 0;          GF_ASSERT (vols);          GF_ASSERT (volinfo); @@ -2892,6 +2905,40 @@ glusterd_import_volinfo (dict_t *vols, int count,          ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo);          if (ret)                  goto out; + +        /* Import the volume's op-versions if available else set it to 1. +         * Not having op-versions implies this informtation was obtained from a +         * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version +         * 1 and all volumes are at op-versions 1. +         * +         * Either both the volume op-versions should be absent or both should be +         * present. Only one being present is a failure +         */ +        memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "volume%d.op-version", count); +        ret = dict_get_int32 (vols, key, &op_version); +        if (ret) +                ret = 0; +        memset (key, 0, sizeof (key)); +        snprintf (key, sizeof (key), "volume%d.client-op-version", count); +        ret = dict_get_int32 (vols, key, &client_op_version); +        if (ret) +                ret = 0; + +        if (op_version && client_op_version) { +                new_volinfo->op_version = op_version; +                new_volinfo->client_op_version = client_op_version; +        } else if (((op_version == 0) && (client_op_version != 0)) || +                   ((op_version != 0) && (client_op_version == 0))) { +                ret = -1; +                gf_log ("glusterd", GF_LOG_ERROR, +                        "Only one volume op-version found"); +                goto out; +        } else { +                new_volinfo->op_version = 1; +                new_volinfo->client_op_version = 1; +        } +          ret = glusterd_import_bricks (vols, count, new_volinfo);          if (ret)                  goto out; @@ -3059,8 +3106,6 @@ glusterd_import_friend_volume (dict_t *vols, size_t count)                  (void) glusterd_start_bricks (new_volinfo);          } -        gd_update_volume_op_versions (new_volinfo); -          ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);          ret = glusterd_create_volfiles_and_notify_services (new_volinfo);          if (ret) | 
