diff options
author | Avra Sengupta <asengupt@redhat.com> | 2014-03-13 01:04:40 +0000 |
---|---|---|
committer | Rajesh Joseph <rjoseph@redhat.com> | 2014-04-02 06:03:25 -0700 |
commit | 0ce369a0aa511e98fd71c0337181a5577b2d8a1f (patch) | |
tree | 842fa4958e10a786572d22b81af2cd0813569da0 /xlators/mgmt/glusterd/src/glusterd-utils.c | |
parent | ee4e8bb5339f5517d3d248f559becfd58013a0fe (diff) |
glusterd/snapshot: Making snap operations crash consistent
In the events of a volume's brick being down, or a node being
down, making snap ops like create, delete, restore, and
status crash consistent.
Marking snap status of snap bricks which were not snapshotted
because the volume brick was down as -1, and not starting those
snap bricks till the snapshot is taken.
During delete bypassing lvm snapshot remove for snap bricks
whose snap status is -1
During restore bypass replacing xattrs on the snapshot bricks
whose snap status is -1. Also bumping restored volume's version
so as to handle nodes being down. On handshake of a restored
volume, passing brick's snap_status as well.
During snapshot status of the non-snapshotted brick details
display "N/A". If a node is down, the entry itself will not
be displayed.
Change-Id: Id042efd7507829995270da0b2b2a6282a08a053d
Signed-off-by: Avra Sengupta <asengupt@redhat.com>
Reviewed-on: http://review.gluster.org/7241
Reviewed-by: Vijaikumar Mallikarjuna <vmallika@redhat.com>
Reviewed-by: Rajesh Joseph <rjoseph@redhat.com>
Tested-by: Rajesh Joseph <rjoseph@redhat.com>
Diffstat (limited to 'xlators/mgmt/glusterd/src/glusterd-utils.c')
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 51 |
1 files changed, 49 insertions, 2 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 8eec06c1a..a969c4c84 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -571,7 +571,7 @@ glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo, } } dup_brickinfo->status = brickinfo->status; - + dup_brickinfo->snap_status = brickinfo->snap_status; out: return ret; } @@ -618,7 +618,10 @@ glusterd_snap_volinfo_restore (glusterd_volinfo_t *new_volinfo, goto out; } - if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* If the brick is not of this peer, or snapshot is missed * + * for the brick do not replace the xattr for it */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + (brickinfo->snap_status != -1)) { /* We need to replace the volume id of all the bricks * to the volume id of the origin volume. new_volinfo * has the origin volume's volume id*/ @@ -1566,6 +1569,16 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, priv = this->private; GF_ASSERT (priv); + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = 0; + goto out; + } + ret = _mk_rundir_p (volinfo); if (ret) goto out; @@ -2077,7 +2090,10 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, glusterd_dict_ctx_t ctx = {0}; char *rebalance_id_str = NULL; char *rb_id_str = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -2315,6 +2331,17 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + count, i); + ret = dict_set_int32 (dict, key, brickinfo->snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_status for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + i++; } @@ -2833,6 +2860,7 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, { char key[512] = {0,}; int ret = -1; + int32_t snap_status = 0; char *hostname = NULL; char *path = NULL; glusterd_brickinfo_t *new_brickinfo = NULL; @@ -2860,12 +2888,22 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, goto out; } + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + vol_count, brick_count); + ret = dict_get_int32 (vols, key, &snap_status); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + ret = glusterd_brickinfo_new (&new_brickinfo); if (ret) goto out; strcpy (new_brickinfo->path, path); strcpy (new_brickinfo->hostname, hostname); + new_brickinfo->snap_status = snap_status; + //peerinfo might not be added yet (void) glusterd_resolve_brick (new_brickinfo); ret = 0; @@ -3076,6 +3114,15 @@ glusterd_import_volinfo (dict_t *vols, int count, new_volinfo->is_snap_volume = is_snap_volume; + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + "is_volume_restored option for %s", + volname); + goto out; + } + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit); if (ret) { |