diff options
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-mem-types.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-mgmt.c | 23 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapshot.c | 677 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 288 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 5 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 201 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 17 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.h | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 37 |
11 files changed, 1203 insertions, 62 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index 158e8bad4..e6f6a0333 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -68,7 +68,8 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_mop_commit_req_t = gf_common_mt_end + 52, gf_gld_mt_int = gf_common_mt_end + 53, gf_gld_mt_snap_t = gf_common_mt_end + 54, - gf_gld_mt_end = gf_common_mt_end + 55, + gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55, + gf_gld_mt_end = gf_common_mt_end + 56, } gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c index c905977be..d52532e54 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mgmt.c +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -1285,7 +1285,7 @@ out: int glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, - int32_t op_ret, dict_t *req_dict, + int32_t op_ret, dict_t *dict, dict_t *req_dict, char **op_errstr, int npeers) { int32_t ret = -1; @@ -1300,10 +1300,12 @@ glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, this = THIS; GF_ASSERT (this); GF_ASSERT (conf); + GF_ASSERT (dict); GF_ASSERT (req_dict); GF_ASSERT (op_errstr); peers = &conf->xaction_peers; + GF_ASSERT (peers); rsp_dict = dict_new (); if (!rsp_dict) { @@ -1312,6 +1314,9 @@ glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, goto out; } + /* Copy the contents of dict like missed snaps info to req_dict */ + dict_copy (dict, req_dict); + /* Post Validation on local node */ ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr, rsp_dict); @@ -1344,7 +1349,7 @@ glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, } /* Sending Post Validation req to other nodes in the cluster */ - gd_syncargs_init (&args, NULL); + gd_syncargs_init (&args, req_dict); synctask_barrier_init((&args)); peer_cnt = 0; list_for_each_entry (peerinfo, peers, op_peers_list) { @@ -1613,7 +1618,7 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, /* PRE-COMMIT VALIDATE PHASE */ ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, - &op_errstr, npeers); + &op_errstr, npeers); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); goto out; @@ -1621,7 +1626,7 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, /* COMMIT OP PHASE */ ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, - &op_errstr, npeers); + &op_errstr, npeers); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); goto out; @@ -1632,8 +1637,8 @@ glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, commands other than snapshot. So as of now, I am sending 0 (op_ret as 0). */ - ret = glusterd_mgmt_v3_post_validate (conf, op, 0, req_dict, - &op_errstr, npeers); + ret = glusterd_mgmt_v3_post_validate (conf, op, 0, dict, req_dict, + &op_errstr, npeers); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); goto out; @@ -1795,7 +1800,7 @@ glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, Who will initiate the cleanup? */ ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, - &op_errstr, npeers); + &op_errstr, npeers); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); /* If the main op fails, we should save the error string. @@ -1815,7 +1820,7 @@ unbarrier: if (ret) goto out; ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, - &op_errstr, npeers); + &op_errstr, npeers); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); @@ -1831,7 +1836,7 @@ out: op_ret = -1; /* POST-COMMIT VALIDATE PHASE */ - ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, req_dict, + ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, dict, req_dict, &op_errstr, npeers); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index 256c34e9b..5a0041535 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -46,6 +46,11 @@ char snap_mount_folder[PATH_MAX]; +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op); + /* This function will restore a snapshot volumes * * @param dict dictionary containing snapshot restore request @@ -54,7 +59,7 @@ char snap_mount_folder[PATH_MAX]; * @return Negative value on Failure and 0 in success */ int -glusterd_snapshot_restore (dict_t *dict, char **op_errstr) +glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { int ret = -1; char *volname = NULL; @@ -63,13 +68,17 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr) glusterd_volinfo_t *snap_volinfo = NULL; glusterd_volinfo_t *volinfo = NULL; glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; this = THIS; GF_ASSERT (this); GF_ASSERT (dict); GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + priv = this->private; + GF_ASSERT (priv); ret = dict_get_str (dict, "snapname", &snapname); if (ret) { @@ -103,7 +112,21 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr) goto out; } - ret = gd_restore_snap_volume (volinfo, snap_volinfo); + if (is_origin_glusterd (dict) == _gf_true) { + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap restores"); + goto out; + } + } + + ret = gd_restore_snap_volume (rsp_dict, volinfo, snap_volinfo); if (ret) { /* No need to update op_errstr because it is assumed * that the called function will do that in case of @@ -977,24 +1000,29 @@ out: } int32_t -glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol) +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) { - char *mnt_pt = NULL; - struct mntent *entry = NULL; - int32_t ret = -1; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - FILE *mtab = NULL; + char *mnt_pt = NULL; + struct mntent *entry = NULL; + int32_t brick_count = -1; + int32_t ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + FILE *mtab = NULL; this = THIS; GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); if (!snap_vol) { gf_log (this->name, GF_LOG_ERROR, "snap volinfo is NULL"); goto out; } + brick_count = -1; list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + brick_count++; if (uuid_compare (brickinfo->uuid, MY_UUID)) continue; @@ -1004,6 +1032,23 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol) "for brick %s:%s of the snap %s.", brickinfo->hostname, brickinfo->path, snap_vol->snapshot->snapname); + + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_vol->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + continue; } @@ -1026,7 +1071,8 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol) goto out; } ret = glusterd_do_lvm_snapshot_remove (snap_vol, brickinfo, - mnt_pt, entry->mnt_fsname); + mnt_pt, + entry->mnt_fsname); if (mtab) endmntent (mtab); if (ret) { @@ -1035,6 +1081,7 @@ glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol) brickinfo->path, entry->mnt_fsname); goto out; } + } ret = 0; @@ -1043,7 +1090,8 @@ out: } int32_t -glusterd_snap_volume_remove (glusterd_volinfo_t *snap_vol, +glusterd_snap_volume_remove (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, gf_boolean_t remove_lvm, gf_boolean_t force) { @@ -1054,6 +1102,9 @@ glusterd_snap_volume_remove (glusterd_volinfo_t *snap_vol, xlator_t *this = NULL; this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); if (!snap_vol) { gf_log(this->name, GF_LOG_WARNING, "snap_vol in NULL"); @@ -1080,7 +1131,7 @@ glusterd_snap_volume_remove (glusterd_volinfo_t *snap_vol, /* Only remove the backend lvm when required */ if (remove_lvm) { - ret = glusterd_lvm_snapshot_remove (snap_vol); + ret = glusterd_lvm_snapshot_remove (rsp_dict, snap_vol); if (ret) { gf_log(this->name, GF_LOG_WARNING, "Failed to remove " "lvm snapshot volume %s", snap_vol->volname); @@ -1148,7 +1199,8 @@ glusterd_snapobject_delete (glusterd_snap_t *snap) } int32_t -glusterd_snap_remove (glusterd_snap_t *snap, +glusterd_snap_remove (dict_t *rsp_dict, + glusterd_snap_t *snap, gf_boolean_t remove_lvm, gf_boolean_t force) { @@ -1159,6 +1211,9 @@ glusterd_snap_remove (glusterd_snap_t *snap, xlator_t *this = NULL; this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); if (!snap) { gf_log(this->name, GF_LOG_WARNING, "snap in NULL"); @@ -1167,7 +1222,7 @@ glusterd_snap_remove (glusterd_snap_t *snap, } list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { - ret = glusterd_snap_volume_remove (snap_vol, + ret = glusterd_snap_volume_remove (rsp_dict, snap_vol, remove_lvm, force); if (ret) { gf_log(this->name, GF_LOG_WARNING, "Failed to remove " @@ -2373,7 +2428,7 @@ out: } glusterd_snap_t* -glusterd_create_snap_object (dict_t *dict) +glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict) { char *snapname = NULL; uuid_t *snap_id = NULL; @@ -2388,6 +2443,7 @@ glusterd_create_snap_object (dict_t *dict) priv = this->private; GF_ASSERT (dict); + GF_ASSERT (rsp_dict); /* Fetch snapname, description, id and time from dict */ ret = dict_get_str (dict, "snapname", &snapname); @@ -2476,7 +2532,8 @@ glusterd_create_snap_object (dict_t *dict) out: if (ret) { if (snap) - glusterd_snap_remove (snap, _gf_true,_gf_true); + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); snap=NULL; } @@ -2746,8 +2803,75 @@ out: return ret; } +/* Added missed_snap_entry to rsp_dict */ +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op) +{ + char *buf = NULL; + char missed_snap_entry[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t missed_snap_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_uuid); + GF_ASSERT (brickinfo); + + snprintf (missed_snap_entry, sizeof(missed_snap_entry), + "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), + snap_uuid, brick_number, brickinfo->path, op, + GD_MISSED_SNAP_PENDING); + + buf = gf_strdup (missed_snap_entry); + if (!buf) { + ret = -1; + goto out; + } + + /* Fetch the missed_snap_count from the dict */ + ret = dict_get_int32 (rsp_dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + /* Initialize the missed_snap_count for the first time */ + missed_snap_count = 0; + } + + /* Setting the missed_snap_entry in the rsp_dict */ + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + missed_snap_count); + ret = dict_set_dynstr (rsp_dict, name_buf, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_entry (%s) " + "in the rsp_dict.", buf); + GF_FREE (buf); + goto out; + } + missed_snap_count++; + + /* Setting the new missed_snap_count in the dict */ + ret = dict_set_int32 (rsp_dict, "missed_snap_count", + missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_count for %s " + "in the rsp_dict.", missed_snap_entry); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + static int32_t -glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol, +glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, glusterd_brickinfo_t *original_brickinfo, glusterd_brickinfo_t *snap_brickinfo, char **snap_brick_dir, int64_t volcount, @@ -2762,6 +2886,7 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol, this = THIS; GF_ASSERT (this); GF_ASSERT (dict); + GF_ASSERT (rsp_dict); GF_ASSERT (snap_vol); GF_ASSERT (original_brickinfo); GF_ASSERT (snap_brickinfo); @@ -2779,6 +2904,25 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol, "snap mount path (%s). Using original brickinfo", key); snap_brickinfo->snap_status = -1; strcpy (snap_brick_path, original_brickinfo->path); + + /* In origiator node add snaps missed + * from different nodes to the dict + */ + if (is_origin_glusterd (dict) == _gf_true) { + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, snap_vol->volname, + original_brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_CREATE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + } } else { /* Create brick-path in the format /var/run/gluster/snaps/ * * <snap-uuid>/<original-brick#>/snap-brick-dir * @@ -2793,6 +2937,9 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, glusterd_volinfo_t *snap_vol, volcount, brick_count); ret = dict_get_ptr (dict, key, (void **)&snap_device); if (ret) { + /* If the device name is empty, so will be the brick path + * Hence the missed snap has already been added above + */ gf_log (this->name, GF_LOG_ERROR, "Unable to fetch " "snap device (%s). Leaving empty", key); } else @@ -2825,7 +2972,7 @@ out: static int32_t glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, - glusterd_volinfo_t *snap_vol, + glusterd_volinfo_t *snap_vol, dict_t *rsp_dict, glusterd_brickinfo_t *original_brickinfo, glusterd_brickinfo_t *snap_brickinfo, char *snap_brick_dir, int32_t brick_count) @@ -2838,6 +2985,7 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, GF_ASSERT (this); GF_ASSERT (origin_vol); GF_ASSERT (snap_vol); + GF_ASSERT (rsp_dict); GF_ASSERT (original_brickinfo); GF_ASSERT (snap_brickinfo); GF_ASSERT (snap_brick_dir); @@ -2849,6 +2997,23 @@ glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, original_brickinfo->path, origin_vol->volname, snap_vol->snapshot->snapname); + + /* Adding the not started bricks to the missed snaps list */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_vol->volname, + original_brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_CREATE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + snap_brickinfo->snap_status = -1; ret = 0; goto out; @@ -2889,9 +3054,73 @@ out: return ret; } +/* Look for disconnected peers, for missed snap creates or deletes */ +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (peers); + GF_ASSERT (vol); + GF_ASSERT (snap_uuid); + + brick_count = 0; + list_for_each_entry (brickinfo, &vol->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* If the brick belongs to the same node */ + brick_count++; + continue; + } + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesnt belong to this peer */ + continue; + } + + /* Found peer who owns the brick, * + * if peer is not connected or not * + * friend add it to missed snap list */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_uuid, + brickinfo, + brick_count + 1, + op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); + goto out; + } + } + } + brick_count++; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + glusterd_volinfo_t * glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, - dict_t *dict, int64_t volcount) + dict_t *dict, dict_t *rsp_dict, int64_t volcount) { char key[PATH_MAX] = ""; char *snap_brick_dir = NULL; @@ -2908,14 +3137,13 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, this = THIS; GF_ASSERT (this); - priv = this->private; GF_ASSERT (priv); GF_ASSERT (origin_vol); GF_ASSERT (dict); + GF_ASSERT (rsp_dict); /* fetch username, password and vol_id from dict*/ - snprintf (key, sizeof(key), "volume%ld_username", volcount); ret = dict_get_str (dict, key, &username); if (ret) { @@ -2974,7 +3202,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, goto out; } - ret = glusterd_add_bricks_to_snap_volume (dict, + ret = glusterd_add_bricks_to_snap_volume (dict, rsp_dict, snap_vol, brickinfo, snap_brickinfo, @@ -2997,9 +3225,8 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, continue; } - ret = glusterd_take_brick_snapshot (origin_vol, - snap_vol, - brickinfo, + ret = glusterd_take_brick_snapshot (origin_vol, snap_vol, + rsp_dict, brickinfo, snap_brickinfo, snap_brick_dir, brick_count); @@ -3090,7 +3317,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, out: if (ret) { if (snap_vol) - glusterd_snap_volume_remove (snap_vol, + glusterd_snap_volume_remove (rsp_dict, snap_vol, _gf_true, _gf_true); snap_vol = NULL; } @@ -3311,7 +3538,6 @@ out : return ret; } - int32_t glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, dict_t *rsp_dict) @@ -3320,9 +3546,18 @@ glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, char *snapname = NULL; char *dup_snapname = NULL; glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; xlator_t *this = NULL; this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + priv = this->private; + GF_ASSERT (priv); if (!dict || !op_errstr) { gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); @@ -3344,7 +3579,34 @@ glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, goto out; } - ret = glusterd_snap_remove (snap, _gf_true, _gf_false); + if (is_origin_glusterd (dict) == _gf_true) { + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, + glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap deletes"); + goto out; + } + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_false); if (ret){ gf_log (this->name, GF_LOG_ERROR, "Failed to remove snap %s", snapname); @@ -3409,7 +3671,7 @@ glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } - ret = glusterd_snap_remove (snap, _gf_true, _gf_true); + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_true); if (ret) { gf_log (this->name, GF_LOG_ERROR, "removing the snap %s failed", name); @@ -3425,6 +3687,42 @@ out: return ret; } +/* In case of a successful, delete or create operation, during post_validate * + * look for missed snap operations and update the missed snap lists */ +int32_t +glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + int32_t missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + ret = dict_get_int32 (dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_update_missed_snaps (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update missed_snaps_list"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, dict_t *rsp_dict) @@ -3443,6 +3741,10 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, glusterd_conf_t *priv = NULL; this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); priv = this->private; GF_ASSERT(priv); @@ -3474,7 +3776,7 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, } tmp_name = NULL; - snap = glusterd_create_snap_object (dict); + snap = glusterd_create_snap_object (dict, rsp_dict); if (!snap) { gf_log (this->name, GF_LOG_ERROR, "creating the" "snap object %s failed", snapname); @@ -3502,7 +3804,8 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, /* TODO: Create a stub where the bricks are added parallely by worker threads so that the snap creating happens parallely. */ - snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict, i); + snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict, + rsp_dict, i); if (!snap_vol) { ret = -1; gf_log (this->name, GF_LOG_WARNING, "taking the " @@ -3524,7 +3827,8 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, out: if (ret) { if (snap) - glusterd_snap_remove (snap, _gf_true, _gf_true); + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); snap=NULL; } @@ -4552,10 +4856,18 @@ glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret, "failed"); goto out; } + } else { + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } } ret = 0; - out: return ret; } @@ -4612,7 +4924,8 @@ glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict) break; case GF_SNAP_OPTION_TYPE_RESTORE: - ret = glusterd_snapshot_restore (dict, op_errstr); + ret = glusterd_snapshot_restore (dict, op_errstr, + rsp_dict); if (ret) { gf_log (this->name, GF_LOG_WARNING, "Failed to " "restore snapshot"); @@ -4820,6 +5133,18 @@ glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, } break; + case GF_SNAP_OPTION_TYPE_DELETE: + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "update missed snaps list"); + goto out; + } + break; + default: gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); goto out; @@ -4998,3 +5323,287 @@ glusterd_handle_snapshot (rpcsvc_request_t *req) { return glusterd_big_locked_handler (req, glusterd_handle_snapshot_fn); } + +static inline void +glusterd_free_snap_op (glusterd_snap_op_t *snap_op) +{ + if (snap_op) { + if (snap_op->brick_path) + GF_FREE (snap_op->brick_path); + + GF_FREE (snap_op); + } +} + +/* Look for duplicates and accordingly update the list */ +int32_t +glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, + glusterd_snap_op_t *missed_snap_op) +{ + int32_t ret = -1; + glusterd_snap_op_t *snap_opinfo = NULL; + gf_boolean_t match = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_snapinfo); + GF_ASSERT(missed_snap_op); + + list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) { + if ((!strcmp (snap_opinfo->brick_path, + missed_snap_op->brick_path)) && + (snap_opinfo->op == missed_snap_op->op)) { + /* If two entries have conflicting status + * GD_MISSED_SNAP_DONE takes precedence + */ + if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) && + (missed_snap_op->status == GD_MISSED_SNAP_DONE)) { + snap_opinfo->status = GD_MISSED_SNAP_DONE; + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + match = _gf_true; + break; + } else if ((snap_opinfo->brick_num == + missed_snap_op->brick_num) && + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && + (missed_snap_op->op == + GF_SNAP_OPTION_TYPE_DELETE)) { + /* Optimizing create and delete entries for the same + * brick and same node + */ + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + snap_opinfo->status = GD_MISSED_SNAP_DONE; + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + } + + if (match == _gf_true) { + gf_log (this->name, GF_LOG_INFO, + "Duplicate entry. Not updating"); + glusterd_free_snap_op (missed_snap_op); + } else { + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add new missed snap entry to the missed_snaps list. */ +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status) +{ + int32_t ret = -1; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *missed_snap_op = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t free_missed_snap_info = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_info); + GF_ASSERT(brick_path); + + priv = this->private; + GF_ASSERT (priv); + + /* Create the snap_op object consisting of the * + * snap id and the op */ + ret = glusterd_missed_snap_op_new (&missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create new missed snap object."); + ret = -1; + goto out; + } + + missed_snap_op->brick_path = gf_strdup(brick_path); + if (!missed_snap_op->brick_path) { + ret = -1; + goto out; + } + missed_snap_op->brick_num = brick_num; + missed_snap_op->op = snap_op; + missed_snap_op->status = snap_status; + + /* Look for other entries for the same node and same snap */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + if (!strcmp (missed_snapinfo->node_snap_info, + missed_info)) { + /* Found missed snapshot info for * + * the same node and same snap */ + match = _gf_true; + break; + } + } + + if (match == _gf_false) { + /* First snap op missed for the brick */ + ret = glusterd_missed_snapinfo_new (&missed_snapinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create missed snapinfo"); + goto out; + } + free_missed_snap_info = _gf_true; + missed_snapinfo->node_snap_info = gf_strdup(missed_info); + if (!missed_snapinfo->node_snap_info) { + ret = -1; + goto out; + } + + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + list_add_tail (&missed_snapinfo->missed_snaps, + &priv->missed_snaps_list); + + ret = 0; + goto out; + } else { + ret = glusterd_update_missed_snap_entry (missed_snapinfo, + missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update existing missed snap entry."); + goto out; + } + } + +out: + if (ret) { + glusterd_free_snap_op (missed_snap_op); + + if (missed_snapinfo && + (free_missed_snap_info == _gf_true)) { + if (missed_snapinfo->node_snap_info) + GF_FREE (missed_snapinfo->node_snap_info); + + GF_FREE (missed_snapinfo); + } + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add missing snap entries to the in-memory conf->missed_snap_list */ +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) +{ + char *buf = NULL; + char *tmp = NULL; + char *save_ptr = NULL; + char *nodeid = NULL; + char *snap_uuid = NULL; + char *brick_path = NULL; + char missed_info[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + /* We can update the missed_snaps_list without acquiring * + * any additional locks as big lock will be held. */ + for (i = 0; i < missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (dict, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "missed_snap_entry = %s", + buf); + + /* Need to make a duplicate string coz the same dictionary * + * is resent to the non-originator nodes */ + tmp = gf_strdup (buf); + if (!tmp) { + ret = -1; + goto out; + } + + /* Fetch the node-id, snap-id, brick_num, + * brick_path, snap_op and snap status + */ + nodeid = strtok_r (tmp, ":", &save_ptr); + snap_uuid = strtok_r (NULL, "=", &save_ptr); + brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!nodeid || !snap_uuid || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + snprintf (missed_info, sizeof(missed_info), "%s:%s", + nodeid, snap_uuid); + + ret = glusterd_store_missed_snaps_list (missed_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + GF_FREE (tmp); + tmp = NULL; + } + +out: + if (tmp) + GF_FREE (tmp); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 256519598..2340c4030 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -802,6 +802,21 @@ glusterd_store_node_state_path_set (glusterd_volinfo_t *volinfo, } static void +glusterd_store_missed_snaps_list_path_set (char *missed_snaps_list, + size_t len) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (missed_snaps_list); + GF_ASSERT (len <= PATH_MAX); + + snprintf (missed_snaps_list, len, "%s/snaps/" + GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir); +} + +static void glusterd_store_snapfpath_set (glusterd_snap_t *snap, char *snap_fpath, size_t len) { @@ -861,6 +876,29 @@ glusterd_store_create_nodestate_sh_on_absence (glusterd_volinfo_t *volinfo) return ret; } +static int32_t +glusterd_store_create_missed_snaps_list_shandle_on_absence () +{ + char missed_snaps_list[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + glusterd_store_missed_snaps_list_path_set (missed_snaps_list, + sizeof(missed_snaps_list)); + + ret = gf_store_handle_create_on_absence + (&priv->missed_snaps_list_shandle, + missed_snaps_list); + return ret; +} + int32_t glusterd_store_create_snap_shandle_on_absence (glusterd_snap_t *snap) { @@ -2748,6 +2786,7 @@ int32_t glusterd_store_retrieve_snap (char *snapname) { int32_t ret = -1; + dict_t *dict = NULL; glusterd_snap_t *snap = NULL; glusterd_conf_t *priv = NULL; xlator_t *this = NULL; @@ -2757,6 +2796,14 @@ glusterd_store_retrieve_snap (char *snapname) GF_ASSERT (priv); GF_ASSERT (snapname); + dict = dict_new(); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create dict"); + ret = -1; + goto out; + } + snap = glusterd_new_snap_object (); if (!snap) { gf_log (this->name, GF_LOG_ERROR, "Failed to create " @@ -2800,7 +2847,7 @@ glusterd_store_retrieve_snap (char *snapname) as a failed snapshot and clean it up. */ if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { - ret = glusterd_snap_remove (snap, _gf_true, _gf_true); + ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_true); if (ret) gf_log (this->name, GF_LOG_WARNING, "failed to remove" " the snapshot %s", snap->snapname); @@ -2814,8 +2861,104 @@ glusterd_store_retrieve_snap (char *snapname) glusterd_compare_snap_time); out: - gf_log ("", GF_LOG_TRACE, "Returning with %d", ret); + if (dict) + dict_unref (dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +/* Read the missed_snap_list and update the in-memory structs */ +int32_t +glusterd_store_retrieve_missed_snaps_list (xlator_t *this) +{ + char buf[PATH_MAX] = ""; + char path[PATH_MAX] = ""; + char *missed_node_info = NULL; + char *brick_path = NULL; + char *value = NULL; + char *save_ptr = NULL; + FILE *fp = NULL; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + gf_store_op_errno_t store_errno = GD_STORE_SUCCESS; + + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + /* Get the path of the missed_snap_list */ + glusterd_store_missed_snaps_list_path_set (path, sizeof(path)); + + fp = fopen (path, "r"); + if (!fp) { + /* If errno is ENOENT then there are no missed snaps yet */ + if (errno != ENOENT) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to open %s. " + "Error: %s", path, strerror(errno)); + } else { + gf_log (this->name, GF_LOG_INFO, + "No missed snaps list."); + ret = 0; + } + goto out; + } + + do { + ret = gf_store_read_and_tokenize (fp, buf, + &missed_node_info, &value, + &store_errno); + if (ret) { + if (store_errno == GD_STORE_EOF) { + gf_log (this->name, + GF_LOG_DEBUG, + "EOF for missed_snap_list"); + ret = 0; + break; + } + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch data from " + "missed_snaps_list. Error: %s", + gf_store_strerror (store_errno)); + goto out; + } + + /* Fetch the brick_num, brick_path, snap_op and snap status */ + brick_num = atoi(strtok_r (value, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + if (!missed_node_info || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + ret = glusterd_store_missed_snaps_list (missed_node_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + } while (store_errno == GD_STORE_SUCCESS); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } @@ -2850,16 +2993,27 @@ glusterd_store_retrieve_snaps (xlator_t *this) glusterd_for_each_entry (entry, dir); while (entry) { - ret = glusterd_store_retrieve_snap (entry->d_name); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to restore " - "snapshot: %s", entry->d_name); - goto out; + if (entry->d_type == DT_DIR) { + ret = glusterd_store_retrieve_snap (entry->d_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to restore snapshot: %s", + entry->d_name); + goto out; + } } glusterd_for_each_entry (entry, dir); } + /* Retrieve missed_snaps_list */ + ret = glusterd_store_retrieve_missed_snaps_list (this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to retrieve missed_snaps_list"); + goto out; + } + out: if (dir) closedir (dir); @@ -2868,6 +3022,126 @@ out: return ret; } +/* Writes all the contents of conf->missed_snap_list */ +int32_t +glusterd_store_write_missed_snapinfo (int32_t fd) +{ + char value[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT (priv); + + /* Write the missed_snap_entry */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + list_for_each_entry (snap_opinfo, + &missed_snapinfo->snap_ops, + snap_ops_list) { + snprintf (value, sizeof(value), "%d:%s:%d:%d", + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op, snap_opinfo->status); + ret = gf_store_save_value + (fd, + missed_snapinfo->node_snap_info, + value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snapinfo"); + goto out; + } + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Adds the missed snap entries to the in-memory conf->missed_snap_list * + * and writes them to disk */ +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) +{ + int32_t fd = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + if (missed_snap_count < 1) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_create_missed_snaps_list_shandle_on_absence (); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to obtain " + "missed_snaps_list store handle."); + goto out; + } + + fd = gf_store_mkstemp (priv->missed_snaps_list_shandle); + if (fd <= 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create tmp file"); + ret = -1; + goto out; + } + + ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_write_missed_snapinfo (fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snaps to disk"); + goto out; + } + + ret = gf_store_rename_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to rename the tmp file"); + goto out; + } +out: + if (ret && (fd > 0)) { + ret = gf_store_unlink_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to unlink the tmp file"); + } + ret = -1; + } + + if (fd > 0) + close (fd); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) { diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index e1efafcc4..e46d712f6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -159,4 +159,9 @@ glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo); int32_t glusterd_store_snap (glusterd_snap_t *snap); + +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, + int32_t missed_snap_count); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index a969c4c84..905ac6e0c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -588,9 +588,11 @@ out: * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves */ int32_t -glusterd_snap_volinfo_restore (glusterd_volinfo_t *new_volinfo, +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, glusterd_volinfo_t *snap_volinfo) { + int32_t brick_count = -1; int32_t ret = -1; xlator_t *this = NULL; glusterd_brickinfo_t *brickinfo = NULL; @@ -598,10 +600,12 @@ glusterd_snap_volinfo_restore (glusterd_volinfo_t *new_volinfo, this = THIS; GF_ASSERT (this); + GF_ASSERT (rsp_dict); GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out); GF_VALIDATE_OR_GOTO (this->name, snap_volinfo, out); + brick_count = 0; list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) { ret = glusterd_brickinfo_new (&new_brickinfo); if (ret) { @@ -641,10 +645,32 @@ glusterd_snap_volinfo_restore (glusterd_volinfo_t *new_volinfo, } } + /* If a snapshot is pending for this brick then + * restore should also be pending + */ + if (brickinfo->snap_status == -1) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_volinfo->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + } + list_add_tail (&new_brickinfo->brick_list, &new_volinfo->bricks); /* ownership of new_brickinfo is passed to new_volinfo */ new_brickinfo = NULL; + brick_count++; } /* Regenerate all volfiles */ @@ -2342,6 +2368,17 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; } + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + count, i); + ret = dict_set_str (dict, key, brickinfo->device_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_device for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + i++; } @@ -2861,6 +2898,7 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, char key[512] = {0,}; int ret = -1; int32_t snap_status = 0; + char *snap_device = NULL; char *hostname = NULL; char *path = NULL; glusterd_brickinfo_t *new_brickinfo = NULL; @@ -2896,12 +2934,21 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, goto out; } + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + vol_count, brick_count); + ret = dict_get_str (vols, key, &snap_device); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + ret = glusterd_brickinfo_new (&new_brickinfo); if (ret) goto out; strcpy (new_brickinfo->path, path); strcpy (new_brickinfo->hostname, hostname); + strcpy (new_brickinfo->device_path, snap_device); new_brickinfo->snap_status = snap_status; //peerinfo might not be added yet @@ -7848,6 +7895,89 @@ out: return ret; } +/* Aggregate missed_snap_counts from different nodes and save it * + * in the req_dict of the originator node */ +int +glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *buf = NULL; + char *tmp_str = NULL; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t src_missed_snap_count = -1; + int32_t dst_missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (src, "missed_snap_count", + &src_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = dict_get_int32 (dst, "missed_snap_count", + &dst_missed_snap_count); + if (ret) { + /* Initialize dst_missed_count for the first time */ + dst_missed_snap_count = 0; + } + + for (i = 0; i < src_missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (src, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + dst_missed_snap_count); + + tmp_str = gf_strdup (buf); + if (!tmp_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dst, name_buf, tmp_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set %s", name_buf); + goto out; + } + + tmp_str = NULL; + dst_missed_snap_count++; + } + + ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set dst_missed_snap_count"); + goto out; + } + +out: + if (ret && tmp_str) + GF_FREE(tmp_str); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src) { @@ -7868,6 +7998,14 @@ glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src) } switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snap_create_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; case GF_SNAP_OPTION_TYPE_CONFIG: ret = glusterd_snap_config_use_rsp_dict (dst, src); if (ret) { @@ -8695,3 +8833,64 @@ glusterd_compare_snap_vol_time(struct list_head *list1, struct list_head *list2) return ((int)diff_time); } + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) +{ + glusterd_missed_snap_info *new_missed_snapinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (missed_snapinfo); + + new_missed_snapinfo = GF_CALLOC (1, sizeof(*new_missed_snapinfo), + gf_gld_mt_missed_snapinfo_t); + + if (!new_missed_snapinfo) + goto out; + + new_missed_snapinfo->node_snap_info = NULL; + INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); + INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); + + *missed_snapinfo = new_missed_snapinfo; + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) +{ + glusterd_snap_op_t *new_snap_op = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap_op); + + new_snap_op = GF_CALLOC (1, sizeof(*new_snap_op), + gf_gld_mt_missed_snapinfo_t); + + if (!new_snap_op) + goto out; + + new_snap_op->brick_path = NULL; + new_snap_op->brick_num = -1; + new_snap_op->op = -1; + new_snap_op->status = -1; + INIT_LIST_HEAD (&new_snap_op->snap_ops_list); + + *snap_op = new_snap_op; + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index ae2406c37..56bb799bf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -614,8 +614,21 @@ int glusterd_compare_snap_vol_time(struct list_head *, struct list_head *); int32_t -glusterd_snap_volinfo_restore (glusterd_volinfo_t *new_volinfo, +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, glusterd_volinfo_t *snap_volinfo); int32_t -glusterd_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol); +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol); + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo); + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op); + +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 9855dc15f..be5efd60c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -4109,7 +4109,8 @@ gd_is_boolean_option (char *key) * @return 0 on success and negative value on error */ int -gd_restore_snap_volume (glusterd_volinfo_t *orig_vol, +gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, glusterd_volinfo_t *snap_vol) { int ret = -1; @@ -4122,6 +4123,7 @@ gd_restore_snap_volume (glusterd_volinfo_t *orig_vol, this = THIS; GF_ASSERT (this); + GF_ASSERT (rsp_dict); conf = this->private; GF_ASSERT (conf); @@ -4164,7 +4166,7 @@ gd_restore_snap_volume (glusterd_volinfo_t *orig_vol, &new_volinfo->snap_volumes); } /* Copy the snap vol info to the new_volinfo.*/ - ret = glusterd_snap_volinfo_restore (new_volinfo, snap_vol); + ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap"); (void)glusterd_volinfo_delete (new_volinfo); @@ -4174,7 +4176,7 @@ gd_restore_snap_volume (glusterd_volinfo_t *orig_vol, /* If the orig_vol is already restored then we should delete * the backend LVMs */ if (orig_vol->is_volume_restored) { - ret = glusterd_lvm_snapshot_remove (orig_vol); + ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to remove " "LVM backend"); @@ -4201,7 +4203,7 @@ gd_restore_snap_volume (glusterd_volinfo_t *orig_vol, /* Now delete the snap entry. As a first step delete the snap * volume information stored in store. */ - ret = glusterd_snap_remove (snap, _gf_false, _gf_true); + ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); if (ret) { gf_log (this->name, GF_LOG_WARNING, "Failed to delete " "snap %s", snap->snapname); diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index f5163b477..fcbaaf93e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -170,6 +170,7 @@ gd_is_xlator_option (char *key); gf_boolean_t gd_is_boolean_option (char *key); -int gd_restore_snap_volume (glusterd_volinfo_t *orig_vol, +int gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, glusterd_volinfo_t *snap_vol); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 45a0b7de5..59288ada0 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1340,6 +1340,7 @@ init (xlator_t *this) INIT_LIST_HEAD (&conf->peers); INIT_LIST_HEAD (&conf->volumes); INIT_LIST_HEAD (&conf->snapshots); + INIT_LIST_HEAD (&conf->missed_snaps_list); pthread_mutex_init (&conf->mutex, NULL); conf->rpc = rpc; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 027732920..3aa395ebc 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -166,6 +166,8 @@ typedef struct { uint64_t snap_max_hard_limit; uint64_t snap_max_soft_limit; char *snap_bricks_directory; + gf_store_handle_t *missed_snaps_list_shandle; + struct list_head missed_snaps_list; } glusterd_conf_t; @@ -373,6 +375,20 @@ struct glusterd_snap_ { gf_store_handle_t *shandle; }; +typedef struct glusterd_snap_op_ { + int32_t brick_num; + char *brick_path; + int32_t op; + int32_t status; + struct list_head snap_ops_list; +} glusterd_snap_op_t; + +typedef struct glusterd_missed_snap_ { + char *node_snap_info; + struct list_head missed_snaps; + struct list_head snap_ops; +} glusterd_missed_snap_info; + typedef enum gd_node_type_ { GD_NODE_NONE, GD_NODE_BRICK, @@ -381,6 +397,12 @@ typedef enum gd_node_type_ { GD_NODE_NFS, } gd_node_type; +typedef enum missed_snap_stat { + GD_MISSED_SNAP_NONE, + GD_MISSED_SNAP_PENDING, + GD_MISSED_SNAP_DONE, +} missed_snap_stat; + typedef struct glusterd_pending_node_ { struct list_head list; void *node; @@ -418,7 +440,7 @@ enum glusterd_vol_comp_status_ { #define GLUSTERD_CKSUM_FILE "cksum" #define GLUSTERD_TRASH "trash" #define GLUSTERD_NODE_STATE_FILE "node_state.info" -#define GLUSTERD_VOL_SNAP_FILE "snap_list.info" +#define GLUSTERD_MISSED_SNAPS_LIST_FILE "missed_snaps_list" #define GLUSTERD_VOL_SNAP_DIR_PREFIX "snaps" #define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps" @@ -914,8 +936,17 @@ glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, char * glusterd_build_snap_device_path (char *device, char *snapname); int32_t -glusterd_snap_remove (glusterd_snap_t *snap, gf_boolean_t remove_lvm, - gf_boolean_t force); +glusterd_snap_remove (dict_t *rsp_dict, glusterd_snap_t *snap, + gf_boolean_t remove_lvm, gf_boolean_t force); int32_t glusterd_snapshot_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count); + +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status); + #endif |