diff options
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-snapshot.c | 453 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 15 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 2 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 71 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.h | 3 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 3 | 
6 files changed, 518 insertions, 29 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index 73723422bc7..399401d187a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -410,6 +410,101 @@ out:          return ret;  } +/* This function will take backup of the volume store + * of the to-be restored volume. This will help us to + * revert the operation if it fails. + * + * @param volinfo volinfo of the origin volume + * + * @return 0 on success and -1 on failure + */ +int +glusterd_snapshot_backup_vol (glusterd_volinfo_t *volinfo) +{ +        char             pathname[PATH_MAX]    = {0,}; +        int              ret                   = -1; +        int              op_ret                = 0; +        char             delete_path[PATH_MAX] = {0,}; +        char             trashdir[PATH_MAX]    = {0,}; +        glusterd_conf_t *priv                  = NULL; +        xlator_t        *this                  = NULL; + +        this = THIS; +        GF_ASSERT (this); +        priv = this->private; +        GF_ASSERT (priv); +        GF_ASSERT (volinfo); + +        GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv); + +        snprintf (delete_path, sizeof (delete_path), +                  "%s/"GLUSTERD_TRASH"/vols-%s.deleted", priv->workdir, +                  volinfo->volname); + +        snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, +                  priv->workdir); + +        /* Create trash folder if it is not there */ +        ret = mkdir (trashdir, 0777); +        if (ret && errno != EEXIST) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " +                        "directory, reason : %s", strerror (errno)); +                ret = -1; +                goto out; +        } + +        /* Move the origin volume volder to the backup location */ +        ret = rename (pathname, delete_path); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to rename snap " +                        "directory %s to %s", pathname, delete_path); +                goto out; +        } + +        /* Re-create an empty origin volume folder so that restore can +         * happen. */ +        ret = mkdir (pathname, 0777); +        if (ret && errno != EEXIST) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to create origin " +                        "volume directory (%s), reason : %s", +                        pathname, strerror (errno)); +                ret = -1; +                goto out; +        } + +        ret = 0; +out: +        /* Save the actual return value */ +        op_ret = ret; +        if (ret) { +                /* Revert the changes in case of failure */ +                ret = rmdir (pathname); +                if (ret) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "Failed to rmdir: %s,err: %s", +                                pathname, strerror (errno)); +                } + +                ret = rename (delete_path, pathname); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "Failed to rename directory %s to %s", +                                delete_path, pathname); +                } + +                ret = rmdir (trashdir); +                if (ret) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "Failed to rmdir: %s, Reason: %s", +                                trashdir, strerror (errno)); +                } +        } + +        gf_log (this->name, GF_LOG_TRACE, "Returning %d", op_ret); + +        return op_ret; +} +  int32_t  glusterd_copy_geo_rep_files (glusterd_volinfo_t *origin_vol,                               glusterd_volinfo_t *snap_vol, dict_t *rsp_dict) @@ -681,6 +776,15 @@ glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr,                          ret = -1;                          goto out;                  } + +                /* Take backup of the volinfo folder */ +                ret = glusterd_snapshot_backup_vol (volinfo); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Failed to backup " +                                "volume backend files for %s volume", +                                volinfo->volname); +                        goto out; +                }          }          ret = 0; @@ -5660,6 +5764,286 @@ out:          return ret;  } +/* This function is called if snapshot restore operation + * is successful. It will cleanup the backup files created + * during the restore operation. + * + * @param rsp_dict Response dictionary + * @param volinfo  volinfo of the volume which is being restored + * @param snap     snap object + * + * @return 0 on success or -1 on failure + */ +int +glusterd_snapshot_restore_cleanup (dict_t *rsp_dict, +                                   glusterd_volinfo_t *volinfo, +                                   glusterd_snap_t *snap) +{ +        int                     ret                     = -1; +        char                    delete_path[PATH_MAX]   = {0,}; +        xlator_t               *this                    = NULL; +        glusterd_conf_t        *priv                    = NULL; + +        this = THIS; +        GF_ASSERT (this); +        priv = this->private; + +        GF_ASSERT (rsp_dict); +        GF_ASSERT (volinfo); +        GF_ASSERT (snap); + +        /* If the volinfo is already restored then we should delete +         * the backend LVMs */ +        if (!uuid_is_null (volinfo->restored_from_snap)) { +                ret = glusterd_lvm_snapshot_remove (rsp_dict, volinfo); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Failed to remove " +                                "LVM backend"); +                        goto out; +                } +        } + +        snprintf (delete_path, sizeof (delete_path), +                  "%s/"GLUSTERD_TRASH"/vols-%s.deleted", priv->workdir, +                  volinfo->volname); + +        /* Restore is successful therefore delete the original volume's +         * volinfo. +         */ +        ret = glusterd_volinfo_delete (volinfo); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); +                goto out; +        } + +        /* Now delete the snap entry. */ +        ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); +        if (ret) { +                gf_log (this->name, GF_LOG_WARNING, "Failed to delete " +                        "snap %s", snap->snapname); +                goto out; +        } + +        /* Delete the backup copy of volume folder */ +        ret = glusterd_recursive_rmdir (delete_path); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to remove " +                        "backup dir (%s)", delete_path); +                goto out; +        } + +        ret = 0; +out: +        return ret; +} + +/* This function is called when the snapshot restore operation failed + * for some reasons. In such case we revert the restore operation. + * + * @param volinfo               volinfo of the origin volume + * @param restore_from_store    Boolean variable which tells whether to + *                              restore the origin from store or not. + * + * @return 0 on success and -1 on failure + */ +int +glusterd_snapshot_revert_partial_restored_vol (glusterd_volinfo_t *volinfo, +                                               gf_boolean_t restore_from_store) +{ +        int                     ret                     = 0; +        char                    pathname [PATH_MAX]     = {0,}; +        char                    trash_path[PATH_MAX]    = {0,}; +        glusterd_volinfo_t     *reverted_vol            = NULL; +        glusterd_conf_t        *priv                    = NULL; +        xlator_t               *this                    = NULL; + +        this = THIS; +        GF_ASSERT (this); +        priv = this->private; +        GF_ASSERT (priv); +        GF_ASSERT (volinfo); + +        GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv); + +        snprintf (trash_path, sizeof (trash_path), +                  "%s/"GLUSTERD_TRASH"/vols-%s.deleted", priv->workdir, +                  volinfo->volname); + +        /* Since snapshot restore failed we cannot rely on the volume +         * data stored under vols folder. Therefore delete the origin +         * volume's backend folder.*/ +        ret = glusterd_recursive_rmdir (pathname); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to remove " +                        "%s directory", pathname); +                goto out; +        } + +        /* Now move the backup copy of the vols to its original +         * location.*/ +        ret = rename (trash_path, pathname); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to rename folder " +                        "from %s to %s", trash_path, pathname); +                goto out; +        } + +        /* Skip the volinfo retrieval from the store if restore_from_store +         * is not true. */ +        if (!restore_from_store) { +                ret = 0; +                goto out; +        } + +        /* Retrieve the volume from the store */ +        reverted_vol = glusterd_store_retrieve_volume (volinfo->volname, NULL); +        if (NULL == reverted_vol) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to load restored " +                        "%s volume", volinfo->volname); +                goto out; +        } + +        /* Since we retrieved the volinfo from store now we don't +         * want the older volinfo. Therefore delete the older volinfo */ +        ret = glusterd_volinfo_delete (volinfo); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); +                goto out; +        } + +        ret = 0; +out: +        return ret; +} + +/* This function is called when glusterd is started and we need + * to revert a failed snapshot restore. + * + * @param snap snapshot object of the restored snap + * + * @return 0 on success and -1 on failure + */ +int +glusterd_snapshot_revert_restore_from_snap (glusterd_snap_t *snap) +{ +        int                     ret                     = -1; +        char                    volname [PATH_MAX]      = {0,}; +        glusterd_volinfo_t     *snap_volinfo            = NULL; +        glusterd_volinfo_t     *volinfo                 = NULL; +        xlator_t               *this                    = NULL; + +        this = THIS; + +        GF_ASSERT (this); +        GF_ASSERT (snap); + +        /* TODO : As of now there is only one volume in snapshot. +         * Change this when multiple volume snapshot is introduced +         */ +        snap_volinfo = list_entry (snap->volumes.next, glusterd_volinfo_t, +                                   vol_list); + +        strcpy (volname, snap_volinfo->parent_volname); + +        ret = glusterd_volinfo_find (volname, &volinfo); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Could not get volinfo of " +                        "%s", snap_volinfo->parent_volname); +                goto out; +        } + +        ret = glusterd_snapshot_revert_partial_restored_vol (volinfo, _gf_true); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to revert snapshot " +                        "restore operation for %s volume", volname); +                goto out; +        } +out: +        return ret; +} + +/* This function is called from post-validation. Based on the op_ret + * it will take a decision on whether to revert the operation or + * perform cleanup. + * + * @param dict          dictionary object + * @param op_ret        return value of the restore operation + * @param op_errstr     error string + * @param rsp_dict      Response dictionary + * + * @return 0 on success and -1 on failure + */ +int +glusterd_snapshot_restore_postop (dict_t *dict, int32_t op_ret, +                                  char **op_errstr, dict_t *rsp_dict) +{ +        int                     ret             = -1; +        char                   *name            = NULL; +        char                   *volname         = NULL; +        glusterd_snap_t        *snap            = NULL; +        glusterd_volinfo_t     *volinfo         = NULL; +        xlator_t               *this            = NULL; + +        this = THIS; + +        GF_ASSERT (this); +        GF_ASSERT (dict); +        GF_ASSERT (rsp_dict); + +        ret = dict_get_str (dict, "snapname", &name); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "getting the snap " +                        "name failed (volume: %s)", volinfo->volname); +                goto out; +        } + +        snap = glusterd_find_snap_by_name (name); +        if (!snap) { +                gf_log (this->name, GF_LOG_ERROR, "snap %s is not found", name); +                ret = -1; +                goto out; +        } + +        /* TODO: fix this when multiple volume support will come */ +        ret = dict_get_str (dict, "volname1", &volname); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to get volume name"); +                goto out; +        } + +        ret = glusterd_volinfo_find (volname, &volinfo); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Volume (%s) does not exist ", volname); +                goto out; +        } + +        /* On success perform the cleanup operation */ +        if (0 == op_ret) { +                ret = glusterd_snapshot_restore_cleanup (rsp_dict, volinfo, +                                                         snap); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Failed to perform " +                                "snapshot restore cleanup for %s volume", +                                volname); +                        goto out; +                } +        } else { /* On failure revert snapshot restore */ +                ret = glusterd_snapshot_revert_partial_restored_vol (volinfo, +                                                                     _gf_false); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Failed to revert " +                                "restore operation for %s volume", volname); +                        goto out; +                } +        } + +        ret = 0; +out: +        return ret; +} +  int  glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr,                                  dict_t *rsp_dict) @@ -5693,6 +6077,15 @@ glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr,                  }                  break;          case GF_SNAP_OPTION_TYPE_DELETE: +                ret = glusterd_snapshot_update_snaps_post_validate (dict, +                                                                    op_errstr, +                                                                    rsp_dict); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Failed to " +                                "update missed snaps list"); +                        goto out; +                } +                break;          case GF_SNAP_OPTION_TYPE_RESTORE:                  ret = glusterd_snapshot_update_snaps_post_validate (dict,                                                                      op_errstr, @@ -5702,6 +6095,14 @@ glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr,                                  "update missed snaps list");                          goto out;                  } + +                ret = glusterd_snapshot_restore_postop (dict, op_ret, +                                                        op_errstr, rsp_dict); +                if (ret) { +                        gf_log (this->name, GF_LOG_ERROR, "Failed to " +                                "perform snapshot restore post-op"); +                        goto out; +                }                  break;          case GF_SNAP_OPTION_TYPE_ACTIVATE:          case GF_SNAP_OPTION_TYPE_DEACTIVATE: @@ -6274,6 +6675,23 @@ gd_restore_snap_volume (dict_t *rsp_dict,          snap = snap_vol->snapshot;          GF_VALIDATE_OR_GOTO (this->name, snap, out); +        /* Set the status to under restore so that if the +         * the node goes down during restore and comes back +         * the state of the volume can be reverted correctly +         */ +        snap->snap_status = GD_SNAP_STATUS_UNDER_RESTORE; + +        /* We need to save this in disk so that if node goes +         * down the status is in updated state. +         */ +        ret = glusterd_store_snap (snap); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Could not store snap " +                        "object for %s snap of %s volume", snap_vol->volname, +                        snap_vol->parent_volname); +                goto out; +        } +          /* Snap volume must be stoped before performing the           * restore operation.           */ @@ -6312,15 +6730,6 @@ gd_restore_snap_volume (dict_t *rsp_dict,          ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol);          if (ret) {                  gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap"); -                (void)glusterd_volinfo_delete (new_volinfo); -                goto out; -        } - -        ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol); -        if (ret) { -                gf_log (this->name, GF_LOG_ERROR, "Failed to remove " -                        "LVM backend"); -                (void)glusterd_volinfo_delete (new_volinfo);                  goto out;          } @@ -6344,27 +6753,8 @@ gd_restore_snap_volume (dict_t *rsp_dict,           * set the status to the original volume's status. */          glusterd_set_volume_status (new_volinfo, orig_vol->status); -        /* Once the new_volinfo is completely constructed then delete -         * the orinal volinfo -         */ -        ret = glusterd_volinfo_delete (orig_vol); -        if (ret) { -                gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); -                (void)glusterd_volinfo_delete (new_volinfo); -                goto out; -        } -          list_add_tail (&new_volinfo->vol_list, &conf->volumes); -        /* Now delete the snap entry. As a first step delete the snap -         * volume information stored in store. */ -        ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); -        if (ret) { -                gf_log (this->name, GF_LOG_WARNING, "Failed to delete " -                        "snap %s", snap->snapname); -                goto out; -        } -          ret = glusterd_store_volinfo (new_volinfo,                                        GLUSTERD_VOLINFO_VER_AC_INCREMENT);          if (ret) { @@ -6374,6 +6764,13 @@ gd_restore_snap_volume (dict_t *rsp_dict,          ret = 0;  out: +        if (ret && NULL != new_volinfo) { +                /* In case of any failure we should free new_volinfo. Doing +                 * this will also remove the entry we added in conf->volumes +                 * if it was added there. +                 */ +                (void)glusterd_volinfo_delete (new_volinfo); +        }          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 3993504e8b0..ab635ff943f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -3871,6 +3871,11 @@ out:   * dies after taking the backend snapshot, but before updating the   * status, then when glusterd comes up, it should treat that snapshot   * as a failed snapshot and clean it up. + * + * Restore operation starts by setting the status to + * GD_SNAP_STATUS_RESTORED. If the server goes down before changing + * the status the status back we need to revert the partial snapshot + * taken.   */  int32_t  glusterd_snap_cleanup (xlator_t  *this) @@ -3893,7 +3898,15 @@ glusterd_snap_cleanup (xlator_t  *this)          }          list_for_each_entry (snap, &priv->snapshots, snap_list) { -                if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { +                if (snap->snap_status == GD_SNAP_STATUS_RESTORED) { +                        ret = glusterd_snapshot_revert_restore_from_snap (snap); +                        if (ret) { +                                gf_log (this->name, GF_LOG_WARNING, "Failed to " +                                        "revert partially restored snapshot " +                                        "(%s)", snap->snapname); +                                goto out; +                        } +                } else if (snap->snap_status != GD_SNAP_STATUS_IN_USE) {                          ret = glusterd_snap_remove (dict, snap,                                                      _gf_true, _gf_true);                          if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 63d510cbf17..7fc643ebe8d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -170,4 +170,6 @@ glusterd_store_snap (glusterd_snap_t *snap);  int32_t  glusterd_store_update_missed_snaps (); +glusterd_volinfo_t* +glusterd_store_retrieve_volume (char *volname, glusterd_snap_t *snap);  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 65aa5e1bf7d..b7f81bf83e5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -11986,3 +11986,74 @@ out:          return ret;  } + +/* This is an utility function which will recursively delete + * a folder and its contents. + * + * @param delete_path folder to be deleted. + * + * @return 0 on success and -1 on failure. + */ +int +glusterd_recursive_rmdir (const char *delete_path) +{ +        int             ret             = -1; +        char            path [PATH_MAX] = {0,}; +        struct stat     st              = {0,}; +        DIR            *dir             = NULL; +        struct dirent  *entry           = NULL; +        xlator_t       *this            = NULL; + +        this = THIS; +        GF_ASSERT (this); +        GF_VALIDATE_OR_GOTO (this->name, delete_path, out); + +        dir = opendir (delete_path); +        if (!dir) { +                gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." +                        " Reason : %s", delete_path, strerror (errno)); +                ret = 0; +                goto out; +        } + +        glusterd_for_each_entry (entry, dir); +        while (entry) { +                snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name); +                ret = stat (path, &st); +                if (ret == -1) { +                        gf_log (this->name, GF_LOG_DEBUG, "Failed to stat " +                                "entry %s : %s", path, strerror (errno)); +                        goto out; +                } + +                if (S_ISDIR (st.st_mode)) +                        ret = glusterd_recursive_rmdir (path); +                else +                        ret = unlink (path); + +                if (ret) { +                        gf_log (this->name, GF_LOG_DEBUG, " Failed to remove " +                                "%s. Reason : %s", path, strerror (errno)); +                } + +                gf_log (this->name, GF_LOG_DEBUG, "%s %s", +                                ret ? "Failed to remove":"Removed", +                                entry->d_name); + +                glusterd_for_each_entry (entry, dir); +        } + +        ret = closedir (dir); +        if (ret) { +                gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " +                        "Reason : %s", delete_path, strerror (errno)); +        } + +        ret = rmdir (delete_path); +        if (ret) { +                gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", +                        delete_path, strerror (errno)); +        } +out: +        return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 9c0c861830e..e4d41af64c0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -777,4 +777,7 @@ glusterd_restore_geo_rep_files (glusterd_volinfo_t *snap_vol);  int32_t  glusterd_copy_quota_files (glusterd_volinfo_t *src_vol,                             glusterd_volinfo_t *dest_vol); + +int +glusterd_recursive_rmdir (const char *delete_path);  #endif diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 2496a4d1182..b7c0aeafb1e 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -378,6 +378,7 @@ typedef enum gd_snap_status_ {          GD_SNAP_STATUS_INIT,          GD_SNAP_STATUS_IN_USE,          GD_SNAP_STATUS_DECOMMISSION, +        GD_SNAP_STATUS_UNDER_RESTORE,          GD_SNAP_STATUS_RESTORED,  } gd_snap_status_t; @@ -1005,4 +1006,6 @@ glusterd_add_new_entry_to_list (char *missed_info, char *snap_vol_id,                                  int32_t brick_num, char *brick_path,                                  int32_t snap_op, int32_t snap_status); +int +glusterd_snapshot_revert_restore_from_snap (glusterd_snap_t *snap);  #endif  | 
