diff options
| author | Krutika Dhananjay <kdhananj@redhat.com> | 2013-02-19 12:11:57 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2013-03-11 14:07:19 -0700 | 
| commit | e125e2ae61c31da798ea9a7342ea9292f47c1d6b (patch) | |
| tree | 7095ef234d76ce5c7152ca9d847afc3e7a18b610 | |
| parent | bc4350423a33d21464b507b4e229eb5244e0cb6e (diff) | |
glusterd: Mark vol as deleted by renaming voldir before cleaning up the store
PROBLEM:
During 'volume delete', when glusterd fails to erase all information
about a volume from the backend store (for instance because rmdir()
failed on non-empty directories), not only does volume delete fail on
that node, but also subsequent attempts to restart glusterd fail
because the volume store is left in an inconsistent state.
FIX:
Rename the volume directory path to a new location
<working-dir>/trash/<volume-id>.deleted, and then go on to clean up its
contents. The volume is considered deleted once rename() succeeds,
irrespective of whether the cleanup succeeds or not.
Change-Id: Iaf18e1684f0b101808bd5e1cd53a5d55790541a8
BUG: 889630
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: http://review.gluster.org/4639
Reviewed-by: Amar Tumballi <amarts@redhat.com>
Reviewed-by: Kaushal M <kaushal@redhat.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
| -rwxr-xr-x | tests/bugs/bug-889630.t | 56 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 111 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 4 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 6 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 1 | 
5 files changed, 134 insertions, 44 deletions
diff --git a/tests/bugs/bug-889630.t b/tests/bugs/bug-889630.t new file mode 100755 index 00000000..b04eb340 --- /dev/null +++ b/tests/bugs/bug-889630.t @@ -0,0 +1,56 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../cluster.rc + +function check_peers { +        $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l +} + +function volume_count { +        local cli=$1; +        if [ $cli -eq '1' ] ; then +                $CLI_1 volume info | grep 'Volume Name' | wc -l; +        else +                $CLI_2 volume info | grep 'Volume Name' | wc -l; +        fi +} + +cleanup; + +TEST launch_cluster 2; +TEST $CLI_1 peer probe $H2; + +EXPECT_WITHIN 20 1 check_peers + +TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 +TEST $CLI_1 volume start $V0 + +b="B1"; + +#Create an extra file in the originator's volume store +touch ${!b}/glusterd/vols/$V0/run/file + +TEST $CLI_1 volume stop $V0 +#Test for self-commit failure +TEST $CLI_1 volume delete $V0 + +#Check whether delete succeeded on both the nodes +EXPECT "0" volume_count '1' +EXPECT "0" volume_count '2' + +#Check whether the volume name can be reused after deletion +TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 +TEST $CLI_1 volume start $V0 + +#Create an extra file in the peer's volume store +touch ${!b}/glusterd/vols/$V0/run/file + +TEST $CLI_1 volume stop $V0 +#Test for commit failure on the other node +TEST $CLI_2 volume delete $V0 + +EXPECT "0" volume_count '1'; +EXPECT "0" volume_count '2'; + +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 085e3e85..7e26eb4a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -446,12 +446,10 @@ out:  }  int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, -                             glusterd_brickinfo_t *brickinfo) +glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, char *delete_path)  {          int32_t                 ret = -1;          glusterd_conf_t         *priv = NULL; -        char                    path[PATH_MAX] = {0,};          char                    brickpath[PATH_MAX] = {0,};          char                    *ptr = NULL;          char                    *tmppath = NULL; @@ -459,15 +457,11 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,          this = THIS;          GF_ASSERT (this); -        GF_ASSERT (volinfo);          GF_ASSERT (brickinfo);          priv = this->private; -          GF_ASSERT (priv); -        GLUSTERD_GET_BRICK_DIR (path, volinfo, priv); -          tmppath = gf_strdup (brickinfo->path);          ptr = strchr (tmppath, '/'); @@ -477,15 +471,16 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,                  ptr = strchr (tmppath, '/');          } -        snprintf (brickpath, sizeof (brickpath), "%s/%s:%s", -                  path, brickinfo->hostname, tmppath); +        snprintf (brickpath, sizeof (brickpath), +                  "%s/"GLUSTERD_BRICK_INFO_DIR"/%s:%s", delete_path, +                  brickinfo->hostname, tmppath);          GF_FREE (tmppath);          ret = unlink (brickpath);          if ((ret < 0) && (errno != ENOENT)) { -                gf_log (this->name, GF_LOG_ERROR, "Unlink failed on %s, " +                gf_log (this->name, GF_LOG_DEBUG, "Unlink failed on %s, "                          "reason: %s", brickpath, strerror(errno));                  ret = -1;                  goto out; @@ -503,7 +498,7 @@ out:  }  int32_t -glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) +glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo, char *delete_path)  {          int32_t                 ret = 0;          glusterd_brickinfo_t    *tmp = NULL; @@ -520,7 +515,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)          GF_ASSERT (volinfo);          list_for_each_entry (tmp, &volinfo->bricks, brick_list) { -                ret = glusterd_store_delete_brick (volinfo, tmp); +                ret = glusterd_store_delete_brick (tmp, delete_path);                  if (ret)                          goto out;          } @@ -528,7 +523,8 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)          priv = this->private;          GF_ASSERT (priv); -        GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); +        snprintf (brickdir, sizeof (brickdir), "%s/%s", delete_path, +                  GLUSTERD_BRICK_INFO_DIR);          dir = opendir (brickdir); @@ -539,7 +535,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)                            brickdir, entry->d_name);                  ret = unlink (path);                  if (ret && errno != ENOENT) { -                        gf_log (this->name, GF_LOG_ERROR, "Unable to unlink %s, " +                        gf_log (this->name, GF_LOG_DEBUG, "Unable to unlink %s, "                                  "reason: %s", path, strerror(errno));                  }                  glusterd_for_each_entry (entry, dir); @@ -1245,14 +1241,17 @@ out:  int32_t  glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)  { -        char    pathname[PATH_MAX] = {0,}; -        int32_t ret = 0; -        glusterd_conf_t *priv = NULL; -        DIR     *dir = NULL; -        struct dirent *entry = NULL; -        char path[PATH_MAX] = {0,}; -        struct stat     st = {0, }; -        xlator_t        *this = NULL; +        char             pathname[PATH_MAX]    = {0,}; +        int32_t          ret                   = 0; +        glusterd_conf_t *priv                  = NULL; +        DIR             *dir                   = NULL; +        struct dirent   *entry                 = NULL; +        char             path[PATH_MAX]        = {0,}; +        char             delete_path[PATH_MAX] = {0,}; +        char             trashdir[PATH_MAX]    = {0,}; +        struct stat      st                    = {0, }; +        xlator_t        *this                  = NULL; +        gf_boolean_t     rename_fail           = _gf_false;          this = THIS;          GF_ASSERT (this); @@ -1261,29 +1260,53 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)          priv = this->private;          GF_ASSERT (priv); -        snprintf (pathname, sizeof (pathname), "%s/vols/%s", priv->workdir, -                  volinfo->volname); -        dir = opendir (pathname); +        GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv); + +        snprintf (delete_path, sizeof (delete_path), +                  "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir, +                  uuid_utoa (volinfo->volume_id)); + +        snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, +                  priv->workdir); + +        ret = mkdir (trashdir, 0777); +        if (ret && errno != EEXIST) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " +                        "directory, reason : %s", strerror (errno)); +                ret = -1; +                goto out; +        } + +        ret = rename (pathname, delete_path); +        if (ret) { +                gf_log (this->name, GF_LOG_ERROR, "Failed to rename volume " +                        "directory for volume %s", volinfo->volname); +                rename_fail = _gf_true; +                goto out; +        } + +        dir = opendir (delete_path);          if (!dir) { -                gf_log (this->name, GF_LOG_ERROR, "Failed to open directory %s." -                        " Reason : %s", pathname, strerror (errno)); +                gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." +                        " Reason : %s", delete_path, strerror (errno)); +                ret = 0;                  goto out;          } -        ret = glusterd_store_remove_bricks (volinfo); +        ret = glusterd_store_remove_bricks (volinfo, delete_path);          if (ret) { -                gf_log (this->name, GF_LOG_ERROR, "Remove bricks failed for %s", +                gf_log (this->name, GF_LOG_DEBUG, "Remove bricks failed for %s",                          volinfo->volname);          }          glusterd_for_each_entry (entry, dir);          while (entry) { -                snprintf (path, PATH_MAX, "%s/%s", pathname, entry->d_name); +                snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name);                  ret = stat (path, &st);                  if (ret == -1) { -                        gf_log (this->name, GF_LOG_ERROR, "Failed to stat " +                        gf_log (this->name, GF_LOG_DEBUG, "Failed to stat "                                  "entry %s : %s", path, strerror (errno));                          goto stat_failed;                  } @@ -1293,11 +1316,12 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)                  else                          ret = unlink (path); -                if (ret) -                        gf_log (this->name, GF_LOG_ERROR, " Failed to remove " +                if (ret) { +                        gf_log (this->name, GF_LOG_DEBUG, " Failed to remove "                                  "%s. Reason : %s", path, strerror (errno)); +                } -                gf_log (this->name, ret ? GF_LOG_ERROR : GF_LOG_DEBUG, "%s %s", +                gf_log (this->name, GF_LOG_DEBUG, "%s %s",                                  ret ? "Failed to remove":"Removed",                                  entry->d_name);  stat_failed: @@ -1307,24 +1331,29 @@ stat_failed:          ret = closedir (dir);          if (ret) { -                gf_log (this->name, GF_LOG_ERROR, "Failed to close dir %s. " -                        "Reason : %s",pathname, strerror (errno)); +                gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " +                        "Reason : %s",delete_path, strerror (errno));          } -        ret = rmdir (pathname); +        ret = rmdir (delete_path);          if (ret) { -                gf_log (this->name, GF_LOG_ERROR, "Failed to rmdir: %s, err: %s", -                        pathname, strerror (errno)); +                gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", +                        delete_path, strerror (errno)); +        } +        ret = rmdir (trashdir); +        if (ret) { +                gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:" +                        " %s", trashdir, strerror (errno));          } -  out:          if (volinfo->shandle) {                  glusterd_store_handle_destroy (volinfo->shandle);                  volinfo->shandle = NULL;          } -        gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); +        ret = (rename_fail == _gf_true) ? -1: 0; +        gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 68977dd9..762604e2 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -117,8 +117,8 @@ int32_t  glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo);  int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, -                             glusterd_brickinfo_t *brickinfo); +glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, +                             char *delete_path);  int32_t  glusterd_store_handle_destroy (glusterd_store_handle_t *handle); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index d07b8b1a..09e3ff66 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -5495,11 +5495,15 @@ glusterd_delete_brick (glusterd_volinfo_t* volinfo,                         glusterd_brickinfo_t *brickinfo)  {          int             ret = 0; +        char      voldir[PATH_MAX] = {0,}; +        glusterd_conf_t *priv = THIS->private;          GF_ASSERT (volinfo);          GF_ASSERT (brickinfo); +        GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv); +          glusterd_delete_volfile (volinfo, brickinfo); -        glusterd_store_delete_brick (volinfo, brickinfo); +        glusterd_store_delete_brick (brickinfo, voldir);          glusterd_brickinfo_delete (brickinfo);          volinfo->brick_count--;          return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index c9e8d42d..34593202 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -338,6 +338,7 @@ enum glusterd_vol_comp_status_ {  #define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate"  #define GLUSTERD_BRICK_INFO_DIR "bricks"  #define GLUSTERD_CKSUM_FILE "cksum" +#define GLUSTERD_TRASH "trash"  #define GLUSTERD_NODE_STATE_FILE "node_state.info"  /* definitions related to replace brick */  | 
