diff options
| author | Ashish Pandey <aspandey@redhat.com> | 2016-02-17 15:57:02 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-03-31 05:45:50 -0700 | 
| commit | 3d34c495d547866a533bc0614b14163381830095 (patch) | |
| tree | 1e14b7944f4d42aa899ede893b86d725f81af489 /xlators/features/locks/src/posix.c | |
| parent | 4f2b417f0a6fea20c8a96b6f66732c709234d637 (diff) | |
cluster/ec: Rebalance hangs during rename
Problem:
During the rename of a particular file (ec
is holding blocking inodelk on the parent
directory), if the rename of another file
under the same directory comes. EC does not
release the lock and goes ahead and renames
the "new" file with the "already held lock".
That causes rebalance process to be blocked
on a lock which has been acquired by rename.
Solution:
While rename fop comes, ec takes blocking inodelk
on old and new parent of the file. Before releasing,
every lock held by ec, it waits for some "time" to
see if that lock can be reused by the next fop.
If within this "time" some other request comes,
it releases this lock based on condition
"lock count > 1"
To get this "lock count" for rename fop, we have
implemented "pl_rename" in feature/lock. Also,
on ec side, changed the condition to release the lock
based on the type of fop and old and new parent
directories.
master-
http://review.gluster.org/#/c/13460/
Change-Id: I979dbab1185df962e8f305a6074ae1186ffe7db0
Bug: 1322299
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Reviewed-on: http://review.gluster.org/13849
Smoke: Gluster Build System <jenkins@build.gluster.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/features/locks/src/posix.c')
| -rw-r--r-- | xlators/features/locks/src/posix.c | 154 | 
1 files changed, 109 insertions, 45 deletions
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 76c7c4ee7ce..afe402cd828 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -30,6 +30,7 @@  #include "clear.h"  #include "defaults.h"  #include "syncop.h" +#include "pl-messages.h"  #ifndef LLONG_MAX  #define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */ @@ -51,7 +52,7 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);                  inode_t *__inode = NULL;                                \                  char    *__name = NULL;                                 \                  dict_t  *__unref = NULL;                                \ -                                                                        \ +                int __i = 0 ;                                           \                  __local = frame->local;                                 \                  if (op_ret >= 0 && pl_needs_xdata_response (frame->local)) {\                          if (xdata)                                      \ @@ -60,12 +61,17 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);                                  xdata = dict_new();                     \                          if (xdata) {                                    \                                  __unref = xdata;                        \ -                                pl_get_xdata_rsp_args (__local,         \ -                                      #fop, &__parent, &__inode,        \ -                                      &__name);                         \ -                                pl_set_xdata_response (frame->this,     \ -                                    __local, __parent, __inode, __name, \ -                                    xdata);                             \ +                                while (__local->fd || __local->loc[__i].inode) {  \ +                                        pl_get_xdata_rsp_args (__local,            \ +                                              #fop, &__parent, &__inode,           \ +                                              &__name, __i);                       \ +                                        pl_set_xdata_response (frame->this,        \ +                                               __local, __parent, __inode, __name, \ +                                               xdata, __i > 0);                    \ +                                        if (__local->fd || __i == 1)               \ +                                                break;                  \ +                                        __i++;                          \ +                                }                                       \                          }                                               \                  }                                                       \                  frame->local = NULL;                                    \ @@ -73,7 +79,8 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);                  if (__local) {                                          \                          if (__local->inodelk_dom_count_req)             \                                  data_unref (__local->inodelk_dom_count_req);\ -                        loc_wipe (&__local->loc);                       \ +                        loc_wipe (&__local->loc[0]);                    \ +                        loc_wipe (&__local->loc[1]);                    \                          if (__local->fd)                                \                                  fd_unref (__local->fd);                 \                          mem_put (__local);                              \ @@ -82,16 +89,22 @@ static int fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);                          dict_unref (__unref);                           \          } while (0) -#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc)          \ +#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc)\          do {                                                            \                  if (pl_has_xdata_requests (xdata)) {                    \                          frame->local = mem_get0 (this->local_pool);     \                          pl_local_t *__local = frame->local;             \                          if (__local) {                                  \ -                                if (__fd)                               \ +                                if (__fd) {                             \                                          __local->fd = fd_ref (__fd);    \ -                                else                                    \ -                                        loc_copy (&__local->loc, __loc);\ +                                } else {                                \ +                                        if (__loc)                      \ +                                                loc_copy (&__local->loc[0],\ +                                                           __loc);      \ +                                        if (__newloc)                   \ +                                                 loc_copy (&__local->loc[1],\ +                                                            __newloc);  \ +                                }                                       \                                  pl_get_xdata_requests (__local, xdata); \                          }                                               \                  }                                                       \ @@ -172,17 +185,17 @@ pl_needs_xdata_response (pl_local_t *local)  void  pl_get_xdata_rsp_args (pl_local_t *local, char *fop, inode_t **parent, -                       inode_t **inode, char **name) +                       inode_t **inode, char **name, int i)  {          if (strcmp (fop, "lookup") == 0) { -                *parent = local->loc.parent; -                *inode = local->loc.inode; -                *name = (char *)local->loc.name; +                *parent = local->loc[0].parent; +                *inode = local->loc[0].inode; +                *name = (char *)local->loc[0].name;          } else {                  if (local->fd) {                          *inode = local->fd->inode;                  } else { -                        *inode = local->loc.parent; +                        *inode = local->loc[i].parent;                  }          }  } @@ -228,16 +241,22 @@ out:  void  pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent, -                              char *basename, dict_t *dict) +                              char *basename, dict_t *dict, gf_boolean_t keep_max)  { -        uint32_t         entrylk = 0; -        int             ret     = -1; +        int32_t         entrylk  = 0; +        int32_t         maxcount = -1; +        int             ret      = -1;          if (!parent || !basename || !strlen (basename))                  goto out; +        if (keep_max) { +                ret = dict_get_int32 (dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount); +        }          entrylk = check_entrylk_on_basename (this, parent, basename); +        if (maxcount >= entrylk) +                return;  out: -        ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk); +        ret = dict_set_int32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);          if (ret < 0) {                  gf_log (this->name, GF_LOG_DEBUG,                          " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK); @@ -246,12 +265,19 @@ out:  void  pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode, -                       dict_t *dict) +                       dict_t *dict, gf_boolean_t keep_max)  {          int32_t     count = 0; +        int32_t     maxcount = -1;          int         ret   = -1; +        if (keep_max) { +                ret = dict_get_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount); +        }          count = get_entrylk_count (this, inode); +        if (maxcount >= count) +                return; +          ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count);          if (ret < 0) {                  gf_log (this->name, GF_LOG_DEBUG, @@ -262,13 +288,18 @@ pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,  void  pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict, -                       char *domname) +                       char *domname, gf_boolean_t keep_max)  {          int32_t     count = 0; +        int32_t     maxcount = -1;          int         ret   = -1; - +        if (keep_max) { +                ret = dict_get_int32 (dict, GLUSTERFS_INODELK_COUNT, &maxcount); +        }          count = get_inodelk_count (this, inode, domname); +        if (maxcount >= count) +                return;          ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);          if (ret < 0) { @@ -281,12 +312,19 @@ pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode, dict_t *dict,  void  pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode, -                       dict_t *dict) +                       dict_t *dict, gf_boolean_t keep_max)  {          int32_t     count = 0; +        int32_t     maxcount = -1;          int         ret   = -1; +        if (keep_max) { +                ret = dict_get_int32 (dict, GLUSTERFS_POSIXLK_COUNT, &maxcount); +        }          count = get_posixlk_count (this, inode); +        if (maxcount >= count) +                return; +          ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count);          if (ret < 0) {                  gf_log (this->name, GF_LOG_DEBUG, @@ -297,26 +335,26 @@ pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode,  void  pl_set_xdata_response (xlator_t *this, pl_local_t *local, inode_t *parent, -                       inode_t *inode, char *name, dict_t *xdata) +                       inode_t *inode, char *name, dict_t *xdata, gf_boolean_t max_lock)  {          if (!xdata || !local)                  return;          if (local->parent_entrylk_req && parent && name && strlen (name)) -                pl_parent_entrylk_xattr_fill (this, parent, name, xdata); +                pl_parent_entrylk_xattr_fill (this, parent, name, xdata, max_lock);          if (local->entrylk_count_req && inode) -                pl_entrylk_xattr_fill (this, inode, xdata); +                pl_entrylk_xattr_fill (this, inode, xdata, max_lock);          if (local->inodelk_dom_count_req && inode)                  pl_inodelk_xattr_fill (this, inode, xdata, -                                    data_to_str (local->inodelk_dom_count_req)); +                                    data_to_str (local->inodelk_dom_count_req), max_lock);          if (local->inodelk_count_req && inode) -                pl_inodelk_xattr_fill (this, inode, xdata, NULL); +                pl_inodelk_xattr_fill (this, inode, xdata, NULL, max_lock);          if (local->posixlk_count_req && inode) -                pl_posixlk_xattr_fill (this, inode, xdata); +                pl_posixlk_xattr_fill (this, inode, xdata, max_lock);  }  static pl_fdctx_t * @@ -379,7 +417,7 @@ pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local;          if (local->op == TRUNCATE) -                loc_wipe (&local->loc); +                loc_wipe (&local->loc[0]);          if (local->xdata)                  dict_unref (local->xdata); @@ -448,7 +486,7 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          }          if (local->op == TRUNCATE) -                inode = local->loc.inode; +                inode = local->loc[0].inode;          else                  inode = local->fd->inode; @@ -473,7 +511,7 @@ truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          case TRUNCATE:                  STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),                              FIRST_CHILD (this)->fops->truncate, -                            &local->loc, local->offset, local->xdata); +                            &local->loc[0], local->offset, local->xdata);                  break;          case FTRUNCATE:                  STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this), @@ -488,7 +526,7 @@ unwind:          gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, "                  "error: %s", op_ret, strerror (op_errno));          if (local->op == TRUNCATE) -                loc_wipe (&local->loc); +                loc_wipe (&local->loc[0]);          if (local->xdata)                  dict_unref (local->xdata);          if (local->fd) @@ -510,7 +548,7 @@ pl_truncate (call_frame_t *frame, xlator_t *this,          local->op         = TRUNCATE;          local->offset     = offset; -        loc_copy (&local->loc, loc); +        loc_copy (&local->loc[0], loc);          if (xdata)                  local->xdata = dict_ref (xdata); @@ -1205,7 +1243,7 @@ int32_t  pl_opendir (call_frame_t *frame, xlator_t *this,              loc_t *loc, fd_t *fd, dict_t *xdata)  { -        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);          STACK_WIND (frame, pl_opendir_cbk, FIRST_CHILD(this),                      FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);          return 0; @@ -1331,7 +1369,7 @@ pl_create (call_frame_t *frame, xlator_t *this,             loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd,             dict_t *xdata)  { -        PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc); +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);          STACK_WIND (frame, pl_create_cbk,                      FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,                      loc, flags, mode, umask, fd, xdata); @@ -1352,7 +1390,7 @@ int32_t  pl_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,             dict_t *xdata)  { -        PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc); +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);          STACK_WIND (frame, pl_unlink_cbk, FIRST_CHILD(this),                      FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);          return 0; @@ -1460,7 +1498,7 @@ pl_readv (call_frame_t *frame, xlator_t *this,          priv = this->private;          pl_inode = pl_inode_get (this, fd->inode); -        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);          if (priv->mandatory && pl_inode->mandatory) {                  region.fl_start   = offset; @@ -1556,7 +1594,7 @@ pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,          priv = this->private;          pl_inode = pl_inode_get (this, fd->inode); -        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);          if (priv->mandatory && pl_inode->mandatory) {                  region.fl_start   = offset; @@ -2183,7 +2221,7 @@ pl_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  int32_t  pl_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)  { -        PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc); +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, loc, NULL);          STACK_WIND (frame, pl_lookup_cbk, FIRST_CHILD(this),                      FIRST_CHILD(this)->fops->lookup, loc, xdata);          return 0; @@ -2200,7 +2238,7 @@ pl_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,  int32_t  pl_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)  { -        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);          STACK_WIND (frame, pl_fstat_cbk, FIRST_CHILD(this),                      FIRST_CHILD(this)->fops->fstat, fd, xdata);          return 0; @@ -2223,7 +2261,7 @@ pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          list_for_each_entry (entry, &entries->list, list) {                  pl_set_xdata_response (this, local, local->fd->inode,                                         entry->inode, entry->d_name, -                                       entry->dict); +                                       entry->dict, 0);          }  unwind: @@ -2237,7 +2275,7 @@ int  pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,               off_t offset, dict_t *xdata)  { -        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL); +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, fd, NULL, NULL);          STACK_WIND (frame, pl_readdirp_cbk,                      FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,                      fd, size, offset, xdata); @@ -2790,6 +2828,31 @@ pl_fentrylk (call_frame_t *frame, xlator_t *this,               const char *volume, fd_t *fd, const char *basename,               entrylk_cmd cmd, entrylk_type type, dict_t *xdata); +int32_t +pl_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +               int32_t op_ret, int32_t op_errno, struct iatt *buf, +               struct iatt *preoldparent, struct iatt *postoldparent, +               struct iatt *prenewparent, struct iatt *postnewparent, +               dict_t *xdata) +{ +        PL_STACK_UNWIND (rename, xdata, frame, op_ret, op_errno, +                         buf, preoldparent, postoldparent, prenewparent, +                         postnewparent, xdata); +        return 0; +} + +int32_t +pl_rename (call_frame_t *frame, xlator_t *this, +           loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ +        PL_LOCAL_GET_REQUESTS (frame, this, xdata, NULL, oldloc,  newloc); + +        STACK_WIND (frame, pl_rename_cbk, FIRST_CHILD (this), +                    FIRST_CHILD(this)->fops->rename, oldloc, +                    newloc, xdata); +        return 0; +} +  struct xlator_fops fops = {          .lookup      = pl_lookup,          .create      = pl_create, @@ -2810,6 +2873,7 @@ struct xlator_fops fops = {          .getxattr    = pl_getxattr,          .fgetxattr   = pl_fgetxattr,          .fsetxattr   = pl_fsetxattr, +        .rename      = pl_rename,  };  struct xlator_dumpops dumpops = {  | 
