diff options
| -rw-r--r-- | tests/bugs/shard/bug-1468483.t | 58 | ||||
| -rw-r--r-- | tests/bugs/shard/shard-inode-refcount-test.t | 2 | ||||
| -rw-r--r-- | xlators/features/shard/src/shard-messages.h | 3 | ||||
| -rw-r--r-- | xlators/features/shard/src/shard.c | 533 | ||||
| -rw-r--r-- | xlators/features/shard/src/shard.h | 6 | 
5 files changed, 563 insertions, 39 deletions
diff --git a/tests/bugs/shard/bug-1468483.t b/tests/bugs/shard/bug-1468483.t new file mode 100644 index 00000000000..e462b8d54d5 --- /dev/null +++ b/tests/bugs/shard/bug-1468483.t @@ -0,0 +1,58 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../common-utils.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}0 +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 16MB +TEST $CLI volume start $V0 +TEST $CLI volume profile $V0 start + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 +TEST dd if=/dev/zero conv=fsync of=$M0/foo bs=1M count=100 + +#This should ensure /.shard is created on the bricks. +TEST stat $B0/${V0}0/.shard + +gfid_foo=$(get_gfid_string $M0/foo) + +TEST stat $B0/${V0}0/.shard/$gfid_foo.1 +TEST stat $B0/${V0}0/.shard/$gfid_foo.2 +TEST stat $B0/${V0}0/.shard/$gfid_foo.3 +TEST stat $B0/${V0}0/.shard/$gfid_foo.4 +TEST stat $B0/${V0}0/.shard/$gfid_foo.5 +TEST stat $B0/${V0}0/.shard/$gfid_foo.6 + +# For a file with 7 shards, there should be 7 fsyncs on the brick. Without this +# fix, I was seeing only 1 fsync (on the base shard alone). + +EXPECT "7" echo `$CLI volume profile $V0 info incremental | grep -w FSYNC | awk '{print $8}'` + +useradd -M test_user 2>/dev/null + +TEST touch $M0/bar + +# Change ownership to non-root on bar. +TEST chown test_user:test_user $M0/bar + +TEST $CLI volume profile $V0 stop +TEST $CLI volume profile $V0 start + +# Write 100M of data on bar as non-root. +TEST run_cmd_as_user test_user "dd if=/dev/zero conv=fsync of=$M0/bar bs=1M count=100" + +EXPECT "7" echo `$CLI volume profile $V0 info incremental | grep -w FSYNC | awk '{print $8}'` + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +userdel test_user +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t index 635809724cf..03e0cc9619b 100644 --- a/tests/bugs/shard/shard-inode-refcount-test.t +++ b/tests/bugs/shard/shard-inode-refcount-test.t @@ -14,7 +14,7 @@ TEST $CLI volume start $V0  TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 -TEST dd if=/dev/zero of=$M0/one-plus-five-shards bs=1M count=23 +TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23  ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)  TEST rm -f $M0/one-plus-five-shards diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h index 9a232a8e7ff..d7296fc214f 100644 --- a/xlators/features/shard/src/shard-messages.h +++ b/xlators/features/shard/src/shard-messages.h @@ -41,7 +41,8 @@ GLFS_MSGID(SHARD,          SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED,          SHARD_MSG_UPDATE_FILE_SIZE_FAILED,          SHARD_MSG_FOP_NOT_SUPPORTED, -        SHARD_MSG_INVALID_FOP +        SHARD_MSG_INVALID_FOP, +        SHARD_MSG_MEMALLOC_FAILED  );  #endif /* !_SHARD_MESSAGES_H_ */ diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 48a87b8f72f..6c15bb3167e 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -76,6 +76,7 @@ __shard_inode_ctx_get (inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)                  return ret;          INIT_LIST_HEAD (&ctx_p->ilist); +        INIT_LIST_HEAD (&ctx_p->to_fsync_list);          ret = __inode_ctx_set (inode, this, (uint64_t *)&ctx_p);          if (ret < 0) { @@ -205,6 +206,65 @@ shard_inode_ctx_set_refreshed_flag (inode_t *inode, xlator_t *this)          return ret;  } +int +__shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this, +                                     inode_t *shard_inode) +{ +        int                 ret           = -1; +        shard_inode_ctx_t  *base_ictx     = NULL; +        shard_inode_ctx_t  *shard_ictx    = NULL; + +        ret = __shard_inode_ctx_get (base_inode, this, &base_ictx); +        if (ret) +                return ret; + +        ret = __shard_inode_ctx_get (shard_inode, this, &shard_ictx); +        if (ret) +                return ret; + +        if (shard_ictx->fsync_needed) { +                shard_ictx->fsync_needed++; +                return 1; +        } + +        list_add_tail (&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list); +        shard_ictx->inode = shard_inode; +        shard_ictx->fsync_needed++; +        base_ictx->fsync_count++; +        shard_ictx->base_inode = base_inode; + +        return 0; +} + +int +shard_inode_ctx_add_to_fsync_list (inode_t *base_inode, xlator_t *this, +                                   inode_t *shard_inode) +{ +        int ret = -1; + +        /* This ref acts as a refkeepr on the base inode. We +         * need to keep this inode alive as it holds the head +         * of the to_fsync_list. +         */ +        inode_ref (base_inode); + +        LOCK (&base_inode->lock); +        LOCK (&shard_inode->lock); +        { +                ret = __shard_inode_ctx_add_to_fsync_list (base_inode, this, +                                                           shard_inode); +        } +        UNLOCK (&shard_inode->lock); +        UNLOCK (&base_inode->lock); + +        /* Unref the base inode corresponding to the ref above, if the shard is +         * found to be already part of the fsync list. +         */ +        if (ret != 0) +                inode_unref (base_inode); +        return ret; +} +  gf_boolean_t  __shard_inode_ctx_needs_lookup (inode_t *inode, xlator_t *this)  { @@ -301,6 +361,40 @@ shard_inode_ctx_get_block_size (inode_t *inode, xlator_t *this,  }  int +__shard_inode_ctx_get_fsync_count (inode_t *inode, xlator_t *this, +                                   int *fsync_count) +{ +        int                 ret      = -1; +        uint64_t            ctx_uint = 0; +        shard_inode_ctx_t  *ctx      = NULL; + +        ret = __inode_ctx_get (inode, this, &ctx_uint); +        if (ret < 0) +                return ret; + +        ctx = (shard_inode_ctx_t *) ctx_uint; + +        *fsync_count = ctx->fsync_needed; + +        return 0; +} + +int +shard_inode_ctx_get_fsync_count (inode_t *inode, xlator_t *this, +                                int *fsync_count) +{ +        int ret = -1; + +        LOCK (&inode->lock); +        { +                ret = __shard_inode_ctx_get_fsync_count (inode, this, +                                                         fsync_count); +        } +        UNLOCK (&inode->lock); + +        return ret; +} +int  __shard_inode_ctx_get_all (inode_t *inode, xlator_t *this,                             shard_inode_ctx_t *ctx_out)  { @@ -482,15 +576,19 @@ out:          return ret;  } -void +inode_t *  __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,                                    inode_t *base_inode, int block_num)  { -        char                block_bname[256] = {0,}; -        inode_t            *lru_inode        = NULL; -        shard_priv_t       *priv             = NULL; -        shard_inode_ctx_t  *ctx              = NULL; -        shard_inode_ctx_t  *lru_inode_ctx    = NULL; +        char                block_bname[256]     = {0,}; +        inode_t            *lru_inode            = NULL; +        shard_priv_t       *priv                 = NULL; +        shard_inode_ctx_t  *ctx                  = NULL; +        shard_inode_ctx_t  *lru_inode_ctx        = NULL; +        shard_inode_ctx_t  *lru_base_inode_ctx   = NULL; +        inode_t            *fsync_inode          = NULL; +        inode_t            *lru_base_inode       = NULL; +        gf_boolean_t        do_fsync             = _gf_false;          priv = this->private; @@ -510,6 +608,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,                          ctx->block_num = block_num;                          list_add_tail (&ctx->ilist, &priv->ilist_head);                          priv->inode_count++; +                        ctx->base_inode = base_inode;                  } else {                  /*If on the other hand there is no available slot for this inode                   * in the list, delete the lru inode from the head of the list, @@ -519,30 +618,56 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,                                                            shard_inode_ctx_t,                                                            ilist);                          GF_ASSERT (lru_inode_ctx->block_num > 0); +                        lru_base_inode = lru_inode_ctx->base_inode;                          list_del_init (&lru_inode_ctx->ilist);                          lru_inode = inode_find (linked_inode->table,                                                  lru_inode_ctx->stat.ia_gfid); -                        shard_make_block_bname (lru_inode_ctx->block_num, -                                                lru_inode_ctx->base_gfid, -                                                block_bname, -                                                sizeof (block_bname)); -                        inode_unlink (lru_inode, priv->dot_shard_inode, -                                      block_bname); -                        /* The following unref corresponds to the ref held by -                         * inode_find() above. +                        /* If the lru inode was part of the pending-fsync list, +                         * the base inode needs to be unref'd, the lru inode +                         * deleted from fsync list and fsync'd in a new frame, +                         * and then unlinked in memory and forgotten.                           */ -                        inode_unref (lru_inode); +                        LOCK (&lru_base_inode->lock); +                        LOCK (&lru_inode->lock); +                        { +                                if (!list_empty(&lru_inode_ctx->to_fsync_list)) { +                                        list_del_init (&lru_inode_ctx->to_fsync_list); +                                        lru_inode_ctx->fsync_needed = 0; +                                        do_fsync = _gf_true; +                                        __shard_inode_ctx_get (lru_base_inode, this, &lru_base_inode_ctx); +                                        lru_base_inode_ctx->fsync_count--; +                                } +                        } +                        UNLOCK (&lru_inode->lock); +                        UNLOCK (&lru_base_inode->lock); + +                        if (!do_fsync) { +                                shard_make_block_bname (lru_inode_ctx->block_num, +                                                        lru_inode_ctx->base_gfid, +                                                        block_bname, +                                                        sizeof (block_bname));                          /* The following unref corresponds to the ref held at -                         * the time the shard was created or looked up +                         * the time the shard was added to the lru list. +                         */ +                                inode_unref (lru_inode); +                                inode_unlink (lru_inode, priv->dot_shard_inode, +                                              block_bname); +                                inode_forget (lru_inode, 0); +                        } else { +                                fsync_inode = lru_inode; +                                inode_unref (lru_base_inode); +                        } +                        /* The following unref corresponds to the ref +                         * held by inode_find() above.                           */                          inode_unref (lru_inode); -                        inode_forget (lru_inode, 0);                          /* For as long as an inode is in lru list, we try to                           * keep it alive by holding a ref on it.                           */                          inode_ref (linked_inode);                          gf_uuid_copy (ctx->base_gfid, base_inode->gfid);                          ctx->block_num = block_num; +                        ctx->base_inode = base_inode;                          list_add_tail (&ctx->ilist, &priv->ilist_head);                  }          } else { @@ -551,6 +676,7 @@ __shard_update_shards_inode_list (inode_t *linked_inode, xlator_t *this,           */                  list_move_tail (&ctx->ilist, &priv->ilist_head);          } +        return fsync_inode;  }  int @@ -617,6 +743,85 @@ shard_common_inode_write_success_unwind (glusterfs_fop_t fop,  }  int +shard_evicted_inode_fsync_cbk (call_frame_t *frame, void *cookie, +                               xlator_t *this, int32_t op_ret, int32_t op_errno, +                               struct iatt *prebuf, struct iatt *postbuf, +                               dict_t *xdata) +{ +        char                  block_bname[256] = {0,}; +        fd_t                 *anon_fd          = cookie; +        inode_t              *shard_inode      = NULL; +        shard_inode_ctx_t    *ctx              = NULL; +        shard_priv_t         *priv             = NULL; + +        priv = this->private; +        shard_inode = anon_fd->inode; + +        if (op_ret < 0) { +                gf_msg (this->name, GF_LOG_WARNING, op_errno, +                        SHARD_MSG_MEMALLOC_FAILED, "fsync failed on shard"); +                goto out; +        } + +        LOCK (&priv->lock); +        LOCK(&shard_inode->lock); +        { +                __shard_inode_ctx_get (shard_inode, this, &ctx); +                if ((list_empty(&ctx->to_fsync_list)) && +                    (list_empty(&ctx->ilist))) { +                        shard_make_block_bname (ctx->block_num, +                                                shard_inode->gfid, block_bname, +                                                sizeof (block_bname)); +                        inode_unlink (shard_inode, priv->dot_shard_inode, +                                      block_bname); +                        /* The following unref corresponds to the ref held by +                         * inode_link() at the time the shard was created or +                         * looked up +                         */ +                        inode_unref (shard_inode); +                        inode_forget (shard_inode, 0); +                } +        } +        UNLOCK(&shard_inode->lock); +        UNLOCK(&priv->lock); + +out: +        if (anon_fd) +                fd_unref (anon_fd); +        STACK_DESTROY (frame->root); +        return 0; +} + +int +shard_initiate_evicted_inode_fsync (xlator_t *this, inode_t *inode) +{ +        fd_t             *anon_fd     = NULL; +        call_frame_t     *fsync_frame = NULL; + +        fsync_frame = create_frame (this, this->ctx->pool); +        if (!fsync_frame) { +                gf_msg (this->name, GF_LOG_WARNING, ENOMEM, +                        SHARD_MSG_MEMALLOC_FAILED, "Failed to create new frame " +                        "to fsync shard"); +                return -1; +        } + +        anon_fd = fd_anonymous (inode); +        if (!anon_fd) { +                gf_msg (this->name, GF_LOG_WARNING, ENOMEM, +                        SHARD_MSG_MEMALLOC_FAILED, "Failed to create anon fd to" +                        " fsync shard"); +                STACK_DESTROY (fsync_frame->root); +                return -1; +        } + +        STACK_WIND_COOKIE (fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd, +                           FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, +                           anon_fd, 1, NULL); +        return 0; +} + +int  shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,                               shard_post_resolve_fop_handler_t post_res_handler)  { @@ -625,6 +830,7 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,          char                  path[PATH_MAX] = {0,};          inode_t              *inode          = NULL;          inode_t              *res_inode      = NULL; +        inode_t              *fsync_inode    = NULL;          shard_priv_t         *priv           = NULL;          shard_local_t        *local          = NULL; @@ -661,20 +867,22 @@ shard_common_resolve_shards (call_frame_t *frame, xlator_t *this,                           */                          LOCK(&priv->lock);                          { -                                __shard_update_shards_inode_list (inode, this, +                                fsync_inode = __shard_update_shards_inode_list (inode, +                                                                  this,                                                                    res_inode,                                                                  shard_idx_iter);                          }                          UNLOCK(&priv->lock);                          shard_idx_iter++; - +                        if (fsync_inode) +                                shard_initiate_evicted_inode_fsync (this, +                                                                    fsync_inode);                           continue;                  } else {                          local->call_count++;                          shard_idx_iter++;                  }          } -  out:          post_res_handler (frame, this);          return 0; @@ -1657,6 +1865,7 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,          char            block_bname[256] = {0,};          inode_t        *linked_inode     = NULL;          xlator_t       *this             = NULL; +        inode_t        *fsync_inode      = NULL;          shard_priv_t   *priv             = NULL;          this = THIS; @@ -1674,10 +1883,14 @@ shard_link_block_inode (shard_local_t *local, int block_num, inode_t *inode,          LOCK(&priv->lock);          { -                __shard_update_shards_inode_list (linked_inode, this, -                                                  local->loc.inode, block_num); +                fsync_inode = __shard_update_shards_inode_list (linked_inode, +                                                                this, +                                                                local->loc.inode, +                                                                block_num);          }          UNLOCK(&priv->lock); +        if (fsync_inode) +                shard_initiate_evicted_inode_fsync (this, fsync_inode);  }  int @@ -2120,6 +2333,7 @@ shard_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,          local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();          if (!local->xattr_req)                  goto err; +        local->resolver_base_inode = loc->inode;          shard_lookup_base_file (frame, this, &local->loc,                                  shard_post_lookup_truncate_handler); @@ -2172,6 +2386,7 @@ shard_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,          local->loc.inode = inode_ref (fd->inode);          gf_uuid_copy (local->loc.gfid, fd->inode->gfid); +        local->resolver_base_inode = fd->inode;          shard_lookup_base_file (frame, this, &local->loc,                                  shard_post_lookup_truncate_handler); @@ -2509,32 +2724,48 @@ shard_unlink_block_inode (shard_local_t *local, int shard_block_num)  {          char                  block_bname[256]  = {0,};          inode_t              *inode             = NULL; +        inode_t              *base_inode        = NULL;          xlator_t             *this              = NULL;          shard_priv_t         *priv              = NULL;          shard_inode_ctx_t    *ctx               = NULL; +        shard_inode_ctx_t    *base_ictx         = NULL; +        gf_boolean_t          unlink_unref_forget = _gf_false;          this = THIS;          priv = this->private;          inode = local->inode_list[shard_block_num - local->first_block]; +        base_inode = local->resolver_base_inode;          shard_make_block_bname (shard_block_num, (local->loc.inode)->gfid,                                  block_bname, sizeof (block_bname));          LOCK(&priv->lock); +        LOCK(&base_inode->lock); +        LOCK(&inode->lock);          { -                shard_inode_ctx_get (inode, this, &ctx); +                __shard_inode_ctx_get (inode, this, &ctx);                  if (!list_empty (&ctx->ilist)) {                          list_del_init (&ctx->ilist);                          priv->inode_count--;                          GF_ASSERT (priv->inode_count >= 0); -                        inode_unlink (inode, priv->dot_shard_inode, block_bname); -                        inode_unref (inode); -                        inode_forget (inode, 0); +                        unlink_unref_forget = _gf_true; +                } +                if (ctx->fsync_needed) { +                        inode_unref (base_inode); +                        list_del_init (&ctx->to_fsync_list); +                        __shard_inode_ctx_get (base_inode, this, &base_ictx); +                        base_ictx->fsync_count--;                  }          } +        UNLOCK(&inode->lock); +        UNLOCK(&base_inode->lock); +        if (unlink_unref_forget) { +                inode_unlink (inode, priv->dot_shard_inode, block_bname); +                inode_unref (inode); +                inode_forget (inode, 0); +        }          UNLOCK(&priv->lock); -  }  int @@ -2752,6 +2983,7 @@ shard_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,          local->xflag = xflag;          local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();          local->block_size = block_size; +        local->resolver_base_inode = loc->inode;          local->fop = GF_FOP_UNLINK;          if (!this->itable)                  this->itable = (local->loc.inode)->table; @@ -2988,6 +3220,7 @@ shard_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,          frame->local = local;          loc_copy (&local->loc, oldloc);          loc_copy (&local->loc2, newloc); +        local->resolver_base_inode = newloc->inode;          local->fop = GF_FOP_RENAME;          local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new();          if (!local->xattr_req) @@ -3754,6 +3987,10 @@ shard_common_inode_write_do_cbk (call_frame_t *frame, void *cookie,                          local->delta_size += (post->ia_size - pre->ia_size);                          shard_inode_ctx_set (local->fd->inode, this, post, 0,                                               SHARD_MASK_TIMES); +                        if (local->fd->inode != anon_fd->inode) +                                shard_inode_ctx_add_to_fsync_list (local->fd->inode, +                                                                   this, +                                                                   anon_fd->inode);                  }          }          UNLOCK (&frame->lock); @@ -4204,18 +4441,198 @@ shard_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)  }  int -shard_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                 int32_t op_ret, int32_t op_errno, struct iatt *prebuf, -                 struct iatt *postbuf, dict_t *xdata) +__shard_get_timestamps_from_inode_ctx (shard_local_t *local, inode_t *inode, +                                       xlator_t *this)  { -        if (op_ret < 0) +        int                   ret      = -1; +        uint64_t              ctx_uint = 0; +        shard_inode_ctx_t    *ctx      = NULL; + +        ret = __inode_ctx_get (inode, this, &ctx_uint); +        if (ret < 0) +                return ret; + +        ctx = (shard_inode_ctx_t *) ctx_uint; + +        local->postbuf.ia_ctime = ctx->stat.ia_ctime; +        local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec; +        local->postbuf.ia_atime = ctx->stat.ia_atime; +        local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec; +        local->postbuf.ia_mtime = ctx->stat.ia_mtime; +        local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec; + +        return 0; +} + +int +shard_get_timestamps_from_inode_ctx (shard_local_t *local, inode_t *inode, +                                     xlator_t *this) +{ +        int ret = 0; + +        LOCK (&inode->lock); +        { +                ret = __shard_get_timestamps_from_inode_ctx (local, inode, +                                                             this); +        } +        UNLOCK (&inode->lock); + +        return ret; +} + +int +shard_fsync_shards_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                       int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                       struct iatt *postbuf, dict_t *xdata) +{ +        int                    call_count  = 0; +        uint64_t               fsync_count = 0; +        fd_t                  *anon_fd     = cookie; +        shard_local_t         *local       = NULL; +        shard_inode_ctx_t     *ctx         = NULL; +        shard_inode_ctx_t     *base_ictx   = NULL; +        inode_t               *base_inode  = NULL; + +        local = frame->local; +        base_inode = local->fd->inode; + +        if (local->op_ret < 0)                  goto out; -        /* To-Do: Wind fsync on all shards of the file */ -        postbuf->ia_ctime = 0; +        LOCK (&frame->lock); +        { +                if (op_ret < 0) { +                        local->op_ret = op_ret; +                        local->op_errno = op_errno; +                        goto out; +                } +                shard_inode_ctx_set (local->fd->inode, this, postbuf, 0, +                                     SHARD_MASK_TIMES); +        } +        UNLOCK (&frame->lock); +        fd_ctx_get (anon_fd, this, &fsync_count);  out: -        SHARD_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, -                            xdata); +        if (base_inode != anon_fd->inode) { +                LOCK (&base_inode->lock); +                LOCK (&anon_fd->inode->lock); +                { +                        __shard_inode_ctx_get (anon_fd->inode, this, &ctx); +                        __shard_inode_ctx_get (base_inode, this, &base_ictx); +                        if (op_ret == 0) +                                ctx->fsync_needed -= fsync_count; +                        GF_ASSERT (ctx->fsync_needed >= 0); +                        list_del_init (&ctx->to_fsync_list); +                        if (ctx->fsync_needed != 0) { +                                list_add_tail (&ctx->to_fsync_list, +                                               &base_ictx->to_fsync_list); +                                base_ictx->fsync_count++; +                        } +                } +                UNLOCK (&anon_fd->inode->lock); +                UNLOCK (&base_inode->lock); +        } +        if (anon_fd) +                fd_unref (anon_fd); + +        call_count = shard_call_count_return (frame); +        if (call_count != 0) +                return 0; + +        if (local->op_ret < 0) { +                SHARD_STACK_UNWIND (fsync, frame, local->op_ret, +                                    local->op_errno, NULL, NULL, NULL); +        } else { +                shard_get_timestamps_from_inode_ctx (local, base_inode, this); +                SHARD_STACK_UNWIND (fsync, frame, local->op_ret, +                                    local->op_errno, &local->prebuf, +                                    &local->postbuf, local->xattr_rsp); +        } +        return 0; +} + +int +shard_post_lookup_fsync_handler (call_frame_t *frame, xlator_t *this) +{ +        int                 ret          = 0; +        int                 call_count   = 0; +        int                 fsync_count  = 0; +        fd_t               *anon_fd      = NULL; +        inode_t            *base_inode   = NULL; +        shard_local_t      *local        = NULL; +        shard_inode_ctx_t  *ctx          = NULL; +        shard_inode_ctx_t  *iter         = NULL; +        struct list_head    copy         = {0,}; +        shard_inode_ctx_t  *tmp          = NULL; + +        local = frame->local; +        base_inode = local->fd->inode; +        local->postbuf = local->prebuf; +        INIT_LIST_HEAD (©); + +        if (local->op_ret < 0) { +                SHARD_STACK_UNWIND (fsync, frame, local->op_ret, +                                    local->op_errno, NULL, NULL, NULL); +                return 0; +        } + +        LOCK (&base_inode->lock); +        { +                __shard_inode_ctx_get (base_inode, this, &ctx); +                list_splice_init (&ctx->to_fsync_list, ©); +                call_count = ctx->fsync_count; +                ctx->fsync_count = 0; +        } +        UNLOCK (&base_inode->lock); + +        local->call_count = ++call_count; + +        /* Send fsync() on the base shard first */ +        anon_fd = fd_ref (local->fd); +        STACK_WIND_COOKIE (frame, shard_fsync_shards_cbk, anon_fd, +                           FIRST_CHILD(this), +                           FIRST_CHILD(this)->fops->fsync, anon_fd, +                           local->datasync, local->xattr_req); +        call_count--; +        anon_fd = NULL; + +        list_for_each_entry_safe (iter, tmp, ©, to_fsync_list) { +                fsync_count = 0; +                shard_inode_ctx_get_fsync_count (iter->inode, this, +                                                 &fsync_count); +                GF_ASSERT (fsync_count > 0); +                anon_fd = fd_anonymous (iter->inode); +                if (!anon_fd) { +                        local->op_ret = -1; +                        local->op_errno = ENOMEM; +                        gf_msg (this->name, GF_LOG_WARNING, ENOMEM, +                                SHARD_MSG_MEMALLOC_FAILED, "Failed to create " +                                "anon fd to fsync shard"); +                        shard_fsync_shards_cbk (frame, (void *) (long) anon_fd, +                                                this, -1, ENOMEM, NULL, NULL, +                                                NULL); +                        continue; +                } + +                ret = fd_ctx_set (anon_fd, this, fsync_count); +                if (ret) { +                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                SHARD_MSG_FD_CTX_SET_FAILED, "Failed to set fd " +                                "ctx for shard inode gfid=%s", +                                uuid_utoa (iter->inode->gfid)); +                        local->op_ret = -1; +                        local->op_errno = ENOMEM; +                        shard_fsync_shards_cbk (frame, (void *) (long) anon_fd, +                                                this, -1, ENOMEM, NULL, NULL, +                                                NULL); +                        continue; +                } +                STACK_WIND_COOKIE (frame, shard_fsync_shards_cbk, anon_fd, +                                   FIRST_CHILD(this), +                                   FIRST_CHILD(this)->fops->fsync, anon_fd, +                                   local->datasync, local->xattr_req); +                call_count--; +        } +          return 0;  } @@ -4223,8 +4640,50 @@ int  shard_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,               dict_t *xdata)  { -        STACK_WIND (frame, shard_fsync_cbk, FIRST_CHILD(this), -                    FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); +        int              ret        = 0; +        uint64_t         block_size = 0; +        shard_local_t   *local      = NULL; + +        ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size); +        if (ret) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        SHARD_MSG_INODE_CTX_GET_FAILED, "Failed to get block " +                        "size for %s from its inode ctx", +                        uuid_utoa (fd->inode->gfid)); +                goto err; +        } + +        if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) { +                STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this), +                            FIRST_CHILD(this)->fops->fsync, fd, datasync, +                            xdata); +                return 0; +        } + +        if (!this->itable) +                this->itable = fd->inode->table; + +        local = mem_get0 (this->local_pool); +        if (!local) +                goto err; + +        frame->local = local; + +        local->fd = fd_ref (fd); +        local->fop = GF_FOP_FSYNC; +        local->datasync = datasync; +        local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); +        if (!local->xattr_req) +                goto err; + +        local->loc.inode = inode_ref (fd->inode); +        gf_uuid_copy (local->loc.gfid, fd->inode->gfid); + +        shard_lookup_base_file (frame, this, &local->loc, +                                shard_post_lookup_fsync_handler); +        return 0; +err: +        SHARD_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);          return 0;  } diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h index 73195983aa4..75d39a19186 100644 --- a/xlators/features/shard/src/shard.h +++ b/xlators/features/shard/src/shard.h @@ -215,6 +215,7 @@ typedef struct shard_local {          uint32_t gid;          uint64_t block_size;          uint64_t dst_block_size; +        int32_t datasync;          off_t offset;          size_t total_size;          size_t written_size; @@ -270,6 +271,11 @@ typedef struct shard_inode_ctx {          uuid_t base_gfid;          int block_num;          gf_boolean_t refreshed; +        struct list_head to_fsync_list; +        int fsync_needed; +        inode_t *inode; +        int fsync_count; +        inode_t *base_inode;  } shard_inode_ctx_t;  #endif /* __SHARD_H__ */  | 
