diff options
| author | Susant Palai <spalai@redhat.com> | 2019-12-27 12:06:19 +0530 | 
|---|---|---|
| committer | Amar Tumballi <amar@kadalu.io> | 2020-02-09 02:51:31 +0000 | 
| commit | c87817495b3c5c36dcca9d157e9313b7d3195eed (patch) | |
| tree | 45d7937077f34afc4c05f8cf752f0df19bef70ba | |
| parent | 8fad76650bd85463708f59d2518f5b764ae4c702 (diff) | |
dht: Fix stale-layout and create issue
Problem: With lookup-optimize set to on by default, a client with
stale-layout can create a new file on a wrong subvol. This will lead to
possible duplicate files if two different clients attempt to create the
same file with two different layouts.
Solution: Send in-memory layout to be cross checked at posix before
commiting a "create". In case of a mismatch, sync the client layout with
that of the server and attempt the create fop one more time.
test: Manual, testcase(attached)
fixes: bz#1786679
Change-Id: Ife0941f105113f1c572f4363cbcee65e0dd9bd6a
Signed-off-by: Susant Palai <spalai@redhat.com>
| -rwxr-xr-x | tests/bugs/distribute/bug-1786679.t | 69 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 149 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 6 | ||||
| -rw-r--r-- | xlators/protocol/client/src/client-rpc-fops_v2.c | 9 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-entry-ops.c | 29 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 76 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.h | 4 | 
7 files changed, 322 insertions, 20 deletions
diff --git a/tests/bugs/distribute/bug-1786679.t b/tests/bugs/distribute/bug-1786679.t new file mode 100755 index 00000000000..219ce51c8a9 --- /dev/null +++ b/tests/bugs/distribute/bug-1786679.t @@ -0,0 +1,69 @@ +#!/bin/bash + +SCRIPT_TIMEOUT=250 + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../dht.rc + + +# create 2 subvols +# create a dir +# create a file +# change layout +# remove the file +# execute create from a different mount +# Without the patch, the file will be present on both of the bricks + +cleanup + +function get_layout () { + +layout=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | gawk -F"=" '{print $2}'` + +echo $layout + +} + +function set_layout() +{ +    setfattr -n  "trusted.glusterfs.dht" -v $1 $2 +} + +TEST glusterd +TEST pidof glusterd + +BRICK1=$B0/${V0}-0 +BRICK2=$B0/${V0}-1 + +TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2 +TEST $CLI volume start $V0 + +# Mount FUSE and create symlink +TEST glusterfs -s $H0 --volfile-id $V0 $M0 +TEST mkdir $M0/dir +TEST touch $M0/dir/file +TEST ! stat "$BRICK1/dir/file" +TEST stat "$BRICK2/dir/file" + +layout1="$(get_layout "$BRICK1/dir")" +layout2="$(get_layout "$BRICK2/dir")" + +TEST set_layout $layout1 "$BRICK2/dir" +TEST set_layout $layout2 "$BRICK1/dir" + +TEST rm $M0/dir/file -f +TEST gluster v set $V0 client-log-level DEBUG + +#Without the patch in place, this client will create the file in $BRICK2 +#which will lead to two files being on both the bricks when a new client +#create the file with the same name +TEST touch $M0/dir/file + +TEST glusterfs -s $H0 --volfile-id $V0 $M1 +TEST touch $M1/dir/file + +TEST stat "$BRICK1/dir/file" +TEST ! stat "$BRICK2/dir/file" + +cleanup diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index ce211e6c642..2231af647de 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -8341,6 +8341,11 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,      xlator_t *prev = NULL;      int ret = -1;      dht_local_t *local = NULL; +    gf_boolean_t parent_layout_changed = _gf_false; +    char pgfid[GF_UUID_BUF_SIZE] = {0}; +    xlator_t *subvol = NULL; + +    local = frame->local;      local = frame->local;      if (!local) { @@ -8349,8 +8354,69 @@ dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,          goto out;      } -    if (op_ret == -1) +    if (op_ret == -1) { +        local->op_errno = op_errno; +        parent_layout_changed = (xdata && +                                 dict_get(xdata, GF_PREOP_CHECK_FAILED)) +                                    ? _gf_true +                                    : _gf_false; + +        if (parent_layout_changed) { +            if (local && local->lock[0].layout.parent_layout.locks) { +                /* Returning failure as the layout could not be fixed even under +                 * the lock */ +                goto out; +            } + +            gf_uuid_unparse(local->loc.parent->gfid, pgfid); +            gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED, +                   "create (%s/%s) (path: %s): parent layout " +                   "changed. Attempting a layout refresh and then a " +                   "retry", +                   pgfid, local->loc.name, local->loc.path); + +            /* +              dht_refresh_layout needs directory info in local->loc.Hence, +              storing the parent_loc in local->loc and storing the create +              context in local->loc2. We will restore this information in +              dht_creation_do. +             */ + +            loc_wipe(&local->loc2); + +            ret = loc_copy(&local->loc2, &local->loc); +            if (ret) { +                gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY, +                       "loc_copy failed %s", local->loc.path); + +                goto out; +            } + +            loc_wipe(&local->loc); + +            ret = dht_build_parent_loc(this, &local->loc, &local->loc2, +                                       &op_errno); + +            if (ret) { +                gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED, +                       "parent loc build failed"); +                goto out; +            } + +            subvol = dht_subvol_get_hashed(this, &local->loc2); + +            ret = dht_create_lock(frame, subvol); +            if (ret < 0) { +                gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR, +                       "locking parent failed"); +                goto out; +            } + +            return 0; +        } +          goto out; +    }      prev = cookie; @@ -8471,6 +8537,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,          gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,                       subvol->name); +        dht_set_parent_layout_in_dict(loc, this, local); +          STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,                            subvol->fops->create, loc, flags, mode, umask, fd,                            params); @@ -8479,10 +8547,6 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,          avail_subvol = dht_free_disk_available_subvol(this, subvol, local);          if (avail_subvol != subvol) { -            local->params = dict_ref(params); -            local->flags = flags; -            local->mode = mode; -            local->umask = umask;              local->cached_subvol = avail_subvol;              local->hashed_subvol = subvol; @@ -8498,6 +8562,8 @@ dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,          gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,                       subvol->name); +        dht_set_parent_layout_in_dict(loc, this, local); +          STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,                            subvol->fops->create, loc, flags, mode, umask, fd,                            params); @@ -8713,7 +8779,7 @@ err:      return 0;  } -static int32_t +int32_t  dht_create_lock(call_frame_t *frame, xlator_t *subvol)  {      dht_local_t *local = NULL; @@ -8759,6 +8825,60 @@ err:  }  int +dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local) +{ +    dht_conf_t *conf = this->private; +    dht_layout_t *parent_layout = NULL; +    int *parent_disk_layout = NULL; +    xlator_t *hashed_subvol = NULL; +    char pgfid[GF_UUID_BUF_SIZE] = {0}; +    int ret = 0; + +    gf_uuid_unparse(loc->parent->gfid, pgfid); + +    parent_layout = dht_layout_get(this, loc->parent); +    hashed_subvol = dht_subvol_get_hashed(this, loc); + +    ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol, +                                             &parent_disk_layout); +    if (ret == -1) { +        gf_msg(this->name, GF_LOG_WARNING, local->op_errno, +               DHT_MSG_PARENT_LAYOUT_CHANGED, +               "%s (%s/%s) (path: %s): " +               "extracting in-memory layout of parent failed. ", +               gf_fop_list[local->fop], pgfid, loc->name, loc->path); +        goto err; +    } + +    ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY, +                             conf->xattr_name); +    if (ret < 0) { +        gf_msg(this->name, GF_LOG_WARNING, local->op_errno, +               DHT_MSG_PARENT_LAYOUT_CHANGED, +               "%s (%s/%s) (path: %s): " +               "setting %s key in params dictionary failed. ", +               gf_fop_list[local->fop], pgfid, loc->name, loc->path, +               GF_PREOP_PARENT_KEY); +        goto err; +    } + +    ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout, +                       4 * 4); +    if (ret < 0) { +        gf_msg(this->name, GF_LOG_WARNING, local->op_errno, +               DHT_MSG_PARENT_LAYOUT_CHANGED, +               "%s (%s/%s) (path: %s): " +               "setting parent-layout in params dictionary failed. ", +               gf_fop_list[local->fop], pgfid, loc->name, loc->path); +        goto err; +    } + +err: +    dht_layout_unref(this, parent_layout); +    return ret; +} + +int  dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,             mode_t mode, mode_t umask, fd_t *fd, dict_t *params)  { @@ -8784,6 +8904,11 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,          goto err;      } +    local->params = dict_ref(params); +    local->flags = flags; +    local->mode = mode; +    local->umask = umask; +      if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {          gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,                 "creating %s on %s (got create on %s)", local->loc.path, @@ -8799,10 +8924,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,          if (hashed_subvol && (hashed_subvol != subvol)) {              /* Create the linkto file and then the data file */ -            local->params = dict_ref(params); -            local->flags = flags; -            local->mode = mode; -            local->umask = umask;              local->cached_subvol = subvol;              local->hashed_subvol = hashed_subvol; @@ -8815,6 +8936,9 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,           * file as we expect a lookup everywhere if there are problems           * with the parent layout           */ + +        dht_set_parent_layout_in_dict(loc, this, local); +          STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,                            subvol->fops->create, &local->loc, flags, mode, umask,                            fd, params); @@ -8866,11 +8990,6 @@ dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,                      goto err;                  } -                local->params = dict_ref(params); -                local->flags = flags; -                local->mode = mode; -                local->umask = umask; -                  loc_wipe(&local->loc);                  ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 6b76d9a0c79..d82087c0448 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -1502,4 +1502,10 @@ dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,                         int32_t op_ret, int32_t op_errno, dict_t *dict,                         dict_t *xdata); +int32_t +dht_create_lock(call_frame_t *frame, xlator_t *subvol); + +int +dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local); +  #endif /* _DHT_H */ diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c index 7537f8adbdd..0d80d4e8efb 100644 --- a/xlators/protocol/client/src/client-rpc-fops_v2.c +++ b/xlators/protocol/client/src/client-rpc-fops_v2.c @@ -2076,11 +2076,12 @@ client4_0_create_cbk(struct rpc_req *req, struct iovec *iov, int count,          goto out;      } +    ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent, +                                local, &xdata); +    if (ret < 0) +        goto out; +      if (-1 != rsp.op_ret) { -        ret = client_post_create_v2(this, &rsp, &stbuf, &preparent, &postparent, -                                    local, &xdata); -        if (ret < 0) -            goto out;          ret = client_add_fd_to_saved_fds(frame->this, fd, &local->loc,                                           local->flags, rsp.fd, 0);          if (ret) { diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c index 1f1e05f1dc9..667fec7d62f 100644 --- a/xlators/storage/posix/src/posix-entry-ops.c +++ b/xlators/storage/posix/src/posix-entry-ops.c @@ -2145,6 +2145,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,          0,      }; +    dict_t *xdata_rsp = dict_ref(xdata); +      DECLARE_OLD_FS_ID_VAR;      VALIDATE_OR_GOTO(frame, out); @@ -2194,6 +2196,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,          was_present = 0;      } +    if (!was_present) { +        if (posix_is_layout_stale(xdata, par_path, this)) { +            op_ret = -1; +            op_errno = EIO; +            if (!xdata_rsp) { +                xdata_rsp = dict_new(); +                if (!xdata_rsp) { +                    op_errno = ENOMEM; +                    goto out; +                } +            } + +            if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) == +                -1) { +                gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED, +                       "setting key %s in dict failed", GF_PREOP_CHECK_FAILED); +            } + +            goto out; +        } +    } +      if (priv->o_direct)          _flags |= O_DIRECT; @@ -2313,7 +2337,10 @@ out:      STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd,                          (loc) ? loc->inode : NULL, &stbuf, &preparent, -                        &postparent, xdata); +                        &postparent, xdata_rsp); + +    if (xdata_rsp) +        dict_unref(xdata_rsp);      return 0;  } diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index cbc271481a6..4919cbdf2e9 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -3586,3 +3586,79 @@ posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req)          }      }  } + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) +{ +    int op_ret = 0; +    ssize_t size = 0; +    char value_buf[4096] = { +        0, +    }; +    gf_boolean_t have_val = _gf_false; +    data_t *arg_data = NULL; +    char *xattr_name = NULL; +    gf_boolean_t is_stale = _gf_false; + +    op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); +    if (xattr_name == NULL) { +        op_ret = 0; +        goto out; +    } + +    arg_data = dict_get(xdata, xattr_name); +    if (!arg_data) { +        op_ret = 0; +        goto out; +    } + +    size = sys_lgetxattr(par_path, xattr_name, value_buf, +                         sizeof(value_buf) - 1); + +    if (size >= 0) { +        have_val = _gf_true; +    } else { +        if (errno == ERANGE) { +            gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, +                   "getxattr on key (%s) path (%s) failed due to" +                   " buffer overflow", +                   xattr_name, par_path); +            size = sys_lgetxattr(par_path, xattr_name, NULL, 0); +        } +        if (size < 0) { +            op_ret = -1; +            gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, +                   "getxattr on key (%s)  failed, path : %s", xattr_name, +                   par_path); +            goto out; +        } +    } + +    if (!have_val) { +        size = sys_lgetxattr(par_path, xattr_name, value_buf, size); +        if (size < 0) { +            gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, +                   "getxattr on key (%s) failed (%s)", xattr_name, +                   strerror(errno)); +            goto out; +        } +    } + +    if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) { +        gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, +               "failing preop as on-disk xattr value differs from argument " +               "value for key %s", +               xattr_name); +        op_ret = -1; +    } + +out: +    dict_del_sizen(xdata, xattr_name); +    dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + +    if (op_ret == -1) { +        is_stale = _gf_true; +    } + +    return is_stale; +} diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index ce4e1193639..7893a9b3e35 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -658,4 +658,8 @@ posix_spawn_ctx_janitor_thread(xlator_t *this);  void  posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); +  #endif /* _POSIX_H */  | 
