summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-common.c
diff options
context:
space:
mode:
authorSusant Palai <spalai@redhat.com>2015-06-16 20:35:46 +0530
committerRaghavendra G <rgowdapp@redhat.com>2015-08-26 21:55:56 -0700
commit6aa0e72198e83d6dd849027fd3d0038037810065 (patch)
treef3bda5926513cf53b1741a6c2c4d2843ebb881e6 /xlators/cluster/dht/src/dht-common.c
parent6c4096f5e6a6d0f2b21cb4b2bea098b6f4eebbed (diff)
dht: block/handle create op falling to decommissioned brick
Problem: Post remove-brick start till commit phase, the client layout may not be in sync with disk layout because of lack of lookup. Hence,a create call may fall on the decommissioned brick. Solution: Will acquire a lock on hashed subvol. So that a fix-layout or selfheal can not step on layout while reading the layout. Even if we read a layout before remove-brick fix-layout and the file falls on the decommissioned brick, the file should be migrated to a new brick as per the fix-layout. BUG: 1256283 Change-Id: I3ef1adaf20dfb9524396a3648d1a664464eda8c1 Signed-off-by: Susant Palai <spalai@redhat.com> Reviewed-on: http://review.gluster.org/11260 Tested-by: Gluster Build System <jenkins@build.gluster.com> Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Signed-off-by: Susant Palai <spalai@redhat.com> Reviewed-on: http://review.gluster.org/12001
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
-rw-r--r--xlators/cluster/dht/src/dht-common.c455
1 files changed, 408 insertions, 47 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index da36a3b33a5..681b74aa2f6 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -3503,7 +3503,6 @@ err:
return 0;
}
-
static int
dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret, int32_t op_errno,
@@ -5463,9 +5462,6 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int ret = -1;
dht_local_t *local = NULL;
- if (op_ret == -1)
- goto out;
-
local = frame->local;
if (!local) {
op_ret = -1;
@@ -5473,6 +5469,9 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
goto out;
}
+ if (op_ret == -1)
+ goto out;
+
prev = cookie;
if (local->loc.parent) {
@@ -5492,18 +5491,34 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
op_errno = EINVAL;
goto out;
}
+
+ local->op_errno = op_errno;
+
if (local->linked == _gf_true) {
local->stbuf = *stbuf;
dht_linkfile_attr_heal (frame, this);
}
out:
+
DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent, xdata);
+
+ if (local && local->lock.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock (frame, this, op_ret);
+
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd,
+ inode, stbuf, preparent, postparent,
+ xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode,
+ stbuf, preparent, postparent, xdata);
+ }
return 0;
}
-
int
dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
@@ -5515,8 +5530,10 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
dht_local_t *local = NULL;
xlator_t *cached_subvol = NULL;
- if (op_ret == -1)
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
goto err;
+ }
local = frame->local;
cached_subvol = local->cached_subvol;
@@ -5528,25 +5545,327 @@ dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
+ if (local->lock.locks)
+ local->refresh_layout_unlock (frame, this, -1);
+
+ return 0;
+}
+
+int
+dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *params)
+{
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
+
+ local = frame->local;
+
+ if (!dht_is_subvol_filled (this, subvol)) {
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path,
+ subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+
+ } else {
+ avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
+
+ if (avail_subvol != subvol) {
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s (link at %s)", loc->path,
+ avail_subvol->name, subvol->name);
+
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, avail_subvol, subvol, loc);
+
+ goto out;
+ }
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+ }
+out:
return 0;
}
int
+dht_build_parent_loc (xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno)
+{
+ inode_table_t *table = NULL;
+ int ret = -1;
+
+ if (!parent || !child) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (child->parent) {
+ parent->inode = inode_ref (child->parent);
+ if (!parent->inode) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+
+ goto out;
+ } else {
+ if (gf_uuid_is_null (child->pargfid)) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ table = this->itable;
+
+ if (!table) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ parent->inode = inode_find (table, child->pargfid);
+
+ if (!parent->inode) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ gf_uuid_copy (parent->gfid, child->pargfid);
+
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+
+int32_t
+dht_create_do (call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ local = frame->local;
+
+ this = THIS;
+
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf, err);
+
+ methods = conf->methods;
+
+ GF_VALIDATE_OR_GOTO (this->name, conf->methods, err);
+
+ /* We don't need parent_loc anymore */
+ loc_wipe (&local->loc);
+
+ loc_copy (&local->loc, &local->loc2);
+
+ loc_wipe (&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search (this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "no subvolume in "
+ "layout for path=%s", local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_create_wind_to_avail_subvol (frame, this, subvol, &local->loc,
+ local->flags, local->mode,
+ local->umask, local->fd, local->params);
+ return 0;
+err:
+ local->refresh_layout_unlock (frame, this, -1);
+
+ return 0;
+}
+
+int32_t
+dht_create_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY (frame);
+ return 0;
+}
+
+int32_t
+dht_create_finish (call_frame_t *frame, xlator_t *this, int op_ret)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count (local->lock.locks, local->lock.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame (frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init (lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock.locks = local->lock.locks;
+ lock_local->lock.lk_count = local->lock.lk_count;
+
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+
+ dht_unlock_inodelk (lock_frame, lock_local->lock.locks,
+ lock_local->lock.lk_count,
+ dht_create_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY (lock_frame);
+ }
+
+ if (op_ret == 0)
+ return 0;
+
+ DHT_STACK_UNWIND (create, frame, op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+dht_create_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
+ if (op_ret < 0) {
+ gf_msg ("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "Create lock failed for file: %s", local->loc2.name);
+
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ local->refresh_layout_unlock = dht_create_finish;
+
+ local->refresh_layout_done = dht_create_do;
+
+ dht_refresh_layout (frame);
+
+ return 0;
+err:
+ dht_create_finish (frame, this, -1);
+ return 0;
+}
+
+int32_t
+dht_create_lock (call_frame_t *frame, xlator_t *subvol)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ GF_VALIDATE_OR_GOTO ("dht", frame, err);
+ GF_VALIDATE_OR_GOTO (frame->this->name, frame->local, err);
+
+ local = frame->local;
+
+ lk_array = GF_CALLOC (count, sizeof (*lk_array), gf_common_mt_char);
+
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new (frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN);
+
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock.locks = lk_array;
+ local->lock.lk_count = count;
+
+ ret = dht_blocking_inodelk (frame, lk_array, count,
+ dht_create_lock_cbk);
+
+ if (ret < 0) {
+ local->lock.locks = NULL;
+ local->lock.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free (lk_array, count);
+ GF_FREE (lk_array);
+ }
+
+ return -1;
+}
+
+int
dht_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode,
mode_t umask, fd_t *fd, dict_t *params)
{
- int op_errno = -1;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
+ conf = this->private;
+
dht_get_du_info (frame, this, loc);
local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
@@ -5569,48 +5888,90 @@ dht_create (call_frame_t *frame, xlator_t *this,
subvol = dht_subvol_get_hashed (this, loc);
if (!subvol) {
- gf_msg_debug (this->name, 0,
- "no subvolume in layout for path=%s",
- loc->path);
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in layout for path=%s",
+ loc->path);
+
op_errno = ENOENT;
goto err;
}
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path,
- subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
- goto done;
- }
- /* Choose the minimum filled volume, and create the
- files there */
- avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
- if (avail_subvol != subvol) {
- local->params = dict_ref (params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- local->hashed_subvol = subvol;
- gf_msg_trace (this->name, 0,
- "creating %s on %s (link at %s)", loc->path,
- avail_subvol->name, subvol->name);
- dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
- this, avail_subvol, subvol, loc);
- goto done;
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a create call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+
+ gf_msg_debug (this->name, 0, "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s", subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy (&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
+
+ goto err;
+ }
+
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
+ loc_wipe (&local->loc);
+
+ ret = dht_build_parent_loc (this, &local->loc, loc,
+ &op_errno);
+
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_NO_MEMORY,
+ "parent loc build failed");
+ goto err;
+ }
+
+ ret = dht_create_lock (frame, subvol);
+
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
+ }
+
+ goto done;
+ }
+ }
}
- gf_msg_trace (this->name, 0,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
+
+
+ dht_create_wind_to_avail_subvol (frame, this, subvol, loc, flags, mode,
+ umask, fd, params);
done:
return 0;
err:
+
op_errno = (op_errno == -1) ? errno : op_errno;
DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
NULL, NULL, NULL);