summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src/dht-common.c
diff options
context:
space:
mode:
authorDan Lambright <dlambrig@redhat.com>2015-11-04 15:33:22 -0500
committerDan Lambright <dlambrig@redhat.com>2015-11-24 04:13:53 -0800
commitb8c56ebe617327d570c252f8c411c85a84e727e6 (patch)
treeb8a94d37a3768a83ce3fc589d94efe86aea3676e /xlators/cluster/dht/src/dht-common.c
parent974837c3da9deb45cd2dc31afe56e5960268cf01 (diff)
cluster/tier: readdirp to cold tier only
It is possible a file would get migrated in the middle of a readdir operation. If there are four subvolumes A,B,C,D, and if readdir reads them in order and reaches subvol B, then, if a file is moved from D to A, it will not be included in the readdir output. This phenonema has pre-existed in DHT migration but is more apparent in tiering. When a file is moved off the hashed subvolume a T file is created. For tiering, we will make the cold subvolume the hashed subvolume. This will ensure the creation of a T file. Readdir will not skip T files in the tier translator. Making the cold subvolume the hashed subvolume ensures the T files created on promotions or creates will be less likely to fill the volume. Creates still put the data on the hot subvolume. This is a backport of 12530 > Change-Id: Ifde557d3d0e94a4570ca9f115adee3db2ee75407 > BUG: 1281598 > Signed-off-by: Dan Lambright <dlambrig@redhat.com> > Reviewed-on: http://review.gluster.org/12530 > Tested-by: Gluster Build System <jenkins@build.gluster.com> > Tested-by: NetBSD Build System <jenkins@build.gluster.org> > Reviewed-by: N Balachandran <nbalacha@redhat.com> > Reviewed-by: Raghavendra G <rgowdapp@redhat.com> Signed-off-by: Dan Lambright <dlambrig@redhat.com> Signed-off-by: Dan Lambright <dlambrig@redhat.com> Conflicts: xlators/cluster/dht/src/tier.c Change-Id: I5720a4cd04ae5088e5d7d23439b0f90d6bbc6265 BUG: 1283923 Reviewed-on: http://review.gluster.org/12722 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: N Balachandran <nbalacha@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Dan Lambright <dlambrig@redhat.com> Tested-by: Dan Lambright <dlambrig@redhat.com>
Diffstat (limited to 'xlators/cluster/dht/src/dht-common.c')
-rw-r--r--xlators/cluster/dht/src/dht-common.c189
1 files changed, 124 insertions, 65 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 3228f20e53e..e664b576f92 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -187,7 +187,7 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
{
dht_local_t *local = NULL;
dht_layout_t *layout = NULL;
- int ret = -1;
+ int ret = -1;
GF_VALIDATE_OR_GOTO ("dht", frame, out);
GF_VALIDATE_OR_GOTO ("dht", this, out);
@@ -787,13 +787,13 @@ dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if ((op_errno != ENOTCONN)
&& (op_errno != ENOENT)
&& (op_errno != ESTALE)) {
- gf_msg (this->name, GF_LOG_INFO, op_errno,
+ gf_msg (this->name, GF_LOG_INFO, op_errno,
DHT_MSG_REVALIDATE_CBK_INFO,
- "Revalidate: subvolume %s for %s "
+ "Revalidate: subvolume %s for %s "
"(gfid = %s) returned -1",
- prev->this->name, local->loc.path,
+ prev->this->name, local->loc.path,
gfid);
- }
+ }
if (op_errno == ESTALE) {
/* propagate the ESTALE to parent.
* setting local->return_estale would send
@@ -940,7 +940,7 @@ out:
}
}
cont:
- if (local->layout_mismatch) {
+ if (local->layout_mismatch) {
/* Found layout mismatch in the directory, need to
fix this in the inode context */
dht_layout_unref (this, local->layout);
@@ -2336,18 +2336,18 @@ dht_lookup (call_frame_t *frame, xlator_t *this,
/* need it for dir self-heal */
dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
gf_msg_debug (this->name, 0, "calling "
"revalidate lookup for %s at %s",
loc->path, subvol->name);
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND (frame, dht_revalidate_cbk,
+ subvol, subvol->fops->lookup,
+ &local->loc, local->xattr_req);
- }
+ }
} else {
do_fresh_lookup:
/* TODO: remove the hard-coding */
@@ -2831,8 +2831,8 @@ dht_vgetxattr_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
gf_msg (this->name, GF_LOG_ERROR, op_errno,
DHT_MSG_GET_XATTR_FAILED,
"getxattr err for dir");
- local->op_ret = -1;
- local->op_errno = op_errno;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
}
goto unlock;
@@ -3011,8 +3011,8 @@ dht_getxattr_unwind (call_frame_t *frame,
int
dht_getxattr_get_real_filename_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- dict_t *xattr, dict_t *xdata)
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
int this_call_cnt = 0;
dht_local_t *local = NULL;
@@ -3104,44 +3104,44 @@ unlock:
UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt)) {
DHT_STACK_UNWIND (getxattr, frame, local->op_ret,
local->op_errno, local->xattr,
local->xattr_req);
- }
+ }
- return 0;
+ return 0;
}
int
dht_getxattr_get_real_filename (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key, dict_t *xdata)
+ loc_t *loc, const char *key, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
- int cnt = 0;
- xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
- local = frame->local;
- layout = local->layout;
+ local = frame->local;
+ layout = local->layout;
- cnt = local->call_cnt = layout->cnt;
+ cnt = local->call_cnt = layout->cnt;
- local->op_ret = -1;
- local->op_errno = ENOENT;
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
- for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
- subvol, subvol->fops->getxattr,
- loc, key, xdata);
- }
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND (frame, dht_getxattr_get_real_filename_cbk,
+ subvol, subvol->fops->getxattr,
+ loc, key, xdata);
+ }
- return 0;
+ return 0;
}
int
@@ -3210,13 +3210,13 @@ dht_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- if (key &&
- (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
- strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
- && DHT_IS_DIR(layout)) {
- dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
- return 0;
- }
+ if (key &&
+ (strncmp (key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)
+ && DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename (frame, this, loc, key, xdata);
+ return 0;
+ }
if (key && DHT_IS_DIR(layout) &&
(!strcmp (key, GF_REBAL_FIND_LOCAL_SUBVOL))) {
@@ -3394,7 +3394,7 @@ dht_fgetxattr (call_frame_t *frame, xlator_t *this,
}
if ((fd->inode->ia_type == IA_IFDIR)
- && key
+ && key
&& (strncmp (key, GF_XATTR_LOCKINFO_KEY,
strlen (GF_XATTR_LOCKINFO_KEY)) != 0)) {
cnt = local->call_cnt = layout->cnt;
@@ -4563,6 +4563,7 @@ err:
return 0;
}
+
int
dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
@@ -4753,17 +4754,17 @@ done:
goto unwind;
}
- if (conf->readdir_optimize == _gf_true) {
+ if (conf->readdir_optimize == _gf_true) {
if (next_subvol != local->first_up_subvol) {
ret = dict_set_int32 (local->xattr,
GF_READDIR_SKIP_DIRS, 1);
if (ret)
gf_msg (this->name, GF_LOG_ERROR, 0,
DHT_MSG_DICT_SET_FAILED,
- "Failed to set dictionary value"
+ "Failed to set dictionary value"
":key = %s",
GF_READDIR_SKIP_DIRS );
- } else {
+ } else {
dict_del (local->xattr,
GF_READDIR_SKIP_DIRS);
}
@@ -4940,23 +4941,23 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
" : key = %s",
conf->link_xattr_name);
- if (conf->readdir_optimize == _gf_true) {
+ if (conf->readdir_optimize == _gf_true) {
if (xvol != local->first_up_subvol) {
- ret = dict_set_int32 (local->xattr,
- GF_READDIR_SKIP_DIRS, 1);
- if (ret)
- gf_msg (this->name,
+ ret = dict_set_int32 (local->xattr,
+ GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg (this->name,
GF_LOG_ERROR, 0,
DHT_MSG_DICT_SET_FAILED,
"Failed to set "
"dictionary value: "
"key = %s",
- GF_READDIR_SKIP_DIRS);
+ GF_READDIR_SKIP_DIRS);
} else {
dict_del (local->xattr,
GF_READDIR_SKIP_DIRS);
}
- }
+ }
}
STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
@@ -6043,6 +6044,58 @@ err:
}
int
+dht_create_tier_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *params)
+{
+ xlator_t *hot_subvol = NULL;
+ xlator_t *cold_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ conf = this->private;
+
+ cold_subvol = subvol;
+ hot_subvol = conf->subvolumes[1];
+ if (conf->subvolumes[0] != cold_subvol) {
+ hot_subvol = conf->subvolumes[0];
+ }
+
+ /* if hot tier full, write to cold */
+ if (dht_is_subvol_filled (this, hot_subvol)) {
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s", loc->path,
+ cold_subvol->name);
+
+ STACK_WIND (frame, dht_create_cbk,
+ cold_subvol, cold_subvol->fops->create,
+ loc, flags, mode, umask, fd, params);
+ } else {
+ local->params = dict_ref (params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = hot_subvol;
+ local->hashed_subvol = cold_subvol;
+
+ gf_msg_debug (this->name, 0,
+ "creating %s on %s (link at %s)", loc->path,
+ hot_subvol->name, cold_subvol->name);
+
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ this, hot_subvol, cold_subvol, loc);
+
+ goto out;
+ }
+out:
+ return 0;
+}
+
+
+int
dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
xlator_t *subvol, loc_t *loc, int32_t flags,
mode_t mode, mode_t umask, fd_t *fd,
@@ -6053,6 +6106,11 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
local = frame->local;
+ if (strcmp (this->type, "cluster/tier") == 0)
+ return dht_create_tier_wind_to_avail_subvol(frame, this, subvol,
+ loc, flags, mode,
+ umask, fd, params);
+
if (!dht_is_subvol_filled (this, subvol)) {
gf_msg_debug (this->name, 0,
"creating %s on %s", loc->path,
@@ -6525,15 +6583,15 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ret = dht_layout_merge (this, layout, prev->this,
-1, ENOSPC, NULL);
} else {
- if (op_ret == -1 && op_errno == EEXIST) {
- /* Very likely just a race between mkdir and
- self-heal (from lookup of a concurrent mkdir
- attempt).
- Ignore error for now. layout setting will
- anyways fail if this was a different (old)
- pre-existing different directory.
- */
- op_ret = 0;
+ if (op_ret == -1 && op_errno == EEXIST) {
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
dir_exists = _gf_true;
}
ret = dht_layout_merge (this, layout, prev->this,
@@ -8041,3 +8099,4 @@ int32_t dht_set_local_rebalance (xlator_t *this, dht_local_t *local,
return 0;
}
+