diff options
-rw-r--r-- | libglusterfs/src/gf-dirent.c | 128 | ||||
-rw-r--r-- | libglusterfs/src/gf-dirent.h | 10 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs.h | 2 | ||||
-rw-r--r-- | libglusterfs/src/graph.c | 125 | ||||
-rw-r--r-- | libglusterfs/src/xlator.h | 7 | ||||
-rw-r--r-- | tests/bugs/distribute/bug-1190734.t | 104 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 17 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 7 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 133 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 1 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 2 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 27 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-dir-read.c | 18 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.c | 46 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.h | 2 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-handshake.c | 2 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-helpers.c | 17 | ||||
-rw-r--r-- | xlators/protocol/client/src/client-rpc-fops.c | 2 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.c | 11 | ||||
-rw-r--r-- | xlators/protocol/client/src/client.h | 4 |
20 files changed, 482 insertions, 183 deletions
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c index f6fd3ab54ee..b5f395afc36 100644 --- a/libglusterfs/src/gf-dirent.c +++ b/libglusterfs/src/gf-dirent.c @@ -21,6 +21,134 @@ #include "compat.h" #include "xlator.h" +#define ONE 1ULL +#define PRESENT_D_OFF_BITS 63 +#define BACKEND_D_OFF_BITS 63 +#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) +#define MASK (~0ULL) +#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) +#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) + +static uint64_t +bits_for (uint64_t num) +{ + uint64_t bits = 0, ctrl = 1; + + while (ctrl < num) { + ctrl *= 2; + bits++; + } + + return bits; +} + +int +gf_deitransform(xlator_t *this, + uint64_t offset) +{ + int cnt = 0; + int max = 0; + int max_bits = 0; + uint64_t off_mask = 0; + uint64_t host_mask = 0; + + max = glusterfs_get_leaf_count(this->graph); + + if (max == 1) { + cnt = 0; + goto out; + } + + if (offset & TOP_BIT) { + /* HUGE d_off */ + max_bits = bits_for (max); + off_mask = (MASK << max_bits); + host_mask = ~(off_mask); + + cnt = offset & host_mask; + } else { + /* small d_off */ + cnt = offset % max; + } +out: + return cnt; +} + +uint64_t +gf_dirent_orig_offset(xlator_t *this, + uint64_t offset) +{ + int max = 0; + int max_bits = 0; + uint64_t off_mask = 0; + uint64_t orig_offset; + + max = glusterfs_get_leaf_count(this->graph); + + if (max == 1) { + orig_offset = offset; + goto out; + } + + if (offset & TOP_BIT) { + /* HUGE d_off */ + max_bits = bits_for (max); + off_mask = (MASK << max_bits); + orig_offset = ((offset & ~TOP_BIT) & off_mask) << SHIFT_BITS; + } else { + /* small d_off */ + orig_offset = offset / max; + } +out: + return orig_offset; +} + +int +gf_itransform (xlator_t *this, uint64_t x, uint64_t *y_p, int client_id) +{ + int max = 0; + uint64_t y = 0; + uint64_t hi_mask = 0; + uint64_t off_mask = 0; + int max_bits = 0; + + if (x == ((uint64_t) -1)) { + y = (uint64_t) -1; + goto out; + } + + if (!x) { + y = 0; + goto out; + } + + max = glusterfs_get_leaf_count(this->graph); + + if (max == 1) { + y = x; + goto out; + } + + max_bits = bits_for (max); + + hi_mask = ~(PRESENT_MASK >> (max_bits + 1)); + + if (x & hi_mask) { + /* HUGE d_off */ + off_mask = MASK << max_bits; + y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | client_id; + } else { + /* small d_off */ + y = ((x * max) + client_id); + } + +out: + if (y_p) + *y_p = y; + + return 0; +} + gf_dirent_t * gf_dirent_for_name (const char *name) { diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h index 4c1ff0b1684..07c605f82b0 100644 --- a/libglusterfs/src/gf-dirent.h +++ b/libglusterfs/src/gf-dirent.h @@ -22,6 +22,16 @@ #define gf_dirent_size(name) (sizeof (gf_dirent_t) + strlen (name) + 1) +int +gf_deitransform(xlator_t *this, uint64_t y); + +int +gf_itransform (xlator_t *this, uint64_t x, uint64_t *y_p, int client_id); + +uint64_t +gf_dirent_orig_offset (xlator_t *this, uint64_t offset); + + struct _dir_entry_t { struct _dir_entry_t *next; char *name; diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index a810f3a81f0..791e6dc5fd8 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -452,6 +452,7 @@ struct _glusterfs_graph { int id; /* Used in logging */ int used; /* Should be set when fuse gets first CHILD_UP */ + uint32_t leaf_count; uint32_t volfile_checksum; }; typedef struct _glusterfs_graph glusterfs_graph_t; @@ -617,6 +618,7 @@ int glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx); int glusterfs_graph_destroy_residual (glusterfs_graph_t *graph); int glusterfs_graph_deactivate (glusterfs_graph_t *graph); int glusterfs_graph_destroy (glusterfs_graph_t *graph); +int glusterfs_get_leaf_count (glusterfs_graph_t *graph); int glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx); glusterfs_graph_t *glusterfs_graph_construct (FILE *fp); glusterfs_graph_t *glusterfs_graph_new (); diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c index b427740f10f..709ec3b3ce3 100644 --- a/libglusterfs/src/graph.c +++ b/libglusterfs/src/graph.c @@ -515,15 +515,138 @@ glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx) /* XXX: --xlator-option additions */ gf_add_cmdline_options (graph, &ctx->cmd_args); - return 0; } +static +xlator_t *glusterfs_root(glusterfs_graph_t *graph) +{ + return graph->first; +} + +static +int glusterfs_is_leaf(xlator_t *xl) +{ + int ret = 0; + + if (!xl->children) + ret = 1; + + return ret; +} + +static +uint32_t glusterfs_count_leaves(xlator_t *xl) +{ + int n = 0; + xlator_list_t *list = NULL; + + if (glusterfs_is_leaf(xl)) + n = 1; + else + for (list = xl->children; list; list = list->next) + n += glusterfs_count_leaves(list->xlator); + + return n; +} + +int glusterfs_get_leaf_count(glusterfs_graph_t *graph) +{ + return graph->leaf_count; +} + +static +int _glusterfs_leaf_position(xlator_t *tgt, int *id, xlator_t *xl) +{ + xlator_list_t *list = NULL; + int found = 0; + + if (xl == tgt) + found = 1; + else if (glusterfs_is_leaf(xl)) + *id += 1; + else + for (list = xl->children; !found && list; list = list->next) + found = _glusterfs_leaf_position(tgt, id, list->xlator); + + return found; +} + +int glusterfs_leaf_position(xlator_t *tgt) +{ + xlator_t *root = NULL; + int pos = 0; + + root = glusterfs_root(tgt->graph); + + if (!_glusterfs_leaf_position(tgt, &pos, root)) + pos = -1; + + return pos; +} + +static int +_glusterfs_reachable_leaves(xlator_t *base, xlator_t *xl, dict_t *leaves) +{ + xlator_list_t *list = NULL; + int err = 1; + int pos = 0; + char strpos[6]; + + if (glusterfs_is_leaf(xl)) { + pos = glusterfs_leaf_position(xl); + if (pos < 0) + goto out; + sprintf(strpos, "%d", pos); + + err = dict_set_static_ptr(leaves, strpos, base); + + } else { + for (err = 0, list = xl->children; + !err && list; + list = list->next) + err = _glusterfs_reachable_leaves(base, list->xlator, + leaves); + } + +out: + return err; +} + +/* + * This function determines which leaves are children (or grandchildren) + * of the given base. The base may have multiple sub volumes. Each sub + * volumes in turn may have sub volumes.. until the leaves are reached. + * Each leaf is numbered 1,2,3,...etc. + * + * The base translator calls this function to see which of *its* subvolumes + * it would forward an FOP to, to *get to* a particular leaf. + * That information is built into the "leaves" dictionary. + * key:destination leaf# -> value:base subvolume xlator. + */ + +int +glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves) +{ + xlator_list_t *list = NULL; + int err = 0; + + for (list = base->children; !err && list; list = list->next) + err = _glusterfs_reachable_leaves(list->xlator, + list->xlator, leaves); + + return err; +} int glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx) { int ret = 0; + xlator_t *root = NULL; + + root = glusterfs_root(graph); + + graph->leaf_count = glusterfs_count_leaves(root); /* XXX: all xlator options validation */ ret = glusterfs_graph_validate_options (graph); diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 733f6cf47ab..5a71ceb3f31 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -978,4 +978,11 @@ glusterfs_volfile_reconfigure (int oldvollen, FILE *newvolfile_fp, int loc_touchup (loc_t *loc, const char *name); + +int +glusterfs_leaf_position(xlator_t *tgt); + +int +glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves); + #endif /* _XLATOR_H */ diff --git a/tests/bugs/distribute/bug-1190734.t b/tests/bugs/distribute/bug-1190734.t new file mode 100644 index 00000000000..c2f2338dc0f --- /dev/null +++ b/tests/bugs/distribute/bug-1190734.t @@ -0,0 +1,104 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../nfs.rc + +BRICK_COUNT=3 +FILE_COUNT=100 + +function create_files { + rm -rf $2 + mkdir $2 + for i in `seq 1 $1`; do + touch $2/file_$i + done +} + +function check_file_count { + ORIG_FILE_COUNT=`find $2 | tail -n +2 |wc -l` + [ $ORIG_FILE_COUNT -eq $1 ] +} + +function reset { + $CLI volume stop $V0 + umount $1 + $CLI volume delete $V0 +} + +function start_mount_fuse { + $CLI volume start $V0 + [ $? -ne 0 ] && return 1 + + $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + [ $? -ne 0 ] && return 1 + + create_files $FILE_COUNT $M0/$1 + [ $? -ne 0 ] && return 1 + + return 0 +} + +function start_mount_nfs { + $CLI volume start $V0 + [ $? -ne 0 ] && return 1 + + sleep 3 + mount_nfs $H0:/$V0 $N0 + [ $? -ne 0 ] && return 1 + + create_files $FILE_COUNT $N0/$1 + [ $? -ne 0 ] && return 1 + + return 0 +} + +function start_removing_bricks { + check_file_count $FILE_COUNT $1 + [ $? -ne 0 ] && return 1 + $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start + [ $? -ne 0 ] && return 1 + + return 0 +} + +function finish_removing_bricks { + + $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 commit + [ $? -ne 0 ] && return 1 + + check_file_count $FILE_COUNT $1 + return $? +} + +cleanup + +TEST glusterd +TEST pidof glusterd + +# Test 1-2 Create repliacted volume + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \ + $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5 + +# ------- test 1: AFR, fuse + remove bricks + +TEST start_mount_fuse test1 +TEST start_removing_bricks $M0/test1 +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3" +$CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 status > /tmp/out +TEST finish_removing_bricks $M0/test1 +reset $M0 + +# ------- test 2: AFR, nfs + remove bricks + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \ + $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5 + +TEST start_mount_nfs test2 +TEST start_removing_bricks $N0/test2 +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3" +TEST finish_removing_bricks $N0/test2 +reset $N0 + +cleanup diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index ef816f53532..729ab30e672 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2840,6 +2840,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, } if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) { + hashed_subvol = dht_subvol_get_hashed (this, loc); if (!hashed_subvol) { gf_msg (this->name, GF_LOG_ERROR, 0, @@ -2864,6 +2865,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, op_errno = ENODATA; goto err; } + STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol, hashed_subvol->fops->getxattr, loc, GF_XATTR_PATHINFO_KEY, xdata); @@ -3854,9 +3856,7 @@ list: } } - dht_itransform (this, prev->this, orig_entry->d_off, - &entry->d_off); - + entry->d_off = orig_entry->d_off; entry->d_stat = orig_entry->d_stat; entry->d_ino = orig_entry->d_ino; entry->d_type = orig_entry->d_type; @@ -3988,9 +3988,7 @@ dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; } - dht_itransform (this, prev->this, orig_entry->d_off, - &entry->d_off); - + entry->d_off = orig_entry->d_off; entry->d_ino = orig_entry->d_ino; entry->d_type = orig_entry->d_type; entry->d_len = orig_entry->d_len; @@ -4050,7 +4048,6 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, dht_local_t *local = NULL; int op_errno = -1; xlator_t *xvol = NULL; - off_t xoff = 0; int ret = 0; dht_conf_t *conf = NULL; @@ -4072,7 +4069,7 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, local->xattr_req = (dict)? dict_ref (dict) : NULL; local->first_up_subvol = dht_first_up_subvol (this); - dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff); + dht_deitransform (this, yoff, &xvol); /* TODO: do proper readdir */ if (whichop == GF_FOP_READDIRP) { @@ -4111,10 +4108,10 @@ dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp, - fd, size, xoff, local->xattr); + fd, size, yoff, local->xattr); } else { STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir, - fd, size, xoff, local->xattr); + fd, size, yoff, local->xattr); } return 0; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 779b470585c..67e693146af 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -332,6 +332,7 @@ struct dht_conf { gf_boolean_t unhashed_sticky_bit; struct timeval last_stat_fetch; gf_lock_t layout_lock; + dict_t *leaf_to_subvol; void *private; /* Can be used by wrapper xlators over dht */ gf_boolean_t use_readdirp; @@ -501,9 +502,7 @@ int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout, int dht_frame_return (call_frame_t *frame); -int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y); -int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol, - uint64_t *x); +int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol); void dht_local_wipe (xlator_t *this, dht_local_t *local); dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, @@ -775,6 +774,8 @@ int32_t dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, off_t len, dict_t *xdata); +int +dht_set_subvol_range(xlator_t *this); int32_t dht_init (xlator_t *this); void dht_fini (xlator_t *this); int dht_reconfigure (xlator_t *this, dict_t *options); diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index bf21f39a3a7..f4e5305d791 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -62,20 +62,6 @@ dht_frame_return (call_frame_t *frame) return this_call_cnt; } - -static uint64_t -dht_bits_for (uint64_t num) -{ - uint64_t bits = 0, ctrl = 1; - - while (ctrl < num) { - ctrl *= 2; - bits ++; - } - - return bits; -} - /* * A slightly "updated" version of the algorithm described in the commit log * is used here. @@ -88,66 +74,6 @@ dht_bits_for (uint64_t num) * upwards which is described as 64, are both made "configurable." */ - -#define BACKEND_D_OFF_BITS 63 -#define PRESENT_D_OFF_BITS 63 - -#define ONE 1ULL -#define MASK (~0ULL) -#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) -#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) - -#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) -#define SHIFT_BITS (max (0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) - -int -dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p) -{ - dht_conf_t *conf = NULL; - int cnt = 0; - int max = 0; - uint64_t y = 0; - uint64_t hi_mask = 0; - uint64_t off_mask = 0; - int max_bits = 0; - - if (x == ((uint64_t) -1)) { - y = (uint64_t) -1; - goto out; - } - - conf = this->private; - if (!conf) - goto out; - - max = conf->subvolume_cnt; - cnt = dht_subvol_cnt (this, subvol); - - if (max == 1) { - y = x; - goto out; - } - - max_bits = dht_bits_for (max); - - hi_mask = ~(PRESENT_MASK >> (max_bits + 1)); - - if (x & hi_mask) { - /* HUGE d_off */ - off_mask = MASK << max_bits; - y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | cnt; - } else { - /* small d_off */ - y = ((x * max) + cnt); - } - -out: - if (y_p) - *y_p = y; - - return 0; -} - int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc, xlator_t **subvol) @@ -205,55 +131,44 @@ out: return ret; } -int -dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p, - uint64_t *x_p) +static xlator_t * +dht_get_subvol_from_id(xlator_t *this, int client_id) { + xlator_t *xl = NULL; dht_conf_t *conf = NULL; - int cnt = 0; - int max = 0; - uint64_t x = 0; + char sid[6] = { 0 }; + + conf = this->private; + + sprintf(sid, "%d", client_id); + if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **) &xl)) + xl = NULL; + + return xl; +} + +int +dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p) +{ + int client_id = 0; xlator_t *subvol = 0; - int max_bits = 0; - uint64_t off_mask = 0; - uint64_t host_mask = 0; + dht_conf_t *conf = NULL; if (!this->private) return -1; conf = this->private; - max = conf->subvolume_cnt; - if (max == 1) { - x = y; - cnt = 0; - goto out; - } + client_id = gf_deitransform(this, y); - if (y & TOP_BIT) { - /* HUGE d_off */ - max_bits = dht_bits_for (max); - off_mask = (MASK << max_bits); - host_mask = ~(off_mask); + subvol = dht_get_subvol_from_id(this, client_id); - x = ((y & ~TOP_BIT) & off_mask) << SHIFT_BITS; - - cnt = y & host_mask; - } else { - /* small d_off */ - cnt = y % max; - x = y / max; - } - -out: - subvol = conf->subvolumes[cnt]; + if (!subvol) + subvol = conf->subvolumes[0]; if (subvol_p) *subvol_p = subvol; - if (x_p) - *x_p = x; - return 0; } @@ -829,6 +744,8 @@ dht_init_subvolumes (xlator_t *this, dht_conf_t *conf) } conf->subvolume_cnt = cnt; + dht_set_subvol_range(this); + cnt = 0; for (subvols = this->children; subvols; subvols = subvols->next) conf->subvolumes[cnt++] = subvols->xlator; diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 757ec731d26..3ea75b34ad0 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -166,7 +166,6 @@ dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name) int i = 0; int ret = 0; - ret = dht_hash_compute (this, layout->type, name, &hash); if (ret != 0) { gf_log (this->name, GF_LOG_WARNING, diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 466042c74a0..3531872dd31 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1492,12 +1492,14 @@ gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, if (defrag->stats == _gf_true) { gettimeofday (&start, NULL); } + if (defrag->defrag_pattern && (gf_defrag_pattern_match (defrag, entry->d_name, entry->d_stat.ia_size) == _gf_false)) { continue; } + loc_wipe (&entry_loc); ret =dht_build_child_loc (this, &entry_loc, loc, entry->d_name); diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 22a7260f829..860f3e716f0 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -214,6 +214,8 @@ dht_fini (xlator_t *this) GF_FREE (conf->file_layouts); } + dict_destroy(conf->leaf_to_subvol); + GF_FREE (conf->subvolumes); GF_FREE (conf->subvolume_status); @@ -288,7 +290,6 @@ out: return ret; } - int dht_decommissioned_remove (xlator_t *this, dht_conf_t *conf) { @@ -344,6 +345,27 @@ dht_init_regex (xlator_t *this, dict_t *odict, char *name, } int +dht_set_subvol_range(xlator_t *this) +{ + int ret = -1; + dht_conf_t *conf = NULL; + + conf = this->private; + + if (!conf) + goto out; + + conf->leaf_to_subvol = dict_new(); + if (!conf->leaf_to_subvol) + goto out; + + ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol); + +out: + return ret; +} + +int dht_reconfigure (xlator_t *this, dict_t *options) { dht_conf_t *conf = NULL; @@ -676,6 +698,9 @@ dht_init (xlator_t *this) this->private = conf; + if (dht_set_subvol_range(this)) + goto err; + return 0; err: diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c index c705b80fe82..17e1a3d124e 100644 --- a/xlators/cluster/ec/src/ec-dir-read.c +++ b/xlators/cluster/ec/src/ec-dir-read.c @@ -304,8 +304,6 @@ void ec_adjust_readdir(ec_t * ec, int32_t idx, gf_dirent_t * entries) list_for_each_entry(entry, &entries->list, list) { - entry->d_off = ec_itransform(ec, idx, entry->d_off); - if (entry->d_stat.ia_type == IA_IFREG) { if ((entry->dict == NULL) || @@ -413,10 +411,20 @@ int32_t ec_manager_readdir(ec_fop_data_t * fop, int32_t state) if (fop->offset != 0) { - int32_t idx; + int32_t idx = -1; + ec_t *ec = fop->xl->private; + + idx = gf_deitransform(fop->xl, fop->offset); + + if ((idx < 0) || (idx >= ec->nodes)) { - fop->offset = ec_deitransform(fop->xl->private, &idx, - fop->offset); + gf_log(fop->xl->name, GF_LOG_ERROR, + "Invalid index %d in readdirp request", idx); + + fop->error = EIO; + + return EC_STATE_REPORT; + } fop->mask &= 1ULL << idx; } diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 783e3d475ce..139957b55c6 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -16,17 +16,6 @@ #include "ec-fops.h" #include "ec-helpers.h" -#define BACKEND_D_OFF_BITS 63 -#define PRESENT_D_OFF_BITS 63 - -#define ONE 1ULL -#define MASK (~0ULL) -#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS)) -#define BACKEND_MASK (MASK >> (64 - BACKEND_D_OFF_BITS)) - -#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1)) -#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1))) - #ifndef ffsll #define ffsll(x) __builtin_ffsll(x) #endif @@ -106,41 +95,6 @@ void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...) } } -uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset) -{ - int32_t bits; - - if (offset == -1ULL) - { - return -1ULL; - } - - bits = ec->bits_for_nodes; - if ((offset & ~(PRESENT_MASK >> (bits + 1))) != 0) - { - return TOP_BIT | ((offset >> SHIFT_BITS) & (MASK << bits)) | idx; - } - - return (offset * ec->nodes) + idx; -} - -uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset) -{ - uint64_t mask = 0; - - if ((offset & TOP_BIT) != 0) - { - mask = MASK << ec->bits_for_nodes; - - *idx = offset & ~mask; - return ((offset & ~TOP_BIT) & mask) << SHIFT_BITS; - } - - *idx = offset % ec->nodes; - - return offset / ec->nodes; -} - int32_t ec_bits_count(uint64_t n) { n -= (n >> 1) & 0x5555555555555555ULL; diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h index 5f5d9382532..11d2707b3c0 100644 --- a/xlators/cluster/ec/src/ec-helpers.h +++ b/xlators/cluster/ec/src/ec-helpers.h @@ -16,8 +16,6 @@ const char * ec_bin(char * str, size_t size, uint64_t value, int32_t digits); const char * ec_fop_name(int32_t id); void ec_trace(const char * event, ec_fop_data_t * fop, const char * fmt, ...); -uint64_t ec_itransform(ec_t * ec, int32_t idx, uint64_t offset); -uint64_t ec_deitransform(ec_t * ec, int32_t * idx, uint64_t offset); int32_t ec_bits_count(uint64_t n); int32_t ec_bits_index(uint64_t n); int32_t ec_bits_consume(uint64_t * n); diff --git a/xlators/protocol/client/src/client-handshake.c b/xlators/protocol/client/src/client-handshake.c index 531b38eaf83..90192ad2ac8 100644 --- a/xlators/protocol/client/src/client-handshake.c +++ b/xlators/protocol/client/src/client-handshake.c @@ -1194,6 +1194,8 @@ client_setvolume_cbk (struct rpc_req *req, struct iovec *iov, int count, void *m } */ + conf->client_id = glusterfs_leaf_position(this); + gf_log (this->name, GF_LOG_INFO, "Connected to %s, attached to remote volume '%s'.", conf->rpc->conn.name, diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index 5d9f00fdc70..be5e7b57739 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -141,12 +141,16 @@ client_local_wipe (clnt_local_t *local) } int -unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries) +unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp, + gf_dirent_t *entries) { struct gfs3_dirlist *trav = NULL; gf_dirent_t *entry = NULL; int entry_len = 0; int ret = -1; + clnt_conf_t *conf = NULL; + + conf = this->private; trav = rsp->reply; while (trav) { @@ -156,7 +160,8 @@ unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries) goto out; entry->d_ino = trav->d_ino; - entry->d_off = trav->d_off; + gf_itransform (this, trav->d_off, &entry->d_off, + conf->client_id); entry->d_len = trav->d_len; entry->d_type = trav->d_type; @@ -182,12 +187,17 @@ unserialize_rsp_direntp (xlator_t *this, fd_t *fd, inode_table_t *itable = NULL; int entry_len = 0; int ret = -1; + clnt_conf_t *conf = NULL; trav = rsp->reply; if (fd) itable = fd->inode->table; + conf = this->private; + if (!conf) + goto out; + while (trav) { entry_len = gf_dirent_size (trav->name); entry = GF_CALLOC (1, entry_len, gf_common_mt_gf_dirent_t); @@ -195,7 +205,8 @@ unserialize_rsp_direntp (xlator_t *this, fd_t *fd, goto out; entry->d_ino = trav->d_ino; - entry->d_off = trav->d_off; + gf_itransform (this, trav->d_off, &entry->d_off, + conf->client_id); entry->d_len = trav->d_len; entry->d_type = trav->d_type; diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index 7ca91e9880c..10abe845c0c 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -2450,7 +2450,7 @@ client3_3_readdir_cbk (struct rpc_req *req, struct iovec *iov, int count, INIT_LIST_HEAD (&entries.list); if (rsp.op_ret > 0) { - unserialize_rsp_dirent (&rsp, &entries); + unserialize_rsp_dirent (this, &rsp, &entries); } GF_PROTOCOL_DICT_UNSERIALIZE (frame->this, xdata, diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index d9e7ccd0c4f..7fca01c5b6a 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -24,6 +24,7 @@ #include "xdr-rpc.h" #include "glusterfs3.h" +#include "gf-dirent.h" extern rpc_clnt_prog_t clnt_handshake_prog; extern rpc_clnt_prog_t clnt_dump_prog; @@ -1913,6 +1914,9 @@ client_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, if (!conf || !conf->fops) goto out; + if (off != 0) + off = gf_dirent_orig_offset(this, off); + args.fd = fd; args.size = size; args.offset = off; @@ -1948,6 +1952,9 @@ client_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, if (!conf || !conf->fops) goto out; + if (off != 0) + off = gf_dirent_orig_offset(this, off); + args.fd = fd; args.size = size; args.offset = off; @@ -2447,7 +2454,7 @@ build_client_config (xlator_t *this, clnt_conf_t *conf) { int ret = -1; - if (!conf) + if (!conf) goto out; GF_OPTION_INIT ("frame-timeout", conf->rpc_conf.rpc_timeout, @@ -2470,6 +2477,8 @@ build_client_config (xlator_t *this, clnt_conf_t *conf) GF_OPTION_INIT ("send-gids", conf->send_gids, bool, out); + conf->client_id = glusterfs_leaf_position(this); + ret = client_check_remote_host (this, this->options); if (ret) goto out; diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 7157e120dda..40200b6afc4 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -85,6 +85,7 @@ typedef struct clnt_conf { rpc_clnt_prog_t *handshake; rpc_clnt_prog_t *dump; + int client_id; uint64_t reopen_fd_count; /* Count of fds reopened after a connection is established */ gf_lock_t rec_lock; @@ -228,7 +229,8 @@ int client_submit_request (xlator_t *this, void *req, struct iovec *rsp_payload, int rsp_count, struct iobref *rsp_iobref, xdrproc_t xdrproc); -int unserialize_rsp_dirent (struct gfs3_readdir_rsp *rsp, gf_dirent_t *entries); +int unserialize_rsp_dirent (xlator_t *this, struct gfs3_readdir_rsp *rsp, + gf_dirent_t *entries); int unserialize_rsp_direntp (xlator_t *this, fd_t *fd, struct gfs3_readdirp_rsp *rsp, gf_dirent_t *entries); |