diff options
-rw-r--r-- | tests/basic/posix/shared-statfs.t | 53 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 17 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-messages.h | 8 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 10 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 1 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 28 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 36 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 5 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.c | 34 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.h | 3 |
11 files changed, 186 insertions, 13 deletions
diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t new file mode 100644 index 00000000000..8caa9fa2110 --- /dev/null +++ b/tests/basic/posix/shared-statfs.t @@ -0,0 +1,53 @@ +#!/bin/bash +#Test that statfs is not served from posix backend FS. + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; +TEST glusterd + +#Create brick partitions +TEST truncate -s 100M $B0/brick1 +TEST truncate -s 100M $B0/brick2 +LO1=`SETUP_LOOP $B0/brick1` +TEST [ $? -eq 0 ] +TEST MKFS_LOOP $LO1 +LO2=`SETUP_LOOP $B0/brick2` +TEST [ $? -eq 0 ] +TEST MKFS_LOOP $LO2 +TEST mkdir -p $B0/${V0}1 $B0/${V0}2 +TEST MOUNT_LOOP $LO1 $B0/${V0}1 +TEST MOUNT_LOOP $LO2 $B0/${V0}2 + +# Create a subdir in mountpoint and use that for volume. +TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; +TEST $CLI volume start $V0 +TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +# Keeping the size less than 200M mainly because XFS will use +# some storage in brick to keep its own metadata. +TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] + + +TEST force_umount $M0 +TEST $CLI volume stop $V0 +EXPECT 'Stopped' volinfo_field $V0 'Status'; + +# From the same mount point, share another 2 bricks with the volume +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3 + +TEST $CLI volume start $V0 +TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] + +TEST force_umount $M0 +TEST $CLI volume stop $V0 +EXPECT 'Stopped' volinfo_field $V0 'Status'; + +TEST $CLI volume delete $V0; + +UMOUNT_LOOP ${B0}/${V0}{1,2} +rm -f ${B0}/brick{1,2} +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 8d4ea13af95..c7b618745b3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -22,6 +22,7 @@ #include "glusterd-server-quorum.h" #include "run.h" #include "glusterd-volgen.h" +#include "syscall.h" #include <sys/signal.h> /* misc */ @@ -1322,6 +1323,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, xlator_t *this = NULL; glusterd_conf_t *conf = NULL; gf_boolean_t is_valid_add_brick = _gf_false; + struct statvfs brickstat = {0,}; this = THIS; GF_ASSERT (this); @@ -1396,6 +1398,21 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, if (ret) goto out; + if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) { + ret = sys_statvfs (brickinfo->path, &brickstat); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + GD_MSG_STATVFS_FAILED, + "Failed to fetch disk utilization " + "from the brick (%s:%s). Please check the health of " + "the brick. Error code was %s", + brickinfo->hostname, brickinfo->path, + strerror (errno)); + + goto out; + } + brickinfo->statfs_fsid = brickstat.f_fsid; + } /* hot tier bricks are added to head of brick list */ if (dict_get (dict, "attach-tier")) { cds_list_add (&brickinfo->brick_list, &volinfo->bricks); diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index 14424d36890..2caa16c2eda 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -4901,6 +4901,14 @@ */ #define GD_MSG_BRICKPROC_NEW_FAILED (GLUSTERD_COMP_BASE + 606) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_STATVFS_FAILED (GLUSTERD_COMP_BASE + 607) + /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 72b70f916c6..8eb301f040f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -352,6 +352,12 @@ gd_store_brick_snap_details_write (int fd, glusterd_brickinfo_t *brickinfo) snprintf (value, sizeof(value), "%d", brickinfo->snap_status); ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, value); + if (ret) + goto out; + + memset (value, 0, sizeof (value)); + snprintf (value, sizeof (value), "%lu", brickinfo->statfs_fsid); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_FSID, value); out: return ret; @@ -2508,6 +2514,10 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) { strncpy (brickinfo->brick_id, value, sizeof (brickinfo->brick_id)); + } else if (!strncmp (key, + GLUSTERD_STORE_KEY_BRICK_FSID, + strlen (GLUSTERD_STORE_KEY_BRICK_FSID))) { + gf_string2uint64 (value, &brickinfo->statfs_fsid); } else { gf_msg (this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY, "Unknown key: %s", diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 3e31c965638..b515ca6c554 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -97,6 +97,7 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_BRICK_FSTYPE "fs-type" #define GLUSTERD_STORE_KEY_BRICK_MNTOPTS "mnt-opts" #define GLUSTERD_STORE_KEY_BRICK_ID "brick-id" +#define GLUSTERD_STORE_KEY_BRICK_FSID "brick-fsid" #define GLUSTERD_STORE_KEY_PEER_UUID "uuid" #define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 6ff11a2e050..f1627df688f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -186,6 +186,32 @@ out: return ret; } +/* This is going to be a O(n^2) operation as we have to pick a brick, + make sure it belong to this machine, and compare another brick belonging + to this machine (if exists), is sharing the backend */ +static void +gd_set_shared_brick_count (glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *trav = NULL; + + cds_list_for_each_entry (brickinfo, &volinfo->bricks, + brick_list) { + if (gf_uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + brickinfo->fs_share_count = 0; + cds_list_for_each_entry (trav, &volinfo->bricks, + brick_list) { + if (!gf_uuid_compare (trav->uuid, MY_UUID) && + (trav->statfs_fsid == brickinfo->statfs_fsid)) { + brickinfo->fs_share_count++; + } + } + } + + return; +} + int glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data, int (*fn) (glusterd_volinfo_t *, glusterd_brickinfo_t *, @@ -195,6 +221,8 @@ glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data, glusterd_volinfo_t *dup_volinfo = NULL; int ret = 0; + gd_set_shared_brick_count (volinfo); + if (volinfo->type != GF_CLUSTER_TYPE_TIER) { ret = _brick_for_each (volinfo, NULL, data, fn); if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 0a0668e9ea6..1ada7232f3e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1440,6 +1440,7 @@ static int brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) { + char tmpstr[10] = {0,}; int ret = -1; gf_boolean_t quota_enabled = _gf_true; gf_boolean_t trash_enabled = _gf_false; @@ -1491,6 +1492,9 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, if (quota_enabled || pgfid_feat || trash_enabled) xlator_set_option (xl, "update-link-count-parent", "on"); + + snprintf (tmpstr, sizeof (tmpstr), "%d", brickinfo->fs_share_count); + ret = xlator_set_option (xl, "shared-brick-count", tmpstr); out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 7254e281497..b95b8a4e863 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2164,6 +2164,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) char *brick_mount_dir = NULL; char key[PATH_MAX] = ""; char *address_family_str = NULL; + struct statvfs brickstat = {0,}; this = THIS; GF_ASSERT (this); @@ -2405,24 +2406,35 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) sizeof(brickinfo->mount_dir)); } -#ifdef HAVE_BD_XLATOR - if (!gf_uuid_compare (brickinfo->uuid, MY_UUID) - && brickinfo->vg[0]) { - ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) { + ret = sys_statvfs (brickinfo->path, &brickstat); if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - GD_MSG_INVALID_VG, "%s", msg); + gf_log ("brick-op", GF_LOG_ERROR, "Failed to fetch disk" + " utilization from the brick (%s:%s). Please " + "check health of the brick. Error code was %s", + brickinfo->hostname, brickinfo->path, + strerror (errno)); goto out; } + brickinfo->statfs_fsid = brickstat.f_fsid; - /* if anyone of the brick does not have thin - support, disable it for entire volume */ - caps &= brickinfo->caps; - } else { - caps = 0; - } +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + GD_MSG_INVALID_VG, "%s", msg); + goto out; + } + /* if anyone of the brick does not have thin + support, disable it for entire volume */ + caps &= brickinfo->caps; + } else { + caps = 0; + } #endif + } cds_list_add_tail (&brickinfo->brick_list, &volinfo->bricks); brick = strtok_r (NULL, " \n", &saveptr); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index b2141853db4..3226ec24c0f 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -232,6 +232,11 @@ struct glusterd_brickinfo { */ uint16_t group; uuid_t jbr_uuid; + + /* Below are used for handling the case of multiple bricks sharing + the backend filesystem */ + uint64_t statfs_fsid; + uint32_t fs_share_count; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index dc8a129cacb..92a2f3772cb 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3641,6 +3641,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this, int32_t op_errno = 0; struct statvfs buf = {0, }; struct posix_private * priv = NULL; + int shared_by = 1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -3665,6 +3666,16 @@ posix_statfs (call_frame_t *frame, xlator_t *this, goto out; } + shared_by = priv->shared_brick_count; + if (shared_by > 1) { + buf.f_blocks /= shared_by; + buf.f_bfree /= shared_by; + buf.f_bavail /= shared_by; + buf.f_files /= shared_by; + buf.f_ffree /= shared_by; + buf.f_favail /= shared_by; + } + if (!priv->export_statfs) { buf.f_blocks = 0; buf.f_bfree = 0; @@ -6971,7 +6982,7 @@ int reconfigure (xlator_t *this, dict_t *options) { int ret = -1; -struct posix_private *priv = NULL; + struct posix_private *priv = NULL; int32_t uid = -1; int32_t gid = -1; char *batch_fsync_mode_str = NULL; @@ -7039,6 +7050,9 @@ struct posix_private *priv = NULL; options, uint32, out); posix_spawn_health_check_thread (this); + GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, + options, int32, out); + ret = 0; out: return ret; @@ -7573,6 +7587,17 @@ init (xlator_t *this) } } #endif + _private->shared_brick_count = 1; + ret = dict_get_int32 (this->options, "shared-brick-count", + &_private->shared_brick_count); + if (ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "'shared-brick-count' takes only integer " + "values"); + goto out; + } + this->private = (void *)_private; op_ret = posix_handle_init (this); @@ -7863,5 +7888,12 @@ struct volume_options options[] = { "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n" }, #endif + { .key = {"shared-brick-count"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1", + .description = "Number of bricks sharing the same backend export." + " Useful for displaying the proper usable size through statvfs() " + "call (df command)", + }, { .key = {NULL} } }; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 480566a5340..81158266111 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -176,6 +176,9 @@ struct posix_private { } xattr_user_namespace; #endif + /* Option to handle the cases of multiple bricks exported from + same backend. Very much usable in brick-splitting feature. */ + int32_t shared_brick_count; }; typedef struct { |