diff options
| author | Amar Tumballi <amarts@redhat.com> | 2017-06-23 13:10:56 +0530 | 
|---|---|---|
| committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2017-07-31 15:34:58 +0000 | 
| commit | 61ea2a44b509cebc566fc18b2c356d88a3f1fdc8 (patch) | |
| tree | 21d43ed73f6a5c3057be59306649ac5fe2ffa268 | |
| parent | d446c0defab52977cfc6460c0bde0fde0f61e314 (diff) | |
posix: option to handle the shared bricks for statvfs()
Currently 'storage/posix' xlator has an option called option
`export-statfs-size no`, which exports zero as values for few
fields in `struct statvfs`. In a case of backend brick shared
between multiple brick processes, the values of these variables
should be `field_value / number-of-bricks-at-node`. This way,
even the issue of 'min-free-disk' etc at different layers would
also be handled properly when the statfs() sys call is made.
Fixes #241
> Reviewed-on: https://review.gluster.org/17618
> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us>
> Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
> (cherry picked from commit febf5ed4848ad705a34413353559482417c61467)
Change-Id: I2e320e1fdcc819ab9173277ef3498201432c275f
Signed-off-by: Amar Tumballi <amarts@redhat.com>
Reviewed-on: https://review.gluster.org/17903
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shyamsundar Ranganathan <srangana@redhat.com>
| -rw-r--r-- | tests/basic/posix/shared-statfs.t | 53 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 17 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-messages.h | 8 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.c | 10 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-store.h | 1 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 28 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 4 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 36 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 5 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 34 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.h | 3 | 
11 files changed, 186 insertions, 13 deletions
diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t new file mode 100644 index 00000000000..8caa9fa2110 --- /dev/null +++ b/tests/basic/posix/shared-statfs.t @@ -0,0 +1,53 @@ +#!/bin/bash +#Test that statfs is not served from posix backend FS. + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; +TEST glusterd + +#Create brick partitions +TEST truncate -s 100M $B0/brick1 +TEST truncate -s 100M $B0/brick2 +LO1=`SETUP_LOOP $B0/brick1` +TEST [ $? -eq 0 ] +TEST MKFS_LOOP $LO1 +LO2=`SETUP_LOOP $B0/brick2` +TEST [ $? -eq 0 ] +TEST MKFS_LOOP $LO2 +TEST mkdir -p $B0/${V0}1 $B0/${V0}2 +TEST MOUNT_LOOP $LO1 $B0/${V0}1 +TEST MOUNT_LOOP $LO2 $B0/${V0}2 + +# Create a subdir in mountpoint and use that for volume. +TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1; +TEST $CLI volume start $V0 +TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +# Keeping the size less than 200M mainly because XFS will use +# some storage in brick to keep its own metadata. +TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] + + +TEST force_umount $M0 +TEST $CLI volume stop $V0 +EXPECT 'Stopped' volinfo_field $V0 'Status'; + +# From the same mount point, share another 2 bricks with the volume +TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3 + +TEST $CLI volume start $V0 +TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 +total_space=$(df -P $M0 | tail -1 | awk '{ print $2}') +TEST [ $total_space -gt 194000 -a $total_space -lt 200000 ] + +TEST force_umount $M0 +TEST $CLI volume stop $V0 +EXPECT 'Stopped' volinfo_field $V0 'Status'; + +TEST $CLI volume delete $V0; + +UMOUNT_LOOP ${B0}/${V0}{1,2} +rm -f ${B0}/brick{1,2} +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 8d4ea13af95..c7b618745b3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -22,6 +22,7 @@  #include "glusterd-server-quorum.h"  #include "run.h"  #include "glusterd-volgen.h" +#include "syscall.h"  #include <sys/signal.h>  /* misc */ @@ -1322,6 +1323,7 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,          xlator_t                     *this           = NULL;          glusterd_conf_t              *conf           = NULL;          gf_boolean_t                  is_valid_add_brick = _gf_false; +        struct statvfs                brickstat = {0,};          this = THIS;          GF_ASSERT (this); @@ -1396,6 +1398,21 @@ glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,                  if (ret)                          goto out; +                if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) { +                        ret = sys_statvfs (brickinfo->path, &brickstat); +                        if (ret) { +                                gf_msg (this->name, GF_LOG_ERROR, errno, +                                        GD_MSG_STATVFS_FAILED, +                                        "Failed to fetch disk utilization " +                                        "from the brick (%s:%s). Please check the health of " +                                        "the brick. Error code was %s", +                                        brickinfo->hostname, brickinfo->path, +                                        strerror (errno)); + +                                goto out; +                        } +                        brickinfo->statfs_fsid = brickstat.f_fsid; +                }                  /* hot tier bricks are added to head of brick list */                  if (dict_get (dict, "attach-tier")) {                          cds_list_add (&brickinfo->brick_list, &volinfo->bricks); diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h index 14424d36890..2caa16c2eda 100644 --- a/xlators/mgmt/glusterd/src/glusterd-messages.h +++ b/xlators/mgmt/glusterd/src/glusterd-messages.h @@ -4901,6 +4901,14 @@   */  #define GD_MSG_BRICKPROC_NEW_FAILED                (GLUSTERD_COMP_BASE + 606) +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ +#define GD_MSG_STATVFS_FAILED  (GLUSTERD_COMP_BASE + 607) +  /*------------*/  #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 72b70f916c6..8eb301f040f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -352,6 +352,12 @@ gd_store_brick_snap_details_write (int fd, glusterd_brickinfo_t *brickinfo)          snprintf (value, sizeof(value), "%d", brickinfo->snap_status);          ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,                                     value); +        if (ret) +                goto out; + +        memset (value, 0, sizeof (value)); +        snprintf (value, sizeof (value), "%lu", brickinfo->statfs_fsid); +        ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_FSID, value);  out:          return ret; @@ -2508,6 +2514,10 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)                          } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) {                                  strncpy (brickinfo->brick_id, value,                                           sizeof (brickinfo->brick_id)); +                        } else if (!strncmp (key, +                                             GLUSTERD_STORE_KEY_BRICK_FSID, +                                             strlen (GLUSTERD_STORE_KEY_BRICK_FSID))) { +                                gf_string2uint64 (value, &brickinfo->statfs_fsid);                          } else {                                  gf_msg (this->name, GF_LOG_ERROR, 0,                                          GD_MSG_UNKNOWN_KEY, "Unknown key: %s", diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 3e31c965638..b515ca6c554 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -97,6 +97,7 @@ typedef enum glusterd_store_ver_ac_{  #define GLUSTERD_STORE_KEY_BRICK_FSTYPE         "fs-type"  #define GLUSTERD_STORE_KEY_BRICK_MNTOPTS        "mnt-opts"  #define GLUSTERD_STORE_KEY_BRICK_ID             "brick-id" +#define GLUSTERD_STORE_KEY_BRICK_FSID           "brick-fsid"  #define GLUSTERD_STORE_KEY_PEER_UUID            "uuid"  #define GLUSTERD_STORE_KEY_PEER_HOSTNAME        "hostname" diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 6ff11a2e050..f1627df688f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -186,6 +186,32 @@ out:          return ret;  } +/* This is going to be a O(n^2) operation as we have to pick a brick, +   make sure it belong to this machine, and compare another brick belonging +   to this machine (if exists), is sharing the backend */ +static void +gd_set_shared_brick_count (glusterd_volinfo_t *volinfo) +{ +        glusterd_brickinfo_t *brickinfo = NULL; +        glusterd_brickinfo_t *trav = NULL; + +        cds_list_for_each_entry (brickinfo, &volinfo->bricks, +                                 brick_list) { +                if (gf_uuid_compare (brickinfo->uuid, MY_UUID)) +                        continue; +                brickinfo->fs_share_count = 0; +                cds_list_for_each_entry (trav, &volinfo->bricks, +                                         brick_list) { +                        if (!gf_uuid_compare (trav->uuid, MY_UUID) && +                            (trav->statfs_fsid == brickinfo->statfs_fsid)) { +                                brickinfo->fs_share_count++; +                        } +                } +        } + +        return; +} +  int  glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data,                 int (*fn) (glusterd_volinfo_t *, glusterd_brickinfo_t *, @@ -195,6 +221,8 @@ glusterd_volume_brick_for_each (glusterd_volinfo_t *volinfo, void *data,          glusterd_volinfo_t *dup_volinfo = NULL;          int                ret          = 0; +        gd_set_shared_brick_count (volinfo); +          if (volinfo->type != GF_CLUSTER_TYPE_TIER) {                  ret = _brick_for_each (volinfo, NULL, data, fn);                  if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 0a0668e9ea6..1ada7232f3e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1440,6 +1440,7 @@ static int  brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                          dict_t *set_dict, glusterd_brickinfo_t *brickinfo)  { +        char            tmpstr[10] = {0,};          int             ret = -1;          gf_boolean_t    quota_enabled   = _gf_true;          gf_boolean_t    trash_enabled   = _gf_false; @@ -1491,6 +1492,9 @@ brick_graph_add_posix (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,          if (quota_enabled || pgfid_feat || trash_enabled)                  xlator_set_option (xl, "update-link-count-parent",                                     "on"); + +        snprintf (tmpstr, sizeof (tmpstr), "%d", brickinfo->fs_share_count); +        ret = xlator_set_option (xl, "shared-brick-count", tmpstr);  out:          return ret;  } diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 7254e281497..b95b8a4e863 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -2164,6 +2164,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)          char                 *brick_mount_dir = NULL;          char                  key[PATH_MAX]   = "";          char                 *address_family_str = NULL; +        struct statvfs        brickstat  = {0,};          this = THIS;          GF_ASSERT (this); @@ -2405,24 +2406,35 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr)                                   sizeof(brickinfo->mount_dir));                  } -#ifdef HAVE_BD_XLATOR -                if (!gf_uuid_compare (brickinfo->uuid, MY_UUID) -                    && brickinfo->vg[0]) { -                        ret = glusterd_is_valid_vg (brickinfo, 0, msg); +                if (!gf_uuid_compare (brickinfo->uuid, MY_UUID)) { +                        ret = sys_statvfs (brickinfo->path, &brickstat);                          if (ret) { -                                gf_msg (this->name, GF_LOG_ERROR, 0, -                                        GD_MSG_INVALID_VG, "%s", msg); +                                gf_log ("brick-op", GF_LOG_ERROR, "Failed to fetch disk" +                                        " utilization from the brick (%s:%s). Please " +                                        "check health of the brick. Error code was %s", +                                        brickinfo->hostname, brickinfo->path, +                                        strerror (errno));                                  goto out;                          } +                        brickinfo->statfs_fsid = brickstat.f_fsid; -                        /* if anyone of the brick does not have thin -                           support, disable it for entire volume */ -                        caps &= brickinfo->caps; -                } else { -                                caps = 0; -                } +#ifdef HAVE_BD_XLATOR +                        if (brickinfo->vg[0]) { +                                ret = glusterd_is_valid_vg (brickinfo, 0, msg); +                                if (ret) { +                                        gf_msg (this->name, GF_LOG_ERROR, 0, +                                                GD_MSG_INVALID_VG, "%s", msg); +                                        goto out; +                                } +                                /* if anyone of the brick does not have thin +                                   support, disable it for entire volume */ +                                caps &= brickinfo->caps; +                        } else { +                                caps = 0; +                        }  #endif +                }                  cds_list_add_tail (&brickinfo->brick_list, &volinfo->bricks);                  brick = strtok_r (NULL, " \n", &saveptr); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index b2141853db4..3226ec24c0f 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -232,6 +232,11 @@ struct glusterd_brickinfo {           */          uint16_t           group;          uuid_t             jbr_uuid; + +        /* Below are used for handling the case of multiple bricks sharing +           the backend filesystem */ +        uint64_t           statfs_fsid; +        uint32_t           fs_share_count;  };  typedef struct glusterd_brickinfo glusterd_brickinfo_t; diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index dc8a129cacb..92a2f3772cb 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3641,6 +3641,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this,          int32_t                op_errno  = 0;          struct statvfs         buf       = {0, };          struct posix_private * priv      = NULL; +        int                    shared_by = 1;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -3665,6 +3666,16 @@ posix_statfs (call_frame_t *frame, xlator_t *this,                  goto out;          } +        shared_by = priv->shared_brick_count; +        if (shared_by > 1) { +                buf.f_blocks /= shared_by; +                buf.f_bfree  /= shared_by; +                buf.f_bavail /= shared_by; +                buf.f_files  /= shared_by; +                buf.f_ffree  /= shared_by; +                buf.f_favail /= shared_by; +        } +          if (!priv->export_statfs) {                  buf.f_blocks = 0;                  buf.f_bfree  = 0; @@ -6971,7 +6982,7 @@ int  reconfigure (xlator_t *this, dict_t *options)  {  	int                   ret = -1; -struct posix_private *priv = NULL; +        struct posix_private *priv = NULL;          int32_t               uid = -1;          int32_t               gid = -1;  	char                 *batch_fsync_mode_str = NULL; @@ -7039,6 +7050,9 @@ struct posix_private *priv = NULL;                            options, uint32, out);          posix_spawn_health_check_thread (this); +        GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, +                          options, int32, out); +  	ret = 0;  out:  	return ret; @@ -7573,6 +7587,17 @@ init (xlator_t *this)                  }          }  #endif +        _private->shared_brick_count = 1; +        ret = dict_get_int32 (this->options, "shared-brick-count", +                              &_private->shared_brick_count); +        if (ret == -1) { +                gf_msg (this->name, GF_LOG_ERROR, 0, +                        P_MSG_INVALID_OPTION_VAL, +                        "'shared-brick-count' takes only integer " +                        "values"); +                goto out; +        } +          this->private = (void *)_private;          op_ret = posix_handle_init (this); @@ -7863,5 +7888,12 @@ struct volume_options options[] = {  	  "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n"          },  #endif +        { .key  = {"shared-brick-count"}, +          .type = GF_OPTION_TYPE_INT, +          .default_value = "1", +          .description = "Number of bricks sharing the same backend export." +          " Useful for displaying the proper usable size through statvfs() " +          "call (df command)", +        },          { .key  = {NULL} }  }; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 480566a5340..81158266111 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -176,6 +176,9 @@ struct posix_private {          } xattr_user_namespace;  #endif +        /* Option to handle the cases of multiple bricks exported from +           same backend. Very much usable in brick-splitting feature. */ +        int32_t shared_brick_count;  };  typedef struct {  | 
