diff options
| author | Richard Wareing <rwareing@fb.com> | 2015-06-23 17:03:11 -0700 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2015-11-01 09:14:34 -0800 | 
| commit | d3e496cbcd35b9d9b840e328ae109c44f59083ce (patch) | |
| tree | 25eedba238f9168cd11015885973d5a960c1596e | |
| parent | 846697d91ed56ca3d76f2a78e87e7675f127f21d (diff) | |
debug/io-stats: Add FOP sampling feature
Summary:
- Using sampling feature you can record details about every Nth FOP.
  The fields in each sample are: FOP type, hostname, uid, gid, FOP priority,
  port and time taken (latency) to fufill the request.
- Implemented using a ring buffer which is not (m/c) allocated in the IO path,
  this should make the sampling process pretty cheap.
- DNS resolution done @ dump time not @ sample time for performance w/
  cache
- Metrics can be used for both diagnostics, traffic/IO profiling as well
  as P95/P99 calculations
- To control this feature there are two new volume options:
  diagnostics.fop-sample-interval - The sampling interval, e.g. 1 means
  sample every FOP, 100 means sample every 100th FOP
  diagnostics.fop-sample-buf-size - The size (in bytes) of the ring
  buffer used to store the samples.  In the even more samples
  are collected in the stats dump interval than can be held in this buffer,
  the oldest samples shall be discarded.  Samples are stored in the log
  directory under /var/log/glusterfs/samples.
- Uses DNS cache written by sshreyas@fb.com (Thank-you!), the DNS cache
  TTL is controlled by the diagnostics.stats-dnscache-ttl-sec option
  and defaults to 24hrs.
Test Plan:
- Valgrind'd to ensure it's leak free
- Run prove test(s)
- Shadow testing on 100+ brick cluster
Change-Id: I9ee14c2fa18486b7efb38e59f70687249d3f96d8
BUG: 1271310
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.org/12210
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
| -rw-r--r-- | extras/glusterfs-logrotate | 9 | ||||
| -rw-r--r-- | libglusterfs/src/common-utils.c | 260 | ||||
| -rw-r--r-- | libglusterfs/src/common-utils.h | 21 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs.h | 6 | ||||
| -rw-r--r-- | libglusterfs/src/mem-types.h | 2 | ||||
| -rw-r--r-- | libglusterfs/src/stack.h | 1 | ||||
| -rw-r--r-- | tests/basic/fop-sampling.t | 61 | ||||
| -rw-r--r-- | xlators/debug/io-stats/src/io-stats-mem-types.h | 2 | ||||
| -rw-r--r-- | xlators/debug/io-stats/src/io-stats.c | 494 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 15 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs-fops.c | 1 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs.c | 15 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs.h | 5 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-helpers.c | 7 | 
14 files changed, 864 insertions, 35 deletions
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate index e3319afaa96..575c0eee771 100644 --- a/extras/glusterfs-logrotate +++ b/extras/glusterfs-logrotate @@ -26,3 +26,12 @@    /usr/bin/killall -HUP glusterfsd > /dev/null 2>&1 || true    endscript  } + +/var/log/glusterfs/samples/*.samp { +  daily +  rotate 3 +  sharedscripts +  missingok +  compress +  delaycompress +} diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index 2dcd54f1829..a89e120c0fb 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -34,6 +34,7 @@  #if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS)  #include <sys/sysctl.h>  #endif +#include <libgen.h>  #include "compat-errno.h"  #include "logging.h" @@ -210,7 +211,7 @@ out:  }  /** - * gf_resolve_parent_path -- Given a path, returns an allocated string + * gf_resolve_path_parent -- Given a path, returns an allocated string   *                           containing the parent's path.   * @path: Path to parse   * @return: The parent path if found, NULL otherwise @@ -359,6 +360,135 @@ err:          return -1;  } +/** + * gf_dnscache_init -- Initializes a dnscache struct and sets the ttl + *                     to the specified value in the parameter. + * + * @ttl: the TTL in seconds + * @return: SUCCESS: Pointer to an allocated dnscache struct + *          FAILURE: NULL + */ +struct dnscache * +gf_dnscache_init (time_t ttl) +{ +        struct dnscache *cache = GF_MALLOC (sizeof (*cache), +                                            gf_common_mt_dnscache); +        cache->cache_dict = NULL; +        cache->ttl = ttl; +        return cache; +} + +/** + * gf_dnscache_entry_init -- Initialize a dnscache entry + * + * @return: SUCCESS: Pointer to an allocated dnscache entry struct + *          FAILURE: NULL + */ +struct dnscache_entry * +gf_dnscache_entry_init () +{ +        struct dnscache_entry *entry = GF_CALLOC (1, sizeof (*entry), +                                                 gf_common_mt_dnscache_entry); +        return entry; +} + +/** + * gf_dnscache_entry_deinit -- Free memory used by a dnscache entry + * + * @entry: Pointer to deallocate + */ +void +gf_dnscache_entry_deinit (struct dnscache_entry *entry) +{ +        GF_FREE (entry->ip); +        GF_FREE (entry->fqdn); +        GF_FREE (entry); +} + +/** + * gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address. + * + * @ip: The IP address to perform a reverse lookup on + * + * @return: success: Allocated string containing the hostname + *          failure: NULL + */ +char * +gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache) +{ +        char               *fqdn = NULL; +        int                ret  = 0; +        dict_t             *cache = NULL; +        data_t             *entrydata = NULL; +        struct dnscache_entry *dnsentry = NULL; +        gf_boolean_t        from_cache = _gf_false; + +        if (!dnscache) +                goto out; + +        if (!dnscache->cache_dict) { +                dnscache->cache_dict = dict_new (); +                if (!dnscache->cache_dict) { +                        goto out; +                } +        } +        cache = dnscache->cache_dict; + +        /* Quick cache lookup to see if we already hold it */ +        entrydata = dict_get (cache, (char *)ip); +        if (entrydata) { +                dnsentry = (struct dnscache_entry *)entrydata->data; +                /* First check the TTL & timestamp */ +                if (time (NULL) - dnsentry->timestamp > dnscache->ttl) { +                        gf_dnscache_entry_deinit (dnsentry); +                        entrydata->data = NULL; /* Mark this as 'null' so +                                                 * dict_del () doesn't try free +                                                 * this after we've already +                                                 * freed it. +                                                 */ + +                        dict_del (cache, (char *)ip); /* Remove this entry */ +                } else { +                        /* Cache entry is valid, get the FQDN and return */ +                        fqdn = dnsentry->fqdn; +                        from_cache = _gf_true; /* Mark this as from cache */ +                        goto out; +                } +        } + +        /* Get the FQDN */ +        ret =  gf_get_hostname_from_ip ((char *)ip, &fqdn); +        if (ret != 0) +                goto out; + +        if (!fqdn) { +                gf_log_callingfn ("resolver", GF_LOG_CRITICAL, +                                  "Allocation failed for the host address"); +                goto out; +        } + +        from_cache = _gf_false; +out: +        /* Insert into the cache */ +        if (fqdn && !from_cache) { +                struct dnscache_entry *entry = gf_dnscache_entry_init (); + +                if (!entry) { +                        goto out; +                } +                entry->fqdn = fqdn; +                entry->ip = gf_strdup (ip); +                if (!ip) { +                        gf_dnscache_entry_deinit (entry); +                        goto out; +                } +                entry->timestamp = time (NULL); + +                entrydata = bin_to_data (entry, sizeof (*entry)); +                dict_set (cache, (char *)ip, entrydata); +        } +        return fqdn; +}  struct xldump {  	int lineno; @@ -4012,3 +4142,131 @@ _unmask_cancellation (void)  {          (void) pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);  } + + +const char * +fop_enum_to_pri_string (glusterfs_fop_t fop) +{ +        switch (fop) { +        case GF_FOP_OPEN: +        case GF_FOP_STAT: +        case GF_FOP_FSTAT: +        case GF_FOP_LOOKUP: +        case GF_FOP_ACCESS: +        case GF_FOP_READLINK: +        case GF_FOP_OPENDIR: +        case GF_FOP_STATFS: +        case GF_FOP_READDIR: +        case GF_FOP_READDIRP: +                return "HIGH"; + +        case GF_FOP_CREATE: +        case GF_FOP_FLUSH: +        case GF_FOP_LK: +        case GF_FOP_INODELK: +        case GF_FOP_FINODELK: +        case GF_FOP_ENTRYLK: +        case GF_FOP_FENTRYLK: +        case GF_FOP_UNLINK: +        case GF_FOP_SETATTR: +        case GF_FOP_FSETATTR: +        case GF_FOP_MKNOD: +        case GF_FOP_MKDIR: +        case GF_FOP_RMDIR: +        case GF_FOP_SYMLINK: +        case GF_FOP_RENAME: +        case GF_FOP_LINK: +        case GF_FOP_SETXATTR: +        case GF_FOP_GETXATTR: +        case GF_FOP_FGETXATTR: +        case GF_FOP_FSETXATTR: +        case GF_FOP_REMOVEXATTR: +        case GF_FOP_FREMOVEXATTR: +        case GF_FOP_IPC: +                return "NORMAL"; + +        case GF_FOP_READ: +        case GF_FOP_WRITE: +        case GF_FOP_FSYNC: +        case GF_FOP_TRUNCATE: +        case GF_FOP_FTRUNCATE: +        case GF_FOP_FSYNCDIR: +        case GF_FOP_XATTROP: +        case GF_FOP_FXATTROP: +        case GF_FOP_RCHECKSUM: +        case GF_FOP_ZEROFILL: +        case GF_FOP_FALLOCATE: +                return "LOW"; + +        case GF_FOP_NULL: +        case GF_FOP_FORGET: +        case GF_FOP_RELEASE: +        case GF_FOP_RELEASEDIR: +        case GF_FOP_GETSPEC: +        case GF_FOP_MAXVALUE: +        case GF_FOP_DISCARD: +                return "LEAST"; +        } +        return "UNKNOWN"; +} + +const char * +fop_enum_to_string (glusterfs_fop_t fop) +{ +        static const char *const str_map[] = { +                "NULL", +                "STAT", +                "READLINK", +                "MKNOD", +                "MKDIR", +                "UNLINK", +                "RMDIR", +                "SYMLINK", +                "RENAME", +                "LINK", +                "TRUNCATE", +                "OPEN", +                "READ", +                "WRITE", +                "STATFS", +                "FLUSH", +                "FSYNC", +                "SETXATTR", +                "GETXATTR", +                "REMOVEXATTR", +                "OPENDIR", +                "FSYNCDIR", +                "ACCESS", +                "CREATE", +                "FTRUNCATE", +                "FSTAT", +                "LK", +                "LOOKUP", +                "READDIR", +                "INODELK", +                "FINODELK", +                "ENTRYLK", +                "FENTRYLK", +                "XATTROP", +                "FXATTROP", +                "FGETXATTR", +                "FSETXATTR", +                "RCHECKSUM", +                "SETATTR", +                "FSETATTR", +                "READDIRP", +                "FORGET", +                "RELEASE", +                "RELEASEDIR", +                "GETSPEC", +                "FREMOVEXATTR", +                "FALLOCATE", +                "DISCARD", +                "ZEROFILL", +                "IPC", +                "MAXVALUE"}; +        if (fop <= GF_FOP_MAXVALUE) +                return str_map[fop]; + +        return "UNKNOWNFOP"; +} diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h index 77a8cdd51c7..bf574fdabc7 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/common-utils.h @@ -159,6 +159,27 @@ typedef struct dht_changelog_rename_info {  typedef int (*gf_cmp) (void *, void *); +struct _dict; + +struct dnscache { +        struct _dict *cache_dict; +        time_t ttl; +}; + +struct dnscache_entry { +        char *ip; +        char *fqdn; +        time_t timestamp; +}; + + +struct dnscache *gf_dnscache_init (time_t ttl); +struct dnscache_entry *gf_dnscache_entry_init (); +void gf_dnscache_entry_deinit (struct dnscache_entry *entry); +char *gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache); + +char *gf_resolve_path_parent (const char *path); +  void gf_global_variable_init(void);  int32_t gf_resolve_ip6 (const char *hostname, uint16_t port, int family, diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 3bc76f6622a..4c7f9f517e3 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -258,6 +258,10 @@  /* NOTE: add members ONLY at the end (just before _MAXVALUE) */ +/* + * OTHER NOTE: fop_enum_to_str and fop_enum_to_pri_str (in common-utils.h) also + * contain lists of fops, so if you update this list UPDATE THOSE TOO. + */  typedef enum {          GF_FOP_NULL = 0,          GF_FOP_STAT, @@ -312,6 +316,8 @@ typedef enum {          GF_FOP_MAXVALUE,  } glusterfs_fop_t; +const char *fop_enum_to_pri_string (glusterfs_fop_t fop); +const char *fop_enum_to_string (glusterfs_fop_t fop);  typedef enum {          GF_MGMT_NULL = 0, diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index 84949c61487..70c4ea770d5 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -128,6 +128,8 @@ enum gf_common_mem_types_ {          gf_common_mt_ereg,          gf_common_mt_wr,          gf_common_mt_rdma_arena_mr, +        gf_common_mt_dnscache             = 115, +        gf_common_mt_dnscache_entry       = 116,          gf_common_mt_parser_t,          gf_common_quota_meta_t,          /*related to gfdb library*/ diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h index 5c0655f2ead..43d943c62b6 100644 --- a/libglusterfs/src/stack.h +++ b/libglusterfs/src/stack.h @@ -96,6 +96,7 @@ struct _call_stack_t {          uid_t                         uid;          gid_t                         gid;          pid_t                         pid; +        char                          identifier[UNIX_PATH_MAX];          uint16_t                      ngrps;          uint32_t                      groups_small[SMALL_GROUP_COUNT];  	uint32_t                     *groups_large; diff --git a/tests/basic/fop-sampling.t b/tests/basic/fop-sampling.t new file mode 100644 index 00000000000..cea8aa737c0 --- /dev/null +++ b/tests/basic/fop-sampling.t @@ -0,0 +1,61 @@ +#!/bin/bash +# + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +SAMPLE_FILE="$(gluster --print-logdir)/samples/glusterfs_${V0}.samp" + +function print_cnt() { +  local FOP_TYPE=$1 +  local FOP_CNT=$(grep ,${FOP_TYPE} ${SAMPLE_FILE} | wc -l) +  echo $FOP_CNT +} + +# Verify we got non-zero counts for stats/lookup/readdir +check_samples() { +        STAT_CNT=$(print_cnt STAT) +        if [ "$STAT_CNT" -le "0" ]; then +                echo "STAT count is zero" +                return +        fi + +        LOOKUP_CNT=$(print_cnt LOOKUP) +        if [ "$LOOKUP_CNT" -le "0" ]; then +                echo "LOOKUP count is zero" +                return +        fi + +        READDIR_CNT=$(print_cnt READDIR) +        if [ "$READDIR_CNT" -le "0" ]; then +                echo "READDIR count is zero" +                return +        fi + +        echo "OK" +} + +cleanup; +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 nfs.disable off +TEST $CLI volume set $V0 diagnostics.latency-measurement on +TEST $CLI volume set $V0 diagnostics.count-fop-hits on +TEST $CLI volume set $V0 diagnostics.stats-dump-interval 2 +TEST $CLI volume set $V0 diagnostics.fop-sample-buf-size 65535 +TEST $CLI volume set $V0 diagnostics.fop-sample-interval 1 +TEST $CLI volume set $V0 diagnostics.stats-dnscache-ttl-sec 3600 + +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +for i in {1..5} +do +        dd if=/dev/zero of=${M0}/testfile$i bs=4k count=1 +done + +TEST ls -l $M0 +EXPECT_WITHIN 6 "OK" check_samples + +cleanup diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h index d7a1055a571..9dde9373264 100644 --- a/xlators/debug/io-stats/src/io-stats-mem-types.h +++ b/xlators/debug/io-stats/src/io-stats-mem-types.h @@ -20,6 +20,8 @@ enum gf_io_stats_mem_types_ {          gf_io_stats_mt_ios_fd,          gf_io_stats_mt_ios_stat,          gf_io_stats_mt_ios_stat_list, +        gf_io_stats_mt_ios_sample_buf, +        gf_io_stats_mt_ios_sample,          gf_io_stats_mt_end  };  #endif diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 3157a65dae1..e73cae0a928 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -34,8 +34,12 @@  #include "logging.h"  #include "cli1-xdr.h"  #include "statedump.h" +#include <pwd.h> +#include <grp.h>  #define MAX_LIST_MEMBERS 100 +#define DEFAULT_PWD_BUF_SZ 16384 +#define DEFAULT_GRP_BUF_SZ 16384  typedef enum {          IOS_STATS_TYPE_NONE, @@ -82,6 +86,25 @@ struct ios_stat_head {         struct ios_stat_list    *iosstats;  }; +typedef struct _ios_sample_t { +        uid_t  uid; +        gid_t  gid; +        char   identifier[UNIX_PATH_MAX]; +        glusterfs_fop_t fop_type; +        struct timeval timestamp; +        double elapsed; +} ios_sample_t; + + +typedef struct _ios_sample_buf_t { +        uint64_t        pos;  /* Position in write buffer */ +        uint64_t        size;  /* Size of ring buffer */ +        uint64_t        collected;  /* Number of samples we've collected */ +        uint64_t        observed;  /* Number of FOPs we've observed */ +        ios_sample_t    *ios_samples;  /* Our list of samples */ +} ios_sample_buf_t; + +  struct ios_lat {          double      min;          double      max; @@ -102,7 +125,6 @@ struct ios_global_stats {          struct timeval  max_openfd_time;  }; -  struct ios_conf {          gf_lock_t                 lock;          struct ios_global_stats   cumulative; @@ -116,6 +138,12 @@ struct ios_conf {          int32_t                   ios_dump_interval;          pthread_t                 dump_thread;          gf_boolean_t              dump_thread_should_die; +        gf_lock_t                 ios_sampling_lock; +        int32_t                   ios_sample_interval; +        int32_t                   ios_sample_buf_size; +        ios_sample_buf_t          *ios_sample_buf; +        struct dnscache           *dnscache; +        int32_t                   ios_dnscache_ttl_sec;  }; @@ -133,7 +161,8 @@ typedef enum {          IOS_DUMP_TYPE_FILE      = 1,          IOS_DUMP_TYPE_DICT      = 2,          IOS_DUMP_TYPE_JSON_FILE = 3, -        IOS_DUMP_TYPE_MAX       = 4 +        IOS_DUMP_TYPE_SAMPLES   = 4, +        IOS_DUMP_TYPE_MAX       = 5  } ios_dump_type_t;  struct ios_dump_args { @@ -163,6 +192,7 @@ is_fop_latency_started (call_frame_t *frame)  }  #ifdef GF_LINUX_HOST_OS +const char *_IOS_SAMP_DIR = "/var/log/glusterfs/samples";  #define _IOS_DUMP_DIR DATADIR "/lib/glusterd/stats"  #else  #define _IOS_DUMP_DIR DATADIR "/db/glusterd/stats" @@ -429,6 +459,72 @@ ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)  } +/* + * So why goto all this trouble?  Why not just queue up some samples in + * a big list and malloc away?  Well malloc is expensive relative + * to what we are measuring, so cannot have any malloc's (or worse + * callocs) in our measurement code paths.  Instead, we are going to + * pre-allocate a circular buffer and collect a maximum number of samples. + * Prior to dumping them all we'll create a new buffer and swap the + * old buffer with the new, and then proceed to dump the statistics + * in our dump thread. + * + */ +ios_sample_buf_t * +ios_create_sample_buf (size_t buf_size) +{ +        ios_sample_buf_t *ios_sample_buf = NULL; +        ios_sample_t     *ios_samples = NULL; + +        ios_sample_buf = GF_CALLOC (1, +                sizeof (*ios_sample_buf), +                gf_io_stats_mt_ios_sample_buf); +        if (!ios_sample_buf) +                goto err; + +        ios_samples = GF_CALLOC (buf_size, +                sizeof (*ios_samples), +                gf_io_stats_mt_ios_sample); + +        if (!ios_samples) +                goto err; + +        ios_sample_buf->ios_samples = ios_samples; +        ios_sample_buf->size = buf_size; +        ios_sample_buf->pos = 0; +        ios_sample_buf->observed = 0; +        ios_sample_buf->collected = 0; + +        return ios_sample_buf; +err: +        GF_FREE (ios_sample_buf); +        return NULL; +} + +void +ios_destroy_sample_buf (ios_sample_buf_t *ios_sample_buf) +{ +        GF_FREE (ios_sample_buf->ios_samples); +        GF_FREE (ios_sample_buf); +} + +static int +ios_init_sample_buf (struct ios_conf *conf) +{ +        int32_t        ret = -1; + +        GF_ASSERT (conf); +        LOCK (&conf->lock); +        conf->ios_sample_buf = ios_create_sample_buf ( +                conf->ios_sample_buf_size); +        if (!conf->ios_sample_buf) +                goto out; +        ret = 0; +out: +        UNLOCK (&conf->lock); +        return ret; +} +  int  ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,                              struct ios_stat *iosstat) @@ -815,6 +911,230 @@ out:          return ret;  } +char * +_resolve_username (xlator_t *this, uid_t uid) +{ +        struct passwd pwd; +        struct passwd *pwd_result = NULL; +        size_t pwd_buf_len; +        char   *pwd_buf = NULL; +        char   *ret = NULL; + +        /* Prepare our buffer for the uid->username translation */ +#ifdef _SC_GETGR_R_SIZE_MAX +        pwd_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX); +#else +        pwd_buf_len = -1; +#endif +        if (pwd_buf_len == -1) { +                pwd_buf_len = DEFAULT_PWD_BUF_SZ;  /* per the man page */ +        } + +        pwd_buf = alloca (pwd_buf_len); +        if (!pwd_buf) +                goto err; + +        getpwuid_r (uid, &pwd, pwd_buf, pwd_buf_len, +                   &pwd_result); +        if (!pwd_result) +                goto err; + +        ret = gf_strdup (pwd.pw_name); +        if (ret) +                return ret; +        else +                gf_log (this->name, GF_LOG_ERROR, +                        "gf_strdup failed, failing username " +                        "resolution."); +err: +        return ret; +} + +char * +_resolve_group_name (xlator_t *this, gid_t gid) +{ +        struct group grp; +        struct group *grp_result = NULL; +        size_t grp_buf_len; +        char   *grp_buf = NULL; +        char   *ret = NULL; + +        /* Prepare our buffer for the gid->group name translation */ +#ifdef _SC_GETGR_R_SIZE_MAX +        grp_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX); +#else +        grp_buf_len = -1; +#endif +        if (grp_buf_len == -1) { +                grp_buf_len = DEFAULT_GRP_BUF_SZ;  /* per the man page */ +        } + +        grp_buf = alloca (grp_buf_len); +        if (!grp_buf) { +                goto err; +        } + +        getgrgid_r (gid, &grp, grp_buf, grp_buf_len, +                   &grp_result); +        if (!grp_result) +                goto err; + +        ret = gf_strdup (grp.gr_name); +        if (ret) +                return ret; +        else +                gf_log (this->name, GF_LOG_ERROR, +                        "gf_strdup failed, failing username " +                        "resolution."); +err: +        return ret; +} + + +/* + * This function writes out a latency sample to a given file descriptor + * and beautifies the output in the process. + */ +void +_io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample, +                                FILE *logfp) +{ +        double epoch_time = 0.00; +        char   *xlator_name = NULL; +        char   *instance_name = NULL; +        char   *hostname = NULL; +        char   *identifier = NULL; +        char   *port = NULL; +        char   *port_pos = NULL; +        char   *group_name = NULL; +        char   *username = NULL; +        struct ios_conf *conf = NULL; + +        conf = this->private; + +        epoch_time = (sample->timestamp).tv_sec + +          ((sample->timestamp).tv_usec / 1000000.0); + +        if (!sample->identifier || (strlen (sample->identifier) == 0)) { +                hostname = "Unknown"; +                port = "Unknown"; +        } else { +                identifier = strdupa (sample->identifier); +                port_pos = strrchr (identifier, ':'); +                if (!port_pos || strlen(port_pos) < 2) +                        goto err; +                port = strdupa (port_pos + 1); +                if (!port) +                        goto err; +                *port_pos = '\0'; +                hostname = gf_rev_dns_lookup_cached (identifier, +                                                     conf->dnscache); +                if (!hostname) +                        hostname = "Unknown"; +        } + +        xlator_name = this->name; +        if (!xlator_name || strlen (xlator_name) == 0) +                xlator_name = "Unknown"; + +        instance_name = this->instance_name; +        if (!instance_name || strlen (instance_name) == 0) +                instance_name = "N/A"; + +        /* Resolve the UID to a string username */ +        username = _resolve_username (this, sample->uid); +        if (!username) { +                username = GF_MALLOC (30, gf_common_mt_char); +                sprintf (username, "%d", (int32_t)sample->uid); +        } + +        /* Resolve the GID to a string group name */ +        group_name = _resolve_group_name (this, sample->gid); +        if (!group_name) { +                group_name = GF_MALLOC (30, gf_common_mt_char); +                sprintf (group_name, "%d", (int32_t)sample->gid); +        } + +        ios_log (this, logfp, +                 "%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s", +                 epoch_time, fop_enum_to_pri_string (sample->fop_type), +                 fop_enum_to_string (sample->fop_type), +                 sample->elapsed, xlator_name, instance_name, username, +                 group_name, hostname, port); +        goto out; +err: +        gf_log (this->name, GF_LOG_ERROR, +                "Error parsing socket identifier"); +out: +        GF_FREE (group_name); +        GF_FREE (username); +} + +/* + * Takes our current sample buffer in conf->io_sample_buf, and saves + * a reference to this, init's a new buffer, and then dumps out the + * contents of the saved reference. + */ +int +io_stats_dump_latency_samples_logfp (xlator_t *this, FILE *logfp) +{ +        uint64_t              i = 0; +        struct ios_conf       *conf = NULL; +        ios_sample_buf_t      *sample_buf = NULL; +        int                   ret = 1;  /* Default to error */ + +        conf = this->private; + +        /* Save pointer to old buffer; the CS equivalent of +         * Indiana Jones: https://www.youtube.com/watch?v=Pr-8AP0To4k, +         * though ours will end better I hope! +         */ +        sample_buf = conf->ios_sample_buf; +        if (!sample_buf) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Sampling buffer is null, bailing!"); +                goto out; +        } + +        /* Empty case, nothing to do, exit. */ +        if (sample_buf->collected == 0) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "No samples, dump not required."); +                ret = 0; +                goto out; +        } + +        /* Init a new buffer, so we are free to work on the one we saved a +         * reference to above. +         */ +        if (ios_init_sample_buf (conf) != 0) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Failed to init new sampling buffer, out of memory?"); +                goto out; +        } + +        /* Wrap-around case, dump from pos to sample_buf->size -1 +         * and then from 0 to sample_buf->pos (covered off by +         * "simple case") +         */ +        if (sample_buf->collected > sample_buf->pos + 1) { +                for (i = sample_buf->pos; i < sample_buf->size; i++) { +                        _io_stats_write_latency_sample (this, +                                &(sample_buf->ios_samples[i]), logfp); +                } +        } + +        /* Simple case: Dump from 0 to sample_buf->pos */ +        for (i = 0; i < sample_buf->pos; i++) { +                _io_stats_write_latency_sample (this, +                        &(sample_buf->ios_samples[i]), logfp); +        } +        ios_destroy_sample_buf (sample_buf); + +out: +        return ret; +} +  int  io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,                                 struct timeval *now, int interval, FILE* logfp) @@ -1284,6 +1604,48 @@ io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)          return 0;  } +void collect_ios_latency_sample (struct ios_conf *conf, +                glusterfs_fop_t fop_type, double elapsed, +                call_frame_t *frame) +{ +        ios_sample_buf_t *ios_sample_buf = NULL; +        ios_sample_t     *ios_sample = NULL; +        struct timeval   *timestamp = NULL; +        call_stack_t     *root = NULL; + + +        ios_sample_buf = conf->ios_sample_buf; +        LOCK (&conf->ios_sampling_lock); +        if (conf->ios_sample_interval == 0 || +            ios_sample_buf->observed % conf->ios_sample_interval != 0) +                goto out; + +        timestamp = &frame->begin; +        root = frame->root; + +        ios_sample = &(ios_sample_buf->ios_samples[ios_sample_buf->pos]); +        ios_sample->elapsed = elapsed; +        ios_sample->fop_type = fop_type; +        ios_sample->uid = root->uid; +        ios_sample->gid = root->gid; +        (ios_sample->timestamp).tv_sec = timestamp->tv_sec; +        (ios_sample->timestamp).tv_usec = timestamp->tv_usec; +        memcpy (&ios_sample->identifier, &root->identifier, +                sizeof (root->identifier)); + +        /* We've reached the end of the circular buffer, start from the +         * beginning. */ +        if (ios_sample_buf->pos == (ios_sample_buf->size - 1)) +                ios_sample_buf->pos = 0; +        else +                ios_sample_buf->pos++; +        ios_sample_buf->collected++; +out: +        ios_sample_buf->observed++; +        UNLOCK (&conf->ios_sampling_lock); +        return; +} +  static void  update_ios_latency_stats (struct ios_global_stats   *stats, double elapsed,                            glusterfs_fop_t op) @@ -1321,6 +1683,7 @@ update_ios_latency (struct ios_conf *conf, call_frame_t *frame,          update_ios_latency_stats (&conf->cumulative, elapsed, op);          update_ios_latency_stats (&conf->incremental, elapsed, op); +        collect_ios_latency_sample (conf, op, elapsed, frame);          return 0;  } @@ -1660,7 +2023,7 @@ io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (iosstat) {                BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP); -              iosstat = NULL; +               iosstat = NULL;          }          STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata); @@ -2512,14 +2875,18 @@ _ios_destroy_dump_thread (struct ios_conf *conf) {  void *  _ios_dump_thread (xlator_t *this) {          struct ios_conf         *conf = NULL; -        FILE                    *logfp = NULL; +        FILE                    *stats_logfp = NULL; +        FILE                    *samples_logfp = NULL;          struct ios_dump_args args = {0};          int                     i; -        int                     bytes_written = 0; -        char                    filename[PATH_MAX]; +        int                     stats_bytes_written = 0; +        int                     samples_bytes_written = 0; +        char                    stats_filename[PATH_MAX]; +        char                    samples_filename[PATH_MAX];          char                    *xlator_name;          char                    *instance_name; -        gf_boolean_t            log_fopen_failure = _gf_true; +        gf_boolean_t            log_stats_fopen_failure = _gf_true; +        gf_boolean_t            log_samples_fopen_failure = _gf_true;          int                     old_cancel_type;          conf = this->private; @@ -2546,17 +2913,34 @@ _ios_dump_thread (xlator_t *this) {                          goto out;                  }          } +        if (mkdir (_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) { +                if (errno != EEXIST) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "could not create stats-sample directory %s", +                                _IOS_SAMP_DIR); +                        goto out; +                } +        }          if (instance_name) { -                bytes_written = snprintf (filename, PATH_MAX, +                stats_bytes_written = snprintf (stats_filename, PATH_MAX,                          "%s/%s_%s_%s.dump", _IOS_DUMP_DIR,                          __progname, xlator_name, instance_name); +                samples_bytes_written = snprintf (samples_filename, PATH_MAX, +                        "%s/%s_%s_%s.samp", _IOS_SAMP_DIR, +                        __progname, xlator_name, instance_name);          } else { -                bytes_written = snprintf (filename, PATH_MAX, "%s/%s_%s.dump", -                        _IOS_DUMP_DIR, __progname, xlator_name); -        } -        if (bytes_written >= PATH_MAX) { +                stats_bytes_written = snprintf (stats_filename, PATH_MAX, +                        "%s/%s_%s.dump", _IOS_DUMP_DIR, __progname, +                        xlator_name); +                samples_bytes_written = snprintf (samples_filename, PATH_MAX, +                        "%s/%s_%s.samp", _IOS_SAMP_DIR, __progname, +                        xlator_name); +        } +        if ((stats_bytes_written >= PATH_MAX) || +            (samples_bytes_written >= PATH_MAX)) {                  gf_log (this->name, GF_LOG_ERROR, -                        "Invalid path for IO Stats dump: %s", filename); +                        "Invalid path for stats dump (%s) and/or latency " +                        "samples (%s)", stats_filename, samples_filename);                  goto out;          }          while (1) { @@ -2572,22 +2956,32 @@ _ios_dump_thread (xlator_t *this) {                   * just hold it open and rewind/truncate on each iteration.                   * Leaving it alone for now.                   */ -                logfp = fopen (filename, "w+"); -                if (!logfp) { -                        if (log_fopen_failure) { -                                gf_log (this->name, GF_LOG_ERROR, -                                        "could not open stats-dump file %s", -                                        filename); -                                log_fopen_failure = _gf_false; -                        } -                        continue; +                stats_logfp = fopen (stats_filename, "w+"); +                if (stats_logfp) { +                        (void) ios_dump_args_init (&args, +                                                   IOS_DUMP_TYPE_JSON_FILE, +                                                   stats_logfp); +                        io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false); +                        fclose (stats_logfp); +                        log_stats_fopen_failure = _gf_true; +                } else if (log_stats_fopen_failure) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "could not open stats-dump file %s (%s)", +                                stats_filename, strerror(errno)); +                        log_stats_fopen_failure = _gf_false; +                } +                samples_logfp = fopen (samples_filename, "w+"); +                if (samples_logfp) { +                        io_stats_dump_latency_samples_logfp (this, +                                                             samples_logfp); +                        fclose (samples_logfp); +                        log_samples_fopen_failure = _gf_true; +                } else if (log_samples_fopen_failure) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "could not open samples-dump file %s (%s)", +                                samples_filename, strerror(errno)); +                        log_samples_fopen_failure = _gf_false;                  } -                (void) ios_dump_args_init ( -                                            &args, IOS_DUMP_TYPE_JSON_FILE, -                                            logfp); -                io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false); -                fclose (logfp); -                log_fopen_failure = _gf_true;          }  out:          gf_log (this->name, GF_LOG_INFO, "IO stats dump thread terminated"); @@ -3116,6 +3510,10 @@ reconfigure (xlator_t *this, dict_t *options)                                  (void *) &_ios_dump_thread, this);          } +        GF_OPTION_RECONF ("ios-sample-interval", conf->ios_sample_interval, +                         options, int32, out); +        GF_OPTION_RECONF ("ios-sample-buf-size", conf->ios_sample_buf_size, +                         options, int32, out);          GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);          if (sys_log_str) {                  sys_log_level = glusterd_check_log_level (sys_log_str); @@ -3229,6 +3627,7 @@ init (xlator_t *this)           * in case of error paths.           */          LOCK_INIT (&conf->lock); +        LOCK_INIT (&conf->ios_sampling_lock);          gettimeofday (&conf->cumulative.started_at, NULL);          gettimeofday (&conf->incremental.started_at, NULL); @@ -3247,6 +3646,22 @@ init (xlator_t *this)          GF_OPTION_INIT ("ios-dump-interval", conf->ios_dump_interval,                          int32, out); +        GF_OPTION_INIT ("ios-sample-interval", conf->ios_sample_interval, +                        int32, out); + +        GF_OPTION_INIT ("ios-sample-buf-size", conf->ios_sample_buf_size, +                        int32, out); + +        if (ios_init_sample_buf (conf) != 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory."); +                return -1; +        } + +        GF_OPTION_INIT ("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, +                        int32, out); +        conf->dnscache = gf_dnscache_init (conf->ios_dnscache_ttl_sec); +          GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);          if (sys_log_str) {                  sys_log_level = glusterd_check_log_level (sys_log_str); @@ -3497,6 +3912,29 @@ struct volume_options options[] = {            .description = "Interval (in seconds) at which to auto-dump "                           "statistics. Zero disables automatic dumping."          }, +        { .key  = { "ios-sample-interval" }, +          .type = GF_OPTION_TYPE_INT, +          .min = 0, +          .max = 65535, +          .default_value = "0", +          .description = "Interval in which we want to collect FOP latency " +                         "samples.  2 means collect a sample every 2nd FOP." +        }, +        { .key  = { "ios-sample-buf-size" }, +          .type = GF_OPTION_TYPE_INT, +          .min = 1024, +          .max = 1024*1024, +          .default_value = "65535", +          .description = "The maximum size of our FOP sampling ring buffer." +        }, +        { .key  = { "ios-dnscache-ttl-sec" }, +          .type = GF_OPTION_TYPE_INT, +          .min = 1, +          .max = 3600 * 72, +          .default_value = "86400", +          .description = "The interval after wish a cached DNS entry will be " +                         "re-validated.  Default: 24 hrs" +        },          { .key  = { "latency-measurement" },            .type = GF_OPTION_TYPE_BOOL,            .default_value = "off", diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 4ea7d3baa8a..ccf4b380ffb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -849,6 +849,21 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .option      = "ios-dump-interval",            .op_version  = 1          }, +        { .key         = "diagnostics.fop-sample-interval", +          .voltype     = "debug/io-stats", +          .option      = "ios-sample-interval", +          .op_version  = 1 +        }, +        { .key         = "diagnostics.fop-sample-buf-size", +          .voltype     = "debug/io-stats", +          .option      = "ios-sample-buf-size", +          .op_version  = 1 +        }, +        { .key         = "diagnostics.stats-dnscache-ttl-sec", +          .voltype     = "debug/io-stats", +          .option      = "ios-dnscache-ttl-sec", +          .op_version  = 1 +        },          /* IO-cache xlator options */          { .key         = "performance.cache-max-file-size", diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c index 819ae98b081..52fdd9796c3 100644 --- a/xlators/nfs/server/src/nfs-fops.c +++ b/xlators/nfs/server/src/nfs-fops.c @@ -219,6 +219,7 @@ nfs_create_frame (xlator_t *xl, nfs_user_t *nfu)          frame->root->pid = NFS_PID;          frame->root->uid = nfu->uid;          frame->root->gid = nfu->gids[NFS_PRIMGID_IDX]; +        memcpy (&frame->root->identifier, &nfu->identifier, UNIX_PATH_MAX);          frame->root->lk_owner = nfu->lk_owner;          if (nfu->ngrps != 1) { diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index 4dda35c49ef..116854b9109 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -639,8 +639,8 @@ nfs_user_root_create (nfs_user_t *newnfu)  int -nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids, -                 int auxcount) +nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, +                 rpc_transport_t *trans, gid_t *auxgids, int auxcount)  {          int     x = 1;          int     y = 0; @@ -655,6 +655,10 @@ nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,          newnfu->uid = uid;          newnfu->gids[0] = gid;          newnfu->ngrps = 1; +        if (trans) { +                memcpy (&newnfu->identifier, trans->peerinfo.identifier, +                       UNIX_PATH_MAX); +        }          gf_msg_trace (GF_NFS, 0, "uid: %d, gid %d, gids: %d", uid, gid,                  auxcount); @@ -683,7 +687,9 @@ nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req)          gidarr = rpcsvc_auth_unix_auxgids (req, &gids);          nfs_user_create (nfu, rpcsvc_request_uid (req), -                         rpcsvc_request_gid (req), gidarr, gids); +                         rpcsvc_request_gid (req), +                         rpcsvc_request_transport (req), +                         gidarr, gids);          return;  } @@ -699,7 +705,8 @@ nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req,                  return;          gidarr = rpcsvc_auth_unix_auxgids (req, &gids); -        nfs_user_create (nfu, uid, gid, gidarr, gids); +        nfs_user_create (nfu, uid, gid, rpcsvc_request_transport (req), +                         gidarr, gids);          return;  } diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h index 107140bc720..82df163d494 100644 --- a/xlators/nfs/server/src/nfs.h +++ b/xlators/nfs/server/src/nfs.h @@ -125,14 +125,15 @@ typedef struct nfs_user_info {          gid_t   gids[NFS_NGROUPS];          int     ngrps;          gf_lkowner_t lk_owner; +        char    identifier[UNIX_PATH_MAX];  /* ip of user */  } nfs_user_t;  extern int  nfs_user_root_create (nfs_user_t *newnfu);  extern int -nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids, -                 int auxcount); +nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, +                 rpc_transport_t *trans, gid_t *auxgids, int auxcount);  extern void  nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req); diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index bcd1fed8b84..b369e835b24 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -423,6 +423,7 @@ get_frame_from_request (rpcsvc_request_t *req)          server_conf_t *priv = NULL;          clienttable_t *clienttable = NULL;          unsigned int   i           = 0; +        rpc_transport_t *trans = NULL;          GF_VALIDATE_OR_GOTO ("server", req, out); @@ -500,6 +501,12 @@ get_frame_from_request (rpcsvc_request_t *req)              server_resolve_groups (frame, req);          else              server_decode_groups (frame, req); +        trans = req->trans; +        if (trans) { +                memcpy (&frame->root->identifier, trans->peerinfo.identifier, +                        sizeof (trans->peerinfo.identifier)); +        } +          frame->local = req;  out:  | 
