diff options
| -rw-r--r-- | extras/glusterfs-logrotate | 9 | ||||
| -rw-r--r-- | libglusterfs/src/common-utils.c | 260 | ||||
| -rw-r--r-- | libglusterfs/src/common-utils.h | 21 | ||||
| -rw-r--r-- | libglusterfs/src/glusterfs.h | 6 | ||||
| -rw-r--r-- | libglusterfs/src/mem-types.h | 2 | ||||
| -rw-r--r-- | libglusterfs/src/stack.h | 1 | ||||
| -rw-r--r-- | tests/basic/fop-sampling.t | 61 | ||||
| -rw-r--r-- | xlators/debug/io-stats/src/io-stats-mem-types.h | 2 | ||||
| -rw-r--r-- | xlators/debug/io-stats/src/io-stats.c | 494 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 15 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs-fops.c | 1 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs.c | 15 | ||||
| -rw-r--r-- | xlators/nfs/server/src/nfs.h | 5 | ||||
| -rw-r--r-- | xlators/protocol/server/src/server-helpers.c | 7 | 
14 files changed, 864 insertions, 35 deletions
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate index e3319afaa96..575c0eee771 100644 --- a/extras/glusterfs-logrotate +++ b/extras/glusterfs-logrotate @@ -26,3 +26,12 @@    /usr/bin/killall -HUP glusterfsd > /dev/null 2>&1 || true    endscript  } + +/var/log/glusterfs/samples/*.samp { +  daily +  rotate 3 +  sharedscripts +  missingok +  compress +  delaycompress +} diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index 2dcd54f1829..a89e120c0fb 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -34,6 +34,7 @@  #if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS)  #include <sys/sysctl.h>  #endif +#include <libgen.h>  #include "compat-errno.h"  #include "logging.h" @@ -210,7 +211,7 @@ out:  }  /** - * gf_resolve_parent_path -- Given a path, returns an allocated string + * gf_resolve_path_parent -- Given a path, returns an allocated string   *                           containing the parent's path.   * @path: Path to parse   * @return: The parent path if found, NULL otherwise @@ -359,6 +360,135 @@ err:          return -1;  } +/** + * gf_dnscache_init -- Initializes a dnscache struct and sets the ttl + *                     to the specified value in the parameter. + * + * @ttl: the TTL in seconds + * @return: SUCCESS: Pointer to an allocated dnscache struct + *          FAILURE: NULL + */ +struct dnscache * +gf_dnscache_init (time_t ttl) +{ +        struct dnscache *cache = GF_MALLOC (sizeof (*cache), +                                            gf_common_mt_dnscache); +        cache->cache_dict = NULL; +        cache->ttl = ttl; +        return cache; +} + +/** + * gf_dnscache_entry_init -- Initialize a dnscache entry + * + * @return: SUCCESS: Pointer to an allocated dnscache entry struct + *          FAILURE: NULL + */ +struct dnscache_entry * +gf_dnscache_entry_init () +{ +        struct dnscache_entry *entry = GF_CALLOC (1, sizeof (*entry), +                                                 gf_common_mt_dnscache_entry); +        return entry; +} + +/** + * gf_dnscache_entry_deinit -- Free memory used by a dnscache entry + * + * @entry: Pointer to deallocate + */ +void +gf_dnscache_entry_deinit (struct dnscache_entry *entry) +{ +        GF_FREE (entry->ip); +        GF_FREE (entry->fqdn); +        GF_FREE (entry); +} + +/** + * gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address. + * + * @ip: The IP address to perform a reverse lookup on + * + * @return: success: Allocated string containing the hostname + *          failure: NULL + */ +char * +gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache) +{ +        char               *fqdn = NULL; +        int                ret  = 0; +        dict_t             *cache = NULL; +        data_t             *entrydata = NULL; +        struct dnscache_entry *dnsentry = NULL; +        gf_boolean_t        from_cache = _gf_false; + +        if (!dnscache) +                goto out; + +        if (!dnscache->cache_dict) { +                dnscache->cache_dict = dict_new (); +                if (!dnscache->cache_dict) { +                        goto out; +                } +        } +        cache = dnscache->cache_dict; + +        /* Quick cache lookup to see if we already hold it */ +        entrydata = dict_get (cache, (char *)ip); +        if (entrydata) { +                dnsentry = (struct dnscache_entry *)entrydata->data; +                /* First check the TTL & timestamp */ +                if (time (NULL) - dnsentry->timestamp > dnscache->ttl) { +                        gf_dnscache_entry_deinit (dnsentry); +                        entrydata->data = NULL; /* Mark this as 'null' so +                                                 * dict_del () doesn't try free +                                                 * this after we've already +                                                 * freed it. +                                                 */ + +                        dict_del (cache, (char *)ip); /* Remove this entry */ +                } else { +                        /* Cache entry is valid, get the FQDN and return */ +                        fqdn = dnsentry->fqdn; +                        from_cache = _gf_true; /* Mark this as from cache */ +                        goto out; +                } +        } + +        /* Get the FQDN */ +        ret =  gf_get_hostname_from_ip ((char *)ip, &fqdn); +        if (ret != 0) +                goto out; + +        if (!fqdn) { +                gf_log_callingfn ("resolver", GF_LOG_CRITICAL, +                                  "Allocation failed for the host address"); +                goto out; +        } + +        from_cache = _gf_false; +out: +        /* Insert into the cache */ +        if (fqdn && !from_cache) { +                struct dnscache_entry *entry = gf_dnscache_entry_init (); + +                if (!entry) { +                        goto out; +                } +                entry->fqdn = fqdn; +                entry->ip = gf_strdup (ip); +                if (!ip) { +                        gf_dnscache_entry_deinit (entry); +                        goto out; +                } +                entry->timestamp = time (NULL); + +                entrydata = bin_to_data (entry, sizeof (*entry)); +                dict_set (cache, (char *)ip, entrydata); +        } +        return fqdn; +}  struct xldump {  	int lineno; @@ -4012,3 +4142,131 @@ _unmask_cancellation (void)  {          (void) pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);  } + + +const char * +fop_enum_to_pri_string (glusterfs_fop_t fop) +{ +        switch (fop) { +        case GF_FOP_OPEN: +        case GF_FOP_STAT: +        case GF_FOP_FSTAT: +        case GF_FOP_LOOKUP: +        case GF_FOP_ACCESS: +        case GF_FOP_READLINK: +        case GF_FOP_OPENDIR: +        case GF_FOP_STATFS: +        case GF_FOP_READDIR: +        case GF_FOP_READDIRP: +                return "HIGH"; + +        case GF_FOP_CREATE: +        case GF_FOP_FLUSH: +        case GF_FOP_LK: +        case GF_FOP_INODELK: +        case GF_FOP_FINODELK: +        case GF_FOP_ENTRYLK: +        case GF_FOP_FENTRYLK: +        case GF_FOP_UNLINK: +        case GF_FOP_SETATTR: +        case GF_FOP_FSETATTR: +        case GF_FOP_MKNOD: +        case GF_FOP_MKDIR: +        case GF_FOP_RMDIR: +        case GF_FOP_SYMLINK: +        case GF_FOP_RENAME: +        case GF_FOP_LINK: +        case GF_FOP_SETXATTR: +        case GF_FOP_GETXATTR: +        case GF_FOP_FGETXATTR: +        case GF_FOP_FSETXATTR: +        case GF_FOP_REMOVEXATTR: +        case GF_FOP_FREMOVEXATTR: +        case GF_FOP_IPC: +                return "NORMAL"; + +        case GF_FOP_READ: +        case GF_FOP_WRITE: +        case GF_FOP_FSYNC: +        case GF_FOP_TRUNCATE: +        case GF_FOP_FTRUNCATE: +        case GF_FOP_FSYNCDIR: +        case GF_FOP_XATTROP: +        case GF_FOP_FXATTROP: +        case GF_FOP_RCHECKSUM: +        case GF_FOP_ZEROFILL: +        case GF_FOP_FALLOCATE: +                return "LOW"; + +        case GF_FOP_NULL: +        case GF_FOP_FORGET: +        case GF_FOP_RELEASE: +        case GF_FOP_RELEASEDIR: +        case GF_FOP_GETSPEC: +        case GF_FOP_MAXVALUE: +        case GF_FOP_DISCARD: +                return "LEAST"; +        } +        return "UNKNOWN"; +} + +const char * +fop_enum_to_string (glusterfs_fop_t fop) +{ +        static const char *const str_map[] = { +                "NULL", +                "STAT", +                "READLINK", +                "MKNOD", +                "MKDIR", +                "UNLINK", +                "RMDIR", +                "SYMLINK", +                "RENAME", +                "LINK", +                "TRUNCATE", +                "OPEN", +                "READ", +                "WRITE", +                "STATFS", +                "FLUSH", +                "FSYNC", +                "SETXATTR", +                "GETXATTR", +                "REMOVEXATTR", +                "OPENDIR", +                "FSYNCDIR", +                "ACCESS", +                "CREATE", +                "FTRUNCATE", +                "FSTAT", +                "LK", +                "LOOKUP", +                "READDIR", +                "INODELK", +                "FINODELK", +                "ENTRYLK", +                "FENTRYLK", +                "XATTROP", +                "FXATTROP", +                "FGETXATTR", +                "FSETXATTR", +                "RCHECKSUM", +                "SETATTR", +                "FSETATTR", +                "READDIRP", +                "FORGET", +                "RELEASE", +                "RELEASEDIR", +                "GETSPEC", +                "FREMOVEXATTR", +                "FALLOCATE", +                "DISCARD", +                "ZEROFILL", +                "IPC", +                "MAXVALUE"}; +        if (fop <= GF_FOP_MAXVALUE) +                return str_map[fop]; + +        return "UNKNOWNFOP"; +} diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h index 77a8cdd51c7..bf574fdabc7 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/common-utils.h @@ -159,6 +159,27 @@ typedef struct dht_changelog_rename_info {  typedef int (*gf_cmp) (void *, void *); +struct _dict; + +struct dnscache { +        struct _dict *cache_dict; +        time_t ttl; +}; + +struct dnscache_entry { +        char *ip; +        char *fqdn; +        time_t timestamp; +}; + + +struct dnscache *gf_dnscache_init (time_t ttl); +struct dnscache_entry *gf_dnscache_entry_init (); +void gf_dnscache_entry_deinit (struct dnscache_entry *entry); +char *gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache); + +char *gf_resolve_path_parent (const char *path); +  void gf_global_variable_init(void);  int32_t gf_resolve_ip6 (const char *hostname, uint16_t port, int family, diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 3bc76f6622a..4c7f9f517e3 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -258,6 +258,10 @@  /* NOTE: add members ONLY at the end (just before _MAXVALUE) */ +/* + * OTHER NOTE: fop_enum_to_str and fop_enum_to_pri_str (in common-utils.h) also + * contain lists of fops, so if you update this list UPDATE THOSE TOO. + */  typedef enum {          GF_FOP_NULL = 0,          GF_FOP_STAT, @@ -312,6 +316,8 @@ typedef enum {          GF_FOP_MAXVALUE,  } glusterfs_fop_t; +const char *fop_enum_to_pri_string (glusterfs_fop_t fop); +const char *fop_enum_to_string (glusterfs_fop_t fop);  typedef enum {          GF_MGMT_NULL = 0, diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index 84949c61487..70c4ea770d5 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -128,6 +128,8 @@ enum gf_common_mem_types_ {          gf_common_mt_ereg,          gf_common_mt_wr,          gf_common_mt_rdma_arena_mr, +        gf_common_mt_dnscache             = 115, +        gf_common_mt_dnscache_entry       = 116,          gf_common_mt_parser_t,          gf_common_quota_meta_t,          /*related to gfdb library*/ diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h index 5c0655f2ead..43d943c62b6 100644 --- a/libglusterfs/src/stack.h +++ b/libglusterfs/src/stack.h @@ -96,6 +96,7 @@ struct _call_stack_t {          uid_t                         uid;          gid_t                         gid;          pid_t                         pid; +        char                          identifier[UNIX_PATH_MAX];          uint16_t                      ngrps;          uint32_t                      groups_small[SMALL_GROUP_COUNT];  	uint32_t                     *groups_large; diff --git a/tests/basic/fop-sampling.t b/tests/basic/fop-sampling.t new file mode 100644 index 00000000000..cea8aa737c0 --- /dev/null +++ b/tests/basic/fop-sampling.t @@ -0,0 +1,61 @@ +#!/bin/bash +# + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +SAMPLE_FILE="$(gluster --print-logdir)/samples/glusterfs_${V0}.samp" + +function print_cnt() { +  local FOP_TYPE=$1 +  local FOP_CNT=$(grep ,${FOP_TYPE} ${SAMPLE_FILE} | wc -l) +  echo $FOP_CNT +} + +# Verify we got non-zero counts for stats/lookup/readdir +check_samples() { +        STAT_CNT=$(print_cnt STAT) +        if [ "$STAT_CNT" -le "0" ]; then +                echo "STAT count is zero" +                return +        fi + +        LOOKUP_CNT=$(print_cnt LOOKUP) +        if [ "$LOOKUP_CNT" -le "0" ]; then +                echo "LOOKUP count is zero" +                return +        fi + +        READDIR_CNT=$(print_cnt READDIR) +        if [ "$READDIR_CNT" -le "0" ]; then +                echo "READDIR count is zero" +                return +        fi + +        echo "OK" +} + +cleanup; +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 nfs.disable off +TEST $CLI volume set $V0 diagnostics.latency-measurement on +TEST $CLI volume set $V0 diagnostics.count-fop-hits on +TEST $CLI volume set $V0 diagnostics.stats-dump-interval 2 +TEST $CLI volume set $V0 diagnostics.fop-sample-buf-size 65535 +TEST $CLI volume set $V0 diagnostics.fop-sample-interval 1 +TEST $CLI volume set $V0 diagnostics.stats-dnscache-ttl-sec 3600 + +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +for i in {1..5} +do +        dd if=/dev/zero of=${M0}/testfile$i bs=4k count=1 +done + +TEST ls -l $M0 +EXPECT_WITHIN 6 "OK" check_samples + +cleanup diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h index d7a1055a571..9dde9373264 100644 --- a/xlators/debug/io-stats/src/io-stats-mem-types.h +++ b/xlators/debug/io-stats/src/io-stats-mem-types.h @@ -20,6 +20,8 @@ enum gf_io_stats_mem_types_ {          gf_io_stats_mt_ios_fd,          gf_io_stats_mt_ios_stat,          gf_io_stats_mt_ios_stat_list, +        gf_io_stats_mt_ios_sample_buf, +        gf_io_stats_mt_ios_sample,          gf_io_stats_mt_end  };  #endif diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 3157a65dae1..e73cae0a928 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -34,8 +34,12 @@  #include "logging.h"  #include "cli1-xdr.h"  #include "statedump.h" +#include <pwd.h> +#include <grp.h>  #define MAX_LIST_MEMBERS 100 +#define DEFAULT_PWD_BUF_SZ 16384 +#define DEFAULT_GRP_BUF_SZ 16384  typedef enum {          IOS_STATS_TYPE_NONE, @@ -82,6 +86,25 @@ struct ios_stat_head {         struct ios_stat_list    *iosstats;  }; +typedef struct _ios_sample_t { +        uid_t  uid; +        gid_t  gid; +        char   identifier[UNIX_PATH_MAX]; +        glusterfs_fop_t fop_type; +        struct timeval timestamp; +        double elapsed; +} ios_sample_t; + + +typedef struct _ios_sample_buf_t { +        uint64_t        pos;  /* Position in write buffer */ +        uint64_t        size;  /* Size of ring buffer */ +        uint64_t        collected;  /* Number of samples we've collected */ +        uint64_t        observed;  /* Number of FOPs we've observed */ +        ios_sample_t    *ios_samples;  /* Our list of samples */ +} ios_sample_buf_t; + +  struct ios_lat {          double      min;          double      max; @@ -102,7 +125,6 @@ struct ios_global_stats {          struct timeval  max_openfd_time;  }; -  struct ios_conf {          gf_lock_t                 lock;          struct ios_global_stats   cumulative; @@ -116,6 +138,12 @@ struct ios_conf {          int32_t                   ios_dump_interval;          pthread_t                 dump_thread;          gf_boolean_t              dump_thread_should_die; +        gf_lock_t                 ios_sampling_lock; +        int32_t                   ios_sample_interval; +        int32_t                   ios_sample_buf_size; +        ios_sample_buf_t          *ios_sample_buf; +        struct dnscache           *dnscache; +        int32_t                   ios_dnscache_ttl_sec;  }; @@ -133,7 +161,8 @@ typedef enum {          IOS_DUMP_TYPE_FILE      = 1,          IOS_DUMP_TYPE_DICT      = 2,          IOS_DUMP_TYPE_JSON_FILE = 3, -        IOS_DUMP_TYPE_MAX       = 4 +        IOS_DUMP_TYPE_SAMPLES   = 4, +        IOS_DUMP_TYPE_MAX       = 5  } ios_dump_type_t;  struct ios_dump_args { @@ -163,6 +192,7 @@ is_fop_latency_started (call_frame_t *frame)  }  #ifdef GF_LINUX_HOST_OS +const char *_IOS_SAMP_DIR = "/var/log/glusterfs/samples";  #define _IOS_DUMP_DIR DATADIR "/lib/glusterd/stats"  #else  #define _IOS_DUMP_DIR DATADIR "/db/glusterd/stats" @@ -429,6 +459,72 @@ ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)  } +/* + * So why goto all this trouble?  Why not just queue up some samples in + * a big list and malloc away?  Well malloc is expensive relative + * to what we are measuring, so cannot have any malloc's (or worse + * callocs) in our measurement code paths.  Instead, we are going to + * pre-allocate a circular buffer and collect a maximum number of samples. + * Prior to dumping them all we'll create a new buffer and swap the + * old buffer with the new, and then proceed to dump the statistics + * in our dump thread. + * + */ +ios_sample_buf_t * +ios_create_sample_buf (size_t buf_size) +{ +        ios_sample_buf_t *ios_sample_buf = NULL; +        ios_sample_t     *ios_samples = NULL; + +        ios_sample_buf = GF_CALLOC (1, +                sizeof (*ios_sample_buf), +                gf_io_stats_mt_ios_sample_buf); +        if (!ios_sample_buf) +                goto err; + +        ios_samples = GF_CALLOC (buf_size, +                sizeof (*ios_samples), +                gf_io_stats_mt_ios_sample); + +        if (!ios_samples) +                goto err; + +        ios_sample_buf->ios_samples = ios_samples; +        ios_sample_buf->size = buf_size; +        ios_sample_buf->pos = 0; +        ios_sample_buf->observed = 0; +        ios_sample_buf->collected = 0; + +        return ios_sample_buf; +err: +        GF_FREE (ios_sample_buf); +        return NULL; +} + +void +ios_destroy_sample_buf (ios_sample_buf_t *ios_sample_buf) +{ +        GF_FREE (ios_sample_buf->ios_samples); +        GF_FREE (ios_sample_buf); +} + +static int +ios_init_sample_buf (struct ios_conf *conf) +{ +        int32_t        ret = -1; + +        GF_ASSERT (conf); +        LOCK (&conf->lock); +        conf->ios_sample_buf = ios_create_sample_buf ( +                conf->ios_sample_buf_size); +        if (!conf->ios_sample_buf) +                goto out; +        ret = 0; +out: +        UNLOCK (&conf->lock); +        return ret; +} +  int  ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,                              struct ios_stat *iosstat) @@ -815,6 +911,230 @@ out:          return ret;  } +char * +_resolve_username (xlator_t *this, uid_t uid) +{ +        struct passwd pwd; +        struct passwd *pwd_result = NULL; +        size_t pwd_buf_len; +        char   *pwd_buf = NULL; +        char   *ret = NULL; + +        /* Prepare our buffer for the uid->username translation */ +#ifdef _SC_GETGR_R_SIZE_MAX +        pwd_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX); +#else +        pwd_buf_len = -1; +#endif +        if (pwd_buf_len == -1) { +                pwd_buf_len = DEFAULT_PWD_BUF_SZ;  /* per the man page */ +        } + +        pwd_buf = alloca (pwd_buf_len); +        if (!pwd_buf) +                goto err; + +        getpwuid_r (uid, &pwd, pwd_buf, pwd_buf_len, +                   &pwd_result); +        if (!pwd_result) +                goto err; + +        ret = gf_strdup (pwd.pw_name); +        if (ret) +                return ret; +        else +                gf_log (this->name, GF_LOG_ERROR, +                        "gf_strdup failed, failing username " +                        "resolution."); +err: +        return ret; +} + +char * +_resolve_group_name (xlator_t *this, gid_t gid) +{ +        struct group grp; +        struct group *grp_result = NULL; +        size_t grp_buf_len; +        char   *grp_buf = NULL; +        char   *ret = NULL; + +        /* Prepare our buffer for the gid->group name translation */ +#ifdef _SC_GETGR_R_SIZE_MAX +        grp_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX); +#else +        grp_buf_len = -1; +#endif +        if (grp_buf_len == -1) { +                grp_buf_len = DEFAULT_GRP_BUF_SZ;  /* per the man page */ +        } + +        grp_buf = alloca (grp_buf_len); +        if (!grp_buf) { +                goto err; +        } + +        getgrgid_r (gid, &grp, grp_buf, grp_buf_len, +                   &grp_result); +        if (!grp_result) +                goto err; + +        ret = gf_strdup (grp.gr_name); +        if (ret) +                return ret; +        else +                gf_log (this->name, GF_LOG_ERROR, +                        "gf_strdup failed, failing username " +                        "resolution."); +err: +        return ret; +} + + +/* + * This function writes out a latency sample to a given file descriptor + * and beautifies the output in the process. + */ +void +_io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample, +                                FILE *logfp) +{ +        double epoch_time = 0.00; +        char   *xlator_name = NULL; +        char   *instance_name = NULL; +        char   *hostname = NULL; +        char   *identifier = NULL; +        char   *port = NULL; +        char   *port_pos = NULL; +        char   *group_name = NULL; +        char   *username = NULL; +        struct ios_conf *conf = NULL; + +        conf = this->private; + +        epoch_time = (sample->timestamp).tv_sec + +          ((sample->timestamp).tv_usec / 1000000.0); + +        if (!sample->identifier || (strlen (sample->identifier) == 0)) { +                hostname = "Unknown"; +                port = "Unknown"; +        } else { +                identifier = strdupa (sample->identifier); +                port_pos = strrchr (identifier, ':'); +                if (!port_pos || strlen(port_pos) < 2) +                        goto err; +                port = strdupa (port_pos + 1); +                if (!port) +                        goto err; +                *port_pos = '\0'; +                hostname = gf_rev_dns_lookup_cached (identifier, +                                                     conf->dnscache); +                if (!hostname) +                        hostname = "Unknown"; +        } + +        xlator_name = this->name; +        if (!xlator_name || strlen (xlator_name) == 0) +                xlator_name = "Unknown"; + +        instance_name = this->instance_name; +        if (!instance_name || strlen (instance_name) == 0) +                instance_name = "N/A"; + +        /* Resolve the UID to a string username */ +        username = _resolve_username (this, sample->uid); +        if (!username) { +                username = GF_MALLOC (30, gf_common_mt_char); +                sprintf (username, "%d", (int32_t)sample->uid); +        } + +        /* Resolve the GID to a string group name */ +        group_name = _resolve_group_name (this, sample->gid); +        if (!group_name) { +                group_name = GF_MALLOC (30, gf_common_mt_char); +                sprintf (group_name, "%d", (int32_t)sample->gid); +        } + +        ios_log (this, logfp, +                 "%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s", +                 epoch_time, fop_enum_to_pri_string (sample->fop_type), +                 fop_enum_to_string (sample->fop_type), +                 sample->elapsed, xlator_name, instance_name, username, +                 group_name, hostname, port); +        goto out; +err: +        gf_log (this->name, GF_LOG_ERROR, +                "Error parsing socket identifier"); +out: +        GF_FREE (group_name); +        GF_FREE (username); +} + +/* + * Takes our current sample buffer in conf->io_sample_buf, and saves + * a reference to this, init's a new buffer, and then dumps out the + * contents of the saved reference. + */ +int +io_stats_dump_latency_samples_logfp (xlator_t *this, FILE *logfp) +{ +        uint64_t              i = 0; +        struct ios_conf       *conf = NULL; +        ios_sample_buf_t      *sample_buf = NULL; +        int                   ret = 1;  /* Default to error */ + +        conf = this->private; + +        /* Save pointer to old buffer; the CS equivalent of +         * Indiana Jones: https://www.youtube.com/watch?v=Pr-8AP0To4k, +         * though ours will end better I hope! +         */ +        sample_buf = conf->ios_sample_buf; +        if (!sample_buf) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Sampling buffer is null, bailing!"); +                goto out; +        } + +        /* Empty case, nothing to do, exit. */ +        if (sample_buf->collected == 0) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "No samples, dump not required."); +                ret = 0; +                goto out; +        } + +        /* Init a new buffer, so we are free to work on the one we saved a +         * reference to above. +         */ +        if (ios_init_sample_buf (conf) != 0) { +                gf_log (this->name, GF_LOG_WARNING, +                        "Failed to init new sampling buffer, out of memory?"); +                goto out; +        } + +        /* Wrap-around case, dump from pos to sample_buf->size -1 +         * and then from 0 to sample_buf->pos (covered off by +         * "simple case") +         */ +        if (sample_buf->collected > sample_buf->pos + 1) { +                for (i = sample_buf->pos; i < sample_buf->size; i++) { +                        _io_stats_write_latency_sample (this, +                                &(sample_buf->ios_samples[i]), logfp); +                } +        } + +        /* Simple case: Dump from 0 to sample_buf->pos */ +        for (i = 0; i < sample_buf->pos; i++) { +                _io_stats_write_latency_sample (this, +                        &(sample_buf->ios_samples[i]), logfp); +        } +        ios_destroy_sample_buf (sample_buf); + +out: +        return ret; +} +  int  io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,                                 struct timeval *now, int interval, FILE* logfp) @@ -1284,6 +1604,48 @@ io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)          return 0;  } +void collect_ios_latency_sample (struct ios_conf *conf, +                glusterfs_fop_t fop_type, double elapsed, +                call_frame_t *frame) +{ +        ios_sample_buf_t *ios_sample_buf = NULL; +        ios_sample_t     *ios_sample = NULL; +        struct timeval   *timestamp = NULL; +        call_stack_t     *root = NULL; + + +        ios_sample_buf = conf->ios_sample_buf; +        LOCK (&conf->ios_sampling_lock); +        if (conf->ios_sample_interval == 0 || +            ios_sample_buf->observed % conf->ios_sample_interval != 0) +                goto out; + +        timestamp = &frame->begin; +        root = frame->root; + +        ios_sample = &(ios_sample_buf->ios_samples[ios_sample_buf->pos]); +        ios_sample->elapsed = elapsed; +        ios_sample->fop_type = fop_type; +        ios_sample->uid = root->uid; +        ios_sample->gid = root->gid; +        (ios_sample->timestamp).tv_sec = timestamp->tv_sec; +        (ios_sample->timestamp).tv_usec = timestamp->tv_usec; +        memcpy (&ios_sample->identifier, &root->identifier, +                sizeof (root->identifier)); + +        /* We've reached the end of the circular buffer, start from the +         * beginning. */ +        if (ios_sample_buf->pos == (ios_sample_buf->size - 1)) +                ios_sample_buf->pos = 0; +        else +                ios_sample_buf->pos++; +        ios_sample_buf->collected++; +out: +        ios_sample_buf->observed++; +        UNLOCK (&conf->ios_sampling_lock); +        return; +} +  static void  update_ios_latency_stats (struct ios_global_stats   *stats, double elapsed,                            glusterfs_fop_t op) @@ -1321,6 +1683,7 @@ update_ios_latency (struct ios_conf *conf, call_frame_t *frame,          update_ios_latency_stats (&conf->cumulative, elapsed, op);          update_ios_latency_stats (&conf->incremental, elapsed, op); +        collect_ios_latency_sample (conf, op, elapsed, frame);          return 0;  } @@ -1660,7 +2023,7 @@ io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (iosstat) {                BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP); -              iosstat = NULL; +               iosstat = NULL;          }          STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata); @@ -2512,14 +2875,18 @@ _ios_destroy_dump_thread (struct ios_conf *conf) {  void *  _ios_dump_thread (xlator_t *this) {          struct ios_conf         *conf = NULL; -        FILE                    *logfp = NULL; +        FILE                    *stats_logfp = NULL; +        FILE                    *samples_logfp = NULL;          struct ios_dump_args args = {0};          int                     i; -        int                     bytes_written = 0; -        char                    filename[PATH_MAX]; +        int                     stats_bytes_written = 0; +        int                     samples_bytes_written = 0; +        char                    stats_filename[PATH_MAX]; +        char                    samples_filename[PATH_MAX];          char                    *xlator_name;          char                    *instance_name; -        gf_boolean_t            log_fopen_failure = _gf_true; +        gf_boolean_t            log_stats_fopen_failure = _gf_true; +        gf_boolean_t            log_samples_fopen_failure = _gf_true;          int                     old_cancel_type;          conf = this->private; @@ -2546,17 +2913,34 @@ _ios_dump_thread (xlator_t *this) {                          goto out;                  }          } +        if (mkdir (_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) { +                if (errno != EEXIST) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "could not create stats-sample directory %s", +                                _IOS_SAMP_DIR); +                        goto out; +                } +        }          if (instance_name) { -                bytes_written = snprintf (filename, PATH_MAX, +                stats_bytes_written = snprintf (stats_filename, PATH_MAX,                          "%s/%s_%s_%s.dump", _IOS_DUMP_DIR,                          __progname, xlator_name, instance_name); +                samples_bytes_written = snprintf (samples_filename, PATH_MAX, +                        "%s/%s_%s_%s.samp", _IOS_SAMP_DIR, +                        __progname, xlator_name, instance_name);          } else { -                bytes_written = snprintf (filename, PATH_MAX, "%s/%s_%s.dump", -                        _IOS_DUMP_DIR, __progname, xlator_name); -        } -        if (bytes_written >= PATH_MAX) { +                stats_bytes_written = snprintf (stats_filename, PATH_MAX, +                        "%s/%s_%s.dump", _IOS_DUMP_DIR, __progname, +                        xlator_name); +                samples_bytes_written = snprintf (samples_filename, PATH_MAX, +                        "%s/%s_%s.samp", _IOS_SAMP_DIR, __progname, +                        xlator_name); +        } +        if ((stats_bytes_written >= PATH_MAX) || +            (samples_bytes_written >= PATH_MAX)) {                  gf_log (this->name, GF_LOG_ERROR, -                        "Invalid path for IO Stats dump: %s", filename); +                        "Invalid path for stats dump (%s) and/or latency " +                        "samples (%s)", stats_filename, samples_filename);                  goto out;          }          while (1) { @@ -2572,22 +2956,32 @@ _ios_dump_thread (xlator_t *this) {                   * just hold it open and rewind/truncate on each iteration.                   * Leaving it alone for now.                   */ -                logfp = fopen (filename, "w+"); -                if (!logfp) { -                        if (log_fopen_failure) { -                                gf_log (this->name, GF_LOG_ERROR, -                                        "could not open stats-dump file %s", -                                        filename); -                                log_fopen_failure = _gf_false; -                        } -                        continue; +                stats_logfp = fopen (stats_filename, "w+"); +                if (stats_logfp) { +                        (void) ios_dump_args_init (&args, +                                                   IOS_DUMP_TYPE_JSON_FILE, +                                                   stats_logfp); +                        io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false); +                        fclose (stats_logfp); +                        log_stats_fopen_failure = _gf_true; +                } else if (log_stats_fopen_failure) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "could not open stats-dump file %s (%s)", +                                stats_filename, strerror(errno)); +                        log_stats_fopen_failure = _gf_false; +                } +                samples_logfp = fopen (samples_filename, "w+"); +                if (samples_logfp) { +                        io_stats_dump_latency_samples_logfp (this, +                                                             samples_logfp); +                        fclose (samples_logfp); +                        log_samples_fopen_failure = _gf_true; +                } else if (log_samples_fopen_failure) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "could not open samples-dump file %s (%s)", +                                samples_filename, strerror(errno)); +                        log_samples_fopen_failure = _gf_false;                  } -                (void) ios_dump_args_init ( -                                            &args, IOS_DUMP_TYPE_JSON_FILE, -                                            logfp); -                io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false); -                fclose (logfp); -                log_fopen_failure = _gf_true;          }  out:          gf_log (this->name, GF_LOG_INFO, "IO stats dump thread terminated"); @@ -3116,6 +3510,10 @@ reconfigure (xlator_t *this, dict_t *options)                                  (void *) &_ios_dump_thread, this);          } +        GF_OPTION_RECONF ("ios-sample-interval", conf->ios_sample_interval, +                         options, int32, out); +        GF_OPTION_RECONF ("ios-sample-buf-size", conf->ios_sample_buf_size, +                         options, int32, out);          GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);          if (sys_log_str) {                  sys_log_level = glusterd_check_log_level (sys_log_str); @@ -3229,6 +3627,7 @@ init (xlator_t *this)           * in case of error paths.           */          LOCK_INIT (&conf->lock); +        LOCK_INIT (&conf->ios_sampling_lock);          gettimeofday (&conf->cumulative.started_at, NULL);          gettimeofday (&conf->incremental.started_at, NULL); @@ -3247,6 +3646,22 @@ init (xlator_t *this)          GF_OPTION_INIT ("ios-dump-interval", conf->ios_dump_interval,                          int32, out); +        GF_OPTION_INIT ("ios-sample-interval", conf->ios_sample_interval, +                        int32, out); + +        GF_OPTION_INIT ("ios-sample-buf-size", conf->ios_sample_buf_size, +                        int32, out); + +        if (ios_init_sample_buf (conf) != 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "Out of memory."); +                return -1; +        } + +        GF_OPTION_INIT ("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, +                        int32, out); +        conf->dnscache = gf_dnscache_init (conf->ios_dnscache_ttl_sec); +          GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);          if (sys_log_str) {                  sys_log_level = glusterd_check_log_level (sys_log_str); @@ -3497,6 +3912,29 @@ struct volume_options options[] = {            .description = "Interval (in seconds) at which to auto-dump "                           "statistics. Zero disables automatic dumping."          }, +        { .key  = { "ios-sample-interval" }, +          .type = GF_OPTION_TYPE_INT, +          .min = 0, +          .max = 65535, +          .default_value = "0", +          .description = "Interval in which we want to collect FOP latency " +                         "samples.  2 means collect a sample every 2nd FOP." +        }, +        { .key  = { "ios-sample-buf-size" }, +          .type = GF_OPTION_TYPE_INT, +          .min = 1024, +          .max = 1024*1024, +          .default_value = "65535", +          .description = "The maximum size of our FOP sampling ring buffer." +        }, +        { .key  = { "ios-dnscache-ttl-sec" }, +          .type = GF_OPTION_TYPE_INT, +          .min = 1, +          .max = 3600 * 72, +          .default_value = "86400", +          .description = "The interval after wish a cached DNS entry will be " +                         "re-validated.  Default: 24 hrs" +        },          { .key  = { "latency-measurement" },            .type = GF_OPTION_TYPE_BOOL,            .default_value = "off", diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 4ea7d3baa8a..ccf4b380ffb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -849,6 +849,21 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .option      = "ios-dump-interval",            .op_version  = 1          }, +        { .key         = "diagnostics.fop-sample-interval", +          .voltype     = "debug/io-stats", +          .option      = "ios-sample-interval", +          .op_version  = 1 +        }, +        { .key         = "diagnostics.fop-sample-buf-size", +          .voltype     = "debug/io-stats", +          .option      = "ios-sample-buf-size", +          .op_version  = 1 +        }, +        { .key         = "diagnostics.stats-dnscache-ttl-sec", +          .voltype     = "debug/io-stats", +          .option      = "ios-dnscache-ttl-sec", +          .op_version  = 1 +        },          /* IO-cache xlator options */          { .key         = "performance.cache-max-file-size", diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c index 819ae98b081..52fdd9796c3 100644 --- a/xlators/nfs/server/src/nfs-fops.c +++ b/xlators/nfs/server/src/nfs-fops.c @@ -219,6 +219,7 @@ nfs_create_frame (xlator_t *xl, nfs_user_t *nfu)          frame->root->pid = NFS_PID;          frame->root->uid = nfu->uid;          frame->root->gid = nfu->gids[NFS_PRIMGID_IDX]; +        memcpy (&frame->root->identifier, &nfu->identifier, UNIX_PATH_MAX);          frame->root->lk_owner = nfu->lk_owner;          if (nfu->ngrps != 1) { diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index 4dda35c49ef..116854b9109 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -639,8 +639,8 @@ nfs_user_root_create (nfs_user_t *newnfu)  int -nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids, -                 int auxcount) +nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, +                 rpc_transport_t *trans, gid_t *auxgids, int auxcount)  {          int     x = 1;          int     y = 0; @@ -655,6 +655,10 @@ nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids,          newnfu->uid = uid;          newnfu->gids[0] = gid;          newnfu->ngrps = 1; +        if (trans) { +                memcpy (&newnfu->identifier, trans->peerinfo.identifier, +                       UNIX_PATH_MAX); +        }          gf_msg_trace (GF_NFS, 0, "uid: %d, gid %d, gids: %d", uid, gid,                  auxcount); @@ -683,7 +687,9 @@ nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req)          gidarr = rpcsvc_auth_unix_auxgids (req, &gids);          nfs_user_create (nfu, rpcsvc_request_uid (req), -                         rpcsvc_request_gid (req), gidarr, gids); +                         rpcsvc_request_gid (req), +                         rpcsvc_request_transport (req), +                         gidarr, gids);          return;  } @@ -699,7 +705,8 @@ nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req,                  return;          gidarr = rpcsvc_auth_unix_auxgids (req, &gids); -        nfs_user_create (nfu, uid, gid, gidarr, gids); +        nfs_user_create (nfu, uid, gid, rpcsvc_request_transport (req), +                         gidarr, gids);          return;  } diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h index 107140bc720..82df163d494 100644 --- a/xlators/nfs/server/src/nfs.h +++ b/xlators/nfs/server/src/nfs.h @@ -125,14 +125,15 @@ typedef struct nfs_user_info {          gid_t   gids[NFS_NGROUPS];          int     ngrps;          gf_lkowner_t lk_owner; +        char    identifier[UNIX_PATH_MAX];  /* ip of user */  } nfs_user_t;  extern int  nfs_user_root_create (nfs_user_t *newnfu);  extern int -nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids, -                 int auxcount); +nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, +                 rpc_transport_t *trans, gid_t *auxgids, int auxcount);  extern void  nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req); diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index bcd1fed8b84..b369e835b24 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -423,6 +423,7 @@ get_frame_from_request (rpcsvc_request_t *req)          server_conf_t *priv = NULL;          clienttable_t *clienttable = NULL;          unsigned int   i           = 0; +        rpc_transport_t *trans = NULL;          GF_VALIDATE_OR_GOTO ("server", req, out); @@ -500,6 +501,12 @@ get_frame_from_request (rpcsvc_request_t *req)              server_resolve_groups (frame, req);          else              server_decode_groups (frame, req); +        trans = req->trans; +        if (trans) { +                memcpy (&frame->root->identifier, trans->peerinfo.identifier, +                        sizeof (trans->peerinfo.identifier)); +        } +          frame->local = req;  out:  | 
