diff options
-rw-r--r-- | extras/glusterfs-logrotate | 9 | ||||
-rw-r--r-- | libglusterfs/src/common-utils.c | 260 | ||||
-rw-r--r-- | libglusterfs/src/common-utils.h | 21 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs.h | 6 | ||||
-rw-r--r-- | libglusterfs/src/mem-types.h | 2 | ||||
-rw-r--r-- | libglusterfs/src/stack.h | 1 | ||||
-rw-r--r-- | tests/basic/fop-sampling.t | 61 | ||||
-rw-r--r-- | xlators/debug/io-stats/src/io-stats-mem-types.h | 2 | ||||
-rw-r--r-- | xlators/debug/io-stats/src/io-stats.c | 494 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 15 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs-fops.c | 1 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs.c | 15 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs.h | 5 | ||||
-rw-r--r-- | xlators/protocol/server/src/server-helpers.c | 7 |
14 files changed, 864 insertions, 35 deletions
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate index e3319afaa96..575c0eee771 100644 --- a/extras/glusterfs-logrotate +++ b/extras/glusterfs-logrotate @@ -26,3 +26,12 @@ /usr/bin/killall -HUP glusterfsd > /dev/null 2>&1 || true endscript } + +/var/log/glusterfs/samples/*.samp { + daily + rotate 3 + sharedscripts + missingok + compress + delaycompress +} diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index 2dcd54f1829..a89e120c0fb 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -34,6 +34,7 @@ #if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS) #include <sys/sysctl.h> #endif +#include <libgen.h> #include "compat-errno.h" #include "logging.h" @@ -210,7 +211,7 @@ out: } /** - * gf_resolve_parent_path -- Given a path, returns an allocated string + * gf_resolve_path_parent -- Given a path, returns an allocated string * containing the parent's path. * @path: Path to parse * @return: The parent path if found, NULL otherwise @@ -359,6 +360,135 @@ err: return -1; } +/** + * gf_dnscache_init -- Initializes a dnscache struct and sets the ttl + * to the specified value in the parameter. + * + * @ttl: the TTL in seconds + * @return: SUCCESS: Pointer to an allocated dnscache struct + * FAILURE: NULL + */ +struct dnscache * +gf_dnscache_init (time_t ttl) +{ + struct dnscache *cache = GF_MALLOC (sizeof (*cache), + gf_common_mt_dnscache); + cache->cache_dict = NULL; + cache->ttl = ttl; + return cache; +} + +/** + * gf_dnscache_entry_init -- Initialize a dnscache entry + * + * @return: SUCCESS: Pointer to an allocated dnscache entry struct + * FAILURE: NULL + */ +struct dnscache_entry * +gf_dnscache_entry_init () +{ + struct dnscache_entry *entry = GF_CALLOC (1, sizeof (*entry), + gf_common_mt_dnscache_entry); + return entry; +} + +/** + * gf_dnscache_entry_deinit -- Free memory used by a dnscache entry + * + * @entry: Pointer to deallocate + */ +void +gf_dnscache_entry_deinit (struct dnscache_entry *entry) +{ + GF_FREE (entry->ip); + GF_FREE (entry->fqdn); + GF_FREE (entry); +} + +/** + * gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address. + * + * @ip: The IP address to perform a reverse lookup on + * + * @return: success: Allocated string containing the hostname + * failure: NULL + */ +char * +gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache) +{ + char *fqdn = NULL; + int ret = 0; + dict_t *cache = NULL; + data_t *entrydata = NULL; + struct dnscache_entry *dnsentry = NULL; + gf_boolean_t from_cache = _gf_false; + + if (!dnscache) + goto out; + + if (!dnscache->cache_dict) { + dnscache->cache_dict = dict_new (); + if (!dnscache->cache_dict) { + goto out; + } + } + cache = dnscache->cache_dict; + + /* Quick cache lookup to see if we already hold it */ + entrydata = dict_get (cache, (char *)ip); + if (entrydata) { + dnsentry = (struct dnscache_entry *)entrydata->data; + /* First check the TTL & timestamp */ + if (time (NULL) - dnsentry->timestamp > dnscache->ttl) { + gf_dnscache_entry_deinit (dnsentry); + entrydata->data = NULL; /* Mark this as 'null' so + * dict_del () doesn't try free + * this after we've already + * freed it. + */ + + dict_del (cache, (char *)ip); /* Remove this entry */ + } else { + /* Cache entry is valid, get the FQDN and return */ + fqdn = dnsentry->fqdn; + from_cache = _gf_true; /* Mark this as from cache */ + goto out; + } + } + + /* Get the FQDN */ + ret = gf_get_hostname_from_ip ((char *)ip, &fqdn); + if (ret != 0) + goto out; + + if (!fqdn) { + gf_log_callingfn ("resolver", GF_LOG_CRITICAL, + "Allocation failed for the host address"); + goto out; + } + + from_cache = _gf_false; +out: + /* Insert into the cache */ + if (fqdn && !from_cache) { + struct dnscache_entry *entry = gf_dnscache_entry_init (); + + if (!entry) { + goto out; + } + entry->fqdn = fqdn; + entry->ip = gf_strdup (ip); + if (!ip) { + gf_dnscache_entry_deinit (entry); + goto out; + } + entry->timestamp = time (NULL); + + entrydata = bin_to_data (entry, sizeof (*entry)); + dict_set (cache, (char *)ip, entrydata); + } + return fqdn; +} struct xldump { int lineno; @@ -4012,3 +4142,131 @@ _unmask_cancellation (void) { (void) pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); } + + +const char * +fop_enum_to_pri_string (glusterfs_fop_t fop) +{ + switch (fop) { + case GF_FOP_OPEN: + case GF_FOP_STAT: + case GF_FOP_FSTAT: + case GF_FOP_LOOKUP: + case GF_FOP_ACCESS: + case GF_FOP_READLINK: + case GF_FOP_OPENDIR: + case GF_FOP_STATFS: + case GF_FOP_READDIR: + case GF_FOP_READDIRP: + return "HIGH"; + + case GF_FOP_CREATE: + case GF_FOP_FLUSH: + case GF_FOP_LK: + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + case GF_FOP_UNLINK: + case GF_FOP_SETATTR: + case GF_FOP_FSETATTR: + case GF_FOP_MKNOD: + case GF_FOP_MKDIR: + case GF_FOP_RMDIR: + case GF_FOP_SYMLINK: + case GF_FOP_RENAME: + case GF_FOP_LINK: + case GF_FOP_SETXATTR: + case GF_FOP_GETXATTR: + case GF_FOP_FGETXATTR: + case GF_FOP_FSETXATTR: + case GF_FOP_REMOVEXATTR: + case GF_FOP_FREMOVEXATTR: + case GF_FOP_IPC: + return "NORMAL"; + + case GF_FOP_READ: + case GF_FOP_WRITE: + case GF_FOP_FSYNC: + case GF_FOP_TRUNCATE: + case GF_FOP_FTRUNCATE: + case GF_FOP_FSYNCDIR: + case GF_FOP_XATTROP: + case GF_FOP_FXATTROP: + case GF_FOP_RCHECKSUM: + case GF_FOP_ZEROFILL: + case GF_FOP_FALLOCATE: + return "LOW"; + + case GF_FOP_NULL: + case GF_FOP_FORGET: + case GF_FOP_RELEASE: + case GF_FOP_RELEASEDIR: + case GF_FOP_GETSPEC: + case GF_FOP_MAXVALUE: + case GF_FOP_DISCARD: + return "LEAST"; + } + return "UNKNOWN"; +} + +const char * +fop_enum_to_string (glusterfs_fop_t fop) +{ + static const char *const str_map[] = { + "NULL", + "STAT", + "READLINK", + "MKNOD", + "MKDIR", + "UNLINK", + "RMDIR", + "SYMLINK", + "RENAME", + "LINK", + "TRUNCATE", + "OPEN", + "READ", + "WRITE", + "STATFS", + "FLUSH", + "FSYNC", + "SETXATTR", + "GETXATTR", + "REMOVEXATTR", + "OPENDIR", + "FSYNCDIR", + "ACCESS", + "CREATE", + "FTRUNCATE", + "FSTAT", + "LK", + "LOOKUP", + "READDIR", + "INODELK", + "FINODELK", + "ENTRYLK", + "FENTRYLK", + "XATTROP", + "FXATTROP", + "FGETXATTR", + "FSETXATTR", + "RCHECKSUM", + "SETATTR", + "FSETATTR", + "READDIRP", + "FORGET", + "RELEASE", + "RELEASEDIR", + "GETSPEC", + "FREMOVEXATTR", + "FALLOCATE", + "DISCARD", + "ZEROFILL", + "IPC", + "MAXVALUE"}; + if (fop <= GF_FOP_MAXVALUE) + return str_map[fop]; + + return "UNKNOWNFOP"; +} diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h index 77a8cdd51c7..bf574fdabc7 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/common-utils.h @@ -159,6 +159,27 @@ typedef struct dht_changelog_rename_info { typedef int (*gf_cmp) (void *, void *); +struct _dict; + +struct dnscache { + struct _dict *cache_dict; + time_t ttl; +}; + +struct dnscache_entry { + char *ip; + char *fqdn; + time_t timestamp; +}; + + +struct dnscache *gf_dnscache_init (time_t ttl); +struct dnscache_entry *gf_dnscache_entry_init (); +void gf_dnscache_entry_deinit (struct dnscache_entry *entry); +char *gf_rev_dns_lookup_cached (const char *ip, struct dnscache *dnscache); + +char *gf_resolve_path_parent (const char *path); + void gf_global_variable_init(void); int32_t gf_resolve_ip6 (const char *hostname, uint16_t port, int family, diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 3bc76f6622a..4c7f9f517e3 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -258,6 +258,10 @@ /* NOTE: add members ONLY at the end (just before _MAXVALUE) */ +/* + * OTHER NOTE: fop_enum_to_str and fop_enum_to_pri_str (in common-utils.h) also + * contain lists of fops, so if you update this list UPDATE THOSE TOO. + */ typedef enum { GF_FOP_NULL = 0, GF_FOP_STAT, @@ -312,6 +316,8 @@ typedef enum { GF_FOP_MAXVALUE, } glusterfs_fop_t; +const char *fop_enum_to_pri_string (glusterfs_fop_t fop); +const char *fop_enum_to_string (glusterfs_fop_t fop); typedef enum { GF_MGMT_NULL = 0, diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index 84949c61487..70c4ea770d5 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -128,6 +128,8 @@ enum gf_common_mem_types_ { gf_common_mt_ereg, gf_common_mt_wr, gf_common_mt_rdma_arena_mr, + gf_common_mt_dnscache = 115, + gf_common_mt_dnscache_entry = 116, gf_common_mt_parser_t, gf_common_quota_meta_t, /*related to gfdb library*/ diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h index 5c0655f2ead..43d943c62b6 100644 --- a/libglusterfs/src/stack.h +++ b/libglusterfs/src/stack.h @@ -96,6 +96,7 @@ struct _call_stack_t { uid_t uid; gid_t gid; pid_t pid; + char identifier[UNIX_PATH_MAX]; uint16_t ngrps; uint32_t groups_small[SMALL_GROUP_COUNT]; uint32_t *groups_large; diff --git a/tests/basic/fop-sampling.t b/tests/basic/fop-sampling.t new file mode 100644 index 00000000000..cea8aa737c0 --- /dev/null +++ b/tests/basic/fop-sampling.t @@ -0,0 +1,61 @@ +#!/bin/bash +# + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +SAMPLE_FILE="$(gluster --print-logdir)/samples/glusterfs_${V0}.samp" + +function print_cnt() { + local FOP_TYPE=$1 + local FOP_CNT=$(grep ,${FOP_TYPE} ${SAMPLE_FILE} | wc -l) + echo $FOP_CNT +} + +# Verify we got non-zero counts for stats/lookup/readdir +check_samples() { + STAT_CNT=$(print_cnt STAT) + if [ "$STAT_CNT" -le "0" ]; then + echo "STAT count is zero" + return + fi + + LOOKUP_CNT=$(print_cnt LOOKUP) + if [ "$LOOKUP_CNT" -le "0" ]; then + echo "LOOKUP count is zero" + return + fi + + READDIR_CNT=$(print_cnt READDIR) + if [ "$READDIR_CNT" -le "0" ]; then + echo "READDIR count is zero" + return + fi + + echo "OK" +} + +cleanup; +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 nfs.disable off +TEST $CLI volume set $V0 diagnostics.latency-measurement on +TEST $CLI volume set $V0 diagnostics.count-fop-hits on +TEST $CLI volume set $V0 diagnostics.stats-dump-interval 2 +TEST $CLI volume set $V0 diagnostics.fop-sample-buf-size 65535 +TEST $CLI volume set $V0 diagnostics.fop-sample-interval 1 +TEST $CLI volume set $V0 diagnostics.stats-dnscache-ttl-sec 3600 + +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +for i in {1..5} +do + dd if=/dev/zero of=${M0}/testfile$i bs=4k count=1 +done + +TEST ls -l $M0 +EXPECT_WITHIN 6 "OK" check_samples + +cleanup diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h index d7a1055a571..9dde9373264 100644 --- a/xlators/debug/io-stats/src/io-stats-mem-types.h +++ b/xlators/debug/io-stats/src/io-stats-mem-types.h @@ -20,6 +20,8 @@ enum gf_io_stats_mem_types_ { gf_io_stats_mt_ios_fd, gf_io_stats_mt_ios_stat, gf_io_stats_mt_ios_stat_list, + gf_io_stats_mt_ios_sample_buf, + gf_io_stats_mt_ios_sample, gf_io_stats_mt_end }; #endif diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 3157a65dae1..e73cae0a928 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -34,8 +34,12 @@ #include "logging.h" #include "cli1-xdr.h" #include "statedump.h" +#include <pwd.h> +#include <grp.h> #define MAX_LIST_MEMBERS 100 +#define DEFAULT_PWD_BUF_SZ 16384 +#define DEFAULT_GRP_BUF_SZ 16384 typedef enum { IOS_STATS_TYPE_NONE, @@ -82,6 +86,25 @@ struct ios_stat_head { struct ios_stat_list *iosstats; }; +typedef struct _ios_sample_t { + uid_t uid; + gid_t gid; + char identifier[UNIX_PATH_MAX]; + glusterfs_fop_t fop_type; + struct timeval timestamp; + double elapsed; +} ios_sample_t; + + +typedef struct _ios_sample_buf_t { + uint64_t pos; /* Position in write buffer */ + uint64_t size; /* Size of ring buffer */ + uint64_t collected; /* Number of samples we've collected */ + uint64_t observed; /* Number of FOPs we've observed */ + ios_sample_t *ios_samples; /* Our list of samples */ +} ios_sample_buf_t; + + struct ios_lat { double min; double max; @@ -102,7 +125,6 @@ struct ios_global_stats { struct timeval max_openfd_time; }; - struct ios_conf { gf_lock_t lock; struct ios_global_stats cumulative; @@ -116,6 +138,12 @@ struct ios_conf { int32_t ios_dump_interval; pthread_t dump_thread; gf_boolean_t dump_thread_should_die; + gf_lock_t ios_sampling_lock; + int32_t ios_sample_interval; + int32_t ios_sample_buf_size; + ios_sample_buf_t *ios_sample_buf; + struct dnscache *dnscache; + int32_t ios_dnscache_ttl_sec; }; @@ -133,7 +161,8 @@ typedef enum { IOS_DUMP_TYPE_FILE = 1, IOS_DUMP_TYPE_DICT = 2, IOS_DUMP_TYPE_JSON_FILE = 3, - IOS_DUMP_TYPE_MAX = 4 + IOS_DUMP_TYPE_SAMPLES = 4, + IOS_DUMP_TYPE_MAX = 5 } ios_dump_type_t; struct ios_dump_args { @@ -163,6 +192,7 @@ is_fop_latency_started (call_frame_t *frame) } #ifdef GF_LINUX_HOST_OS +const char *_IOS_SAMP_DIR = "/var/log/glusterfs/samples"; #define _IOS_DUMP_DIR DATADIR "/lib/glusterd/stats" #else #define _IOS_DUMP_DIR DATADIR "/db/glusterd/stats" @@ -429,6 +459,72 @@ ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat) } +/* + * So why goto all this trouble? Why not just queue up some samples in + * a big list and malloc away? Well malloc is expensive relative + * to what we are measuring, so cannot have any malloc's (or worse + * callocs) in our measurement code paths. Instead, we are going to + * pre-allocate a circular buffer and collect a maximum number of samples. + * Prior to dumping them all we'll create a new buffer and swap the + * old buffer with the new, and then proceed to dump the statistics + * in our dump thread. + * + */ +ios_sample_buf_t * +ios_create_sample_buf (size_t buf_size) +{ + ios_sample_buf_t *ios_sample_buf = NULL; + ios_sample_t *ios_samples = NULL; + + ios_sample_buf = GF_CALLOC (1, + sizeof (*ios_sample_buf), + gf_io_stats_mt_ios_sample_buf); + if (!ios_sample_buf) + goto err; + + ios_samples = GF_CALLOC (buf_size, + sizeof (*ios_samples), + gf_io_stats_mt_ios_sample); + + if (!ios_samples) + goto err; + + ios_sample_buf->ios_samples = ios_samples; + ios_sample_buf->size = buf_size; + ios_sample_buf->pos = 0; + ios_sample_buf->observed = 0; + ios_sample_buf->collected = 0; + + return ios_sample_buf; +err: + GF_FREE (ios_sample_buf); + return NULL; +} + +void +ios_destroy_sample_buf (ios_sample_buf_t *ios_sample_buf) +{ + GF_FREE (ios_sample_buf->ios_samples); + GF_FREE (ios_sample_buf); +} + +static int +ios_init_sample_buf (struct ios_conf *conf) +{ + int32_t ret = -1; + + GF_ASSERT (conf); + LOCK (&conf->lock); + conf->ios_sample_buf = ios_create_sample_buf ( + conf->ios_sample_buf_size); + if (!conf->ios_sample_buf) + goto out; + ret = 0; +out: + UNLOCK (&conf->lock); + return ret; +} + int ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value, struct ios_stat *iosstat) @@ -815,6 +911,230 @@ out: return ret; } +char * +_resolve_username (xlator_t *this, uid_t uid) +{ + struct passwd pwd; + struct passwd *pwd_result = NULL; + size_t pwd_buf_len; + char *pwd_buf = NULL; + char *ret = NULL; + + /* Prepare our buffer for the uid->username translation */ +#ifdef _SC_GETGR_R_SIZE_MAX + pwd_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX); +#else + pwd_buf_len = -1; +#endif + if (pwd_buf_len == -1) { + pwd_buf_len = DEFAULT_PWD_BUF_SZ; /* per the man page */ + } + + pwd_buf = alloca (pwd_buf_len); + if (!pwd_buf) + goto err; + + getpwuid_r (uid, &pwd, pwd_buf, pwd_buf_len, + &pwd_result); + if (!pwd_result) + goto err; + + ret = gf_strdup (pwd.pw_name); + if (ret) + return ret; + else + gf_log (this->name, GF_LOG_ERROR, + "gf_strdup failed, failing username " + "resolution."); +err: + return ret; +} + +char * +_resolve_group_name (xlator_t *this, gid_t gid) +{ + struct group grp; + struct group *grp_result = NULL; + size_t grp_buf_len; + char *grp_buf = NULL; + char *ret = NULL; + + /* Prepare our buffer for the gid->group name translation */ +#ifdef _SC_GETGR_R_SIZE_MAX + grp_buf_len = sysconf (_SC_GETGR_R_SIZE_MAX); +#else + grp_buf_len = -1; +#endif + if (grp_buf_len == -1) { + grp_buf_len = DEFAULT_GRP_BUF_SZ; /* per the man page */ + } + + grp_buf = alloca (grp_buf_len); + if (!grp_buf) { + goto err; + } + + getgrgid_r (gid, &grp, grp_buf, grp_buf_len, + &grp_result); + if (!grp_result) + goto err; + + ret = gf_strdup (grp.gr_name); + if (ret) + return ret; + else + gf_log (this->name, GF_LOG_ERROR, + "gf_strdup failed, failing username " + "resolution."); +err: + return ret; +} + + +/* + * This function writes out a latency sample to a given file descriptor + * and beautifies the output in the process. + */ +void +_io_stats_write_latency_sample (xlator_t *this, ios_sample_t *sample, + FILE *logfp) +{ + double epoch_time = 0.00; + char *xlator_name = NULL; + char *instance_name = NULL; + char *hostname = NULL; + char *identifier = NULL; + char *port = NULL; + char *port_pos = NULL; + char *group_name = NULL; + char *username = NULL; + struct ios_conf *conf = NULL; + + conf = this->private; + + epoch_time = (sample->timestamp).tv_sec + + ((sample->timestamp).tv_usec / 1000000.0); + + if (!sample->identifier || (strlen (sample->identifier) == 0)) { + hostname = "Unknown"; + port = "Unknown"; + } else { + identifier = strdupa (sample->identifier); + port_pos = strrchr (identifier, ':'); + if (!port_pos || strlen(port_pos) < 2) + goto err; + port = strdupa (port_pos + 1); + if (!port) + goto err; + *port_pos = '\0'; + hostname = gf_rev_dns_lookup_cached (identifier, + conf->dnscache); + if (!hostname) + hostname = "Unknown"; + } + + xlator_name = this->name; + if (!xlator_name || strlen (xlator_name) == 0) + xlator_name = "Unknown"; + + instance_name = this->instance_name; + if (!instance_name || strlen (instance_name) == 0) + instance_name = "N/A"; + + /* Resolve the UID to a string username */ + username = _resolve_username (this, sample->uid); + if (!username) { + username = GF_MALLOC (30, gf_common_mt_char); + sprintf (username, "%d", (int32_t)sample->uid); + } + + /* Resolve the GID to a string group name */ + group_name = _resolve_group_name (this, sample->gid); + if (!group_name) { + group_name = GF_MALLOC (30, gf_common_mt_char); + sprintf (group_name, "%d", (int32_t)sample->gid); + } + + ios_log (this, logfp, + "%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s", + epoch_time, fop_enum_to_pri_string (sample->fop_type), + fop_enum_to_string (sample->fop_type), + sample->elapsed, xlator_name, instance_name, username, + group_name, hostname, port); + goto out; +err: + gf_log (this->name, GF_LOG_ERROR, + "Error parsing socket identifier"); +out: + GF_FREE (group_name); + GF_FREE (username); +} + +/* + * Takes our current sample buffer in conf->io_sample_buf, and saves + * a reference to this, init's a new buffer, and then dumps out the + * contents of the saved reference. + */ +int +io_stats_dump_latency_samples_logfp (xlator_t *this, FILE *logfp) +{ + uint64_t i = 0; + struct ios_conf *conf = NULL; + ios_sample_buf_t *sample_buf = NULL; + int ret = 1; /* Default to error */ + + conf = this->private; + + /* Save pointer to old buffer; the CS equivalent of + * Indiana Jones: https://www.youtube.com/watch?v=Pr-8AP0To4k, + * though ours will end better I hope! + */ + sample_buf = conf->ios_sample_buf; + if (!sample_buf) { + gf_log (this->name, GF_LOG_WARNING, + "Sampling buffer is null, bailing!"); + goto out; + } + + /* Empty case, nothing to do, exit. */ + if (sample_buf->collected == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "No samples, dump not required."); + ret = 0; + goto out; + } + + /* Init a new buffer, so we are free to work on the one we saved a + * reference to above. + */ + if (ios_init_sample_buf (conf) != 0) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to init new sampling buffer, out of memory?"); + goto out; + } + + /* Wrap-around case, dump from pos to sample_buf->size -1 + * and then from 0 to sample_buf->pos (covered off by + * "simple case") + */ + if (sample_buf->collected > sample_buf->pos + 1) { + for (i = sample_buf->pos; i < sample_buf->size; i++) { + _io_stats_write_latency_sample (this, + &(sample_buf->ios_samples[i]), logfp); + } + } + + /* Simple case: Dump from 0 to sample_buf->pos */ + for (i = 0; i < sample_buf->pos; i++) { + _io_stats_write_latency_sample (this, + &(sample_buf->ios_samples[i]), logfp); + } + ios_destroy_sample_buf (sample_buf); + +out: + return ret; +} + int io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats, struct timeval *now, int interval, FILE* logfp) @@ -1284,6 +1604,48 @@ io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd) return 0; } +void collect_ios_latency_sample (struct ios_conf *conf, + glusterfs_fop_t fop_type, double elapsed, + call_frame_t *frame) +{ + ios_sample_buf_t *ios_sample_buf = NULL; + ios_sample_t *ios_sample = NULL; + struct timeval *timestamp = NULL; + call_stack_t *root = NULL; + + + ios_sample_buf = conf->ios_sample_buf; + LOCK (&conf->ios_sampling_lock); + if (conf->ios_sample_interval == 0 || + ios_sample_buf->observed % conf->ios_sample_interval != 0) + goto out; + + timestamp = &frame->begin; + root = frame->root; + + ios_sample = &(ios_sample_buf->ios_samples[ios_sample_buf->pos]); + ios_sample->elapsed = elapsed; + ios_sample->fop_type = fop_type; + ios_sample->uid = root->uid; + ios_sample->gid = root->gid; + (ios_sample->timestamp).tv_sec = timestamp->tv_sec; + (ios_sample->timestamp).tv_usec = timestamp->tv_usec; + memcpy (&ios_sample->identifier, &root->identifier, + sizeof (root->identifier)); + + /* We've reached the end of the circular buffer, start from the + * beginning. */ + if (ios_sample_buf->pos == (ios_sample_buf->size - 1)) + ios_sample_buf->pos = 0; + else + ios_sample_buf->pos++; + ios_sample_buf->collected++; +out: + ios_sample_buf->observed++; + UNLOCK (&conf->ios_sampling_lock); + return; +} + static void update_ios_latency_stats (struct ios_global_stats *stats, double elapsed, glusterfs_fop_t op) @@ -1321,6 +1683,7 @@ update_ios_latency (struct ios_conf *conf, call_frame_t *frame, update_ios_latency_stats (&conf->cumulative, elapsed, op); update_ios_latency_stats (&conf->incremental, elapsed, op); + collect_ios_latency_sample (conf, op, elapsed, frame); return 0; } @@ -1660,7 +2023,7 @@ io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (iosstat) { BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP); - iosstat = NULL; + iosstat = NULL; } STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata); @@ -2512,14 +2875,18 @@ _ios_destroy_dump_thread (struct ios_conf *conf) { void * _ios_dump_thread (xlator_t *this) { struct ios_conf *conf = NULL; - FILE *logfp = NULL; + FILE *stats_logfp = NULL; + FILE *samples_logfp = NULL; struct ios_dump_args args = {0}; int i; - int bytes_written = 0; - char filename[PATH_MAX]; + int stats_bytes_written = 0; + int samples_bytes_written = 0; + char stats_filename[PATH_MAX]; + char samples_filename[PATH_MAX]; char *xlator_name; char *instance_name; - gf_boolean_t log_fopen_failure = _gf_true; + gf_boolean_t log_stats_fopen_failure = _gf_true; + gf_boolean_t log_samples_fopen_failure = _gf_true; int old_cancel_type; conf = this->private; @@ -2546,17 +2913,34 @@ _ios_dump_thread (xlator_t *this) { goto out; } } + if (mkdir (_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) { + if (errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, + "could not create stats-sample directory %s", + _IOS_SAMP_DIR); + goto out; + } + } if (instance_name) { - bytes_written = snprintf (filename, PATH_MAX, + stats_bytes_written = snprintf (stats_filename, PATH_MAX, "%s/%s_%s_%s.dump", _IOS_DUMP_DIR, __progname, xlator_name, instance_name); + samples_bytes_written = snprintf (samples_filename, PATH_MAX, + "%s/%s_%s_%s.samp", _IOS_SAMP_DIR, + __progname, xlator_name, instance_name); } else { - bytes_written = snprintf (filename, PATH_MAX, "%s/%s_%s.dump", - _IOS_DUMP_DIR, __progname, xlator_name); - } - if (bytes_written >= PATH_MAX) { + stats_bytes_written = snprintf (stats_filename, PATH_MAX, + "%s/%s_%s.dump", _IOS_DUMP_DIR, __progname, + xlator_name); + samples_bytes_written = snprintf (samples_filename, PATH_MAX, + "%s/%s_%s.samp", _IOS_SAMP_DIR, __progname, + xlator_name); + } + if ((stats_bytes_written >= PATH_MAX) || + (samples_bytes_written >= PATH_MAX)) { gf_log (this->name, GF_LOG_ERROR, - "Invalid path for IO Stats dump: %s", filename); + "Invalid path for stats dump (%s) and/or latency " + "samples (%s)", stats_filename, samples_filename); goto out; } while (1) { @@ -2572,22 +2956,32 @@ _ios_dump_thread (xlator_t *this) { * just hold it open and rewind/truncate on each iteration. * Leaving it alone for now. */ - logfp = fopen (filename, "w+"); - if (!logfp) { - if (log_fopen_failure) { - gf_log (this->name, GF_LOG_ERROR, - "could not open stats-dump file %s", - filename); - log_fopen_failure = _gf_false; - } - continue; + stats_logfp = fopen (stats_filename, "w+"); + if (stats_logfp) { + (void) ios_dump_args_init (&args, + IOS_DUMP_TYPE_JSON_FILE, + stats_logfp); + io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false); + fclose (stats_logfp); + log_stats_fopen_failure = _gf_true; + } else if (log_stats_fopen_failure) { + gf_log (this->name, GF_LOG_ERROR, + "could not open stats-dump file %s (%s)", + stats_filename, strerror(errno)); + log_stats_fopen_failure = _gf_false; + } + samples_logfp = fopen (samples_filename, "w+"); + if (samples_logfp) { + io_stats_dump_latency_samples_logfp (this, + samples_logfp); + fclose (samples_logfp); + log_samples_fopen_failure = _gf_true; + } else if (log_samples_fopen_failure) { + gf_log (this->name, GF_LOG_ERROR, + "could not open samples-dump file %s (%s)", + samples_filename, strerror(errno)); + log_samples_fopen_failure = _gf_false; } - (void) ios_dump_args_init ( - &args, IOS_DUMP_TYPE_JSON_FILE, - logfp); - io_stats_dump (this, &args, GF_CLI_INFO_ALL, _gf_false); - fclose (logfp); - log_fopen_failure = _gf_true; } out: gf_log (this->name, GF_LOG_INFO, "IO stats dump thread terminated"); @@ -3116,6 +3510,10 @@ reconfigure (xlator_t *this, dict_t *options) (void *) &_ios_dump_thread, this); } + GF_OPTION_RECONF ("ios-sample-interval", conf->ios_sample_interval, + options, int32, out); + GF_OPTION_RECONF ("ios-sample-buf-size", conf->ios_sample_buf_size, + options, int32, out); GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out); if (sys_log_str) { sys_log_level = glusterd_check_log_level (sys_log_str); @@ -3229,6 +3627,7 @@ init (xlator_t *this) * in case of error paths. */ LOCK_INIT (&conf->lock); + LOCK_INIT (&conf->ios_sampling_lock); gettimeofday (&conf->cumulative.started_at, NULL); gettimeofday (&conf->incremental.started_at, NULL); @@ -3247,6 +3646,22 @@ init (xlator_t *this) GF_OPTION_INIT ("ios-dump-interval", conf->ios_dump_interval, int32, out); + GF_OPTION_INIT ("ios-sample-interval", conf->ios_sample_interval, + int32, out); + + GF_OPTION_INIT ("ios-sample-buf-size", conf->ios_sample_buf_size, + int32, out); + + if (ios_init_sample_buf (conf) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + return -1; + } + + GF_OPTION_INIT ("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, + int32, out); + conf->dnscache = gf_dnscache_init (conf->ios_dnscache_ttl_sec); + GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out); if (sys_log_str) { sys_log_level = glusterd_check_log_level (sys_log_str); @@ -3497,6 +3912,29 @@ struct volume_options options[] = { .description = "Interval (in seconds) at which to auto-dump " "statistics. Zero disables automatic dumping." }, + { .key = { "ios-sample-interval" }, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 65535, + .default_value = "0", + .description = "Interval in which we want to collect FOP latency " + "samples. 2 means collect a sample every 2nd FOP." + }, + { .key = { "ios-sample-buf-size" }, + .type = GF_OPTION_TYPE_INT, + .min = 1024, + .max = 1024*1024, + .default_value = "65535", + .description = "The maximum size of our FOP sampling ring buffer." + }, + { .key = { "ios-dnscache-ttl-sec" }, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .max = 3600 * 72, + .default_value = "86400", + .description = "The interval after wish a cached DNS entry will be " + "re-validated. Default: 24 hrs" + }, { .key = { "latency-measurement" }, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 4ea7d3baa8a..ccf4b380ffb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -849,6 +849,21 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "ios-dump-interval", .op_version = 1 }, + { .key = "diagnostics.fop-sample-interval", + .voltype = "debug/io-stats", + .option = "ios-sample-interval", + .op_version = 1 + }, + { .key = "diagnostics.fop-sample-buf-size", + .voltype = "debug/io-stats", + .option = "ios-sample-buf-size", + .op_version = 1 + }, + { .key = "diagnostics.stats-dnscache-ttl-sec", + .voltype = "debug/io-stats", + .option = "ios-dnscache-ttl-sec", + .op_version = 1 + }, /* IO-cache xlator options */ { .key = "performance.cache-max-file-size", diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c index 819ae98b081..52fdd9796c3 100644 --- a/xlators/nfs/server/src/nfs-fops.c +++ b/xlators/nfs/server/src/nfs-fops.c @@ -219,6 +219,7 @@ nfs_create_frame (xlator_t *xl, nfs_user_t *nfu) frame->root->pid = NFS_PID; frame->root->uid = nfu->uid; frame->root->gid = nfu->gids[NFS_PRIMGID_IDX]; + memcpy (&frame->root->identifier, &nfu->identifier, UNIX_PATH_MAX); frame->root->lk_owner = nfu->lk_owner; if (nfu->ngrps != 1) { diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index 4dda35c49ef..116854b9109 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -639,8 +639,8 @@ nfs_user_root_create (nfs_user_t *newnfu) int -nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids, - int auxcount) +nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, + rpc_transport_t *trans, gid_t *auxgids, int auxcount) { int x = 1; int y = 0; @@ -655,6 +655,10 @@ nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids, newnfu->uid = uid; newnfu->gids[0] = gid; newnfu->ngrps = 1; + if (trans) { + memcpy (&newnfu->identifier, trans->peerinfo.identifier, + UNIX_PATH_MAX); + } gf_msg_trace (GF_NFS, 0, "uid: %d, gid %d, gids: %d", uid, gid, auxcount); @@ -683,7 +687,9 @@ nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req) gidarr = rpcsvc_auth_unix_auxgids (req, &gids); nfs_user_create (nfu, rpcsvc_request_uid (req), - rpcsvc_request_gid (req), gidarr, gids); + rpcsvc_request_gid (req), + rpcsvc_request_transport (req), + gidarr, gids); return; } @@ -699,7 +705,8 @@ nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req, return; gidarr = rpcsvc_auth_unix_auxgids (req, &gids); - nfs_user_create (nfu, uid, gid, gidarr, gids); + nfs_user_create (nfu, uid, gid, rpcsvc_request_transport (req), + gidarr, gids); return; } diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h index 107140bc720..82df163d494 100644 --- a/xlators/nfs/server/src/nfs.h +++ b/xlators/nfs/server/src/nfs.h @@ -125,14 +125,15 @@ typedef struct nfs_user_info { gid_t gids[NFS_NGROUPS]; int ngrps; gf_lkowner_t lk_owner; + char identifier[UNIX_PATH_MAX]; /* ip of user */ } nfs_user_t; extern int nfs_user_root_create (nfs_user_t *newnfu); extern int -nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, gid_t *auxgids, - int auxcount); +nfs_user_create (nfs_user_t *newnfu, uid_t uid, gid_t gid, + rpc_transport_t *trans, gid_t *auxgids, int auxcount); extern void nfs_request_user_init (nfs_user_t *nfu, rpcsvc_request_t *req); diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index bcd1fed8b84..b369e835b24 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -423,6 +423,7 @@ get_frame_from_request (rpcsvc_request_t *req) server_conf_t *priv = NULL; clienttable_t *clienttable = NULL; unsigned int i = 0; + rpc_transport_t *trans = NULL; GF_VALIDATE_OR_GOTO ("server", req, out); @@ -500,6 +501,12 @@ get_frame_from_request (rpcsvc_request_t *req) server_resolve_groups (frame, req); else server_decode_groups (frame, req); + trans = req->trans; + if (trans) { + memcpy (&frame->root->identifier, trans->peerinfo.identifier, + sizeof (trans->peerinfo.identifier)); + } + frame->local = req; out: |