diff options
Diffstat (limited to 'tests/utils/arequal-checksum.c')
| -rw-r--r-- | tests/utils/arequal-checksum.c | 633 |
1 files changed, 633 insertions, 0 deletions
diff --git a/tests/utils/arequal-checksum.c b/tests/utils/arequal-checksum.c new file mode 100644 index 00000000000..b51a054162b --- /dev/null +++ b/tests/utils/arequal-checksum.c @@ -0,0 +1,633 @@ +/* + Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#define _XOPEN_SOURCE 500 + +#include <ftw.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <dirent.h> +#include <stdlib.h> +#include <libgen.h> +#include <stdint.h> +#include <dirent.h> +#include <argp.h> + +/* + * FTW_ACTIONRETVAL is a GNU libc extension. It is used here to skip + * hierarchies. On other systems we will still walk the tree, ignoring + * entries. + */ +#ifndef FTW_ACTIONRETVAL +#define FTW_ACTIONRETVAL 0 +#endif + +int debug = 0; + +typedef struct { + char test_directory[4096]; + char **ignored_directory; + unsigned int directories_ignored; +} arequal_config_t; + +static arequal_config_t arequal_config; + +static error_t +arequal_parse_opts(int key, char *arg, struct argp_state *_state); + +static struct argp_option arequal_options[] = { + {"ignore", 'i', "IGNORED", 0, "entry in the given path to be ignored"}, + {"path", 'p', "PATH", 0, "path where arequal has to be run"}, + {0, 0, 0, 0, 0}}; + +#define DBG(fmt...) \ + do { \ + if (debug) { \ + fprintf(stderr, "D "); \ + fprintf(stderr, fmt); \ + } \ + } while (0) + +void +add_to_list(char *arg); +void +get_absolute_path(char directory[], char *arg); + +static int +roof(int a, int b) +{ + return ((((a) + (b)-1) / ((b) ? (b) : 1)) * (b)); +} + +void +add_to_list(char *arg) +{ + char *string = NULL; + int index = 0; + + index = arequal_config.directories_ignored - 1; + string = strdup(arg); + + if (!arequal_config.ignored_directory) { + arequal_config.ignored_directory = calloc(1, sizeof(char *)); + } else + arequal_config.ignored_directory = realloc( + arequal_config.ignored_directory, sizeof(char *) * (index + 1)); + + arequal_config.ignored_directory[index] = string; +} + +static error_t +arequal_parse_opts(int key, char *arg, struct argp_state *_state) +{ + switch (key) { + case 'i': { + arequal_config.directories_ignored++; + add_to_list(arg); + } break; + case 'p': { + if (arg[0] == '/') + strcpy(arequal_config.test_directory, arg); + else + get_absolute_path(arequal_config.test_directory, arg); + + if (arequal_config + .test_directory[strlen(arequal_config.test_directory) - + 1] == '/') + arequal_config + .test_directory[strlen(arequal_config.test_directory) - 1] = + '\0'; + } break; + + case ARGP_KEY_NO_ARGS: + break; + case ARGP_KEY_ARG: + break; + case ARGP_KEY_END: + if (_state->argc == 1) { + argp_usage(_state); + } + } + + return 0; +} + +void +get_absolute_path(char directory[], char *arg) +{ + char cwd[4096] = { + 0, + }; + + if (getcwd(cwd, sizeof(cwd)) == NULL) + printf("some error in getting cwd\n"); + + if (strcmp(arg, ".") != 0) { + if (cwd[strlen(cwd)] != '/') + cwd[strlen(cwd)] = '/'; + strcat(cwd, arg); + } + strcpy(directory, cwd); +} + +static struct argp argp = { + arequal_options, arequal_parse_opts, "", + "arequal - Tool which calculates the checksum of all the entries" + "present in a given directory"}; + +/* All this runs in single thread, hence using 'global' variables */ + +unsigned long long avg_uid_file = 0; +unsigned long long avg_uid_dir = 0; +unsigned long long avg_uid_symlink = 0; +unsigned long long avg_uid_other = 0; + +unsigned long long avg_gid_file = 0; +unsigned long long avg_gid_dir = 0; +unsigned long long avg_gid_symlink = 0; +unsigned long long avg_gid_other = 0; + +unsigned long long avg_mode_file = 0; +unsigned long long avg_mode_dir = 0; +unsigned long long avg_mode_symlink = 0; +unsigned long long avg_mode_other = 0; + +unsigned long long global_ctime_checksum = 0; + +unsigned long long count_dir = 0; +unsigned long long count_file = 0; +unsigned long long count_symlink = 0; +unsigned long long count_other = 0; + +unsigned long long checksum_file1 = 0; +unsigned long long checksum_file2 = 0; +unsigned long long checksum_dir = 0; +unsigned long long checksum_symlink = 0; +unsigned long long checksum_other = 0; + +unsigned long long +checksum_path(const char *path) +{ + unsigned long long csum = 0; + unsigned long long *nums = 0; + int len = 0; + int cnt = 0; + + len = roof(strlen(path), sizeof(csum)); + cnt = len / sizeof(csum); + + nums = __builtin_alloca(len); + memset(nums, 0, len); + strcpy((char *)nums, path); + + while (cnt) { + csum ^= *nums; + nums++; + cnt--; + } + + return csum; +} + +int +checksum_md5(const char *path, const struct stat *sb) +{ + uint64_t this_data_checksum = 0; + FILE *filep = NULL; + char *cmd = NULL; + char strvalue[17] = { + 0, + }; + int ret = -1; + int len = 0; + const char *pos = NULL; + char *cpos = NULL; + + /* Have to escape single-quotes in filename. + * First, calculate the size of the buffer I'll need. + */ + for (pos = path; *pos; pos++) { + if (*pos == '\'') + len += 4; + else + len += 1; + } + + cmd = malloc(sizeof(char) * (len + 20)); + cmd[0] = '\0'; + + /* Now, build the command with single quotes escaped. */ + + cpos = cmd; +#if defined(linux) + strcpy(cpos, "md5sum '"); + cpos += 8; +#elif defined(__NetBSD__) + strcpy(cpos, "md5 -n '"); + cpos += 8; +#elif defined(__FreeBSD__) || defined(__APPLE__) + strcpy(cpos, "md5 -q '"); + cpos += 8; +#else +#error "Please add system-specific md5 command" +#endif + + /* Add the file path, with every single quotes replaced with this sequence: + * '\'' + */ + + for (pos = path; *pos; pos++) { + if (*pos == '\'') { + strcpy(cpos, "'\\''"); + cpos += 4; + } else { + *cpos = *pos; + cpos++; + } + } + + /* Add on the trailing single-quote and null-terminate. */ + strcpy(cpos, "'"); + + filep = popen(cmd, "r"); + if (!filep) { + perror(path); + goto out; + } + + if (fread(strvalue, sizeof(char), 16, filep) != 16) { + fprintf(stderr, "%s: short read\n", path); + goto out; + } + + this_data_checksum = strtoull(strvalue, NULL, 16); + if (-1 == this_data_checksum) { + fprintf(stderr, "%s: %s\n", strvalue, strerror(errno)); + goto out; + } + checksum_file1 ^= this_data_checksum; + + if (fread(strvalue, sizeof(char), 16, filep) != 16) { + fprintf(stderr, "%s: short read\n", path); + goto out; + } + + this_data_checksum = strtoull(strvalue, NULL, 16); + if (-1 == this_data_checksum) { + fprintf(stderr, "%s: %s\n", strvalue, strerror(errno)); + goto out; + } + checksum_file2 ^= this_data_checksum; + + ret = 0; +out: + if (filep) + pclose(filep); + + if (cmd) + free(cmd); + + return ret; +} + +int +checksum_filenames(const char *path, const struct stat *sb) +{ + DIR *dirp = NULL; + struct dirent *entry = NULL; + unsigned long long csum = 0; + int i = 0; + int found = 0; + + dirp = opendir(path); + if (!dirp) { + perror(path); + goto out; + } + + errno = 0; + while ((entry = readdir(dirp))) { + /* do not calculate the checksum of the entries which user has + told to ignore and proceed to other siblings.*/ + if (arequal_config.ignored_directory) { + for (i = 0; i < arequal_config.directories_ignored; i++) { + if ((strcmp(entry->d_name, + arequal_config.ignored_directory[i]) == 0)) { + found = 1; + DBG("ignoring the entry %s\n", entry->d_name); + break; + } + } + if (found == 1) { + found = 0; + continue; + } + } + csum = checksum_path(entry->d_name); + checksum_dir ^= csum; + } + + if (errno) { + perror(path); + goto out; + } + +out: + if (dirp) + closedir(dirp); + + return 0; +} + +int +process_file(const char *path, const struct stat *sb) +{ + int ret = 0; + + count_file++; + + avg_uid_file ^= sb->st_uid; + avg_gid_file ^= sb->st_gid; + avg_mode_file ^= sb->st_mode; + + ret = checksum_md5(path, sb); + + return ret; +} + +int +process_dir(const char *path, const struct stat *sb) +{ + unsigned long long csum = 0; + + count_dir++; + + avg_uid_dir ^= sb->st_uid; + avg_gid_dir ^= sb->st_gid; + avg_mode_dir ^= sb->st_mode; + + csum = checksum_filenames(path, sb); + + checksum_dir ^= csum; + + return 0; +} + +int +process_symlink(const char *path, const struct stat *sb) +{ + int ret = 0; + char buf[4096] = { + 0, + }; + unsigned long long csum = 0; + + count_symlink++; + + avg_uid_symlink ^= sb->st_uid; + avg_gid_symlink ^= sb->st_gid; + avg_mode_symlink ^= sb->st_mode; + + ret = readlink(path, buf, 4096); + if (ret < 0) { + perror(path); + goto out; + } + + DBG("readlink (%s) => %s\n", path, buf); + + csum = checksum_path(buf); + + DBG("checksum_path (%s) => %llx\n", buf, csum); + + checksum_symlink ^= csum; + + ret = 0; +out: + return ret; +} + +int +process_other(const char *path, const struct stat *sb) +{ + count_other++; + + avg_uid_other ^= sb->st_uid; + avg_gid_other ^= sb->st_gid; + avg_mode_other ^= sb->st_mode; + + checksum_other ^= sb->st_rdev; + + return 0; +} + +static int +ignore_entry(const char *bname, const char *dname) +{ + int i; + + for (i = 0; i < arequal_config.directories_ignored; i++) { + if (strcmp(bname, arequal_config.ignored_directory[i]) == 0 && + strncmp(arequal_config.test_directory, dname, + strlen(arequal_config.test_directory)) == 0) + return 1; + } + + return 0; +} + +int +process_entry(const char *path, const struct stat *sb, int typeflag, + struct FTW *ftwbuf) +{ + int ret = 0; + char *name = NULL; + char *bname = NULL; + char *dname = NULL; + int i = 0; + + /* The if condition below helps in ignoring some directories in + the given path. If the name of the entry is one of the directory + names that the user told to ignore, then that directory will not + be processed and will return FTW_SKIP_SUBTREE to nftw which will + not crawl this directory and move on to other siblings. + Note that for nftw to recognize FTW_SKIP_TREE, FTW_ACTIONRETVAL + should be passed as an argument to nftw. + + This mainly helps in calculating the checksum of network filesystems + (client-server), where the server might have some hidden directories + for managing the filesystem. So to calculate the sanity of filesystem + one has to get the checksum of the client and then the export directory + of server by telling arequal to ignore some of the directories which + are not part of the namespace. + */ + + if (arequal_config.ignored_directory) { +#ifndef FTW_SKIP_SUBTREE + char *cp; + + name = strdup(path); + dname = dirname(name); + + for (cp = strtok(name, "/"); cp; cp = strtok(NULL, "/")) { + if (ignore_entry(cp, dname)) { + DBG("ignoring %s\n", path); + if (name) + free(name); + return 0; + } + } +#else /* FTW_SKIP_SUBTREE */ + name = strdup(path); + + name[strlen(name)] = '\0'; + + bname = strrchr(name, '/'); + if (bname) + bname++; + + dname = dirname(name); + if (ignore_entry(bname, dname)) { + DBG("ignoring %s\n", bname); + ret = FTW_SKIP_SUBTREE; + if (name) + free(name); + return ret; + } +#endif /* FTW_SKIP_SUBTREE */ + } + + DBG("processing entry %s\n", path); + + switch ((S_IFMT & sb->st_mode)) { + case S_IFDIR: + ret = process_dir(path, sb); + break; + case S_IFREG: + ret = process_file(path, sb); + break; + case S_IFLNK: + ret = process_symlink(path, sb); + break; + default: + ret = process_other(path, sb); + break; + } + + if (name) + free(name); + return ret; +} + +int +display_counts(FILE *fp) +{ + fprintf(fp, "\n"); + fprintf(fp, "Entry counts\n"); + fprintf(fp, "Regular files : %lld\n", count_file); + fprintf(fp, "Directories : %lld\n", count_dir); + fprintf(fp, "Symbolic links : %lld\n", count_symlink); + fprintf(fp, "Other : %lld\n", count_other); + fprintf(fp, "Total : %lld\n", + (count_file + count_dir + count_symlink + count_other)); + + return 0; +} + +int +display_checksums(FILE *fp) +{ + fprintf(fp, "\n"); + fprintf(fp, "Checksums\n"); + fprintf(fp, "Regular files : %llx%llx\n", checksum_file1, checksum_file2); + fprintf(fp, "Directories : %llx\n", checksum_dir); + fprintf(fp, "Symbolic links : %llx\n", checksum_symlink); + fprintf(fp, "Other : %llx\n", checksum_other); + fprintf(fp, "Total : %llx\n", + (checksum_file1 ^ checksum_file2 ^ checksum_dir ^ checksum_symlink ^ + checksum_other)); + + return 0; +} + +int +display_metadata(FILE *fp) +{ + fprintf(fp, "\n"); + fprintf(fp, "Metadata checksums\n"); + fprintf(fp, "Regular files : %llx\n", + (avg_uid_file + 13) * (avg_gid_file + 11) * (avg_mode_file + 7)); + fprintf(fp, "Directories : %llx\n", + (avg_uid_dir + 13) * (avg_gid_dir + 11) * (avg_mode_dir + 7)); + fprintf(fp, "Symbolic links : %llx\n", + (avg_uid_symlink + 13) * (avg_gid_symlink + 11) * + (avg_mode_symlink + 7)); + fprintf(fp, "Other : %llx\n", + (avg_uid_other + 13) * (avg_gid_other + 11) * (avg_mode_other + 7)); + + return 0; +} + +int +display_stats(FILE *fp) +{ + display_counts(fp); + + display_metadata(fp); + + display_checksums(fp); + + return 0; +} + +int +main(int argc, char *argv[]) +{ + int ret = 0; + int i = 0; + + ret = argp_parse(&argp, argc, argv, 0, 0, NULL); + if (ret != 0) { + fprintf(stderr, "parsing arguments failed\n"); + return -2; + } + + /* Use FTW_ACTIONRETVAL to take decision on what to do depending upon */ + /* the return value of the callback function */ + /* (process_entry in this case) */ + ret = nftw(arequal_config.test_directory, process_entry, 30, + FTW_ACTIONRETVAL | FTW_PHYS | FTW_MOUNT); + if (ret != 0) { + fprintf(stderr, "ftw (%s) returned %d (%s), terminating\n", argv[1], + ret, strerror(errno)); + return 1; + } + + display_stats(stdout); + + if (arequal_config.ignored_directory) { + for (i = 0; i < arequal_config.directories_ignored; i++) { + if (arequal_config.ignored_directory[i]) + free(arequal_config.ignored_directory[i]); + } + free(arequal_config.ignored_directory); + } + + return 0; +} |
