diff options
Diffstat (limited to 'tests/utils/arequal-checksum.c')
-rw-r--r-- | tests/utils/arequal-checksum.c | 611 |
1 files changed, 611 insertions, 0 deletions
diff --git a/tests/utils/arequal-checksum.c b/tests/utils/arequal-checksum.c new file mode 100644 index 00000000000..bdc6af48464 --- /dev/null +++ b/tests/utils/arequal-checksum.c @@ -0,0 +1,611 @@ +/* + Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#define _XOPEN_SOURCE 600 + +#include <ftw.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <dirent.h> +#include <stdlib.h> +#include <libgen.h> +#include <stdint.h> +#include <alloca.h> +#include <dirent.h> +#include <argp.h> + + +int debug = 0; + +typedef struct { + char test_directory[4096]; + char **ignored_directory; + unsigned int directories_ignored; +} arequal_config_t; + +static arequal_config_t arequal_config; + +static error_t +arequal_parse_opts (int key, char *arg, struct argp_state *_state); + +static struct argp_option arequal_options[] = { + { "ignore", 'i', "IGNORED", 0, + "entry in the given path to be ignored"}, + { "path", 'p', "PATH", 0, "path where arequal has to be run"}, + {0, 0, 0, 0, 0} +}; + +#define DBG(fmt ...) do { \ + if (debug) { \ + fprintf (stderr, "D "); \ + fprintf (stderr, fmt); \ + } \ + } while (0) + +void +add_to_list (char *arg); +void +get_absolute_path (char directory[], char *arg); + +static inline int roof(int a, int b) +{ + return ((((a)+(b)-1)/((b)?(b):1))*(b)); +} + +void +add_to_list (char *arg) +{ + char *string = NULL; + int index = 0; + + index = arequal_config.directories_ignored - 1; + string = strdup (arg); + + if (!arequal_config.ignored_directory) { + arequal_config.ignored_directory = calloc (1, sizeof (char *)); + } else + arequal_config.ignored_directory = + realloc (arequal_config.ignored_directory, + sizeof (char *) * (index+1)); + + arequal_config.ignored_directory[index] = string; +} + +static error_t +arequal_parse_opts (int key, char *arg, struct argp_state *_state) +{ + switch (key) { + case 'i': + { + arequal_config.directories_ignored++; + add_to_list (arg); + } + break; + case 'p': + { + if (arg[0] == '/') + strcpy (arequal_config.test_directory, arg); + else + get_absolute_path (arequal_config.test_directory, arg); + + if (arequal_config.test_directory + [strlen(arequal_config.test_directory) - 1] == '/') + arequal_config.test_directory + [strlen(arequal_config.test_directory) - 1] = '\0'; + } + break; + + case ARGP_KEY_NO_ARGS: + break; + case ARGP_KEY_ARG: + break; + case ARGP_KEY_END: + if (_state->argc == 1) { + argp_usage (_state); + } + + } + + return 0; +} + +void +get_absolute_path (char directory[], char *arg) +{ + char cwd[4096] = {0,}; + + if (getcwd (cwd, sizeof (cwd)) == NULL) + printf ("some error in getting cwd\n"); + + if (strcmp (arg, ".") != 0) { + if (cwd[strlen(cwd)] != '/') + cwd[strlen (cwd)] = '/'; + strcat (cwd, arg); + } + strcpy (directory, cwd); +} + +static struct argp argp = { + arequal_options, + arequal_parse_opts, + "", + "arequal - Tool which calculates the checksum of all the entries" + "present in a given directory" +}; + +/* All this runs in single thread, hence using 'global' variables */ + +unsigned long long avg_uid_file = 0; +unsigned long long avg_uid_dir = 0; +unsigned long long avg_uid_symlink = 0; +unsigned long long avg_uid_other = 0; + +unsigned long long avg_gid_file = 0; +unsigned long long avg_gid_dir = 0; +unsigned long long avg_gid_symlink = 0; +unsigned long long avg_gid_other = 0; + +unsigned long long avg_mode_file = 0; +unsigned long long avg_mode_dir = 0; +unsigned long long avg_mode_symlink = 0; +unsigned long long avg_mode_other = 0; + +unsigned long long global_ctime_checksum = 0; + + +unsigned long long count_dir = 0; +unsigned long long count_file = 0; +unsigned long long count_symlink = 0; +unsigned long long count_other = 0; + + +unsigned long long checksum_file1 = 0; +unsigned long long checksum_file2 = 0; +unsigned long long checksum_dir = 0; +unsigned long long checksum_symlink = 0; +unsigned long long checksum_other = 0; + + +unsigned long long +checksum_path (const char *path) +{ + unsigned long long csum = 0; + unsigned long long *nums = 0; + int len = 0; + int cnt = 0; + + len = roof (strlen (path), sizeof (csum)); + cnt = len / sizeof (csum); + + nums = alloca (len); + memset (nums, 0, len); + strcpy ((char *)nums, path); + + while (cnt) { + csum ^= *nums; + nums++; + cnt--; + } + + return csum; +} + +int +checksum_md5 (const char *path, const struct stat *sb) +{ + uint64_t this_data_checksum = 0; + FILE *filep = NULL; + char *cmd = NULL; + char strvalue[17] = {0,}; + int ret = -1; + int len = 0; + const char *pos = NULL; + char *cpos = NULL; + + /* Have to escape single-quotes in filename. + * First, calculate the size of the buffer I'll need. + */ + for (pos = path; *pos; pos++) { + if ( *pos == '\'' ) + len += 4; + else + len += 1; + } + + cmd = malloc(sizeof(char) * (len + 20)); + cmd[0] = '\0'; + + /* Now, build the command with single quotes escaped. */ + + cpos = cmd; + strcpy(cpos, "md5sum '"); + cpos += 8; + + /* Add the file path, with every single quotes replaced with this sequence: + * '\'' + */ + + for (pos = path; *pos; pos++) { + if ( *pos == '\'' ) { + strcpy(cpos, "'\\''"); + cpos += 4; + } else { + *cpos = *pos; + cpos++; + } + } + + /* Add on the trailing single-quote and null-terminate. */ + strcpy(cpos, "'"); + + filep = popen (cmd, "r"); + if (!filep) { + perror (path); + goto out; + } + + if (fread (strvalue, sizeof (char), 16, filep) != 16) { + fprintf (stderr, "%s: short read\n", path); + goto out; + } + + this_data_checksum = strtoull (strvalue, NULL, 16); + if (-1 == this_data_checksum) { + fprintf (stderr, "%s: %s\n", strvalue, strerror (errno)); + goto out; + } + checksum_file1 ^= this_data_checksum; + + if (fread (strvalue, sizeof (char), 16, filep) != 16) { + fprintf (stderr, "%s: short read\n", path); + goto out; + } + + this_data_checksum = strtoull (strvalue, NULL, 16); + if (-1 == this_data_checksum) { + fprintf (stderr, "%s: %s\n", strvalue, strerror (errno)); + goto out; + } + checksum_file2 ^= this_data_checksum; + + ret = 0; +out: + if (filep) + pclose (filep); + + if (cmd) + free(cmd); + + return ret; +} + +int +checksum_filenames (const char *path, const struct stat *sb) +{ + DIR *dirp = NULL; + struct dirent *entry = NULL; + unsigned long long csum = 0; + int i = 0; + int found = 0; + + dirp = opendir (path); + if (!dirp) { + perror (path); + goto out; + } + + errno = 0; + while ((entry = readdir (dirp))) { + /* do not calculate the checksum of the entries which user has + told to ignore and proceed to other siblings.*/ + if (arequal_config.ignored_directory) { + for (i = 0;i < arequal_config.directories_ignored;i++) { + if ((strcmp (entry->d_name, + arequal_config.ignored_directory[i]) + == 0)) { + found = 1; + DBG ("ignoring the entry %s\n", + entry->d_name); + break; + } + } + if (found == 1) { + found = 0; + continue; + } + } + csum = checksum_path (entry->d_name); + checksum_dir ^= csum; + } + + if (errno) { + perror (path); + goto out; + } + +out: + if (dirp) + closedir (dirp); + + return 0; +} + + +int +process_file (const char *path, const struct stat *sb) +{ + int ret = 0; + + count_file++; + + avg_uid_file ^= sb->st_uid; + avg_gid_file ^= sb->st_gid; + avg_mode_file ^= sb->st_mode; + + ret = checksum_md5 (path, sb); + + return ret; +} + + +int +process_dir (const char *path, const struct stat *sb) +{ + unsigned long long csum = 0; + + count_dir++; + + avg_uid_dir ^= sb->st_uid; + avg_gid_dir ^= sb->st_gid; + avg_mode_dir ^= sb->st_mode; + + csum = checksum_filenames (path, sb); + + checksum_dir ^= csum; + + return 0; +} + + +int +process_symlink (const char *path, const struct stat *sb) +{ + int ret = 0; + char buf[4096] = {0, }; + unsigned long long csum = 0; + + count_symlink++; + + avg_uid_symlink ^= sb->st_uid; + avg_gid_symlink ^= sb->st_gid; + avg_mode_symlink ^= sb->st_mode; + + ret = readlink (path, buf, 4096); + if (ret < 0) { + perror (path); + goto out; + } + + DBG ("readlink (%s) => %s\n", path, buf); + + csum = checksum_path (buf); + + DBG ("checksum_path (%s) => %llx\n", buf, csum); + + checksum_symlink ^= csum; + + ret = 0; +out: + return ret; +} + + +int +process_other (const char *path, const struct stat *sb) +{ + count_other++; + + avg_uid_other ^= sb->st_uid; + avg_gid_other ^= sb->st_gid; + avg_mode_other ^= sb->st_mode; + + checksum_other ^= sb->st_rdev; + + return 0; +} + + +int +process_entry (const char *path, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) +{ + int ret = 0; + char *name = NULL; + char *bname = NULL; + char *dname = NULL; + int i = 0; + + /* The if condition below helps in ignoring some directories in + the given path. If the name of the entry is one of the directory + names that the user told to ignore, then that directory will not + be processed and will return FTW_SKIP_SUBTREE to nftw which will + not crawl this directory and move on to other siblings. + Note that for nftw to recognize FTW_SKIP_TREE, FTW_ACTIONRETVAL + should be passed as an argument to nftw. + + This mainly helps in calculating the checksum of network filesystems + (client-server), where the server might have some hidden directories + for managing the filesystem. So to calculate the sanity of filesytem + one has to get the checksum of the client and then the export directory + of server by telling arequal to ignore some of the directories which + are not part of the namespace. + */ + + if (arequal_config.ignored_directory) { + name = strdup (path); + + name[strlen(name)] == '\0'; + + bname = strrchr (name, '/'); + if (bname) + bname++; + + dname = dirname (name); + for ( i = 0; i < arequal_config.directories_ignored; i++) { + if ((strcmp (bname, arequal_config.ignored_directory[i]) + == 0) && (strcmp (arequal_config.test_directory, + dname) == 0)) { + DBG ("ignoring %s\n", bname); + ret = FTW_SKIP_SUBTREE; + if (name) + free (name); + return ret; + } + } + } + + DBG ("processing entry %s\n", path); + + switch ((S_IFMT & sb->st_mode)) { + case S_IFDIR: + ret = process_dir (path, sb); + break; + case S_IFREG: + ret = process_file (path, sb); + break; + case S_IFLNK: + ret = process_symlink (path, sb); + break; + default: + ret = process_other (path, sb); + break; + } + + if (name) + free (name); + return ret; +} + + +int +display_counts (FILE *fp) +{ + fprintf (fp, "\n"); + fprintf (fp, "Entry counts\n"); + fprintf (fp, "Regular files : %lld\n", count_file); + fprintf (fp, "Directories : %lld\n", count_dir); + fprintf (fp, "Symbolic links : %lld\n", count_symlink); + fprintf (fp, "Other : %lld\n", count_other); + fprintf (fp, "Total : %lld\n", + (count_file + count_dir + count_symlink + count_other)); + + return 0; +} + + +int +display_checksums (FILE *fp) +{ + fprintf (fp, "\n"); + fprintf (fp, "Checksums\n"); + fprintf (fp, "Regular files : %llx%llx\n", checksum_file1, checksum_file2); + fprintf (fp, "Directories : %llx\n", checksum_dir); + fprintf (fp, "Symbolic links : %llx\n", checksum_symlink); + fprintf (fp, "Other : %llx\n", checksum_other); + fprintf (fp, "Total : %llx\n", + (checksum_file1 ^ checksum_file2 ^ checksum_dir ^ checksum_symlink ^ checksum_other)); + + return 0; +} + + +int +display_metadata (FILE *fp) +{ + fprintf (fp, "\n"); + fprintf (fp, "Metadata checksums\n"); + fprintf (fp, "Regular files : %llx\n", + (avg_uid_file + 13) * (avg_gid_file + 11) * (avg_mode_file + 7)); + fprintf (fp, "Directories : %llx\n", + (avg_uid_dir + 13) * (avg_gid_dir + 11) * (avg_mode_dir + 7)); + fprintf (fp, "Symbolic links : %llx\n", + (avg_uid_symlink + 13) * (avg_gid_symlink + 11) * (avg_mode_symlink + 7)); + fprintf (fp, "Other : %llx\n", + (avg_uid_other + 13) * (avg_gid_other + 11) * (avg_mode_other + 7)); + + return 0; +} + +int +display_stats (FILE *fp) +{ + display_counts (fp); + + display_metadata (fp); + + display_checksums (fp); + + return 0; +} + + +int +main(int argc, char *argv[]) +{ + int ret = 0; + int i = 0; + + ret = argp_parse (&argp, argc, argv, 0, 0, NULL); + if (ret != 0) { + fprintf (stderr, "parsing arguments failed\n"); + return -2; + } + + /* Use FTW_ACTIONRETVAL to take decision on what to do depending upon */ + /* the return value of the callback function */ + /* (process_entry in this case) */ + ret = nftw (arequal_config.test_directory, process_entry, 30, + FTW_ACTIONRETVAL|FTW_PHYS|FTW_MOUNT); + if (ret != 0) { + fprintf (stderr, "ftw (%s) returned %d (%s), terminating\n", + argv[1], ret, strerror (errno)); + return 1; + } + + display_stats (stdout); + + if (arequal_config.ignored_directory) { + for (i = 0; i < arequal_config.directories_ignored; i++) { + if (arequal_config.ignored_directory[i]) + free (arequal_config.ignored_directory[i]); + } + free (arequal_config.ignored_directory); + } + + return 0; +} |