diff options
Diffstat (limited to 'arequal/arequal-checksum.c')
-rw-r--r-- | arequal/arequal-checksum.c | 322 |
1 files changed, 252 insertions, 70 deletions
diff --git a/arequal/arequal-checksum.c b/arequal/arequal-checksum.c index 8df7b02..107c0df 100644 --- a/arequal/arequal-checksum.c +++ b/arequal/arequal-checksum.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.zresearch.com/> + Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com/> This file is part of GlusterFS. GlusterFS is free software; you can redistribute it and/or modify @@ -17,14 +17,17 @@ <http://www.gnu.org/licenses/>. */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #define _XOPEN_SOURCE 600 +#include <ftw.h> #include <stdio.h> #include <stdlib.h> #include <sys/types.h> #include <sys/stat.h> -#include <ftw.h> #include <unistd.h> #include <errno.h> #include <string.h> @@ -34,10 +37,29 @@ #include <stdint.h> #include <alloca.h> #include <dirent.h> +#include <argp.h> int debug = 0; +typedef struct { + char test_directory[4096]; + char **ignored_directory; + unsigned int directories_ignored; +} arequal_config_t; + +static arequal_config_t arequal_config; + +static error_t +arequal_parse_opts (int key, char *arg, struct argp_state *_state); + +static struct argp_option arequal_options[] = { + { "ignore", 'i', "IGNORED", 0, + "entry in the given path to be ignored"}, + { "path", 'p', "PATH", 0, "path where arequal has to be run"}, + {0, 0, 0, 0, 0} +}; + #define DBG(fmt ...) do { \ if (debug) { \ fprintf (stderr, "D "); \ @@ -45,12 +67,96 @@ int debug = 0; } \ } while (0) +void +add_to_list (char *arg); +void +get_absolute_path (char directory[], char *arg); static inline int roof(int a, int b) { return ((((a)+(b)-1)/((b)?(b):1))*(b)); } +void +add_to_list (char *arg) +{ + char *string = NULL; + int index = 0; + + index = arequal_config.directories_ignored - 1; + string = strdup (arg); + + if (!arequal_config.ignored_directory) { + arequal_config.ignored_directory = calloc (1, sizeof (char *)); + } else + arequal_config.ignored_directory = + realloc (arequal_config.ignored_directory, + sizeof (char *) * (index+1)); + + arequal_config.ignored_directory[index] = string; +} + +static error_t +arequal_parse_opts (int key, char *arg, struct argp_state *_state) +{ + switch (key) { + case 'i': + { + arequal_config.directories_ignored++; + add_to_list (arg); + } + break; + case 'p': + { + if (arg[0] == '/') + strcpy (arequal_config.test_directory, arg); + else + get_absolute_path (arequal_config.test_directory, arg); + + if (arequal_config.test_directory + [strlen(arequal_config.test_directory) - 1] == '/') + arequal_config.test_directory + [strlen(arequal_config.test_directory) - 1] = '\0'; + } + break; + + case ARGP_KEY_NO_ARGS: + break; + case ARGP_KEY_ARG: + break; + case ARGP_KEY_END: + if (_state->argc == 1) { + argp_usage (_state); + } + + } + + return 0; +} + +void +get_absolute_path (char directory[], char *arg) +{ + char cwd[4096] = {0,}; + + if (getcwd (cwd, sizeof (cwd)) == NULL) + printf ("some error in getting cwd\n"); + + if (strcmp (arg, ".") != 0) { + if (cwd[strlen(cwd)] != '/') + cwd[strlen (cwd)] = '/'; + strcat (cwd, arg); + } + strcpy (directory, cwd); +} + +static struct argp argp = { + arequal_options, + arequal_parse_opts, + "", + "arequal - Tool which calculates the checksum of all the entries" + "present in a given directory" +}; /* All this runs in single thread, hence using 'global' variables */ @@ -109,103 +215,103 @@ checksum_path (const char *path) return csum; } - int checksum_md5 (const char *path, const struct stat *sb) { uint64_t this_data_checksum = 0; - FILE *filep = NULL; - char *cmd = NULL; - char strvalue[17] = {0,}; - int ret = -1; - int len = 0; - const char *pos = NULL; - char *cpos = NULL; - - /* Have to escape single-quotes in filename. - * First, calculate the size of the buffer I'll need. - */ - for (pos = path; *pos; pos++) { - if ( *pos == '\'' ) - len += 4; - else - len += 1; - } - - cmd = malloc(sizeof(char) * (len + 20)); - cmd[0] = '\0'; - - /* Now, build the command with single quotes escaped. */ - - cpos = cmd; - strcpy(cpos, "md5sum '"); - cpos += 8; - - /* Add the file path, with every single quotes replaced with this sequence: - * '\'' - */ - - for (pos = path; *pos; pos++) { - if ( *pos == '\'' ) { - strcpy(cpos, "'\\''"); - cpos += 4; - } else { - *cpos = *pos; - cpos++; - } - } - - /* Add on the trailing single-quote and null-terminate. */ - strcpy(cpos, "'"); - + FILE *filep = NULL; + char *cmd = NULL; + char strvalue[17] = {0,}; + int ret = -1; + int len = 0; + const char *pos = NULL; + char *cpos = NULL; + + /* Have to escape single-quotes in filename. + * First, calculate the size of the buffer I'll need. + */ + for (pos = path; *pos; pos++) { + if ( *pos == '\'' ) + len += 4; + else + len += 1; + } + + cmd = malloc(sizeof(char) * (len + 20)); + cmd[0] = '\0'; + + /* Now, build the command with single quotes escaped. */ + + cpos = cmd; + strcpy(cpos, "md5sum '"); + cpos += 8; + + /* Add the file path, with every single quotes replaced with this sequence: + * '\'' + */ + + for (pos = path; *pos; pos++) { + if ( *pos == '\'' ) { + strcpy(cpos, "'\\''"); + cpos += 4; + } else { + *cpos = *pos; + cpos++; + } + } + + /* Add on the trailing single-quote and null-terminate. */ + strcpy(cpos, "'"); + filep = popen (cmd, "r"); if (!filep) { - perror (path); - goto out; + perror (path); + goto out; } if (fread (strvalue, sizeof (char), 16, filep) != 16) { - fprintf (stderr, "%s: short read\n", path); - goto out; - } + fprintf (stderr, "%s: short read\n", path); + goto out; + } this_data_checksum = strtoull (strvalue, NULL, 16); if (-1 == this_data_checksum) { fprintf (stderr, "%s: %s\n", strvalue, strerror (errno)); - goto out; + goto out; } checksum_file1 ^= this_data_checksum; if (fread (strvalue, sizeof (char), 16, filep) != 16) { - fprintf (stderr, "%s: short read\n", path); - goto out; - } + fprintf (stderr, "%s: short read\n", path); + goto out; + } this_data_checksum = strtoull (strvalue, NULL, 16); if (-1 == this_data_checksum) { fprintf (stderr, "%s: %s\n", strvalue, strerror (errno)); - goto out; + goto out; } checksum_file2 ^= this_data_checksum; - ret = 0; + ret = 0; out: - if (filep) - pclose (filep); - - if (cmd) - free(cmd); + if (filep) + pclose (filep); + + if (cmd) + free(cmd); return ret; } - int checksum_filenames (const char *path, const struct stat *sb) { DIR *dirp = NULL; struct dirent *entry = NULL; unsigned long long csum = 0; + int i = 0; + int found = 0; dirp = opendir (path); if (!dirp) { @@ -215,6 +321,24 @@ checksum_filenames (const char *path, const struct stat *sb) errno = 0; while ((entry = readdir (dirp))) { + /* do not calculate the checksum of the entries which user has + told to ignore and proceed to other siblings.*/ + if (arequal_config.ignored_directory) { + for (i = 0;i < arequal_config.directories_ignored;i++) { + if ((strcmp (entry->d_name, + arequal_config.ignored_directory[i]) + == 0)) { + found = 1; + DBG ("ignoring the entry %s\n", + entry->d_name); + break; + } + } + if (found == 1) { + found = 0; + continue; + } + } csum = checksum_path (entry->d_name); checksum_dir ^= csum; } @@ -321,6 +445,49 @@ process_entry (const char *path, const struct stat *sb, int typeflag, struct FTW *ftwbuf) { int ret = 0; + char *name = NULL; + char *bname = NULL; + char *dname = NULL; + int i = 0; + + /* The if condition below helps in ignoring some directories in + the given path. If the name of the entry is one of the directory + names that the user told to ignore, then that directory will not + be processed and will return FTW_SKIP_SUBTREE to nftw which will + not crawl this directory and move on to other siblings. + Note that for nftw to recognize FTW_SKIP_TREE, FTW_ACTIONRETVAL + should be passed as an argument to nftw. + + This mainly helps in calculating the checksum of network filesystems + (client-server), where the server might have some hidden directories + for managing the filesystem. So to calculate the sanity of filesytem + one has to get the checksum of the client and then the export directory + of server by telling arequal to ignore some of the directories which + are not part of the namespace. + */ + + if (arequal_config.ignored_directory) { + name = strdup (path); + + name[strlen(name)] == '\0'; + + bname = strrchr (name, '/'); + if (bname) + bname++; + + dname = dirname (name); + for ( i = 0; i < arequal_config.directories_ignored; i++) { + if ((strcmp (bname, arequal_config.ignored_directory[i]) + == 0) && (strcmp (arequal_config.test_directory, + dname) == 0)) { + DBG ("ignoring %s\n", bname); + ret = FTW_SKIP_SUBTREE; + if (name) + free (name); + return ret; + } + } + } DBG ("processing entry %s\n", path); @@ -339,6 +506,8 @@ process_entry (const char *path, const struct stat *sb, break; } + if (name) + free (name); return ret; } @@ -409,14 +578,19 @@ int main(int argc, char *argv[]) { int ret = 0; + int i = 0; - if (argc != 2) { - fprintf (stderr, "Usage: %s <directory>\n", - argv[0]); - return -1; - } + ret = argp_parse (&argp, argc, argv, 0, 0, NULL); + if (ret != 0) { + fprintf (stderr, "parsing arguments failed\n"); + return -2; + } - ret = nftw (argv[1], process_entry, 30, FTW_PHYS|FTW_MOUNT); + /* Use FTW_ACTIONRETVAL to take decision on what to do depending upon */ + /* the return value of the callback function */ + /* (process_entry in this case) */ + ret = nftw (arequal_config.test_directory, process_entry, 30, + FTW_ACTIONRETVAL|FTW_PHYS|FTW_MOUNT); if (ret != 0) { fprintf (stderr, "ftw (%s) returned %d (%s), terminating\n", argv[1], ret, strerror (errno)); @@ -425,5 +599,13 @@ main(int argc, char *argv[]) display_stats (stdout); + if (arequal_config.ignored_directory) { + for (i = 0; i < arequal_config.directories_ignored; i++) { + if (arequal_config.ignored_directory[i]) + free (arequal_config.ignored_directory[i]); + } + free (arequal_config.ignored_directory); + } + return 0; } |