From f90a39b875921fcd18ee80857ec2eb29daea9c76 Mon Sep 17 00:00:00 2001 From: Raghavendra Bhat Date: Thu, 9 Feb 2012 11:12:09 +0530 Subject: c_pgms/arequal: Add arequal tool to the git Adding arequal to the git, which can be used to calculate the checksum of path given to it as an argument Change-Id: Iefd283511e6ff2f3bc1359e718d1922506d7a24a Signed-off-by: Raghavendra Bhat --- arequal/README | 77 ++++++++ arequal/arequal-checksum.c | 429 +++++++++++++++++++++++++++++++++++++++++++++ arequal/arequal-run.sh | 45 +++++ 3 files changed, 551 insertions(+) create mode 100644 arequal/README create mode 100644 arequal/arequal-checksum.c create mode 100755 arequal/arequal-run.sh diff --git a/arequal/README b/arequal/README new file mode 100644 index 0000000..8c9d952 --- /dev/null +++ b/arequal/README @@ -0,0 +1,77 @@ +arequal - Tool to test data security of GlusterFS +======= + +0. Install arequal +------------------ + sh# ./autogen.sh + sh# ./configure + sh# make + sh# make install + + Perform the above installation steps on both clients and servers + + +1. Generate data set +-------------------- + Create a data set to be used for the test. This data set should +have file sizes and file count similar to the data set to be used +in production. You could also use existing data (like /usr) as +your dataset as it will not be modified. This document will use +/usr as the example source directory. + + +2. Mount GlusterFS +------------------ + Install, configure and start glusterfs servers and client. If +the Replicate module is loaded, this tool can be used to perform +data consistency check among the replicas. This document will +use /mnt/glusterfs as the example mount point. + + +3. Start the test +----------------- + sh# arequal-run.sh /usr/ /mnt/gluster/usr + + +4. Verify the output +-------------------- + The tool outputs two sets of checksums one after another on +the standard output. Verify that all the values match against +each other. This ensures that the data has been copied over +properly into the GlusterFS mountpoint. + + +5. Extensive Replicate testing +------------------------------ + The rest of the document is for testing the high availability +and healing features of Replicate. + + +6. High availability testing +---------------------------- + Restart step 3. While the script is in progress, kill one of the +servers. Let the script continue to completion. The script should +not fail because of one of the server getting killed. The checksums +should still match. + + +7. Consistency testing +---------------------- + After step 3, run the following command on both the servers + + sh# arequal-checksum /export/directory + + The output values should match + + +8. Recovery testing +------------------- + If step 7 is performed after step 6, the output values will not +match since changes performed when one of the servers was down has +not propagated to the backend. + + Bring back the server up again. On the same mountpoint, run an +ls -lR to force an access to all the files on the files involved. + + Now calculate the checksums on both the backends as described +in step 7. The output values should match. diff --git a/arequal/arequal-checksum.c b/arequal/arequal-checksum.c new file mode 100644 index 0000000..8df7b02 --- /dev/null +++ b/arequal/arequal-checksum.c @@ -0,0 +1,429 @@ +/* + Copyright (c) 2006-2011 Gluster, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + + +#define _XOPEN_SOURCE 600 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +int debug = 0; + +#define DBG(fmt ...) do { \ + if (debug) { \ + fprintf (stderr, "D "); \ + fprintf (stderr, fmt); \ + } \ + } while (0) + + +static inline int roof(int a, int b) +{ + return ((((a)+(b)-1)/((b)?(b):1))*(b)); +} + + +/* All this runs in single thread, hence using 'global' variables */ + +unsigned long long avg_uid_file = 0; +unsigned long long avg_uid_dir = 0; +unsigned long long avg_uid_symlink = 0; +unsigned long long avg_uid_other = 0; + +unsigned long long avg_gid_file = 0; +unsigned long long avg_gid_dir = 0; +unsigned long long avg_gid_symlink = 0; +unsigned long long avg_gid_other = 0; + +unsigned long long avg_mode_file = 0; +unsigned long long avg_mode_dir = 0; +unsigned long long avg_mode_symlink = 0; +unsigned long long avg_mode_other = 0; + +unsigned long long global_ctime_checksum = 0; + + +unsigned long long count_dir = 0; +unsigned long long count_file = 0; +unsigned long long count_symlink = 0; +unsigned long long count_other = 0; + + +unsigned long long checksum_file1 = 0; +unsigned long long checksum_file2 = 0; +unsigned long long checksum_dir = 0; +unsigned long long checksum_symlink = 0; +unsigned long long checksum_other = 0; + + +unsigned long long +checksum_path (const char *path) +{ + unsigned long long csum = 0; + unsigned long long *nums = 0; + int len = 0; + int cnt = 0; + + len = roof (strlen (path), sizeof (csum)); + cnt = len / sizeof (csum); + + nums = alloca (len); + memset (nums, 0, len); + strcpy ((char *)nums, path); + + while (cnt) { + csum ^= *nums; + nums++; + cnt--; + } + + return csum; +} + + +int +checksum_md5 (const char *path, const struct stat *sb) +{ + uint64_t this_data_checksum = 0; + FILE *filep = NULL; + char *cmd = NULL; + char strvalue[17] = {0,}; + int ret = -1; + int len = 0; + const char *pos = NULL; + char *cpos = NULL; + + /* Have to escape single-quotes in filename. + * First, calculate the size of the buffer I'll need. + */ + for (pos = path; *pos; pos++) { + if ( *pos == '\'' ) + len += 4; + else + len += 1; + } + + cmd = malloc(sizeof(char) * (len + 20)); + cmd[0] = '\0'; + + /* Now, build the command with single quotes escaped. */ + + cpos = cmd; + strcpy(cpos, "md5sum '"); + cpos += 8; + + /* Add the file path, with every single quotes replaced with this sequence: + * '\'' + */ + + for (pos = path; *pos; pos++) { + if ( *pos == '\'' ) { + strcpy(cpos, "'\\''"); + cpos += 4; + } else { + *cpos = *pos; + cpos++; + } + } + + /* Add on the trailing single-quote and null-terminate. */ + strcpy(cpos, "'"); + + filep = popen (cmd, "r"); + if (!filep) { + perror (path); + goto out; + } + + if (fread (strvalue, sizeof (char), 16, filep) != 16) { + fprintf (stderr, "%s: short read\n", path); + goto out; + } + + this_data_checksum = strtoull (strvalue, NULL, 16); + if (-1 == this_data_checksum) { + fprintf (stderr, "%s: %s\n", strvalue, strerror (errno)); + goto out; + } + checksum_file1 ^= this_data_checksum; + + if (fread (strvalue, sizeof (char), 16, filep) != 16) { + fprintf (stderr, "%s: short read\n", path); + goto out; + } + + this_data_checksum = strtoull (strvalue, NULL, 16); + if (-1 == this_data_checksum) { + fprintf (stderr, "%s: %s\n", strvalue, strerror (errno)); + goto out; + } + checksum_file2 ^= this_data_checksum; + + ret = 0; +out: + if (filep) + pclose (filep); + + if (cmd) + free(cmd); + + return ret; +} + + +int +checksum_filenames (const char *path, const struct stat *sb) +{ + DIR *dirp = NULL; + struct dirent *entry = NULL; + unsigned long long csum = 0; + + dirp = opendir (path); + if (!dirp) { + perror (path); + goto out; + } + + errno = 0; + while ((entry = readdir (dirp))) { + csum = checksum_path (entry->d_name); + checksum_dir ^= csum; + } + + if (errno) { + perror (path); + goto out; + } + +out: + if (dirp) + closedir (dirp); + + return 0; +} + + +int +process_file (const char *path, const struct stat *sb) +{ + int ret = 0; + + count_file++; + + avg_uid_file ^= sb->st_uid; + avg_gid_file ^= sb->st_gid; + avg_mode_file ^= sb->st_mode; + + ret = checksum_md5 (path, sb); + + return ret; +} + + +int +process_dir (const char *path, const struct stat *sb) +{ + unsigned long long csum = 0; + + count_dir++; + + avg_uid_dir ^= sb->st_uid; + avg_gid_dir ^= sb->st_gid; + avg_mode_dir ^= sb->st_mode; + + csum = checksum_filenames (path, sb); + + checksum_dir ^= csum; + + return 0; +} + + +int +process_symlink (const char *path, const struct stat *sb) +{ + int ret = 0; + char buf[4096] = {0, }; + unsigned long long csum = 0; + + count_symlink++; + + avg_uid_symlink ^= sb->st_uid; + avg_gid_symlink ^= sb->st_gid; + avg_mode_symlink ^= sb->st_mode; + + ret = readlink (path, buf, 4096); + if (ret < 0) { + perror (path); + goto out; + } + + DBG ("readlink (%s) => %s\n", path, buf); + + csum = checksum_path (buf); + + DBG ("checksum_path (%s) => %llx\n", buf, csum); + + checksum_symlink ^= csum; + + ret = 0; +out: + return ret; +} + + +int +process_other (const char *path, const struct stat *sb) +{ + count_other++; + + avg_uid_other ^= sb->st_uid; + avg_gid_other ^= sb->st_gid; + avg_mode_other ^= sb->st_mode; + + checksum_other ^= sb->st_rdev; + + return 0; +} + + +int +process_entry (const char *path, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) +{ + int ret = 0; + + DBG ("processing entry %s\n", path); + + switch ((S_IFMT & sb->st_mode)) { + case S_IFDIR: + ret = process_dir (path, sb); + break; + case S_IFREG: + ret = process_file (path, sb); + break; + case S_IFLNK: + ret = process_symlink (path, sb); + break; + default: + ret = process_other (path, sb); + break; + } + + return ret; +} + + +int +display_counts (FILE *fp) +{ + fprintf (fp, "\n"); + fprintf (fp, "Entry counts\n"); + fprintf (fp, "Regular files : %lld\n", count_file); + fprintf (fp, "Directories : %lld\n", count_dir); + fprintf (fp, "Symbolic links : %lld\n", count_symlink); + fprintf (fp, "Other : %lld\n", count_other); + fprintf (fp, "Total : %lld\n", + (count_file + count_dir + count_symlink + count_other)); + + return 0; +} + + +int +display_checksums (FILE *fp) +{ + fprintf (fp, "\n"); + fprintf (fp, "Checksums\n"); + fprintf (fp, "Regular files : %llx%llx\n", checksum_file1, checksum_file2); + fprintf (fp, "Directories : %llx\n", checksum_dir); + fprintf (fp, "Symbolic links : %llx\n", checksum_symlink); + fprintf (fp, "Other : %llx\n", checksum_other); + fprintf (fp, "Total : %llx\n", + (checksum_file1 ^ checksum_file2 ^ checksum_dir ^ checksum_symlink ^ checksum_other)); + + return 0; +} + + +int +display_metadata (FILE *fp) +{ + fprintf (fp, "\n"); + fprintf (fp, "Metadata checksums\n"); + fprintf (fp, "Regular files : %llx\n", + (avg_uid_file + 13) * (avg_gid_file + 11) * (avg_mode_file + 7)); + fprintf (fp, "Directories : %llx\n", + (avg_uid_dir + 13) * (avg_gid_dir + 11) * (avg_mode_dir + 7)); + fprintf (fp, "Symbolic links : %llx\n", + (avg_uid_symlink + 13) * (avg_gid_symlink + 11) * (avg_mode_symlink + 7)); + fprintf (fp, "Other : %llx\n", + (avg_uid_other + 13) * (avg_gid_other + 11) * (avg_mode_other + 7)); + + return 0; +} + +int +display_stats (FILE *fp) +{ + display_counts (fp); + + display_metadata (fp); + + display_checksums (fp); + + return 0; +} + + +int +main(int argc, char *argv[]) +{ + int ret = 0; + + if (argc != 2) { + fprintf (stderr, "Usage: %s \n", + argv[0]); + return -1; + } + + ret = nftw (argv[1], process_entry, 30, FTW_PHYS|FTW_MOUNT); + if (ret != 0) { + fprintf (stderr, "ftw (%s) returned %d (%s), terminating\n", + argv[1], ret, strerror (errno)); + return 1; + } + + display_stats (stdout); + + return 0; +} diff --git a/arequal/arequal-run.sh b/arequal/arequal-run.sh new file mode 100755 index 0000000..597cf26 --- /dev/null +++ b/arequal/arequal-run.sh @@ -0,0 +1,45 @@ +#!/bin/bash + + + +function do_copy() +{ + local src="$1"; + local dst="$2"; + + rsync -avz $src $dst; +} + + +function calc_checksum() +{ + local dir="$1"; + + echo "Calculating checksum on directory $dir ..." + arequal-checksum "$dir"; + echo "-------------------------------------" + echo +} + + +function main() +{ + local src="$1"; + local dst="$2"; + + if [ $# -ne 2 ]; then + echo "Usage: $0 "; + echo " e.g: $0 /usr /mnt/glusterfs/usr"; + fi + + do_copy "$src" "$dst"; + + echo "Calculating checksums on source and destination"; + echo "==============================================="; + + calc_checksum "$src"; + + calc_checksum "$dst"; +} + +main "$@" -- cgit