summaryrefslogtreecommitdiffstats
path: root/arequal
diff options
context:
space:
mode:
authorRaghavendra Bhat <raghavendrabhat@gluster.com>2012-02-09 11:12:09 +0530
committerRaghavendra Bhat <raghavendrabhat@gluster.com>2012-02-20 22:00:12 +0530
commitf90a39b875921fcd18ee80857ec2eb29daea9c76 (patch)
tree7d72471960af6d2d56d915deb6f19946dc13a8bf /arequal
parent160b798aad17c48e2bf93811dd06de168244a4e1 (diff)
c_pgms/arequal: Add arequal tool to the git
Adding arequal to the git, which can be used to calculate the checksum of path given to it as an argument Change-Id: Iefd283511e6ff2f3bc1359e718d1922506d7a24a Signed-off-by: Raghavendra Bhat <raghavendrabhat@gluster.com>
Diffstat (limited to 'arequal')
-rw-r--r--arequal/README77
-rw-r--r--arequal/arequal-checksum.c429
-rwxr-xr-xarequal/arequal-run.sh45
3 files changed, 551 insertions, 0 deletions
diff --git a/arequal/README b/arequal/README
new file mode 100644
index 0000000..8c9d952
--- /dev/null
+++ b/arequal/README
@@ -0,0 +1,77 @@
+arequal - Tool to test data security of GlusterFS
+=======
+
+0. Install arequal
+------------------
+ sh# ./autogen.sh
+ sh# ./configure
+ sh# make
+ sh# make install
+
+ Perform the above installation steps on both clients and servers
+
+
+1. Generate data set
+--------------------
+ Create a data set to be used for the test. This data set should
+have file sizes and file count similar to the data set to be used
+in production. You could also use existing data (like /usr) as
+your dataset as it will not be modified. This document will use
+/usr as the example source directory.
+
+
+2. Mount GlusterFS
+------------------
+ Install, configure and start glusterfs servers and client. If
+the Replicate module is loaded, this tool can be used to perform
+data consistency check among the replicas. This document will
+use /mnt/glusterfs as the example mount point.
+
+
+3. Start the test
+-----------------
+ sh# arequal-run.sh /usr/ /mnt/gluster/usr
+
+
+4. Verify the output
+--------------------
+ The tool outputs two sets of checksums one after another on
+the standard output. Verify that all the values match against
+each other. This ensures that the data has been copied over
+properly into the GlusterFS mountpoint.
+
+
+5. Extensive Replicate testing
+------------------------------
+ The rest of the document is for testing the high availability
+and healing features of Replicate.
+
+
+6. High availability testing
+----------------------------
+ Restart step 3. While the script is in progress, kill one of the
+servers. Let the script continue to completion. The script should
+not fail because of one of the server getting killed. The checksums
+should still match.
+
+
+7. Consistency testing
+----------------------
+ After step 3, run the following command on both the servers
+
+ sh# arequal-checksum /export/directory
+
+ The output values should match
+
+
+8. Recovery testing
+-------------------
+ If step 7 is performed after step 6, the output values will not
+match since changes performed when one of the servers was down has
+not propagated to the backend.
+
+ Bring back the server up again. On the same mountpoint, run an
+ls -lR to force an access to all the files on the files involved.
+
+ Now calculate the checksums on both the backends as described
+in step 7. The output values should match.
diff --git a/arequal/arequal-checksum.c b/arequal/arequal-checksum.c
new file mode 100644
index 0000000..8df7b02
--- /dev/null
+++ b/arequal/arequal-checksum.c
@@ -0,0 +1,429 @@
+/*
+ Copyright (c) 2006-2011 Gluster, Inc. <http://www.zresearch.com/>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#define _XOPEN_SOURCE 600
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ftw.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <alloca.h>
+#include <dirent.h>
+
+
+int debug = 0;
+
+#define DBG(fmt ...) do { \
+ if (debug) { \
+ fprintf (stderr, "D "); \
+ fprintf (stderr, fmt); \
+ } \
+ } while (0)
+
+
+static inline int roof(int a, int b)
+{
+ return ((((a)+(b)-1)/((b)?(b):1))*(b));
+}
+
+
+/* All this runs in single thread, hence using 'global' variables */
+
+unsigned long long avg_uid_file = 0;
+unsigned long long avg_uid_dir = 0;
+unsigned long long avg_uid_symlink = 0;
+unsigned long long avg_uid_other = 0;
+
+unsigned long long avg_gid_file = 0;
+unsigned long long avg_gid_dir = 0;
+unsigned long long avg_gid_symlink = 0;
+unsigned long long avg_gid_other = 0;
+
+unsigned long long avg_mode_file = 0;
+unsigned long long avg_mode_dir = 0;
+unsigned long long avg_mode_symlink = 0;
+unsigned long long avg_mode_other = 0;
+
+unsigned long long global_ctime_checksum = 0;
+
+
+unsigned long long count_dir = 0;
+unsigned long long count_file = 0;
+unsigned long long count_symlink = 0;
+unsigned long long count_other = 0;
+
+
+unsigned long long checksum_file1 = 0;
+unsigned long long checksum_file2 = 0;
+unsigned long long checksum_dir = 0;
+unsigned long long checksum_symlink = 0;
+unsigned long long checksum_other = 0;
+
+
+unsigned long long
+checksum_path (const char *path)
+{
+ unsigned long long csum = 0;
+ unsigned long long *nums = 0;
+ int len = 0;
+ int cnt = 0;
+
+ len = roof (strlen (path), sizeof (csum));
+ cnt = len / sizeof (csum);
+
+ nums = alloca (len);
+ memset (nums, 0, len);
+ strcpy ((char *)nums, path);
+
+ while (cnt) {
+ csum ^= *nums;
+ nums++;
+ cnt--;
+ }
+
+ return csum;
+}
+
+
+int
+checksum_md5 (const char *path, const struct stat *sb)
+{
+ uint64_t this_data_checksum = 0;
+ FILE *filep = NULL;
+ char *cmd = NULL;
+ char strvalue[17] = {0,};
+ int ret = -1;
+ int len = 0;
+ const char *pos = NULL;
+ char *cpos = NULL;
+
+ /* Have to escape single-quotes in filename.
+ * First, calculate the size of the buffer I'll need.
+ */
+ for (pos = path; *pos; pos++) {
+ if ( *pos == '\'' )
+ len += 4;
+ else
+ len += 1;
+ }
+
+ cmd = malloc(sizeof(char) * (len + 20));
+ cmd[0] = '\0';
+
+ /* Now, build the command with single quotes escaped. */
+
+ cpos = cmd;
+ strcpy(cpos, "md5sum '");
+ cpos += 8;
+
+ /* Add the file path, with every single quotes replaced with this sequence:
+ * '\''
+ */
+
+ for (pos = path; *pos; pos++) {
+ if ( *pos == '\'' ) {
+ strcpy(cpos, "'\\''");
+ cpos += 4;
+ } else {
+ *cpos = *pos;
+ cpos++;
+ }
+ }
+
+ /* Add on the trailing single-quote and null-terminate. */
+ strcpy(cpos, "'");
+
+ filep = popen (cmd, "r");
+ if (!filep) {
+ perror (path);
+ goto out;
+ }
+
+ if (fread (strvalue, sizeof (char), 16, filep) != 16) {
+ fprintf (stderr, "%s: short read\n", path);
+ goto out;
+ }
+
+ this_data_checksum = strtoull (strvalue, NULL, 16);
+ if (-1 == this_data_checksum) {
+ fprintf (stderr, "%s: %s\n", strvalue, strerror (errno));
+ goto out;
+ }
+ checksum_file1 ^= this_data_checksum;
+
+ if (fread (strvalue, sizeof (char), 16, filep) != 16) {
+ fprintf (stderr, "%s: short read\n", path);
+ goto out;
+ }
+
+ this_data_checksum = strtoull (strvalue, NULL, 16);
+ if (-1 == this_data_checksum) {
+ fprintf (stderr, "%s: %s\n", strvalue, strerror (errno));
+ goto out;
+ }
+ checksum_file2 ^= this_data_checksum;
+
+ ret = 0;
+out:
+ if (filep)
+ pclose (filep);
+
+ if (cmd)
+ free(cmd);
+
+ return ret;
+}
+
+
+int
+checksum_filenames (const char *path, const struct stat *sb)
+{
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ unsigned long long csum = 0;
+
+ dirp = opendir (path);
+ if (!dirp) {
+ perror (path);
+ goto out;
+ }
+
+ errno = 0;
+ while ((entry = readdir (dirp))) {
+ csum = checksum_path (entry->d_name);
+ checksum_dir ^= csum;
+ }
+
+ if (errno) {
+ perror (path);
+ goto out;
+ }
+
+out:
+ if (dirp)
+ closedir (dirp);
+
+ return 0;
+}
+
+
+int
+process_file (const char *path, const struct stat *sb)
+{
+ int ret = 0;
+
+ count_file++;
+
+ avg_uid_file ^= sb->st_uid;
+ avg_gid_file ^= sb->st_gid;
+ avg_mode_file ^= sb->st_mode;
+
+ ret = checksum_md5 (path, sb);
+
+ return ret;
+}
+
+
+int
+process_dir (const char *path, const struct stat *sb)
+{
+ unsigned long long csum = 0;
+
+ count_dir++;
+
+ avg_uid_dir ^= sb->st_uid;
+ avg_gid_dir ^= sb->st_gid;
+ avg_mode_dir ^= sb->st_mode;
+
+ csum = checksum_filenames (path, sb);
+
+ checksum_dir ^= csum;
+
+ return 0;
+}
+
+
+int
+process_symlink (const char *path, const struct stat *sb)
+{
+ int ret = 0;
+ char buf[4096] = {0, };
+ unsigned long long csum = 0;
+
+ count_symlink++;
+
+ avg_uid_symlink ^= sb->st_uid;
+ avg_gid_symlink ^= sb->st_gid;
+ avg_mode_symlink ^= sb->st_mode;
+
+ ret = readlink (path, buf, 4096);
+ if (ret < 0) {
+ perror (path);
+ goto out;
+ }
+
+ DBG ("readlink (%s) => %s\n", path, buf);
+
+ csum = checksum_path (buf);
+
+ DBG ("checksum_path (%s) => %llx\n", buf, csum);
+
+ checksum_symlink ^= csum;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+
+int
+process_other (const char *path, const struct stat *sb)
+{
+ count_other++;
+
+ avg_uid_other ^= sb->st_uid;
+ avg_gid_other ^= sb->st_gid;
+ avg_mode_other ^= sb->st_mode;
+
+ checksum_other ^= sb->st_rdev;
+
+ return 0;
+}
+
+
+int
+process_entry (const char *path, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ int ret = 0;
+
+ DBG ("processing entry %s\n", path);
+
+ switch ((S_IFMT & sb->st_mode)) {
+ case S_IFDIR:
+ ret = process_dir (path, sb);
+ break;
+ case S_IFREG:
+ ret = process_file (path, sb);
+ break;
+ case S_IFLNK:
+ ret = process_symlink (path, sb);
+ break;
+ default:
+ ret = process_other (path, sb);
+ break;
+ }
+
+ return ret;
+}
+
+
+int
+display_counts (FILE *fp)
+{
+ fprintf (fp, "\n");
+ fprintf (fp, "Entry counts\n");
+ fprintf (fp, "Regular files : %lld\n", count_file);
+ fprintf (fp, "Directories : %lld\n", count_dir);
+ fprintf (fp, "Symbolic links : %lld\n", count_symlink);
+ fprintf (fp, "Other : %lld\n", count_other);
+ fprintf (fp, "Total : %lld\n",
+ (count_file + count_dir + count_symlink + count_other));
+
+ return 0;
+}
+
+
+int
+display_checksums (FILE *fp)
+{
+ fprintf (fp, "\n");
+ fprintf (fp, "Checksums\n");
+ fprintf (fp, "Regular files : %llx%llx\n", checksum_file1, checksum_file2);
+ fprintf (fp, "Directories : %llx\n", checksum_dir);
+ fprintf (fp, "Symbolic links : %llx\n", checksum_symlink);
+ fprintf (fp, "Other : %llx\n", checksum_other);
+ fprintf (fp, "Total : %llx\n",
+ (checksum_file1 ^ checksum_file2 ^ checksum_dir ^ checksum_symlink ^ checksum_other));
+
+ return 0;
+}
+
+
+int
+display_metadata (FILE *fp)
+{
+ fprintf (fp, "\n");
+ fprintf (fp, "Metadata checksums\n");
+ fprintf (fp, "Regular files : %llx\n",
+ (avg_uid_file + 13) * (avg_gid_file + 11) * (avg_mode_file + 7));
+ fprintf (fp, "Directories : %llx\n",
+ (avg_uid_dir + 13) * (avg_gid_dir + 11) * (avg_mode_dir + 7));
+ fprintf (fp, "Symbolic links : %llx\n",
+ (avg_uid_symlink + 13) * (avg_gid_symlink + 11) * (avg_mode_symlink + 7));
+ fprintf (fp, "Other : %llx\n",
+ (avg_uid_other + 13) * (avg_gid_other + 11) * (avg_mode_other + 7));
+
+ return 0;
+}
+
+int
+display_stats (FILE *fp)
+{
+ display_counts (fp);
+
+ display_metadata (fp);
+
+ display_checksums (fp);
+
+ return 0;
+}
+
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+
+ if (argc != 2) {
+ fprintf (stderr, "Usage: %s <directory>\n",
+ argv[0]);
+ return -1;
+ }
+
+ ret = nftw (argv[1], process_entry, 30, FTW_PHYS|FTW_MOUNT);
+ if (ret != 0) {
+ fprintf (stderr, "ftw (%s) returned %d (%s), terminating\n",
+ argv[1], ret, strerror (errno));
+ return 1;
+ }
+
+ display_stats (stdout);
+
+ return 0;
+}
diff --git a/arequal/arequal-run.sh b/arequal/arequal-run.sh
new file mode 100755
index 0000000..597cf26
--- /dev/null
+++ b/arequal/arequal-run.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+
+
+function do_copy()
+{
+ local src="$1";
+ local dst="$2";
+
+ rsync -avz $src $dst;
+}
+
+
+function calc_checksum()
+{
+ local dir="$1";
+
+ echo "Calculating checksum on directory $dir ..."
+ arequal-checksum "$dir";
+ echo "-------------------------------------"
+ echo
+}
+
+
+function main()
+{
+ local src="$1";
+ local dst="$2";
+
+ if [ $# -ne 2 ]; then
+ echo "Usage: $0 <src> <dst>";
+ echo " e.g: $0 /usr /mnt/glusterfs/usr";
+ fi
+
+ do_copy "$src" "$dst";
+
+ echo "Calculating checksums on source and destination";
+ echo "===============================================";
+
+ calc_checksum "$src";
+
+ calc_checksum "$dst";
+}
+
+main "$@"