diff options
Diffstat (limited to 'tools/gfind_missing_files/gcrawler.c')
-rw-r--r-- | tools/gfind_missing_files/gcrawler.c | 572 |
1 files changed, 572 insertions, 0 deletions
diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c new file mode 100644 index 00000000000..517e773cb7c --- /dev/null +++ b/tools/gfind_missing_files/gcrawler.c @@ -0,0 +1,572 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdio.h> +#include <errno.h> +#include <sys/stat.h> +#include <unistd.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <dirent.h> +#include <assert.h> + +#ifndef __FreeBSD__ +#ifdef __NetBSD__ +#include <sys/xattr.h> +#else +#include <attr/xattr.h> +#endif /* __NetBSD__ */ +#endif /* __FreeBSD__ */ + +#include "list.h" + +#define THREAD_MAX 32 +#define BUMP(name) INC(name, 1) +#define DEFAULT_WORKERS 4 + +#define NEW(x) { \ + x = calloc (1, sizeof (typeof (*x))); \ + } + +#define err(x ...) fprintf(stderr, x) +#define out(x ...) fprintf(stdout, x) +#define dbg(x ...) do { if (debug) fprintf(stdout, x); } while (0) +#define tout(x ...) do { out("[%ld] ", pthread_self()); out(x); } while (0) +#define terr(x ...) do { err("[%ld] ", pthread_self()); err(x); } while (0) +#define tdbg(x ...) do { dbg("[%ld] ", pthread_self()); dbg(x); } while (0) + +int debug = 0; +const char *slavemnt = NULL; +int workers = 0; + +struct stats { + unsigned long long int cnt_skipped_gfids; +}; + +pthread_spinlock_t stats_lock; + +struct stats stats_total; +int stats = 0; + +#define INC(name, val) do { \ + if (!stats) \ + break; \ + pthread_spin_lock(&stats_lock); \ + { \ + stats_total.cnt_##name += val; \ + } \ + pthread_spin_unlock(&stats_lock); \ + } while (0) + +void +stats_dump() +{ + if (!stats) + return; + + out("-------------------------------------------\n"); + out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids); + out("-------------------------------------------\n"); +} + +struct dirjob { + struct list_head list; + + char *dirname; + + struct dirjob *parent; + int ret; /* final status of this subtree */ + int refcnt; /* how many dirjobs have this as parent */ + + pthread_spinlock_t lock; +}; + + +struct xwork { + pthread_t cthreads[THREAD_MAX]; /* crawler threads */ + int count; + int idle; + int stop; + + struct dirjob crawl; + + struct dirjob *rootjob; /* to verify completion in xwork_fini() */ + + pthread_mutex_t mutex; + pthread_cond_t cond; +}; + + +struct dirjob * +dirjob_ref (struct dirjob *job) +{ + pthread_spin_lock (&job->lock); + { + job->refcnt++; + } + pthread_spin_unlock (&job->lock); + + return job; +} + + +void +dirjob_free (struct dirjob *job) +{ + assert (list_empty (&job->list)); + + pthread_spin_destroy (&job->lock); + free (job->dirname); + free (job); +} + +void +dirjob_ret (struct dirjob *job, int err) +{ + int ret = 0; + int refcnt = 0; + struct dirjob *parent = NULL; + + pthread_spin_lock (&job->lock); + { + refcnt = --job->refcnt; + job->ret = (job->ret || err); + } + pthread_spin_unlock (&job->lock); + + if (refcnt == 0) { + ret = job->ret; + + if (ret) + terr ("Failed: %s (%d)\n", job->dirname, ret); + else + tdbg ("Finished: %s\n", job->dirname); + + parent = job->parent; + if (parent) + dirjob_ret (parent, ret); + + dirjob_free (job); + job = NULL; + } +} + + +struct dirjob * +dirjob_new (const char *dir, struct dirjob *parent) +{ + struct dirjob *job = NULL; + + NEW(job); + if (!job) + return NULL; + + job->dirname = strdup (dir); + if (!job->dirname) { + free (job); + return NULL; + } + + INIT_LIST_HEAD(&job->list); + pthread_spin_init (&job->lock, PTHREAD_PROCESS_PRIVATE); + job->ret = 0; + + if (parent) + job->parent = dirjob_ref (parent); + + job->refcnt = 1; + + return job; +} + +void +xwork_addcrawl (struct xwork *xwork, struct dirjob *job) +{ + pthread_mutex_lock (&xwork->mutex); + { + list_add_tail (&job->list, &xwork->crawl.list); + pthread_cond_broadcast (&xwork->cond); + } + pthread_mutex_unlock (&xwork->mutex); +} + +int +xwork_add (struct xwork *xwork, const char *dir, struct dirjob *parent) +{ + struct dirjob *job = NULL; + + job = dirjob_new (dir, parent); + if (!job) + return -1; + + xwork_addcrawl (xwork, job); + + return 0; +} + + +struct dirjob * +xwork_pick (struct xwork *xwork, int block) +{ + struct dirjob *job = NULL; + struct list_head *head = NULL; + + head = &xwork->crawl.list; + + pthread_mutex_lock (&xwork->mutex); + { + for (;;) { + if (xwork->stop) + break; + + if (!list_empty (head)) { + job = list_entry (head->next, typeof(*job), + list); + list_del_init (&job->list); + break; + } + + if (((xwork->count * 2) == xwork->idle) && + list_empty (&xwork->crawl.list)) { + /* no outstanding jobs, and no + active workers + */ + tdbg ("Jobless. Terminating\n"); + xwork->stop = 1; + pthread_cond_broadcast (&xwork->cond); + break; + } + + if (!block) + break; + + xwork->idle++; + pthread_cond_wait (&xwork->cond, &xwork->mutex); + xwork->idle--; + } + } + pthread_mutex_unlock (&xwork->mutex); + + return job; +} + +int +skip_name (const char *dirname, const char *name) +{ + if (strcmp (name, ".") == 0) + return 1; + + if (strcmp (name, "..") == 0) + return 1; + + if (strcmp (name, "changelogs") == 0) + return 1; + + if (strcmp (name, "health_check") == 0) + return 1; + + if (strcmp (name, "indices") == 0) + return 1; + + if (strcmp (name, "landfill") == 0) + return 1; + + return 0; +} + +int +skip_stat (struct dirjob *job, const char *name) +{ + if (job == NULL) + return 0; + + if (strcmp (job->dirname, ".glusterfs") == 0) { + tdbg ("Directly adding directories under .glusterfs " + "to global list: %s\n", name); + return 1; + } + + if (job->parent != NULL) { + if (strcmp (job->parent->dirname, ".glusterfs") == 0) { + tdbg ("Directly adding directories under .glusterfs/XX " + "to global list: %s\n", name); + return 1; + } + } + + return 0; +} + +int +xworker_do_crawl (struct xwork *xwork, struct dirjob *job) +{ + DIR *dirp = NULL; + int ret = -1; + int boff; + int plen; + struct dirent *result; + char dbuf[512]; + char *path = NULL; + struct dirjob *cjob = NULL; + struct stat statbuf = {0,}; + char gfid_path[4096] = {0,}; + + + plen = strlen (job->dirname) + 256 + 2; + path = alloca (plen); + + tdbg ("Entering: %s\n", job->dirname); + + dirp = opendir (job->dirname); + if (!dirp) { + terr ("opendir failed on %s (%s)\n", job->dirname, + strerror (errno)); + goto out; + } + + boff = sprintf (path, "%s/", job->dirname); + + for (;;) { + ret = readdir_r (dirp, (struct dirent *)dbuf, &result); + if (ret) { + err ("readdir_r(%s): %s\n", job->dirname, + strerror (errno)); + goto out; + } + + if (!result) /* EOF */ + break; + + if (result->d_ino == 0) + continue; + + if (skip_name (job->dirname, result->d_name)) + continue; + + /* It is sure that, children and grandchildren of .glusterfs + * are directories, just add them to global queue. + */ + if (skip_stat (job, result->d_name)) { + strncpy (path + boff, result->d_name, (plen-boff)); + cjob = dirjob_new (path, job); + if (!cjob) { + err ("dirjob_new(%s): %s\n", + path, strerror (errno)); + ret = -1; + goto out; + } + xwork_addcrawl (xwork, cjob); + continue; + } + + strcpy (gfid_path, slavemnt); + strcat (gfid_path, "/.gfid/"); + strcat (gfid_path, result->d_name); + ret = lstat (gfid_path, &statbuf); + + if (ret && errno == ENOENT) { + out ("%s\n", result->d_name); + BUMP (skipped_gfids); + } + + if (ret && errno != ENOENT) { + err ("stat on slave failed(%s): %s\n", + gfid_path, strerror (errno)); + goto out; + } + } + + ret = 0; +out: + if (dirp) + closedir (dirp); + + return ret; +} + + +void * +xworker_crawl (void *data) +{ + struct xwork *xwork = data; + struct dirjob *job = NULL; + int ret = -1; + + while ((job = xwork_pick (xwork, 0))) { + ret = xworker_do_crawl (xwork, job); + dirjob_ret (job, ret); + } + + return NULL; +} + +int +xwork_fini (struct xwork *xwork, int stop) +{ + int i = 0; + int ret = 0; + void *tret = 0; + + pthread_mutex_lock (&xwork->mutex); + { + xwork->stop = (xwork->stop || stop); + pthread_cond_broadcast (&xwork->cond); + } + pthread_mutex_unlock (&xwork->mutex); + + for (i = 0; i < xwork->count; i++) { + pthread_join (xwork->cthreads[i], &tret); + tdbg ("CThread id %ld returned %p\n", + xwork->cthreads[i], tret); + } + + if (debug) { + assert (xwork->rootjob->refcnt == 1); + dirjob_ret (xwork->rootjob, 0); + } + + if (stats) + pthread_spin_destroy(&stats_lock); + + return ret; +} + + +int +xwork_init (struct xwork *xwork, int count) +{ + int i = 0; + int ret = 0; + struct dirjob *rootjob = NULL; + + if (stats) + pthread_spin_init (&stats_lock, PTHREAD_PROCESS_PRIVATE); + + pthread_mutex_init (&xwork->mutex, NULL); + pthread_cond_init (&xwork->cond, NULL); + + INIT_LIST_HEAD (&xwork->crawl.list); + + rootjob = dirjob_new (".glusterfs", NULL); + if (debug) + xwork->rootjob = dirjob_ref (rootjob); + + xwork_addcrawl (xwork, rootjob); + + xwork->count = count; + for (i = 0; i < count; i++) { + ret = pthread_create (&xwork->cthreads[i], NULL, + xworker_crawl, xwork); + if (ret) + break; + tdbg ("Spawned crawler %d thread %ld\n", i, + xwork->cthreads[i]); + } + + return ret; +} + + +int +xfind (const char *basedir) +{ + struct xwork xwork; + int ret = 0; + char *cwd = NULL; + + ret = chdir (basedir); + if (ret) { + err ("%s: %s\n", basedir, strerror (errno)); + return ret; + } + + cwd = getcwd (0, 0); + if (!cwd) { + err ("getcwd(): %s\n", strerror (errno)); + return -1; + } + + tdbg ("Working directory: %s\n", cwd); + free (cwd); + + memset (&xwork, 0, sizeof (xwork)); + + ret = xwork_init (&xwork, workers); + if (ret == 0) + xworker_crawl (&xwork); + + ret = xwork_fini (&xwork, ret); + stats_dump (); + + return ret; +} + +static char * +parse_and_validate_args (int argc, char *argv[]) +{ + char *basedir = NULL; + struct stat d = {0, }; + int ret = -1; +#ifndef __FreeBSD__ + unsigned char volume_id[16]; +#endif /* __FreeBSD__ */ + char *slv_mnt = NULL; + + if (argc != 4) { + err ("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n", + argv[0]); + return NULL; + } + + basedir = argv[1]; + ret = lstat (basedir, &d); + if (ret) { + err ("%s: %s\n", basedir, strerror (errno)); + return NULL; + } + +#ifndef __FreeBSD__ + ret = lgetxattr (basedir, "trusted.glusterfs.volume-id", + volume_id, 16); + if (ret != 16) { + err ("%s:Not a valid brick path.\n", basedir); + return NULL; + } +#endif /* __FreeBSD__ */ + + slv_mnt = argv[2]; + ret = lstat (slv_mnt, &d); + if (ret) { + err ("%s: %s\n", slv_mnt, strerror (errno)); + return NULL; + } + slavemnt = argv[2]; + + workers = atoi(argv[3]); + if (workers <= 0) + workers = DEFAULT_WORKERS; + + return basedir; +} + +int +main (int argc, char *argv[]) +{ + char *basedir = NULL; + + basedir = parse_and_validate_args (argc, argv); + if (!basedir) + return 1; + + xfind (basedir); + + return 0; +} |