diff options
| author | Kotresh HR <khiremat@redhat.com> | 2015-01-29 15:53:19 +0530 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-15 21:20:03 -0700 | 
| commit | 7a9a66cc5fb7f06118fab1fc2ae1c43cfbb1178f (patch) | |
| tree | 11a1b53b1410c7bd9b9cf2424b2e75118bd86d18 /tools | |
| parent | 38e342ca4a2167720bea82d3cee7fca08baba666 (diff) | |
tools: Finds missing files in gluster volume given backend brickpath
The tool finds the missing files in a geo-replication slave volume.
The tool crawls backend .glusterfs of the brickpath, which is passed
as a parameter and stats each entry on slave volume mount to check
the presence of file. The mount used is aux-gfid-mount, hence no path
conversion is required and is fast. The tool needs to be run on every
node in cluster for each brickpath of geo-rep master volume to find
missing files on slave volume. The tool is generic enough and can be
used in non geo-replication context as well.
Most of the crawler code is leverged from Avati's xfind and is modified
to crawl only .glusterfs (https://github.com/avati/xsync)
Thanks Aravinda for scripts to convert gfid to path.
Change-Id: I84deaaaf638f7c571ff1319b67a3440fe27da810
BUG: 1187140
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: http://review.gluster.org/9503
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/Makefile.am | 3 | ||||
| -rw-r--r-- | tools/gfind_missing_files/Makefile.am | 24 | ||||
| -rw-r--r-- | tools/gfind_missing_files/gcrawler.c | 572 | ||||
| -rw-r--r-- | tools/gfind_missing_files/gfid_to_path.py | 162 | ||||
| -rw-r--r-- | tools/gfind_missing_files/gfid_to_path.sh | 42 | ||||
| -rw-r--r-- | tools/gfind_missing_files/gfind_missing_files.sh | 119 | 
6 files changed, 922 insertions, 0 deletions
diff --git a/tools/Makefile.am b/tools/Makefile.am new file mode 100644 index 00000000000..74229ab41e7 --- /dev/null +++ b/tools/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = gfind_missing_files + +CLEANFILES = diff --git a/tools/gfind_missing_files/Makefile.am b/tools/gfind_missing_files/Makefile.am new file mode 100644 index 00000000000..456aad836b6 --- /dev/null +++ b/tools/gfind_missing_files/Makefile.am @@ -0,0 +1,24 @@ +gfindmissingfilesdir = $(libexecdir)/glusterfs/gfind_missing_files + +gfindmissingfiles_SCRIPTS = gfind_missing_files.sh gfid_to_path.sh \ +	gfid_to_path.py + +EXTRA_DIST = gfind_missing_files.sh gfid_to_path.sh \ +	gfid_to_path.py + +gfindmissingfiles_PROGRAMS = gcrawler + +gcrawler_SOURCES = gcrawler.c + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +uninstall-local: +	rm -f $(DESTDIR)$(sbindir)/gfind_missing_files + +install-data-local: +	rm -f $(DESTDIR)$(sbindir)/gfind_missing_files +	ln -s $(libexecdir)/glusterfs/gfind_missing_files/gfind_missing_files.sh $(DESTDIR)$(sbindir)/gfind_missing_files + +CLEANFILES = diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c new file mode 100644 index 00000000000..517e773cb7c --- /dev/null +++ b/tools/gfind_missing_files/gcrawler.c @@ -0,0 +1,572 @@ +/* +  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#include <stdio.h> +#include <errno.h> +#include <sys/stat.h> +#include <unistd.h> +#include <pthread.h> +#include <stdlib.h> +#include <string.h> +#include <dirent.h> +#include <assert.h> + +#ifndef __FreeBSD__ +#ifdef __NetBSD__ +#include <sys/xattr.h> +#else +#include <attr/xattr.h> +#endif /* __NetBSD__ */ +#endif /* __FreeBSD__ */ + +#include "list.h" + +#define THREAD_MAX 32 +#define BUMP(name) INC(name, 1) +#define DEFAULT_WORKERS 4 + +#define NEW(x) {                              \ +        x = calloc (1, sizeof (typeof (*x))); \ +        } + +#define err(x ...) fprintf(stderr, x) +#define out(x ...) fprintf(stdout, x) +#define dbg(x ...) do { if (debug) fprintf(stdout, x); } while (0) +#define tout(x ...) do { out("[%ld] ", pthread_self()); out(x); } while (0) +#define terr(x ...) do { err("[%ld] ", pthread_self()); err(x); } while (0) +#define tdbg(x ...) do { dbg("[%ld] ", pthread_self()); dbg(x); } while (0) + +int debug = 0; +const char *slavemnt = NULL; +int workers = 0; + +struct stats { +        unsigned long long int cnt_skipped_gfids; +}; + +pthread_spinlock_t stats_lock; + +struct stats stats_total; +int stats = 0; + +#define INC(name, val) do {                             \ +        if (!stats)                                     \ +                break;                                  \ +        pthread_spin_lock(&stats_lock);                 \ +        {                                               \ +                stats_total.cnt_##name += val;          \ +        }                                               \ +        pthread_spin_unlock(&stats_lock);               \ +        } while (0) + +void +stats_dump() +{ +        if (!stats) +                return; + +        out("-------------------------------------------\n"); +        out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids); +        out("-------------------------------------------\n"); +} + +struct dirjob { +        struct list_head    list; + +        char               *dirname; + +        struct dirjob      *parent; +        int                 ret;    /* final status of this subtree */ +        int                 refcnt; /* how many dirjobs have this as parent */ + +        pthread_spinlock_t  lock; +}; + + +struct xwork { +        pthread_t        cthreads[THREAD_MAX]; /* crawler threads */ +        int              count; +        int              idle; +        int              stop; + +        struct dirjob    crawl; + +        struct dirjob   *rootjob; /* to verify completion in xwork_fini() */ + +        pthread_mutex_t  mutex; +        pthread_cond_t   cond; +}; + + +struct dirjob * +dirjob_ref (struct dirjob *job) +{ +        pthread_spin_lock (&job->lock); +        { +                job->refcnt++; +        } +        pthread_spin_unlock (&job->lock); + +        return job; +} + + +void +dirjob_free (struct dirjob *job) +{ +        assert (list_empty (&job->list)); + +        pthread_spin_destroy (&job->lock); +        free (job->dirname); +        free (job); +} + +void +dirjob_ret (struct dirjob *job, int err) +{ +        int            ret = 0; +        int            refcnt = 0; +        struct dirjob *parent = NULL; + +        pthread_spin_lock (&job->lock); +        { +                refcnt = --job->refcnt; +                job->ret = (job->ret || err); +        } +        pthread_spin_unlock (&job->lock); + +        if (refcnt == 0) { +                ret = job->ret; + +                if (ret) +                        terr ("Failed: %s (%d)\n", job->dirname, ret); +                else +                        tdbg ("Finished: %s\n", job->dirname); + +                parent = job->parent; +                if (parent) +                        dirjob_ret (parent, ret); + +                dirjob_free (job); +                job = NULL; +        } +} + + +struct dirjob * +dirjob_new (const char *dir, struct dirjob *parent) +{ +        struct dirjob *job = NULL; + +        NEW(job); +        if (!job) +                return NULL; + +        job->dirname = strdup (dir); +        if (!job->dirname) { +                free (job); +                return NULL; +        } + +        INIT_LIST_HEAD(&job->list); +        pthread_spin_init (&job->lock, PTHREAD_PROCESS_PRIVATE); +        job->ret = 0; + +        if (parent) +                job->parent = dirjob_ref (parent); + +        job->refcnt = 1; + +        return job; +} + +void +xwork_addcrawl (struct xwork *xwork, struct dirjob *job) +{ +        pthread_mutex_lock (&xwork->mutex); +        { +                list_add_tail (&job->list, &xwork->crawl.list); +                pthread_cond_broadcast (&xwork->cond); +        } +        pthread_mutex_unlock (&xwork->mutex); +} + +int +xwork_add (struct xwork *xwork, const char *dir, struct dirjob *parent) +{ +        struct dirjob *job = NULL; + +        job = dirjob_new (dir, parent); +        if (!job) +                return -1; + +        xwork_addcrawl (xwork, job); + +        return 0; +} + + +struct dirjob * +xwork_pick (struct xwork *xwork, int block) +{ +        struct dirjob *job = NULL; +        struct list_head *head = NULL; + +        head = &xwork->crawl.list; + +        pthread_mutex_lock (&xwork->mutex); +        { +                for (;;) { +                        if (xwork->stop) +                                break; + +                        if (!list_empty (head)) { +                                job = list_entry (head->next, typeof(*job), +                                                  list); +                                list_del_init (&job->list); +                                break; +                        } + +                        if (((xwork->count * 2) == xwork->idle) && +                            list_empty (&xwork->crawl.list)) { +                                /* no outstanding jobs, and no +                                   active workers +                                */ +                                tdbg ("Jobless. Terminating\n"); +                                xwork->stop = 1; +                                pthread_cond_broadcast (&xwork->cond); +                                break; +                        } + +                        if (!block) +                                break; + +                        xwork->idle++; +                        pthread_cond_wait (&xwork->cond, &xwork->mutex); +                        xwork->idle--; +                } +        } +        pthread_mutex_unlock (&xwork->mutex); + +        return job; +} + +int +skip_name (const char *dirname, const char *name) +{ +        if (strcmp (name, ".") == 0) +                return 1; + +        if (strcmp (name, "..") == 0) +                return 1; + +        if (strcmp (name, "changelogs") == 0) +                return 1; + +        if (strcmp (name, "health_check") == 0) +                return 1; + +        if (strcmp (name, "indices") == 0) +                return 1; + +        if (strcmp (name, "landfill") == 0) +                return 1; + +        return 0; +} + +int +skip_stat (struct dirjob *job, const char *name) +{ +        if (job == NULL) +                return 0; + +        if (strcmp (job->dirname, ".glusterfs") == 0) { +                tdbg ("Directly adding directories under .glusterfs " +                      "to global list: %s\n", name); +                return 1; +        } + +        if (job->parent != NULL) { +                if (strcmp (job->parent->dirname, ".glusterfs") == 0) { +                        tdbg ("Directly adding directories under .glusterfs/XX " +                              "to global list: %s\n", name); +                        return 1; +                } +        } + +        return 0; +} + +int +xworker_do_crawl (struct xwork *xwork, struct dirjob *job) +{ +        DIR            *dirp = NULL; +        int             ret = -1; +        int             boff; +        int             plen; +        struct dirent  *result; +        char            dbuf[512]; +        char           *path = NULL; +        struct dirjob  *cjob = NULL; +        struct stat     statbuf = {0,}; +        char            gfid_path[4096] = {0,}; + + +        plen = strlen (job->dirname) + 256 + 2; +        path = alloca (plen); + +        tdbg ("Entering: %s\n", job->dirname); + +        dirp = opendir (job->dirname); +        if (!dirp) { +                terr ("opendir failed on %s (%s)\n", job->dirname, +                     strerror (errno)); +                goto out; +        } + +        boff = sprintf (path, "%s/", job->dirname); + +        for (;;) { +                ret = readdir_r (dirp, (struct dirent *)dbuf, &result); +                if (ret) { +                        err ("readdir_r(%s): %s\n", job->dirname, +                             strerror (errno)); +                        goto out; +                } + +                if (!result) /* EOF */ +                        break; + +                if (result->d_ino == 0) +                        continue; + +                if (skip_name (job->dirname, result->d_name)) +                        continue; + +                /* It is sure that, children and grandchildren of .glusterfs +                 * are directories, just add them to global queue. +                 */ +                if (skip_stat (job, result->d_name)) { +                        strncpy (path + boff, result->d_name, (plen-boff)); +                        cjob = dirjob_new (path, job); +                        if (!cjob) { +                                err ("dirjob_new(%s): %s\n", +                                     path, strerror (errno)); +                                ret = -1; +                                goto out; +                        } +                        xwork_addcrawl (xwork, cjob); +                        continue; +                } + +                strcpy (gfid_path, slavemnt); +                strcat (gfid_path, "/.gfid/"); +                strcat (gfid_path, result->d_name); +                ret = lstat (gfid_path, &statbuf); + +                if (ret && errno == ENOENT) { +                        out ("%s\n", result->d_name); +                        BUMP (skipped_gfids); +                } + +                if (ret && errno != ENOENT) { +                        err ("stat on slave failed(%s): %s\n", +                             gfid_path, strerror (errno)); +                        goto out; +                } +        } + +        ret = 0; +out: +        if (dirp) +                closedir (dirp); + +        return ret; +} + + +void * +xworker_crawl (void *data) +{ +        struct xwork *xwork = data; +        struct dirjob *job = NULL; +        int            ret = -1; + +        while ((job = xwork_pick (xwork, 0))) { +                ret = xworker_do_crawl (xwork, job); +                dirjob_ret (job, ret); +        } + +        return NULL; +} + +int +xwork_fini (struct xwork *xwork, int stop) +{ +        int i = 0; +        int ret = 0; +        void *tret = 0; + +        pthread_mutex_lock (&xwork->mutex); +        { +                xwork->stop = (xwork->stop || stop); +                pthread_cond_broadcast (&xwork->cond); +        } +        pthread_mutex_unlock (&xwork->mutex); + +        for (i = 0; i < xwork->count; i++) { +                pthread_join (xwork->cthreads[i], &tret); +                tdbg ("CThread id %ld returned %p\n", +                      xwork->cthreads[i], tret); +        } + +        if (debug) { +                assert (xwork->rootjob->refcnt == 1); +                dirjob_ret (xwork->rootjob, 0); +        } + +        if (stats) +                pthread_spin_destroy(&stats_lock); + +        return ret; +} + + +int +xwork_init (struct xwork *xwork, int count) +{ +        int  i = 0; +        int  ret = 0; +        struct dirjob *rootjob = NULL; + +        if (stats) +                pthread_spin_init (&stats_lock, PTHREAD_PROCESS_PRIVATE); + +        pthread_mutex_init (&xwork->mutex, NULL); +        pthread_cond_init (&xwork->cond, NULL); + +        INIT_LIST_HEAD (&xwork->crawl.list); + +        rootjob = dirjob_new (".glusterfs", NULL); +        if (debug) +                xwork->rootjob = dirjob_ref (rootjob); + +        xwork_addcrawl (xwork, rootjob); + +        xwork->count = count; +        for (i = 0; i < count; i++) { +                ret = pthread_create (&xwork->cthreads[i], NULL, +                                      xworker_crawl, xwork); +                if (ret) +                        break; +                tdbg ("Spawned crawler %d thread %ld\n", i, +                      xwork->cthreads[i]); +        } + +        return ret; +} + + +int +xfind (const char *basedir) +{ +        struct xwork xwork; +        int          ret = 0; +        char         *cwd = NULL; + +        ret = chdir (basedir); +        if (ret) { +                err ("%s: %s\n", basedir, strerror (errno)); +                return ret; +        } + +        cwd = getcwd (0, 0); +        if (!cwd) { +                err ("getcwd(): %s\n", strerror (errno)); +                return -1; +        } + +        tdbg ("Working directory: %s\n", cwd); +        free (cwd); + +        memset (&xwork, 0, sizeof (xwork)); + +        ret = xwork_init (&xwork, workers); +        if (ret == 0) +                xworker_crawl (&xwork); + +        ret = xwork_fini (&xwork, ret); +        stats_dump (); + +        return ret; +} + +static char * +parse_and_validate_args (int argc, char *argv[]) +{ +        char        *basedir = NULL; +        struct stat  d = {0, }; +        int          ret = -1; +#ifndef __FreeBSD__ +        unsigned char volume_id[16]; +#endif /* __FreeBSD__ */ +        char        *slv_mnt = NULL; + +        if (argc != 4) { +                err ("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n", +                      argv[0]); +                return NULL; +        } + +        basedir = argv[1]; +        ret = lstat (basedir, &d); +        if (ret) { +                err ("%s: %s\n", basedir, strerror (errno)); +                return NULL; +        } + +#ifndef __FreeBSD__ +        ret = lgetxattr (basedir, "trusted.glusterfs.volume-id", +                         volume_id, 16); +        if (ret != 16) { +                err ("%s:Not a valid brick path.\n", basedir); +                return NULL; +        } +#endif /* __FreeBSD__ */ + +        slv_mnt = argv[2]; +        ret = lstat (slv_mnt, &d); +        if (ret) { +                err ("%s: %s\n", slv_mnt, strerror (errno)); +                return NULL; +        } +        slavemnt = argv[2]; + +        workers = atoi(argv[3]); +        if (workers <= 0) +                workers = DEFAULT_WORKERS; + +        return basedir; +} + +int +main (int argc, char *argv[]) +{ +        char *basedir = NULL; + +        basedir = parse_and_validate_args (argc, argv); +        if (!basedir) +                return 1; + +        xfind (basedir); + +        return 0; +} diff --git a/tools/gfind_missing_files/gfid_to_path.py b/tools/gfind_missing_files/gfid_to_path.py new file mode 100644 index 00000000000..8362f68b955 --- /dev/null +++ b/tools/gfind_missing_files/gfid_to_path.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python + +# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. + +import sys +import os +import xattr +import uuid +import re +import errno + +CHANGELOG_SEARCH_MAX_TRY = 31 +DEC_CTIME_START = 5 +ROOT_GFID = "00000000-0000-0000-0000-000000000001" +MAX_NUM_CHANGELOGS_TRY = 2 + + +def output_not_found(gfid): +    # Write GFID to stderr +    sys.stderr.write("%s\n" % gfid) + + +def output_success(path): +    # Write converted Path to Stdout +    sys.stdout.write("%s\n" % path) + + +def full_dir_path(gfid): +    out_path = "" +    while True: +        path = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid) +        path_readlink = os.readlink(path) +        pgfid = os.path.dirname(path_readlink) +        out_path = os.path.join(os.path.basename(path_readlink), out_path) +        if pgfid == "../../00/00/%s" % ROOT_GFID: +            out_path = os.path.join("./", out_path) +            break +        gfid = os.path.basename(pgfid) +    return out_path + + +def find_path_from_changelog(fd, gfid): +    """ +    In given Changelog File, finds using following pattern +    <T><GFID>\x00<TYPE>\x00<MODE>\x00<UID>\x00<GID>\x00<PARGFID>/<BASENAME> +    Pattern search finds PARGFID and BASENAME, Convert PARGFID to Path +    Using readlink and add basename to form Full path. +    """ +    content = fd.read() + +    pattern = "E%s" % gfid +    pattern += "\x00(3|23)\x00\d+\x00\d+\x00\d+\x00([^\x00]+)/([^\x00]+)" +    pat = re.compile(pattern) +    match = pat.search(content) + +    if match: +        pgfid = match.group(2) +        basename = match.group(3) +        if pgfid == ROOT_GFID: +            return os.path.join("./", basename) +        else: +            full_path_parent = full_dir_path(pgfid) +            if full_path_parent: +                return os.path.join(full_path_parent, basename) + +    return None + + +def gfid_to_path(gfid): +    """ +    Try readlink, if it is directory it succeeds. +    Get ctime of the GFID file, Decrement by 5 sec +    Search for Changelog filename, Since Changelog file generated +    every 15 sec, Search and get immediate next Changelog after the file +    Creation. Get the Path by searching in Changelog file. +    Get the resultant file's GFID and Compare with the input, If these +    GFIDs are different then Some thing is changed(May be Rename) +    """ +    gfid = gfid.strip() +    gpath = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid) +    try: +        output_success(full_dir_path(gfid)) +        return +    except OSError: +        # Not an SymLink +        pass + +    try: +        ctime = int(os.stat(gpath).st_ctime) +        ctime -= DEC_CTIME_START +    except (OSError, IOError): +        output_not_found(gfid) +        return + +    path = None +    found_changelog = False +    changelog_parse_try = 0 +    for i in range(CHANGELOG_SEARCH_MAX_TRY): +        cl = os.path.join(".glusterfs/changelogs", "CHANGELOG.%s" % ctime) + +        try: +            with open(cl, "rb") as f: +                changelog_parse_try += 1 +                found_changelog = True +                path = find_path_from_changelog(f, gfid) +                if not path and changelog_parse_try < MAX_NUM_CHANGELOGS_TRY: +                    ctime += 1 +                    continue +            break +        except (IOError, OSError) as e: +            if e.errno == errno.ENOENT: +                ctime += 1 +            else: +                break + +    if not found_changelog: +        output_not_found(gfid) +        return + +    if not path: +        output_not_found(gfid) +        return +    gfid1 = str(uuid.UUID(bytes=xattr.get(path, "trusted.gfid"))) +    if gfid != gfid1: +        output_not_found(gfid) +        return + +    output_success(path) + + +def main(): +    num_arguments = 3 +    if not sys.stdin.isatty(): +        num_arguments = 2 + +    if len(sys.argv) != num_arguments: +        sys.stderr.write("Invalid arguments\nUsage: " +                         "%s <BRICK_PATH> <GFID_FILE>\n" % sys.argv[0]) +        sys.exit(1) + +    path = sys.argv[1] + +    if sys.stdin.isatty(): +        gfid_list = os.path.abspath(sys.argv[2]) +        os.chdir(path) +        with open(gfid_list) as f: +            for gfid in f: +                gfid_to_path(gfid) +    else: +        os.chdir(path) +        for gfid in sys.stdin: +            gfid_to_path(gfid) + + +if __name__ == "__main__": +    main() diff --git a/tools/gfind_missing_files/gfid_to_path.sh b/tools/gfind_missing_files/gfid_to_path.sh new file mode 100644 index 00000000000..20ac6a94fd2 --- /dev/null +++ b/tools/gfind_missing_files/gfid_to_path.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +## Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> +## This file is part of GlusterFS. +## +## This file is licensed to you under your choice of the GNU Lesser +## General Public License, version 3 or any later version (LGPLv3 or +## later), or the GNU General Public License, version 2 (GPLv2), in all +## cases as published by the Free Software Foundation. + +E_BADARGS=65 + + +function gfid_to_path() +{ +    brick_dir=$1; +    gfid_file=$(readlink -e $2); + +    current_dir=$(pwd); +    cd $brick_dir; + +    while read gfid +    do +        to_search=`echo .glusterfs/${gfid:0:2}"/"${gfid:2:2}"/"$gfid`; +        find . -samefile $to_search | grep -v $to_search; +    done < $gfid_file; + +    cd $current_dir; +} + + +function main(){ +    if [ $# -ne 2 ] +    then +        echo "Usage: `basename $0` BRICK_DIR GFID_FILE"; +        exit $E_BADARGS; +    fi + +    gfid_to_path $1 $2; +} + +main "$@"; diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh new file mode 100644 index 00000000000..07d6befc958 --- /dev/null +++ b/tools/gfind_missing_files/gfind_missing_files.sh @@ -0,0 +1,119 @@ +#!/bin/sh + +##  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> +##  This file is part of GlusterFS. +## +##  This file is licensed to you under your choice of the GNU Lesser +##  General Public License, version 3 or any later version (LGPLv3 or +##  later), or the GNU General Public License, version 2 (GPLv2), in all +##  cases as published by the Free Software Foundation. + +BRICKPATH=    #Brick path of gluster volume +SLAVEHOST=    #Slave hostname +SLAVEVOL=     #Slave volume +SLAVEMNT=     #Slave gluster volume mount point +WORKERS=4     #Default number of worker threads + +function out() +{ +    echo "$@"; +} + +function fatal() +{ +    out FATAL "$@"; +    exit 1 +} + +function ping_host () +{ +    ### Use bash internal socket support +    { +        exec 400<>/dev/tcp/$1/$2 +        if [ $? -ne '0' ]; then +            return 1; +        else +            exec 400>&- +            return 0; +        fi +    } 1>&2 2>/dev/null +} + +function mount_slave() +{ +    local i; # inode number +    SSH_PORT=22 + +    SLAVEMNT=`mktemp -d` +    [ "x$SLAVEMNT" = "x" ] && fatal "Could not mktemp directory"; +    [ -d "$SLAVEMNT" ] || fatal "$SLAVEMNT not a directory"; + +    ping_host ${SLAVEHOST} $SSH_PORT +    if [ $? -ne 0 ]; then +        echo "$SLAVEHOST not reachable."; +        exit 1; +    fi; + +    glusterfs --volfile-id=$SLAVEVOL --aux-gfid-mount --volfile-server=$SLAVEHOST $SLAVEMNT; +    i=$(stat -c '%i' $SLAVEMNT); +    [ "x$i" = "x1" ] || fatal "Could not mount volume $2 on $SLAVEMNT Please check host and volume exists"; +} + +function parse_cli() +{ +    if [[ $# -ne 4 ]]; then +        echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>" +        exit 1 +    else +        BRICKPATH=$1; +        SLAVEHOST=$2; +        SLAVEVOL=$3; +        OUTFILE=$4; + +        mount_slave; +        echo "Slave volume is mounted at ${SLAVEMNT}" +        echo +    fi +} + +function main() +{ +    parse_cli "$@"; + +    echo "Calling crawler..."; +    path=$(readlink -e $0) +    $(dirname $path)/gcrawler ${BRICKPATH} ${SLAVEMNT} ${WORKERS} > ${OUTFILE} + +    #Clean up the mount +    umount $SLAVEMNT; +    rmdir $SLAVEMNT; + +    echo "Crawl Complete." +    num_files_missing=$(wc -l ${OUTFILE} | awk '{print $1}') +    if [ $num_files_missing -eq 0 ] +    then +        echo "Total Missing File Count : 0" +        exit 0; +    fi + +    echo "gfids of skipped files are available in the file ${OUTFILE}" +    echo +    echo "Starting gfid to path conversion" + +    #Call python script to convert gfids to full pathname +    INFILE=$(readlink -e ${OUTFILE}) +    python $(dirname $path)/gfid_to_path.py ${BRICKPATH} ${INFILE} 1> ${OUTFILE}_pathnames 2> ${OUTFILE}_gfids +    echo "Path names of skipped files are available in the file ${OUTFILE}_pathnames" + +    gfid_to_path_failures=$(wc -l ${OUTFILE}_gfids | awk '{print $1}') +    if [ $gfid_to_path_failures -gt 0 ] +    then +       echo "WARNING: Unable to convert some GFIDs to Paths, GFIDs logged to ${OUTFILE}_gfids" +       echo "Use $(dirname $path)/gfid_to_path.sh <brick-path> ${OUTFILE}_gfids to convert those GFIDs to Path" +    fi + +    #Output +    echo "Total Missing File Count : $(wc -l ${OUTFILE} | awk '{print $1}')" +} + +main "$@";  | 
