diff options
Diffstat (limited to 'tools')
23 files changed, 1078 insertions, 565 deletions
diff --git a/tools/Makefile.am b/tools/Makefile.am index d689f60fa52..5808a3728cd 100644 --- a/tools/Makefile.am +++ b/tools/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = gfind_missing_files glusterfind +SUBDIRS = gfind_missing_files glusterfind setgfid2path CLEANFILES = diff --git a/tools/gfind_missing_files/Makefile.am b/tools/gfind_missing_files/Makefile.am index 043c34c4182..181fe7091f3 100644 --- a/tools/gfind_missing_files/Makefile.am +++ b/tools/gfind_missing_files/Makefile.am @@ -1,12 +1,16 @@ -gfindmissingfilesdir = $(libexecdir)/glusterfs/gfind_missing_files +gfindmissingfilesdir = $(GLUSTERFS_LIBEXECDIR)/gfind_missing_files +if WITH_SERVER gfindmissingfiles_SCRIPTS = gfind_missing_files.sh gfid_to_path.sh \ gfid_to_path.py +endif EXTRA_DIST = gfind_missing_files.sh gfid_to_path.sh \ gfid_to_path.py +if WITH_SERVER gfindmissingfiles_PROGRAMS = gcrawler +endif gcrawler_SOURCES = gcrawler.c gcrawler_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la @@ -16,11 +20,13 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src AM_CFLAGS = -Wall $(GF_CFLAGS) +if WITH_SERVER uninstall-local: rm -f $(DESTDIR)$(sbindir)/gfind_missing_files install-data-local: rm -f $(DESTDIR)$(sbindir)/gfind_missing_files - ln -s $(libexecdir)/glusterfs/gfind_missing_files/gfind_missing_files.sh $(DESTDIR)$(sbindir)/gfind_missing_files + ln -s $(GLUSTERFS_LIBEXECDIR)/gfind_missing_files/gfind_missing_files.sh $(DESTDIR)$(sbindir)/gfind_missing_files +endif CLEANFILES = diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c index 02b644a1a77..4acbe92bc8f 100644 --- a/tools/gfind_missing_files/gcrawler.c +++ b/tools/gfind_missing_files/gcrawler.c @@ -16,33 +16,50 @@ #include <string.h> #include <dirent.h> #include <assert.h> -#include "locking.h" +#include <glusterfs/locking.h> -#include "compat.h" -#include "list.h" -#include "syscall.h" +#include <glusterfs/compat.h> +#include <glusterfs/list.h> +#include <glusterfs/syscall.h> #define THREAD_MAX 32 #define BUMP(name) INC(name, 1) #define DEFAULT_WORKERS 4 -#define NEW(x) { \ - x = calloc (1, sizeof (typeof (*x))); \ - } - -#define err(x ...) fprintf(stderr, x) -#define out(x ...) fprintf(stdout, x) -#define dbg(x ...) do { if (debug) fprintf(stdout, x); } while (0) -#define tout(x ...) do { out("[%ld] ", pthread_self()); out(x); } while (0) -#define terr(x ...) do { err("[%ld] ", pthread_self()); err(x); } while (0) -#define tdbg(x ...) do { dbg("[%ld] ", pthread_self()); dbg(x); } while (0) +#define NEW(x) \ + { \ + x = calloc(1, sizeof(typeof(*x))); \ + } + +#define err(x...) fprintf(stderr, x) +#define out(x...) fprintf(stdout, x) +#define dbg(x...) \ + do { \ + if (debug) \ + fprintf(stdout, x); \ + } while (0) +#define tout(x...) \ + do { \ + out("[%ld] ", pthread_self()); \ + out(x); \ + } while (0) +#define terr(x...) \ + do { \ + err("[%ld] ", pthread_self()); \ + err(x); \ + } while (0) +#define tdbg(x...) \ + do { \ + dbg("[%ld] ", pthread_self()); \ + dbg(x); \ + } while (0) int debug = 0; const char *slavemnt = NULL; int workers = 0; struct stats { - unsigned long long int cnt_skipped_gfids; + unsigned long long int cnt_skipped_gfids; }; pthread_spinlock_t stats_lock; @@ -50,518 +67,515 @@ pthread_spinlock_t stats_lock; struct stats stats_total; int stats = 0; -#define INC(name, val) do { \ - if (!stats) \ - break; \ - pthread_spin_lock(&stats_lock); \ - { \ - stats_total.cnt_##name += val; \ - } \ - pthread_spin_unlock(&stats_lock); \ - } while (0) +#define INC(name, val) \ + do { \ + if (!stats) \ + break; \ + pthread_spin_lock(&stats_lock); \ + { \ + stats_total.cnt_##name += val; \ + } \ + pthread_spin_unlock(&stats_lock); \ + } while (0) void stats_dump() { - if (!stats) - return; + if (!stats) + return; - out("-------------------------------------------\n"); - out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids); - out("-------------------------------------------\n"); + out("-------------------------------------------\n"); + out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids); + out("-------------------------------------------\n"); } struct dirjob { - struct list_head list; + struct list_head list; - char *dirname; + char *dirname; - struct dirjob *parent; - int ret; /* final status of this subtree */ - int refcnt; /* how many dirjobs have this as parent */ + struct dirjob *parent; + int ret; /* final status of this subtree */ + int refcnt; /* how many dirjobs have this as parent */ - pthread_spinlock_t lock; + pthread_spinlock_t lock; }; - struct xwork { - pthread_t cthreads[THREAD_MAX]; /* crawler threads */ - int count; - int idle; - int stop; + pthread_t cthreads[THREAD_MAX]; /* crawler threads */ + int count; + int idle; + int stop; - struct dirjob crawl; + struct dirjob crawl; - struct dirjob *rootjob; /* to verify completion in xwork_fini() */ + struct dirjob *rootjob; /* to verify completion in xwork_fini() */ - pthread_mutex_t mutex; - pthread_cond_t cond; + pthread_mutex_t mutex; + pthread_cond_t cond; }; - struct dirjob * -dirjob_ref (struct dirjob *job) +dirjob_ref(struct dirjob *job) { - pthread_spin_lock (&job->lock); - { - job->refcnt++; - } - pthread_spin_unlock (&job->lock); + pthread_spin_lock(&job->lock); + { + job->refcnt++; + } + pthread_spin_unlock(&job->lock); - return job; + return job; } - void -dirjob_free (struct dirjob *job) +dirjob_free(struct dirjob *job) { - assert (list_empty (&job->list)); + assert(list_empty(&job->list)); - pthread_spin_destroy (&job->lock); - free (job->dirname); - free (job); + pthread_spin_destroy(&job->lock); + free(job->dirname); + free(job); } void -dirjob_ret (struct dirjob *job, int err) +dirjob_ret(struct dirjob *job, int err) { - int ret = 0; - int refcnt = 0; - struct dirjob *parent = NULL; - - pthread_spin_lock (&job->lock); - { - refcnt = --job->refcnt; - job->ret = (job->ret || err); - } - pthread_spin_unlock (&job->lock); - - if (refcnt == 0) { - ret = job->ret; - - if (ret) - terr ("Failed: %s (%d)\n", job->dirname, ret); - else - tdbg ("Finished: %s\n", job->dirname); - - parent = job->parent; - if (parent) - dirjob_ret (parent, ret); + int ret = 0; + int refcnt = 0; + struct dirjob *parent = NULL; + + pthread_spin_lock(&job->lock); + { + refcnt = --job->refcnt; + job->ret = (job->ret || err); + } + pthread_spin_unlock(&job->lock); + + if (refcnt == 0) { + ret = job->ret; + + if (ret) + terr("Failed: %s (%d)\n", job->dirname, ret); + else + tdbg("Finished: %s\n", job->dirname); + + parent = job->parent; + if (parent) + dirjob_ret(parent, ret); - dirjob_free (job); - job = NULL; - } + dirjob_free(job); + job = NULL; + } } - struct dirjob * -dirjob_new (const char *dir, struct dirjob *parent) +dirjob_new(const char *dir, struct dirjob *parent) { - struct dirjob *job = NULL; + struct dirjob *job = NULL; - NEW(job); - if (!job) - return NULL; + NEW(job); + if (!job) + return NULL; - job->dirname = strdup (dir); - if (!job->dirname) { - free (job); - return NULL; - } + job->dirname = strdup(dir); + if (!job->dirname) { + free(job); + return NULL; + } - INIT_LIST_HEAD(&job->list); - pthread_spin_init (&job->lock, PTHREAD_PROCESS_PRIVATE); - job->ret = 0; + INIT_LIST_HEAD(&job->list); + pthread_spin_init(&job->lock, PTHREAD_PROCESS_PRIVATE); + job->ret = 0; - if (parent) - job->parent = dirjob_ref (parent); + if (parent) + job->parent = dirjob_ref(parent); - job->refcnt = 1; + job->refcnt = 1; - return job; + return job; } void -xwork_addcrawl (struct xwork *xwork, struct dirjob *job) +xwork_addcrawl(struct xwork *xwork, struct dirjob *job) { - pthread_mutex_lock (&xwork->mutex); - { - list_add_tail (&job->list, &xwork->crawl.list); - pthread_cond_broadcast (&xwork->cond); - } - pthread_mutex_unlock (&xwork->mutex); + pthread_mutex_lock(&xwork->mutex); + { + list_add_tail(&job->list, &xwork->crawl.list); + pthread_cond_broadcast(&xwork->cond); + } + pthread_mutex_unlock(&xwork->mutex); } int -xwork_add (struct xwork *xwork, const char *dir, struct dirjob *parent) +xwork_add(struct xwork *xwork, const char *dir, struct dirjob *parent) { - struct dirjob *job = NULL; + struct dirjob *job = NULL; - job = dirjob_new (dir, parent); - if (!job) - return -1; + job = dirjob_new(dir, parent); + if (!job) + return -1; - xwork_addcrawl (xwork, job); + xwork_addcrawl(xwork, job); - return 0; + return 0; } - struct dirjob * -xwork_pick (struct xwork *xwork, int block) +xwork_pick(struct xwork *xwork, int block) { - struct dirjob *job = NULL; - struct list_head *head = NULL; + struct dirjob *job = NULL; + struct list_head *head = NULL; - head = &xwork->crawl.list; + head = &xwork->crawl.list; - pthread_mutex_lock (&xwork->mutex); - { - for (;;) { - if (xwork->stop) - break; - - if (!list_empty (head)) { - job = list_entry (head->next, typeof(*job), - list); - list_del_init (&job->list); - break; - } - - if (((xwork->count * 2) == xwork->idle) && - list_empty (&xwork->crawl.list)) { - /* no outstanding jobs, and no - active workers - */ - tdbg ("Jobless. Terminating\n"); - xwork->stop = 1; - pthread_cond_broadcast (&xwork->cond); - break; - } - - if (!block) - break; - - xwork->idle++; - pthread_cond_wait (&xwork->cond, &xwork->mutex); - xwork->idle--; - } + pthread_mutex_lock(&xwork->mutex); + { + for (;;) { + if (xwork->stop) + break; + + if (!list_empty(head)) { + job = list_entry(head->next, typeof(*job), list); + list_del_init(&job->list); + break; + } + + if (((xwork->count * 2) == xwork->idle) && + list_empty(&xwork->crawl.list)) { + /* no outstanding jobs, and no + active workers + */ + tdbg("Jobless. Terminating\n"); + xwork->stop = 1; + pthread_cond_broadcast(&xwork->cond); + break; + } + + if (!block) + break; + + xwork->idle++; + pthread_cond_wait(&xwork->cond, &xwork->mutex); + xwork->idle--; } - pthread_mutex_unlock (&xwork->mutex); + } + pthread_mutex_unlock(&xwork->mutex); - return job; + return job; } int -skip_name (const char *dirname, const char *name) +skip_name(const char *dirname, const char *name) { - if (strcmp (name, ".") == 0) - return 1; + if (strcmp(name, ".") == 0) + return 1; - if (strcmp (name, "..") == 0) - return 1; + if (strcmp(name, "..") == 0) + return 1; - if (strcmp (name, "changelogs") == 0) - return 1; + if (strcmp(name, "changelogs") == 0) + return 1; - if (strcmp (name, "health_check") == 0) - return 1; + if (strcmp(name, "health_check") == 0) + return 1; - if (strcmp (name, "indices") == 0) - return 1; + if (strcmp(name, "indices") == 0) + return 1; - if (strcmp (name, "landfill") == 0) - return 1; + if (strcmp(name, "landfill") == 0) + return 1; - return 0; + return 0; } int -skip_stat (struct dirjob *job, const char *name) +skip_stat(struct dirjob *job, const char *name) { - if (job == NULL) - return 0; - - if (strcmp (job->dirname, ".glusterfs") == 0) { - tdbg ("Directly adding directories under .glusterfs " - "to global list: %s\n", name); - return 1; - } + if (job == NULL) + return 0; - if (job->parent != NULL) { - if (strcmp (job->parent->dirname, ".glusterfs") == 0) { - tdbg ("Directly adding directories under .glusterfs/XX " - "to global list: %s\n", name); - return 1; - } + if (strcmp(job->dirname, ".glusterfs") == 0) { + tdbg( + "Directly adding directories under .glusterfs " + "to global list: %s\n", + name); + return 1; + } + + if (job->parent != NULL) { + if (strcmp(job->parent->dirname, ".glusterfs") == 0) { + tdbg( + "Directly adding directories under .glusterfs/XX " + "to global list: %s\n", + name); + return 1; } + } - return 0; + return 0; } int -xworker_do_crawl (struct xwork *xwork, struct dirjob *job) +xworker_do_crawl(struct xwork *xwork, struct dirjob *job) { - DIR *dirp = NULL; - int ret = -1; - int boff; - int plen; - char *path = NULL; - struct dirjob *cjob = NULL; - struct stat statbuf = {0,}; - struct dirent *entry; - struct dirent scratch[2] = {{0,},}; - char gfid_path[PATH_MAX] = {0,}; - - - plen = strlen (job->dirname) + 256 + 2; - path = alloca (plen); - - tdbg ("Entering: %s\n", job->dirname); - - dirp = sys_opendir (job->dirname); - if (!dirp) { - terr ("opendir failed on %s (%s)\n", job->dirname, - strerror (errno)); + DIR *dirp = NULL; + int ret = -1; + int boff; + int plen; + char *path = NULL; + struct dirjob *cjob = NULL; + struct stat statbuf = { + 0, + }; + struct dirent *entry; + struct dirent scratch[2] = { + { + 0, + }, + }; + char gfid_path[PATH_MAX] = { + 0, + }; + + plen = strlen(job->dirname) + 256 + 2; + path = alloca(plen); + + tdbg("Entering: %s\n", job->dirname); + + dirp = sys_opendir(job->dirname); + if (!dirp) { + terr("opendir failed on %s (%s)\n", job->dirname, strerror(errno)); + goto out; + } + + boff = sprintf(path, "%s/", job->dirname); + + for (;;) { + errno = 0; + entry = sys_readdir(dirp, scratch); + if (!entry || errno != 0) { + if (errno != 0) { + err("readdir(%s): %s\n", job->dirname, strerror(errno)); + ret = errno; goto out; + } + break; } - boff = sprintf (path, "%s/", job->dirname); + if (entry->d_ino == 0) + continue; + + if (skip_name(job->dirname, entry->d_name)) + continue; + + /* It is sure that, children and grandchildren of .glusterfs + * are directories, just add them to global queue. + */ + if (skip_stat(job, entry->d_name)) { + strncpy(path + boff, entry->d_name, (plen - boff)); + cjob = dirjob_new(path, job); + if (!cjob) { + err("dirjob_new(%s): %s\n", path, strerror(errno)); + ret = -1; + goto out; + } + xwork_addcrawl(xwork, cjob); + continue; + } - for (;;) { - errno = 0; - entry = sys_readdir (dirp, scratch); - if (!entry || errno != 0) { - if (errno != 0) { - err ("readdir(%s): %s\n", job->dirname, - strerror (errno)); - ret = errno; - goto out; - } - break; - } - - if (entry->d_ino == 0) - continue; - - if (skip_name (job->dirname, entry->d_name)) - continue; - - /* It is sure that, children and grandchildren of .glusterfs - * are directories, just add them to global queue. - */ - if (skip_stat (job, entry->d_name)) { - strncpy (path + boff, entry->d_name, (plen-boff)); - cjob = dirjob_new (path, job); - if (!cjob) { - err ("dirjob_new(%s): %s\n", - path, strerror (errno)); - ret = -1; - goto out; - } - xwork_addcrawl (xwork, cjob); - continue; - } - - (void) snprintf (gfid_path, sizeof(gfid_path), "%s/.gfid/%s", - slavemnt, entry->d_name); - ret = sys_lstat (gfid_path, &statbuf); - - if (ret && errno == ENOENT) { - out ("%s\n", entry->d_name); - BUMP (skipped_gfids); - } - - if (ret && errno != ENOENT) { - err ("stat on slave failed(%s): %s\n", - gfid_path, strerror (errno)); - goto out; - } + (void)snprintf(gfid_path, sizeof(gfid_path), "%s/.gfid/%s", slavemnt, + entry->d_name); + ret = sys_lstat(gfid_path, &statbuf); + + if (ret && errno == ENOENT) { + out("%s\n", entry->d_name); + BUMP(skipped_gfids); + } + + if (ret && errno != ENOENT) { + err("stat on slave failed(%s): %s\n", gfid_path, strerror(errno)); + goto out; } + } - ret = 0; + ret = 0; out: - if (dirp) - (void) sys_closedir (dirp); + if (dirp) + (void)sys_closedir(dirp); - return ret; + return ret; } - void * -xworker_crawl (void *data) +xworker_crawl(void *data) { - struct xwork *xwork = data; - struct dirjob *job = NULL; - int ret = -1; + struct xwork *xwork = data; + struct dirjob *job = NULL; + int ret = -1; - while ((job = xwork_pick (xwork, 0))) { - ret = xworker_do_crawl (xwork, job); - dirjob_ret (job, ret); - } + while ((job = xwork_pick(xwork, 0))) { + ret = xworker_do_crawl(xwork, job); + dirjob_ret(job, ret); + } - return NULL; + return NULL; } int -xwork_fini (struct xwork *xwork, int stop) +xwork_fini(struct xwork *xwork, int stop) { - int i = 0; - int ret = 0; - void *tret = 0; - - pthread_mutex_lock (&xwork->mutex); - { - xwork->stop = (xwork->stop || stop); - pthread_cond_broadcast (&xwork->cond); - } - pthread_mutex_unlock (&xwork->mutex); - - for (i = 0; i < xwork->count; i++) { - pthread_join (xwork->cthreads[i], &tret); - tdbg ("CThread id %ld returned %p\n", - xwork->cthreads[i], tret); - } - - if (debug) { - assert (xwork->rootjob->refcnt == 1); - dirjob_ret (xwork->rootjob, 0); - } - - if (stats) - pthread_spin_destroy(&stats_lock); - - return ret; + int i = 0; + int ret = 0; + void *tret = 0; + + pthread_mutex_lock(&xwork->mutex); + { + xwork->stop = (xwork->stop || stop); + pthread_cond_broadcast(&xwork->cond); + } + pthread_mutex_unlock(&xwork->mutex); + + for (i = 0; i < xwork->count; i++) { + pthread_join(xwork->cthreads[i], &tret); + tdbg("CThread id %ld returned %p\n", xwork->cthreads[i], tret); + } + + if (debug) { + assert(xwork->rootjob->refcnt == 1); + dirjob_ret(xwork->rootjob, 0); + } + + if (stats) + pthread_spin_destroy(&stats_lock); + + return ret; } - int -xwork_init (struct xwork *xwork, int count) +xwork_init(struct xwork *xwork, int count) { - int i = 0; - int ret = 0; - struct dirjob *rootjob = NULL; + int i = 0; + int ret = 0; + struct dirjob *rootjob = NULL; - if (stats) - pthread_spin_init (&stats_lock, PTHREAD_PROCESS_PRIVATE); + if (stats) + pthread_spin_init(&stats_lock, PTHREAD_PROCESS_PRIVATE); - pthread_mutex_init (&xwork->mutex, NULL); - pthread_cond_init (&xwork->cond, NULL); + pthread_mutex_init(&xwork->mutex, NULL); + pthread_cond_init(&xwork->cond, NULL); - INIT_LIST_HEAD (&xwork->crawl.list); + INIT_LIST_HEAD(&xwork->crawl.list); - rootjob = dirjob_new (".glusterfs", NULL); - if (debug) - xwork->rootjob = dirjob_ref (rootjob); + rootjob = dirjob_new(".glusterfs", NULL); + if (debug) + xwork->rootjob = dirjob_ref(rootjob); - xwork_addcrawl (xwork, rootjob); + xwork_addcrawl(xwork, rootjob); - xwork->count = count; - for (i = 0; i < count; i++) { - ret = pthread_create (&xwork->cthreads[i], NULL, - xworker_crawl, xwork); - if (ret) - break; - tdbg ("Spawned crawler %d thread %ld\n", i, - xwork->cthreads[i]); - } + xwork->count = count; + for (i = 0; i < count; i++) { + ret = pthread_create(&xwork->cthreads[i], NULL, xworker_crawl, xwork); + if (ret) + break; + tdbg("Spawned crawler %d thread %ld\n", i, xwork->cthreads[i]); + } - return ret; + return ret; } - int -xfind (const char *basedir) +xfind(const char *basedir) { - struct xwork xwork; - int ret = 0; - char *cwd = NULL; - - ret = chdir (basedir); - if (ret) { - err ("%s: %s\n", basedir, strerror (errno)); - return ret; - } + struct xwork xwork; + int ret = 0; + char *cwd = NULL; - cwd = getcwd (0, 0); - if (!cwd) { - err ("getcwd(): %s\n", strerror (errno)); - return -1; - } + ret = chdir(basedir); + if (ret) { + err("%s: %s\n", basedir, strerror(errno)); + return ret; + } - tdbg ("Working directory: %s\n", cwd); - free (cwd); + cwd = getcwd(0, 0); + if (!cwd) { + err("getcwd(): %s\n", strerror(errno)); + return -1; + } - memset (&xwork, 0, sizeof (xwork)); + tdbg("Working directory: %s\n", cwd); + free(cwd); - ret = xwork_init (&xwork, workers); - if (ret == 0) - xworker_crawl (&xwork); + memset(&xwork, 0, sizeof(xwork)); - ret = xwork_fini (&xwork, ret); - stats_dump (); + ret = xwork_init(&xwork, workers); + if (ret == 0) + xworker_crawl(&xwork); - return ret; + ret = xwork_fini(&xwork, ret); + stats_dump(); + + return ret; } static char * -parse_and_validate_args (int argc, char *argv[]) +parse_and_validate_args(int argc, char *argv[]) { - char *basedir = NULL; - struct stat d = {0, }; - int ret = -1; + char *basedir = NULL; + struct stat d = { + 0, + }; + int ret = -1; #ifndef __FreeBSD__ - unsigned char volume_id[16]; + unsigned char volume_id[16]; #endif /* __FreeBSD__ */ - char *slv_mnt = NULL; + char *slv_mnt = NULL; - if (argc != 4) { - err ("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n", - argv[0]); - return NULL; - } + if (argc != 4) { + err("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n", + argv[0]); + return NULL; + } - basedir = argv[1]; - ret = sys_lstat (basedir, &d); - if (ret) { - err ("%s: %s\n", basedir, strerror (errno)); - return NULL; - } + basedir = argv[1]; + ret = sys_lstat(basedir, &d); + if (ret) { + err("%s: %s\n", basedir, strerror(errno)); + return NULL; + } #ifndef __FreeBSD__ - ret = sys_lgetxattr (basedir, "trusted.glusterfs.volume-id", - volume_id, 16); - if (ret != 16) { - err ("%s:Not a valid brick path.\n", basedir); - return NULL; - } + ret = sys_lgetxattr(basedir, "trusted.glusterfs.volume-id", volume_id, 16); + if (ret != 16) { + err("%s:Not a valid brick path.\n", basedir); + return NULL; + } #endif /* __FreeBSD__ */ - slv_mnt = argv[2]; - ret = sys_lstat (slv_mnt, &d); - if (ret) { - err ("%s: %s\n", slv_mnt, strerror (errno)); - return NULL; - } - slavemnt = argv[2]; + slv_mnt = argv[2]; + ret = sys_lstat(slv_mnt, &d); + if (ret) { + err("%s: %s\n", slv_mnt, strerror(errno)); + return NULL; + } + slavemnt = argv[2]; - workers = atoi(argv[3]); - if (workers <= 0) - workers = DEFAULT_WORKERS; + workers = atoi(argv[3]); + if (workers <= 0) + workers = DEFAULT_WORKERS; - return basedir; + return basedir; } int -main (int argc, char *argv[]) +main(int argc, char *argv[]) { - char *basedir = NULL; + char *basedir = NULL; - basedir = parse_and_validate_args (argc, argv); - if (!basedir) - return 1; + basedir = parse_and_validate_args(argc, argv); + if (!basedir) + return 1; - xfind (basedir); + xfind(basedir); - return 0; + return 0; } diff --git a/tools/gfind_missing_files/gfid_to_path.py b/tools/gfind_missing_files/gfid_to_path.py index 8362f68b955..01e08a9494a 100644 --- a/tools/gfind_missing_files/gfid_to_path.py +++ b/tools/gfind_missing_files/gfid_to_path.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> # This file is part of GlusterFS. diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh index f42fe7b05af..e7aaa0b5dd4 100644 --- a/tools/gfind_missing_files/gfind_missing_files.sh +++ b/tools/gfind_missing_files/gfind_missing_files.sh @@ -61,7 +61,7 @@ mount_slave() parse_cli() { - if [[ $# -ne 4 ]]; then + if [ "$#" -ne 4 ]; then echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>" exit 1 else diff --git a/tools/glusterfind/Makefile.am b/tools/glusterfind/Makefile.am index 37f23bed1bb..f17dbdb228e 100644 --- a/tools/glusterfind/Makefile.am +++ b/tools/glusterfind/Makefile.am @@ -1,12 +1,15 @@ SUBDIRS = src -EXTRA_DIST = S57glusterfind-delete-post.py +EXTRA_DIST = S57glusterfind-delete-post.py glusterfind +if WITH_SERVER bin_SCRIPTS = glusterfind +endif CLEANFILES = $(bin_SCRIPTS) -deletehookscriptsdir = $(libexecdir)/glusterfs/glusterfind/ +if WITH_SERVER +deletehookscriptsdir = $(GLUSTERFS_LIBEXECDIR)/glusterfind/ deletehookscripts_SCRIPTS = S57glusterfind-delete-post.py uninstall-local: @@ -16,5 +19,6 @@ install-data-local: $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)/glusterfind/.keys $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/ rm -f $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post - ln -s $(libexecdir)/glusterfs/glusterfind/S57glusterfind-delete-post.py \ + ln -s $(GLUSTERFS_LIBEXECDIR)/glusterfind/S57glusterfind-delete-post.py \ $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post +endif diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py index fb6c222df03..5beece220f0 100755 --- a/tools/glusterfind/S57glusterfind-delete-post.py +++ b/tools/glusterfind/S57glusterfind-delete-post.py @@ -1,4 +1,4 @@ -#!/usr/bin/python +#!/usr/bin/python3 import os import shutil from errno import ENOENT @@ -18,7 +18,7 @@ def handle_rm_error(func, path, exc_info): def get_glusterd_workdir(): p = Popen(["gluster", "system::", "getwd"], - stdout=PIPE, stderr=PIPE) + stdout=PIPE, stderr=PIPE, universal_newlines=True) out, _ = p.communicate() diff --git a/tools/glusterfind/glusterfind.in b/tools/glusterfind/glusterfind.in index cff8973980a..ca154b625dd 100644 --- a/tools/glusterfind/glusterfind.in +++ b/tools/glusterfind/glusterfind.in @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> # This file is part of GlusterFS. @@ -10,6 +10,7 @@ import sys sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/') +sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/glusterfind') from glusterfind.main import main diff --git a/tools/glusterfind/src/Makefile.am b/tools/glusterfind/src/Makefile.am index 541ff946c04..43b6141b01c 100644 --- a/tools/glusterfind/src/Makefile.am +++ b/tools/glusterfind/src/Makefile.am @@ -1,12 +1,14 @@ -glusterfinddir = $(libexecdir)/glusterfs/glusterfind +glusterfinddir = $(GLUSTERFS_LIBEXECDIR)/glusterfind +if WITH_SERVER glusterfind_PYTHON = conf.py utils.py __init__.py \ - main.py libgfchangelog.py changelogdata.py + main.py libgfchangelog.py changelogdata.py gfind_py2py3.py glusterfind_SCRIPTS = changelog.py nodeagent.py \ brickfind.py glusterfind_DATA = tool.conf +endif EXTRA_DIST = changelog.py nodeagent.py brickfind.py \ tool.conf changelogdata.py diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py index 0ffb3f7432d..1753698b5fa 100644 --- a/tools/glusterfind/src/__init__.py +++ b/tools/glusterfind/src/__init__.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> diff --git a/tools/glusterfind/src/brickfind.py b/tools/glusterfind/src/brickfind.py index efc840bca70..73b6350188d 100644 --- a/tools/glusterfind/src/brickfind.py +++ b/tools/glusterfind/src/brickfind.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -13,7 +13,10 @@ import os import sys import logging from argparse import ArgumentParser, RawDescriptionHelpFormatter -import urllib +try: + import urllib.parse as urllib +except ImportError: + import urllib import time from utils import mkdirp, setup_logger, create_file, output_write, find @@ -38,11 +41,20 @@ def brickfind_crawl(brick, args): with open(args.outfile, "a+") as fout: brick_path_len = len(brick) - def output_callback(path, filter_result): + def output_callback(path, filter_result, is_dir): path = path.strip() path = path[brick_path_len+1:] - output_write(fout, path, args.output_prefix, - encode=(not args.no_encode), tag=args.tag) + + if args.type == "both": + output_write(fout, path, args.output_prefix, + encode=(not args.no_encode), tag=args.tag, + field_separator=args.field_separator) + else: + if (is_dir and args.type == "d") or ( + (not is_dir) and args.type == "f"): + output_write(fout, path, args.output_prefix, + encode=(not args.no_encode), tag=args.tag, + field_separator=args.field_separator) ignore_dirs = [os.path.join(brick, dirname) for dirname in @@ -61,9 +73,9 @@ def _get_args(): parser.add_argument("session", help="Session Name") parser.add_argument("volume", help="Volume Name") + parser.add_argument("node", help="Node Name") parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") - parser.add_argument("start", help="Start Time", type=float) parser.add_argument("tag", help="Tag to prefix file name with") parser.add_argument("--only-query", help="Only query, No session update", action="store_true") @@ -73,6 +85,11 @@ def _get_args(): action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") + parser.add_argument('--type', help="type: f, f-files only" + " d, d-directories only, by default = both", + default='both') + parser.add_argument("--field-separator", help="Field separator", + default=" ") return parser.parse_args() @@ -81,7 +98,7 @@ if __name__ == "__main__": args = _get_args() session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) @@ -96,6 +113,6 @@ if __name__ == "__main__": time_to_update = int(time.time()) brickfind_crawl(args.brick, args) if not args.only_query: - with open(status_file_pre, "w", buffering=0) as f: + with open(status_file_pre, "w") as f: f.write(str(time_to_update)) sys.exit(0) diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py index 721b8d0ca3a..a5e9ea4288f 100644 --- a/tools/glusterfind/src/changelog.py +++ b/tools/glusterfind/src/changelog.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -14,9 +14,13 @@ import sys import time import xattr import logging +from gfind_py2py3 import bytearray_to_str from argparse import ArgumentParser, RawDescriptionHelpFormatter import hashlib -import urllib +try: + import urllib.parse as urllib +except ImportError: + import urllib import codecs import libgfchangelog @@ -40,8 +44,6 @@ history_turn_time = 0 logger = logging.getLogger() - - def pgfid_to_path(brick, changelog_data): """ For all the pgfids in table, converts into path using recursive @@ -49,7 +51,7 @@ def pgfid_to_path(brick, changelog_data): """ # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}): - # In case of Data/Metadata only, pgfid1 will not be their + # In case of Data/Metadata only, pgfid1 will not be there if row[0] == "": continue @@ -94,7 +96,7 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): path = symlink_gfid_to_path(brick, gfid) path = output_path_prepare(path, args) changelog_data.gfidpath_update({"path1": path}, - {"gfid": gfid}) + {"gfid": gfid}) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue @@ -104,15 +106,55 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): changelog_data.inodegfid_add(os.stat(p).st_ino, gfid) file_xattrs = xattr.list(p) for x in file_xattrs: - if x.startswith("trusted.pgfid."): + x_str = bytearray_to_str(x) + if x_str.startswith("trusted.pgfid."): # PGFID in pgfid table - changelog_data.pgfid_add(x.split(".")[-1]) + changelog_data.pgfid_add(x_str.split(".")[-1]) except (IOError, OSError): # All OS Errors ignored, since failures will be logged # in End. All GFIDs present in gfidpath table continue +def enum_hard_links_using_gfid2path(brick, gfid, args): + hardlinks = [] + p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) + if not os.path.isdir(p): + # we have a symlink or a normal file + try: + file_xattrs = xattr.list(p) + for x in file_xattrs: + x_str = bytearray_to_str(x) + if x_str.startswith("trusted.gfid2path."): + # get the value for the xattr i.e. <PGFID>/<BN> + v = xattr.getxattr(p, x_str) + v_str = bytearray_to_str(v) + pgfid, bn = v_str.split(os.sep) + try: + path = symlink_gfid_to_path(brick, pgfid) + fullpath = os.path.join(path, bn) + fullpath = output_path_prepare(fullpath, args) + hardlinks.append(fullpath) + except (IOError, OSError) as e: + logger.warn("Error converting to path: %s" % e) + continue + except (IOError, OSError): + pass + return hardlinks + + +def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args): + path = "" + for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}): + gfid = row[3].strip() + logger.debug("Processing gfid %s" % gfid) + hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args) + + path = ",".join(hardlinks) + + changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid}) + + def gfid_to_path_using_pgfid(brick, changelog_data, args): """ For all the pgfids collected, Converts to Path and @@ -160,10 +202,10 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args): try: path = symlink_gfid_to_path(brick, row[0]) find(os.path.join(brick, path), - callback_func=output_callback, - filter_func=inode_filter, - ignore_dirs=ignore_dirs, - subdirs_crawl=False) + callback_func=output_callback, + filter_func=inode_filter, + ignore_dirs=ignore_dirs, + subdirs_crawl=False) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue @@ -245,7 +287,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) # Get previous session try: @@ -272,9 +314,10 @@ def get_changes(brick, hash_dir, log_file, start, end, args): actual_end = libgfchangelog.cl_history_changelog( cl_path, start, end, CHANGELOGAPI_NUM_WORKERS) except libgfchangelog.ChangelogException as e: - fail("%s Historical Changelogs not available: %s" % (brick, e), - logger=logger) + fail("%s: %s Historical Changelogs not available: %s" % + (args.node, brick, e), logger=logger) + logger.info("[1/4] Starting changelog parsing ...") try: # scan followed by getchanges till scan returns zero. # history_scan() is blocking call, till it gets the number @@ -296,25 +339,34 @@ def get_changes(brick, hash_dir, log_file, start, end, args): libgfchangelog.cl_history_done(change) except IOError as e: logger.warn("Error parsing changelog file %s: %s" % - (change, e)) + (change, e)) changelog_data.commit() except libgfchangelog.ChangelogException as e: fail("%s Error during Changelog Crawl: %s" % (brick, e), logger=logger) + logger.info("[1/4] Finished changelog parsing.") + # Convert all pgfid available from Changelogs + logger.info("[2/4] Starting 'pgfid to path' conversions ...") pgfid_to_path(brick, changelog_data) changelog_data.commit() + logger.info("[2/4] Finished 'pgfid to path' conversions.") - # Convert all GFIDs for which no other additional details available - gfid_to_path_using_pgfid(brick, changelog_data, args) + # Convert all gfids recorded for data and metadata to all hardlink paths + logger.info("[3/4] Starting 'gfid2path' conversions ...") + gfid_to_all_paths_using_gfid2path(brick, changelog_data, args) changelog_data.commit() + logger.info("[3/4] Finished 'gfid2path' conversions.") # If some GFIDs fail to get converted from previous step, # convert using find + logger.info("[4/4] Starting 'gfid to path using batchfind' " + "conversions ...") gfid_to_path_using_batchfind(brick, changelog_data) changelog_data.commit() + logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.") return actual_end @@ -328,7 +380,7 @@ def changelog_crawl(brick, start, end, args): # WORKING_DIR/BRICKHASH/OUTFILE working_dir = os.path.dirname(args.outfile) - brickhash = hashlib.sha1(brick) + brickhash = hashlib.sha1(brick.encode()) brickhash = str(brickhash.hexdigest()) working_dir = os.path.join(working_dir, brickhash) @@ -350,9 +402,11 @@ def _get_args(): parser.add_argument("session", help="Session Name") parser.add_argument("volume", help="Volume Name") + parser.add_argument("node", help="Node Name") parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=int) + parser.add_argument("end", help="End Time", type=int) parser.add_argument("--only-query", help="Query mode only (no session)", action="store_true") parser.add_argument("--debug", help="Debug", action="store_true") @@ -361,6 +415,7 @@ def _get_args(): action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") + parser.add_argument("--type",default="both") parser.add_argument("-N", "--only-namespace-changes", help="List only namespace changes", action="store_true") @@ -380,13 +435,15 @@ if __name__ == "__main__": session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) + end = -1 if args.only_query: start = args.start + end = args.end else: try: with open(status_file) as f: @@ -394,13 +451,17 @@ if __name__ == "__main__": except (ValueError, OSError, IOError): start = args.start - end = int(time.time()) - get_changelog_rollover_time(args.volume) + # end time is optional; so a -1 may be sent to use the default method of + # identifying the end time + if end == -1: + end = int(time.time()) - get_changelog_rollover_time(args.volume) + logger.info("%s Started Changelog Crawl - Start: %s End: %s" % (args.brick, start, end)) actual_end = changelog_crawl(args.brick, start, end, args) if not args.only_query: - with open(status_file_pre, "w", buffering=0) as f: + with open(status_file_pre, "w") as f: f.write(str(actual_end)) logger.info("%s Finished Changelog Crawl - End: %s" % (args.brick, diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py index 0e32d7b7f91..641593cf4b1 100644 --- a/tools/glusterfind/src/changelogdata.py +++ b/tools/glusterfind/src/changelogdata.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -10,12 +9,12 @@ # cases as published by the Free Software Foundation. import sqlite3 -import urllib import os -from utils import RecordType +from utils import RecordType, unquote_plus_space_newline from utils import output_path_prepare + class OutputMerger(object): """ Class to merge the output files collected from @@ -91,7 +90,7 @@ class ChangelogData(object): self._create_table_pgfid() self._create_table_inodegfid() self.args = args - self.path_sep = "/" if args.no_encode else "%2F" + self.path_sep = "/" def _create_table_gfidpath(self): drop_table = "DROP TABLE IF EXISTS gfidpath" @@ -113,6 +112,11 @@ class ChangelogData(object): """ self.cursor.execute(create_table) + create_index = """ + CREATE INDEX gfid_index ON gfidpath(gfid); + """ + self.cursor.execute(create_index) + def _create_table_inodegfid(self): drop_table = "DROP TABLE IF EXISTS inodegfid" self.cursor.execute(drop_table) @@ -322,36 +326,21 @@ class ChangelogData(object): def when_create_mknod_mkdir(self, changelogfile, data): # E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME> # Add the Entry to DB - # urllib.unquote_plus will not handle unicode so, encode Unicode to - # represent in 8 bit format and then unquote - pgfid1, bn1 = urllib.unquote_plus( - data[6].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[6].split("/", 1) if self.args.no_encode: - # No urlencode since no_encode is set, so convert again to Unicode - # format from previously encoded. - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1) def when_rename(self, changelogfile, data): # E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME> - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) - pgfid2, bn2 = urllib.unquote_plus( - data[4].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) + pgfid2, bn2 = data[4].split("/", 1) if self.args.no_encode: - # Quote again the basename - bn1 = bn1.decode("utf-8").strip() - bn2 = bn2.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) - bn2 = urllib.quote_plus(bn2.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() + bn2 = unquote_plus_space_newline(bn2).strip() if self.gfidpath_exists({"gfid": data[1], "type": "NEW", "pgfid1": pgfid1, "bn1": bn1}): @@ -391,14 +380,9 @@ class ChangelogData(object): def when_link_symlink(self, changelogfile, data): # E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME> # Add as New record in Db as Type NEW - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) if self.args.no_encode: - # Quote again the basename - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1) @@ -410,19 +394,15 @@ class ChangelogData(object): def when_unlink_rmdir(self, changelogfile, data): # E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME> - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) if self.args.no_encode: - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() deleted_path = data[4] if len(data) == 5 else "" if deleted_path != "": - deleted_path = output_path_prepare(deleted_path, - self.args) + deleted_path = unquote_plus_space_newline(deleted_path) + deleted_path = output_path_prepare(deleted_path, self.args) if self.gfidpath_exists({"gfid": data[1], "type": "NEW", "pgfid1": pgfid1, "bn1": bn1}): diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py index d73fee42aad..3849ba5dd1f 100644 --- a/tools/glusterfind/src/conf.py +++ b/tools/glusterfind/src/conf.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -10,9 +9,12 @@ # cases as published by the Free Software Foundation. import os -import ConfigParser +try: + from ConfigParser import ConfigParser +except ImportError: + from configparser import ConfigParser -config = ConfigParser.ConfigParser() +config = ConfigParser() config.read(os.path.join(os.path.dirname(os.path.abspath(__file__)), "tool.conf")) diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py new file mode 100644 index 00000000000..87324fbf350 --- /dev/null +++ b/tools/glusterfind/src/gfind_py2py3.py @@ -0,0 +1,88 @@ +# +# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> +# This file is part of GlusterFS. + +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. +# + +# All python2/python3 compatibility routines + +import os +import sys +from ctypes import create_string_buffer + +if sys.version_info >= (3,): + + # Raw conversion of bytearray to string. Used in the cases where + # buffer is created by create_string_buffer which is a 8-bit char + # array and passed to syscalls to fetch results. Using encode/decode + # doesn't work as it converts to string altering the size. + # def bytearray_to_str(byte_arr): + def bytearray_to_str(byte_arr): + return ''.join([chr(b) for b in byte_arr]) + + def gf_create_string_buffer(size): + return create_string_buffer(b'\0', size) + + def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel, + actual_end): + return libgfc.gf_history_changelog(changelog_path.encode(), start, end, num_parallel, + actual_end) + + def gfind_changelog_register(libgfc, brick, path, log_file, log_level, + retries): + return libgfc.gf_changelog_register(brick.encode(), path.encode(), log_file.encode(), + log_level, retries) + + def gfind_history_changelog_done(libgfc, clfile): + return libgfc.gf_history_changelog_done(clfile.encode()) + + def gfind_write_row(f, row, field_separator, p_rep, row_2_rep): + f.write(u"{0}{1}{2}{3}{4}\n".format(row, + field_separator, + p_rep, + field_separator, + row_2_rep)) + + def gfind_write(f, row, field_separator, p_rep): + f.write(u"{0}{1}{2}\n".format(row, + field_separator, + p_rep)) + + +else: + + # Raw conversion of bytearray to string + def bytearray_to_str(byte_arr): + return byte_arr + + def gf_create_string_buffer(size): + return create_string_buffer('\0', size) + + def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel, + actual_end): + return libgfc.gf_history_changelog(changelog_path, start, end, + num_parallel, actual_end) + + def gfind_changelog_register(libgfc, brick, path, log_file, log_level, + retries): + return libgfc.gf_changelog_register(brick, path, log_file, + log_level, retries) + + def gfind_history_changelog_done(libgfc, clfile): + return libgfc.gf_history_changelog_done(clfile) + + def gfind_write_row(f, row, field_separator, p_rep, row_2_rep): + f.write(u"{0}{1}{2}{3}{4}\n".format(row, + field_separator, + p_rep, + field_separator, + row_2_rep).encode()) + + def gfind_write(f, row, field_separator, p_rep): + f.write(u"{0}{1}{2}\n".format(row, + field_separator, + p_rep).encode()) diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py index dd8153e4e61..513bb101e93 100644 --- a/tools/glusterfind/src/libgfchangelog.py +++ b/tools/glusterfind/src/libgfchangelog.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -10,51 +9,52 @@ # cases as published by the Free Software Foundation. import os -from ctypes import CDLL, get_errno, create_string_buffer, c_ulong, byref -from ctypes import RTLD_GLOBAL +from ctypes import CDLL, RTLD_GLOBAL, get_errno, create_string_buffer, c_ulong, byref from ctypes.util import find_library +from gfind_py2py3 import bytearray_to_str, gf_create_string_buffer +from gfind_py2py3 import gfind_history_changelog, gfind_changelog_register +from gfind_py2py3 import gfind_history_changelog_done class ChangelogException(OSError): pass +libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True) -libgfc = CDLL(find_library("gfchangelog"), use_errno=True, mode=RTLD_GLOBAL) - -def raise_oserr(): +def raise_oserr(prefix=None): errn = get_errno() - raise ChangelogException(errn, os.strerror(errn)) + prefix_or_empty = prefix + ": " if prefix else "" + raise ChangelogException(errn, prefix_or_empty + os.strerror(errn)) def cl_init(): ret = libgfc.gf_changelog_init(None) if ret == -1: - raise_oserr() + raise_oserr(prefix="gf_changelog_init") def cl_register(brick, path, log_file, log_level, retries=0): - ret = libgfc.gf_changelog_register(brick, path, log_file, - log_level, retries) + ret = gfind_changelog_register(libgfc, brick, path, log_file,log_level, retries) if ret == -1: - raise_oserr() + raise_oserr(prefix="gf_changelog_register") def cl_history_scan(): ret = libgfc.gf_history_changelog_scan() if ret == -1: - raise_oserr() + raise_oserr(prefix="gf_history_changelog_scan") return ret def cl_history_changelog(changelog_path, start, end, num_parallel): actual_end = c_ulong() - ret = libgfc.gf_history_changelog(changelog_path, start, end, + ret = gfind_history_changelog(libgfc,changelog_path, start, end, num_parallel, byref(actual_end)) if ret == -1: - raise_oserr() + raise_oserr(prefix="gf_history_changelog") return actual_end.value @@ -62,7 +62,7 @@ def cl_history_changelog(changelog_path, start, end, num_parallel): def cl_history_startfresh(): ret = libgfc.gf_history_changelog_start_fresh() if ret == -1: - raise_oserr() + raise_oserr(prefix="gf_history_changelog_start_fresh") def cl_history_getchanges(): @@ -71,20 +71,22 @@ def cl_history_getchanges(): return f.split('.')[-1] changes = [] - buf = create_string_buffer('\0', 4096) + buf = gf_create_string_buffer(4096) while True: ret = libgfc.gf_history_changelog_next_change(buf, 4096) if ret in (0, -1): break - changes.append(buf.raw[:ret - 1]) + # py2 and py3 compatibility + result = bytearray_to_str(buf.raw[:ret - 1]) + changes.append(result) if ret == -1: - raise_oserr() + raise_oserr(prefix="gf_history_changelog_next_change") return sorted(changes, key=clsort) def cl_history_done(clfile): - ret = libgfc.gf_history_changelog_done(clfile) + ret = gfind_history_changelog_done(libgfc, clfile) if ret == -1: - raise_oserr() + raise_oserr(prefix="gf_history_changelog_done") diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py index c125f970a83..4b5466d0114 100644 --- a/tools/glusterfind/src/main.py +++ b/tools/glusterfind/src/main.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -16,18 +16,20 @@ from multiprocessing import Process import os import xml.etree.cElementTree as etree from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action +from gfind_py2py3 import gfind_write_row, gfind_write import logging import shutil import tempfile import signal from datetime import datetime +import codecs +import re from utils import execute, is_host_local, mkdirp, fail from utils import setup_logger, human_time, handle_rm_error from utils import get_changelog_rollover_time, cache_output, create_file import conf from changelogdata import OutputMerger -import codecs PROG_DESCRIPTION = """ GlusterFS Incremental API @@ -35,9 +37,9 @@ GlusterFS Incremental API ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError logger = logging.getLogger() -node_outfiles = [] vol_statusStr = "" gtmpfilename = None +g_pid_nodefile_map = {} class StoreAbsPath(Action): @@ -75,12 +77,27 @@ def node_cmd(host, host_uuid, task, cmd, args, opts): cmd = ["ssh", "-oNumberOfPasswordPrompts=0", "-oStrictHostKeyChecking=no", + # We force TTY allocation (-t -t) so that Ctrl+C is handed + # through; see: + # https://bugzilla.redhat.com/show_bug.cgi?id=1382236 + # Note that this turns stderr of the remote `cmd` + # into stdout locally. "-t", "-t", "-i", pem_key_path, "root@%s" % host] + cmd - execute(cmd, exit_msg="%s - %s failed" % (host, task), logger=logger) + (returncode, err, out) = execute(cmd, logger=logger) + if returncode != 0: + # Because the `-t -t` above turns the remote stderr into + # local stdout, we need to log both stderr and stdout + # here to print all error messages. + fail("%s - %s failed; stdout (including remote stderr):\n" + "%s\n" + "stderr:\n" + "%s" % (host, task, out, err), + returncode, + logger=logger) if opts.get("copy_outfile", False) and not localdir: cmd_copy = ["scp", @@ -96,7 +113,7 @@ def node_cmd(host, host_uuid, task, cmd, args, opts): def run_cmd_nodes(task, args, **kwargs): - global node_outfiles + global g_pid_nodefile_map nodes = get_nodes(args.volume) pool = [] for num, node in enumerate(nodes): @@ -127,23 +144,30 @@ def run_cmd_nodes(task, args, **kwargs): if tag == "": tag = '""' if not is_host_local(host_uuid) else "" - node_outfiles.append(node_outfile) # remote file will be copied into this directory mkdirp(os.path.dirname(node_outfile), exit_on_err=True, logger=logger) + FS = args.field_separator + if not is_host_local(host_uuid): + FS = "'" + FS + "'" + cmd = [change_detector, args.session, args.volume, + host, brick, - node_outfile, - str(kwargs.get("start"))] + \ + node_outfile] + \ + ([str(kwargs.get("start")), str(kwargs.get("end"))] + if not args.full else []) + \ ([tag] if tag is not None else []) + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] if args.only_namespace_changes - else []) + else []) + \ + (["--type", args.type]) + \ + (["--field-separator", FS] if args.full else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True @@ -157,24 +181,31 @@ def run_cmd_nodes(task, args, **kwargs): if tag == "": tag = '""' if not is_host_local(host_uuid) else "" - node_outfiles.append(node_outfile) # remote file will be copied into this directory mkdirp(os.path.dirname(node_outfile), exit_on_err=True, logger=logger) + FS = args.field_separator + if not is_host_local(host_uuid): + FS = "'" + FS + "'" + cmd = [change_detector, args.session, args.volume, + host, brick, - node_outfile, - str(kwargs.get("start"))] + \ + node_outfile] + \ + ([str(kwargs.get("start")), str(kwargs.get("end"))] + if not args.full else []) + \ ([tag] if tag is not None else []) + \ ["--only-query"] + \ ["--output-prefix", args.output_prefix] + \ (["--debug"] if args.debug else []) + \ (["--no-encode"] if args.no_encode else []) + \ (["--only-namespace-changes"] - if args.only_namespace_changes else []) + if args.only_namespace_changes else []) + \ + (["--type", args.type]) + \ + (["--field-separator", FS] if args.full else []) opts["node_outfile"] = node_outfile opts["copy_outfile"] = True @@ -233,6 +264,7 @@ def run_cmd_nodes(task, args, **kwargs): args=(host, host_uuid, task, cmd, args, opts)) p.start() pool.append(p) + g_pid_nodefile_map[p.pid] = node_outfile for num, p in enumerate(pool): p.join() @@ -240,8 +272,11 @@ def run_cmd_nodes(task, args, **kwargs): logger.warn("Command %s failed in %s" % (task, nodes[num][1])) if task in ["create", "delete"]: fail("Command %s failed in %s" % (task, nodes[num][1])) - elif task == "pre" and args.disable_partial: - sys.exit(1) + elif task == "pre" or task == "query": + if args.disable_partial: + sys.exit(1) + else: + del g_pid_nodefile_map[p.pid] @cache_output @@ -291,6 +326,7 @@ def _get_args(): parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter, description=PROG_DESCRIPTION) subparsers = parser.add_subparsers(dest="mode") + subparsers.required = True # create <SESSION> <VOLUME> [--debug] [--force] parser_create = subparsers.add_parser('create') @@ -341,6 +377,11 @@ def _get_args(): help="Tag prefix for file names emitted during" " a full find operation; default: \"NEW\"", default="NEW") + parser_pre.add_argument('--type', help="type: f, f-files only" + " d, d-directories only, by default = both", + default='both', choices=["f", "d", "both"]) + parser_pre.add_argument("--field-separator", help="Field separator string", + default=" ") # query <VOLUME> <OUTFILE> --since-time <SINCE_TIME> # [--output-prefix <OUTPUT_PREFIX>] [--full] @@ -350,6 +391,8 @@ def _get_args(): action=StoreAbsPath) parser_query.add_argument("--since-time", help="UNIX epoch time since " "which listing is required", type=int) + parser_query.add_argument("--end-time", help="UNIX epoch time up to " + "which listing is required", type=int) parser_query.add_argument("--no-encode", help="Do not encode path in output file", action="store_true") @@ -366,6 +409,12 @@ def _get_args(): help="Tag prefix for file names emitted during" " a full find operation; default: \"NEW\"", default="NEW") + parser_query.add_argument('--type', help="type: f, f-files only" + " d, d-directories only, by default = both", + default='both', choices=["f", "d", "both"]) + parser_query.add_argument("--field-separator", + help="Field separator string", + default=" ") # post <SESSION> <VOLUME> parser_post = subparsers.add_parser('post') @@ -451,7 +500,7 @@ def enable_volume_options(args): % args.volume) -def write_output(outfile, outfilemerger): +def write_output(outfile, outfilemerger, field_separator): with codecs.open(outfile, "a", encoding="utf-8") as f: for row in outfilemerger.get(): # Multiple paths in case of Hardlinks @@ -460,24 +509,20 @@ def write_output(outfile, outfilemerger): for p in paths: if p == "": continue - p_rep = p.replace("%2F%2F", "%2F").replace("//", "/") + p_rep = p.replace("//", "/") if not row_2_rep: - row_2_rep = row[2].replace("%2F%2F", "%2F").replace("//", - "/") + row_2_rep = row[2].replace("//", "/") if p_rep == row_2_rep: continue if row_2_rep and row_2_rep != "": - f.write(u"{0} {1} {2}\n".format(row[0], p_rep, row_2_rep)) - else: - f.write(u"{0} {1}\n".format(row[0], p_rep)) - + gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep) -def mode_create(session_dir, args): - logger.debug("Init is called - Session: %s, Volume: %s" - % (args.session, args.volume)) + else: + gfind_write(f, row[0], field_separator, p_rep) - cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] +def validate_volume(volume): + cmd = ["gluster", 'volume', 'info', volume, "--xml"] _, data, _ = execute(cmd, exit_msg="Failed to Run Gluster Volume Info", logger=logger) @@ -485,11 +530,42 @@ def mode_create(session_dir, args): tree = etree.fromstring(data) statusStr = tree.find('volInfo/volumes/volume/statusStr').text except (ParseError, AttributeError) as e: - fail("Invalid Volume: %s" % e, logger=logger) - + fail("Invalid Volume: Check the Volume name! %s" % e) if statusStr != "Started": - fail("Volume %s is not online" % args.volume, logger=logger) + fail("Volume %s is not online" % volume) + +# The rules for a valid session name. +SESSION_NAME_RULES = { + 'min_length': 2, + 'max_length': 256, # same as maximum volume length + # Specifies all alphanumeric characters, underscore, hyphen. + 'valid_chars': r'0-9a-zA-Z_-', +} + + +# checks valid session name, fail otherwise +def validate_session_name(session): + # Check for minimum length + if len(session) < SESSION_NAME_RULES['min_length']: + fail('session_name must be at least ' + + str(SESSION_NAME_RULES['min_length']) + ' characters long.') + # Check for maximum length + if len(session) > SESSION_NAME_RULES['max_length']: + fail('session_name must not exceed ' + + str(SESSION_NAME_RULES['max_length']) + ' characters length.') + + # Matches strings composed entirely of characters specified within + if not re.match(r'^[' + SESSION_NAME_RULES['valid_chars'] + + ']+$', session): + fail('Session name can only contain these characters: ' + + SESSION_NAME_RULES['valid_chars']) + + +def mode_create(session_dir, args): + validate_session_name(args.session) + logger.debug("Init is called - Session: %s, Volume: %s" + % (args.session, args.volume)) mkdirp(session_dir, exit_on_err=True, logger=logger) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) @@ -510,7 +586,7 @@ def mode_create(session_dir, args): run_cmd_nodes("create", args, time_to_update=str(time_to_update)) if not os.path.exists(status_file) or args.reset_session_time: - with open(status_file, "w", buffering=0) as f: + with open(status_file, "w") as f: f.write(str(time_to_update)) sys.stdout.write("Session %s created with volume %s\n" % @@ -521,6 +597,7 @@ def mode_create(session_dir, args): def mode_query(session_dir, args): global gtmpfilename + global g_pid_nodefile_map # Verify volume status cmd = ["gluster", 'volume', 'info', args.volume, "--xml"] @@ -548,39 +625,57 @@ def mode_query(session_dir, args): enable_volume_options(args) # Test options - if not args.since_time and not args.full: - fail("Please specify either --since-time or --full", logger=logger) - - if args.since_time and args.full: - fail("Please specify either --since-time or --full, but not both", + if not args.full and args.type in ["f", "d"]: + fail("--type can only be used with --full") + if not args.since_time and not args.end_time and not args.full: + fail("Please specify either {--since-time and optionally --end-time} " + "or --full", logger=logger) + + if args.since_time and args.end_time and args.full: + fail("Please specify either {--since-time and optionally --end-time} " + "or --full, but not both", logger=logger) + if args.end_time and not args.since_time: + fail("Please specify --since-time as well", logger=logger) + # Start query command processing + start = -1 + end = -1 if args.since_time: start = args.since_time + if args.end_time: + end = args.end_time else: start = 0 # --full option is handled separately logger.debug("Query is called - Session: %s, Volume: %s, " - "Start time: %s" - % ("default", args.volume, start)) + "Start time: %s, End time: %s" + % ("default", args.volume, start, end)) prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-") gtmpfilename = prefix + next(tempfile._get_candidate_names()) - run_cmd_nodes("query", args, start=start, tmpfilename=gtmpfilename) + run_cmd_nodes("query", args, start=start, end=end, + tmpfilename=gtmpfilename) # Merger if args.full: - cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] - execute(cmd, - exit_msg="Failed to merge output files " - "collected from nodes", logger=logger) + if len(g_pid_nodefile_map) > 0: + cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \ + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " + "collected from nodes", logger=logger) + else: + fail("Failed to collect any output files from peers. " + "Looks like all bricks are offline.", logger=logger) else: # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) - outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) - write_output(args.outfile, outfilemerger) + outfilemerger = OutputMerger(args.outfile + ".db", + list(g_pid_nodefile_map.values())) + write_output(args.outfile, outfilemerger, args.field_separator) try: os.remove(args.outfile + ".db") @@ -594,6 +689,7 @@ def mode_query(session_dir, args): def mode_pre(session_dir, args): global gtmpfilename + global g_pid_nodefile_map """ Read from Session file and write to session.pre file @@ -605,6 +701,9 @@ def mode_pre(session_dir, args): mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger) + if not args.full and args.type in ["f", "d"]: + fail("--type can only be used with --full") + # If Pre status file exists and running pre command again if os.path.exists(status_file_pre) and not args.regenerate_outfile: fail("Post command is not run after last pre, " @@ -627,19 +726,25 @@ def mode_pre(session_dir, args): prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-") gtmpfilename = prefix + next(tempfile._get_candidate_names()) - run_cmd_nodes("pre", args, start=start, tmpfilename=gtmpfilename) + run_cmd_nodes("pre", args, start=start, end=-1, tmpfilename=gtmpfilename) # Merger if args.full: - cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile] - execute(cmd, - exit_msg="Failed to merge output files " - "collected from nodes", logger=logger) + if len(g_pid_nodefile_map) > 0: + cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \ + ["-o", args.outfile] + execute(cmd, + exit_msg="Failed to merge output files " + "collected from nodes", logger=logger) + else: + fail("Failed to collect any output files from peers. " + "Looks like all bricks are offline.", logger=logger) else: # Read each Changelogs db and generate finaldb create_file(args.outfile, exit_on_err=True, logger=logger) - outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles) - write_output(args.outfile, outfilemerger) + outfilemerger = OutputMerger(args.outfile + ".db", + list(g_pid_nodefile_map.values())) + write_output(args.outfile, outfilemerger, args.field_separator) try: os.remove(args.outfile + ".db") @@ -648,7 +753,7 @@ def mode_pre(session_dir, args): run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename) - with open(status_file_pre, "w", buffering=0) as f: + with open(status_file_pre, "w") as f: f.write(str(endtime_to_update)) sys.stdout.write("Generated output file %s\n" % args.outfile) @@ -774,6 +879,11 @@ def main(): args.mode not in ["create", "list", "query"]: fail("Invalid session %s" % args.session) + # volume involved, validate the volume first + if args.mode not in ["list"]: + validate_volume(args.volume) + + # "default" is a system defined session name if args.mode in ["create", "post", "pre", "delete"] and \ args.session == "default": diff --git a/tools/glusterfind/src/nodeagent.py b/tools/glusterfind/src/nodeagent.py index 07d82826e0d..679daa6fa76 100644 --- a/tools/glusterfind/src/nodeagent.py +++ b/tools/glusterfind/src/nodeagent.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -14,7 +14,10 @@ import sys import os import logging from argparse import ArgumentParser, RawDescriptionHelpFormatter -import urllib +try: + import urllib.parse as urllib +except ImportError: + import urllib from errno import ENOTEMPTY from utils import setup_logger, mkdirp, handle_rm_error @@ -49,13 +52,13 @@ def mode_create(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) if not os.path.exists(status_file) or args.reset_session_time: - with open(status_file, "w", buffering=0) as f: + with open(status_file, "w") as f: f.write(args.time_to_update) sys.exit(0) @@ -64,7 +67,7 @@ def mode_create(args): def mode_post(args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py index 70737be760a..906ebd8f252 100644 --- a/tools/glusterfind/src/utils.py +++ b/tools/glusterfind/src/utils.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -16,10 +15,12 @@ import xml.etree.cElementTree as etree import logging import os from datetime import datetime -import urllib ROOT_GFID = "00000000-0000-0000-0000-000000000001" DEFAULT_CHANGELOG_INTERVAL = 15 +SPACE_ESCAPE_CHAR = "%20" +NEWLINE_ESCAPE_CHAR = "%0A" +PERCENTAGE_ESCAPE_CHAR = "%25" ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError cache_data = {} @@ -35,10 +36,10 @@ class RecordType(object): def cache_output(func): def wrapper(*args, **kwargs): global cache_data - if cache_data.get(func.func_name, None) is None: - cache_data[func.func_name] = func(*args, **kwargs) + if cache_data.get(func.__name__, None) is None: + cache_data[func.__name__] = func(*args, **kwargs) - return cache_data[func.func_name] + return cache_data[func.__name__] return wrapper @@ -57,12 +58,13 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True, # Capture filter_func output and pass it to callback function filter_result = filter_func(path) if filter_result is not None: - callback_func(path, filter_result) + callback_func(path, filter_result, os.path.isdir(path)) for p in os.listdir(path): full_path = os.path.join(path, p) - if os.path.isdir(full_path): + is_dir = os.path.isdir(full_path) + if is_dir: if subdirs_crawl: find(full_path, callback_func, filter_func, ignore_dirs) else: @@ -72,10 +74,11 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True, else: filter_result = filter_func(full_path) if filter_result is not None: - callback_func(full_path, filter_result) + callback_func(full_path, filter_result, is_dir) -def output_write(f, path, prefix=".", encode=False, tag=""): +def output_write(f, path, prefix=".", encode=False, tag="", + field_separator=" "): if path == "": return @@ -83,10 +86,10 @@ def output_write(f, path, prefix=".", encode=False, tag=""): path = os.path.join(prefix, path) if encode: - path = urllib.quote_plus(path) + path = quote_plus_space_newline(path) # set the field separator - FS = "" if tag == "" else " " + FS = "" if tag == "" else field_separator f.write("%s%s%s\n" % (tag.strip(), FS, path)) @@ -227,7 +230,11 @@ def get_changelog_rollover_time(volumename): try: tree = etree.fromstring(out) - return int(tree.find('volGetopts/Opt/Value').text) + val = tree.find('volGetopts/Opt/Value').text + if val is not None: + # Filter the value by split, as it may be 'X (DEFAULT)' + # and we only need 'X' + return int(val.split(' ', 1)[0]) except ParseError: return DEFAULT_CHANGELOG_INTERVAL @@ -245,4 +252,16 @@ def output_path_prepare(path, args): if args.no_encode: return path else: - return urllib.quote_plus(path.encode("utf-8")) + return quote_plus_space_newline(path) + + +def unquote_plus_space_newline(s): + return s.replace(SPACE_ESCAPE_CHAR, " ")\ + .replace(NEWLINE_ESCAPE_CHAR, "\n")\ + .replace(PERCENTAGE_ESCAPE_CHAR, "%") + + +def quote_plus_space_newline(s): + return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\ + .replace(" ", SPACE_ESCAPE_CHAR)\ + .replace("\n", NEWLINE_ESCAPE_CHAR) diff --git a/tools/setgfid2path/Makefile.am b/tools/setgfid2path/Makefile.am new file mode 100644 index 00000000000..c14787a80ce --- /dev/null +++ b/tools/setgfid2path/Makefile.am @@ -0,0 +1,5 @@ +SUBDIRS = src + +EXTRA_DIST = gluster-setgfid2path.8 + +man8_MANS = gluster-setgfid2path.8 diff --git a/tools/setgfid2path/gluster-setgfid2path.8 b/tools/setgfid2path/gluster-setgfid2path.8 new file mode 100644 index 00000000000..2e228ca8514 --- /dev/null +++ b/tools/setgfid2path/gluster-setgfid2path.8 @@ -0,0 +1,54 @@ + +.\" Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com> +.\" This file is part of GlusterFS. +.\" +.\" This file is licensed to you under your choice of the GNU Lesser +.\" General Public License, version 3 or any later version (LGPLv3 or +.\" later), or the GNU General Public License, version 2 (GPLv2), in all +.\" cases as published by the Free Software Foundation. +.\" +.\" +.TH gluster-setgfid2path 8 "Command line utility to set GFID to Path Xattrs" +.SH NAME +gluster-setgfid2path - Gluster tool to set GFID to Path xattrs +.SH SYNOPSIS +.B gluster-setgfid2path +.IR file +.SH DESCRIPTION +New feature introduced with Gluster release 3.12, to find full path from GFID. +This feature can be enabled using Volume set command \fBgluster volume set +<VOLUME> storage.gfid2path enable\fR +.PP +Once \fBgfid2path\fR feature is enabled, it starts recording the necessary +xattrs required for the feature. But it will not add xattrs for the already +existing files. This tool provides facility to update the gfid2path xattrs for +the given file path. + +.SH EXAMPLES +To add xattrs of a single file, +.PP +.nf +.RS +gluster-setgfid2path /bricks/b1/hello.txt +.RE +.fi +.PP +To set xattr for all the existing files, run the below script on each bricks. +.PP +.nf +.RS +BRICK=/bricks/b1 +find $BRICK -type d \\( -path "${BRICK}/.trashcan" -o -path \\ + "${BRICK}/.glusterfs" \\) -prune -o -type f \\ + -exec gluster-setgfid2path {} \\; +.RE +.fi +.PP +.SH SEE ALSO +.nf +\fBgluster\fR(8) +\fR +.fi +.SH COPYRIGHT +.nf +Copyright(c) 2017 Red Hat, Inc. <http://www.redhat.com> diff --git a/tools/setgfid2path/src/Makefile.am b/tools/setgfid2path/src/Makefile.am new file mode 100644 index 00000000000..7316d117070 --- /dev/null +++ b/tools/setgfid2path/src/Makefile.am @@ -0,0 +1,16 @@ +gluster_setgfid2pathdir = $(sbindir) + +if WITH_SERVER +gluster_setgfid2path_PROGRAMS = gluster-setgfid2path +endif + +gluster_setgfid2path_SOURCES = main.c + +gluster_setgfid2path_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +gluster_setgfid2path_LDFLAGS = $(GF_LDFLAGS) + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/tools/setgfid2path/src/main.c b/tools/setgfid2path/src/main.c new file mode 100644 index 00000000000..4320a7b2481 --- /dev/null +++ b/tools/setgfid2path/src/main.c @@ -0,0 +1,130 @@ +/* + Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. + */ +#include <stdio.h> +#include <libgen.h> + +#include <glusterfs/common-utils.h> +#include <glusterfs/syscall.h> + +#define MAX_GFID2PATH_LINK_SUP 500 +#define GFID_SIZE 16 +#define GFID_XATTR_KEY "trusted.gfid" + +int +main(int argc, char **argv) +{ + int ret = 0; + struct stat st; + char *dname = NULL; + char *bname = NULL; + ssize_t ret_size = 0; + uuid_t pgfid_raw = { + 0, + }; + char pgfid[36 + 1] = ""; + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { + 0, + }; + char pgfid_bname[1024] = { + 0, + }; + char *key = NULL; + char *val = NULL; + size_t key_size = 0; + size_t val_size = 0; + const char *file_path = NULL; + char *file_path1 = NULL; + char *file_path2 = NULL; + + if (argc != 2) { + fprintf(stderr, "Usage: setgfid2path <file-path>\n"); + return -1; + } + + ret = sys_lstat(argv[1], &st); + if (ret != 0) { + fprintf(stderr, "Invalid File Path\n"); + return -1; + } + + if (st.st_nlink >= MAX_GFID2PATH_LINK_SUP) { + fprintf(stderr, + "Number of Hardlink support exceeded. " + "max=%d\n", + MAX_GFID2PATH_LINK_SUP); + return -1; + } + + file_path = argv[1]; + file_path1 = strdup(file_path); + file_path2 = strdup(file_path); + + dname = dirname(file_path1); + bname = basename(file_path2); + + /* Get GFID of Parent directory */ + ret_size = sys_lgetxattr(dname, GFID_XATTR_KEY, pgfid_raw, GFID_SIZE); + if (ret_size != GFID_SIZE) { + fprintf(stderr, "Failed to get GFID of parent directory. dir=%s\n", + dname); + ret = -1; + goto out; + } + + /* Convert to UUID format */ + if (uuid_utoa_r(pgfid_raw, pgfid) == NULL) { + fprintf(stderr, + "Failed to format GFID of parent directory. " + "dir=%s GFID=%s\n", + dname, pgfid_raw); + ret = -1; + goto out; + } + + /* Find xxhash for PGFID/BaseName */ + snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", pgfid, bname); + gf_xxh64_wrapper((unsigned char *)pgfid_bname, strlen(pgfid_bname), + GF_XXHSUM64_DEFAULT_SEED, xxh64); + + key_size = SLEN(GFID2PATH_XATTR_KEY_PREFIX) + GF_XXH64_DIGEST_LENGTH * 2 + + 1; + key = alloca(key_size); + snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); + + val_size = UUID_CANONICAL_FORM_LEN + NAME_MAX + 2; + val = alloca(val_size); + snprintf(val, val_size, "%s/%s", pgfid, bname); + + /* Set the Xattr, ignore if same key xattr already exists */ + ret = sys_lsetxattr(file_path, key, val, strlen(val), XATTR_CREATE); + if (ret == -1) { + if (errno == EEXIST) { + printf("Xattr already exists, ignoring..\n"); + ret = 0; + goto out; + } + + fprintf(stderr, "Failed to set gfid2path xattr. errno=%d\n error=%s", + errno, strerror(errno)); + ret = -1; + goto out; + } + + printf("Success. file=%s key=%s value=%s\n", file_path, key, val); + +out: + if (file_path1 != NULL) + free(file_path1); + + if (file_path2 != NULL) + free(file_path2); + + return ret; +} |
