summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile.am2
-rw-r--r--tools/gfind_missing_files/Makefile.am6
-rw-r--r--tools/gfind_missing_files/gcrawler.c788
-rw-r--r--tools/gfind_missing_files/gfid_to_path.py2
-rw-r--r--tools/gfind_missing_files/gfind_missing_files.sh2
-rw-r--r--tools/glusterfind/Makefile.am6
-rwxr-xr-xtools/glusterfind/S57glusterfind-delete-post.py4
-rw-r--r--tools/glusterfind/glusterfind.in3
-rw-r--r--tools/glusterfind/src/Makefile.am4
-rw-r--r--tools/glusterfind/src/__init__.py1
-rw-r--r--tools/glusterfind/src/brickfind.py33
-rw-r--r--tools/glusterfind/src/changelog.py105
-rw-r--r--tools/glusterfind/src/changelogdata.py60
-rw-r--r--tools/glusterfind/src/conf.py8
-rw-r--r--tools/glusterfind/src/gfind_py2py3.py88
-rw-r--r--tools/glusterfind/src/libgfchangelog.py42
-rw-r--r--tools/glusterfind/src/main.py216
-rw-r--r--tools/glusterfind/src/nodeagent.py13
-rw-r--r--tools/glusterfind/src/utils.py45
-rw-r--r--tools/setgfid2path/Makefile.am5
-rw-r--r--tools/setgfid2path/gluster-setgfid2path.854
-rw-r--r--tools/setgfid2path/src/Makefile.am16
-rw-r--r--tools/setgfid2path/src/main.c130
23 files changed, 1073 insertions, 560 deletions
diff --git a/tools/Makefile.am b/tools/Makefile.am
index d689f60fa52..5808a3728cd 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = gfind_missing_files glusterfind
+SUBDIRS = gfind_missing_files glusterfind setgfid2path
CLEANFILES =
diff --git a/tools/gfind_missing_files/Makefile.am b/tools/gfind_missing_files/Makefile.am
index f77f7899efb..181fe7091f3 100644
--- a/tools/gfind_missing_files/Makefile.am
+++ b/tools/gfind_missing_files/Makefile.am
@@ -1,12 +1,16 @@
gfindmissingfilesdir = $(GLUSTERFS_LIBEXECDIR)/gfind_missing_files
+if WITH_SERVER
gfindmissingfiles_SCRIPTS = gfind_missing_files.sh gfid_to_path.sh \
gfid_to_path.py
+endif
EXTRA_DIST = gfind_missing_files.sh gfid_to_path.sh \
gfid_to_path.py
+if WITH_SERVER
gfindmissingfiles_PROGRAMS = gcrawler
+endif
gcrawler_SOURCES = gcrawler.c
gcrawler_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
@@ -16,11 +20,13 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
+if WITH_SERVER
uninstall-local:
rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
install-data-local:
rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
ln -s $(GLUSTERFS_LIBEXECDIR)/gfind_missing_files/gfind_missing_files.sh $(DESTDIR)$(sbindir)/gfind_missing_files
+endif
CLEANFILES =
diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c
index 02b644a1a77..4acbe92bc8f 100644
--- a/tools/gfind_missing_files/gcrawler.c
+++ b/tools/gfind_missing_files/gcrawler.c
@@ -16,33 +16,50 @@
#include <string.h>
#include <dirent.h>
#include <assert.h>
-#include "locking.h"
+#include <glusterfs/locking.h>
-#include "compat.h"
-#include "list.h"
-#include "syscall.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/list.h>
+#include <glusterfs/syscall.h>
#define THREAD_MAX 32
#define BUMP(name) INC(name, 1)
#define DEFAULT_WORKERS 4
-#define NEW(x) { \
- x = calloc (1, sizeof (typeof (*x))); \
- }
-
-#define err(x ...) fprintf(stderr, x)
-#define out(x ...) fprintf(stdout, x)
-#define dbg(x ...) do { if (debug) fprintf(stdout, x); } while (0)
-#define tout(x ...) do { out("[%ld] ", pthread_self()); out(x); } while (0)
-#define terr(x ...) do { err("[%ld] ", pthread_self()); err(x); } while (0)
-#define tdbg(x ...) do { dbg("[%ld] ", pthread_self()); dbg(x); } while (0)
+#define NEW(x) \
+ { \
+ x = calloc(1, sizeof(typeof(*x))); \
+ }
+
+#define err(x...) fprintf(stderr, x)
+#define out(x...) fprintf(stdout, x)
+#define dbg(x...) \
+ do { \
+ if (debug) \
+ fprintf(stdout, x); \
+ } while (0)
+#define tout(x...) \
+ do { \
+ out("[%ld] ", pthread_self()); \
+ out(x); \
+ } while (0)
+#define terr(x...) \
+ do { \
+ err("[%ld] ", pthread_self()); \
+ err(x); \
+ } while (0)
+#define tdbg(x...) \
+ do { \
+ dbg("[%ld] ", pthread_self()); \
+ dbg(x); \
+ } while (0)
int debug = 0;
const char *slavemnt = NULL;
int workers = 0;
struct stats {
- unsigned long long int cnt_skipped_gfids;
+ unsigned long long int cnt_skipped_gfids;
};
pthread_spinlock_t stats_lock;
@@ -50,518 +67,515 @@ pthread_spinlock_t stats_lock;
struct stats stats_total;
int stats = 0;
-#define INC(name, val) do { \
- if (!stats) \
- break; \
- pthread_spin_lock(&stats_lock); \
- { \
- stats_total.cnt_##name += val; \
- } \
- pthread_spin_unlock(&stats_lock); \
- } while (0)
+#define INC(name, val) \
+ do { \
+ if (!stats) \
+ break; \
+ pthread_spin_lock(&stats_lock); \
+ { \
+ stats_total.cnt_##name += val; \
+ } \
+ pthread_spin_unlock(&stats_lock); \
+ } while (0)
void
stats_dump()
{
- if (!stats)
- return;
+ if (!stats)
+ return;
- out("-------------------------------------------\n");
- out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids);
- out("-------------------------------------------\n");
+ out("-------------------------------------------\n");
+ out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids);
+ out("-------------------------------------------\n");
}
struct dirjob {
- struct list_head list;
+ struct list_head list;
- char *dirname;
+ char *dirname;
- struct dirjob *parent;
- int ret; /* final status of this subtree */
- int refcnt; /* how many dirjobs have this as parent */
+ struct dirjob *parent;
+ int ret; /* final status of this subtree */
+ int refcnt; /* how many dirjobs have this as parent */
- pthread_spinlock_t lock;
+ pthread_spinlock_t lock;
};
-
struct xwork {
- pthread_t cthreads[THREAD_MAX]; /* crawler threads */
- int count;
- int idle;
- int stop;
+ pthread_t cthreads[THREAD_MAX]; /* crawler threads */
+ int count;
+ int idle;
+ int stop;
- struct dirjob crawl;
+ struct dirjob crawl;
- struct dirjob *rootjob; /* to verify completion in xwork_fini() */
+ struct dirjob *rootjob; /* to verify completion in xwork_fini() */
- pthread_mutex_t mutex;
- pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
};
-
struct dirjob *
-dirjob_ref (struct dirjob *job)
+dirjob_ref(struct dirjob *job)
{
- pthread_spin_lock (&job->lock);
- {
- job->refcnt++;
- }
- pthread_spin_unlock (&job->lock);
+ pthread_spin_lock(&job->lock);
+ {
+ job->refcnt++;
+ }
+ pthread_spin_unlock(&job->lock);
- return job;
+ return job;
}
-
void
-dirjob_free (struct dirjob *job)
+dirjob_free(struct dirjob *job)
{
- assert (list_empty (&job->list));
+ assert(list_empty(&job->list));
- pthread_spin_destroy (&job->lock);
- free (job->dirname);
- free (job);
+ pthread_spin_destroy(&job->lock);
+ free(job->dirname);
+ free(job);
}
void
-dirjob_ret (struct dirjob *job, int err)
+dirjob_ret(struct dirjob *job, int err)
{
- int ret = 0;
- int refcnt = 0;
- struct dirjob *parent = NULL;
-
- pthread_spin_lock (&job->lock);
- {
- refcnt = --job->refcnt;
- job->ret = (job->ret || err);
- }
- pthread_spin_unlock (&job->lock);
-
- if (refcnt == 0) {
- ret = job->ret;
-
- if (ret)
- terr ("Failed: %s (%d)\n", job->dirname, ret);
- else
- tdbg ("Finished: %s\n", job->dirname);
-
- parent = job->parent;
- if (parent)
- dirjob_ret (parent, ret);
+ int ret = 0;
+ int refcnt = 0;
+ struct dirjob *parent = NULL;
+
+ pthread_spin_lock(&job->lock);
+ {
+ refcnt = --job->refcnt;
+ job->ret = (job->ret || err);
+ }
+ pthread_spin_unlock(&job->lock);
+
+ if (refcnt == 0) {
+ ret = job->ret;
+
+ if (ret)
+ terr("Failed: %s (%d)\n", job->dirname, ret);
+ else
+ tdbg("Finished: %s\n", job->dirname);
+
+ parent = job->parent;
+ if (parent)
+ dirjob_ret(parent, ret);
- dirjob_free (job);
- job = NULL;
- }
+ dirjob_free(job);
+ job = NULL;
+ }
}
-
struct dirjob *
-dirjob_new (const char *dir, struct dirjob *parent)
+dirjob_new(const char *dir, struct dirjob *parent)
{
- struct dirjob *job = NULL;
+ struct dirjob *job = NULL;
- NEW(job);
- if (!job)
- return NULL;
+ NEW(job);
+ if (!job)
+ return NULL;
- job->dirname = strdup (dir);
- if (!job->dirname) {
- free (job);
- return NULL;
- }
+ job->dirname = strdup(dir);
+ if (!job->dirname) {
+ free(job);
+ return NULL;
+ }
- INIT_LIST_HEAD(&job->list);
- pthread_spin_init (&job->lock, PTHREAD_PROCESS_PRIVATE);
- job->ret = 0;
+ INIT_LIST_HEAD(&job->list);
+ pthread_spin_init(&job->lock, PTHREAD_PROCESS_PRIVATE);
+ job->ret = 0;
- if (parent)
- job->parent = dirjob_ref (parent);
+ if (parent)
+ job->parent = dirjob_ref(parent);
- job->refcnt = 1;
+ job->refcnt = 1;
- return job;
+ return job;
}
void
-xwork_addcrawl (struct xwork *xwork, struct dirjob *job)
+xwork_addcrawl(struct xwork *xwork, struct dirjob *job)
{
- pthread_mutex_lock (&xwork->mutex);
- {
- list_add_tail (&job->list, &xwork->crawl.list);
- pthread_cond_broadcast (&xwork->cond);
- }
- pthread_mutex_unlock (&xwork->mutex);
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ list_add_tail(&job->list, &xwork->crawl.list);
+ pthread_cond_broadcast(&xwork->cond);
+ }
+ pthread_mutex_unlock(&xwork->mutex);
}
int
-xwork_add (struct xwork *xwork, const char *dir, struct dirjob *parent)
+xwork_add(struct xwork *xwork, const char *dir, struct dirjob *parent)
{
- struct dirjob *job = NULL;
+ struct dirjob *job = NULL;
- job = dirjob_new (dir, parent);
- if (!job)
- return -1;
+ job = dirjob_new(dir, parent);
+ if (!job)
+ return -1;
- xwork_addcrawl (xwork, job);
+ xwork_addcrawl(xwork, job);
- return 0;
+ return 0;
}
-
struct dirjob *
-xwork_pick (struct xwork *xwork, int block)
+xwork_pick(struct xwork *xwork, int block)
{
- struct dirjob *job = NULL;
- struct list_head *head = NULL;
+ struct dirjob *job = NULL;
+ struct list_head *head = NULL;
- head = &xwork->crawl.list;
+ head = &xwork->crawl.list;
- pthread_mutex_lock (&xwork->mutex);
- {
- for (;;) {
- if (xwork->stop)
- break;
-
- if (!list_empty (head)) {
- job = list_entry (head->next, typeof(*job),
- list);
- list_del_init (&job->list);
- break;
- }
-
- if (((xwork->count * 2) == xwork->idle) &&
- list_empty (&xwork->crawl.list)) {
- /* no outstanding jobs, and no
- active workers
- */
- tdbg ("Jobless. Terminating\n");
- xwork->stop = 1;
- pthread_cond_broadcast (&xwork->cond);
- break;
- }
-
- if (!block)
- break;
-
- xwork->idle++;
- pthread_cond_wait (&xwork->cond, &xwork->mutex);
- xwork->idle--;
- }
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ for (;;) {
+ if (xwork->stop)
+ break;
+
+ if (!list_empty(head)) {
+ job = list_entry(head->next, typeof(*job), list);
+ list_del_init(&job->list);
+ break;
+ }
+
+ if (((xwork->count * 2) == xwork->idle) &&
+ list_empty(&xwork->crawl.list)) {
+ /* no outstanding jobs, and no
+ active workers
+ */
+ tdbg("Jobless. Terminating\n");
+ xwork->stop = 1;
+ pthread_cond_broadcast(&xwork->cond);
+ break;
+ }
+
+ if (!block)
+ break;
+
+ xwork->idle++;
+ pthread_cond_wait(&xwork->cond, &xwork->mutex);
+ xwork->idle--;
}
- pthread_mutex_unlock (&xwork->mutex);
+ }
+ pthread_mutex_unlock(&xwork->mutex);
- return job;
+ return job;
}
int
-skip_name (const char *dirname, const char *name)
+skip_name(const char *dirname, const char *name)
{
- if (strcmp (name, ".") == 0)
- return 1;
+ if (strcmp(name, ".") == 0)
+ return 1;
- if (strcmp (name, "..") == 0)
- return 1;
+ if (strcmp(name, "..") == 0)
+ return 1;
- if (strcmp (name, "changelogs") == 0)
- return 1;
+ if (strcmp(name, "changelogs") == 0)
+ return 1;
- if (strcmp (name, "health_check") == 0)
- return 1;
+ if (strcmp(name, "health_check") == 0)
+ return 1;
- if (strcmp (name, "indices") == 0)
- return 1;
+ if (strcmp(name, "indices") == 0)
+ return 1;
- if (strcmp (name, "landfill") == 0)
- return 1;
+ if (strcmp(name, "landfill") == 0)
+ return 1;
- return 0;
+ return 0;
}
int
-skip_stat (struct dirjob *job, const char *name)
+skip_stat(struct dirjob *job, const char *name)
{
- if (job == NULL)
- return 0;
-
- if (strcmp (job->dirname, ".glusterfs") == 0) {
- tdbg ("Directly adding directories under .glusterfs "
- "to global list: %s\n", name);
- return 1;
- }
+ if (job == NULL)
+ return 0;
- if (job->parent != NULL) {
- if (strcmp (job->parent->dirname, ".glusterfs") == 0) {
- tdbg ("Directly adding directories under .glusterfs/XX "
- "to global list: %s\n", name);
- return 1;
- }
+ if (strcmp(job->dirname, ".glusterfs") == 0) {
+ tdbg(
+ "Directly adding directories under .glusterfs "
+ "to global list: %s\n",
+ name);
+ return 1;
+ }
+
+ if (job->parent != NULL) {
+ if (strcmp(job->parent->dirname, ".glusterfs") == 0) {
+ tdbg(
+ "Directly adding directories under .glusterfs/XX "
+ "to global list: %s\n",
+ name);
+ return 1;
}
+ }
- return 0;
+ return 0;
}
int
-xworker_do_crawl (struct xwork *xwork, struct dirjob *job)
+xworker_do_crawl(struct xwork *xwork, struct dirjob *job)
{
- DIR *dirp = NULL;
- int ret = -1;
- int boff;
- int plen;
- char *path = NULL;
- struct dirjob *cjob = NULL;
- struct stat statbuf = {0,};
- struct dirent *entry;
- struct dirent scratch[2] = {{0,},};
- char gfid_path[PATH_MAX] = {0,};
-
-
- plen = strlen (job->dirname) + 256 + 2;
- path = alloca (plen);
-
- tdbg ("Entering: %s\n", job->dirname);
-
- dirp = sys_opendir (job->dirname);
- if (!dirp) {
- terr ("opendir failed on %s (%s)\n", job->dirname,
- strerror (errno));
+ DIR *dirp = NULL;
+ int ret = -1;
+ int boff;
+ int plen;
+ char *path = NULL;
+ struct dirjob *cjob = NULL;
+ struct stat statbuf = {
+ 0,
+ };
+ struct dirent *entry;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char gfid_path[PATH_MAX] = {
+ 0,
+ };
+
+ plen = strlen(job->dirname) + 256 + 2;
+ path = alloca(plen);
+
+ tdbg("Entering: %s\n", job->dirname);
+
+ dirp = sys_opendir(job->dirname);
+ if (!dirp) {
+ terr("opendir failed on %s (%s)\n", job->dirname, strerror(errno));
+ goto out;
+ }
+
+ boff = sprintf(path, "%s/", job->dirname);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(dirp, scratch);
+ if (!entry || errno != 0) {
+ if (errno != 0) {
+ err("readdir(%s): %s\n", job->dirname, strerror(errno));
+ ret = errno;
goto out;
+ }
+ break;
}
- boff = sprintf (path, "%s/", job->dirname);
+ if (entry->d_ino == 0)
+ continue;
+
+ if (skip_name(job->dirname, entry->d_name))
+ continue;
+
+ /* It is sure that, children and grandchildren of .glusterfs
+ * are directories, just add them to global queue.
+ */
+ if (skip_stat(job, entry->d_name)) {
+ strncpy(path + boff, entry->d_name, (plen - boff));
+ cjob = dirjob_new(path, job);
+ if (!cjob) {
+ err("dirjob_new(%s): %s\n", path, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ xwork_addcrawl(xwork, cjob);
+ continue;
+ }
- for (;;) {
- errno = 0;
- entry = sys_readdir (dirp, scratch);
- if (!entry || errno != 0) {
- if (errno != 0) {
- err ("readdir(%s): %s\n", job->dirname,
- strerror (errno));
- ret = errno;
- goto out;
- }
- break;
- }
-
- if (entry->d_ino == 0)
- continue;
-
- if (skip_name (job->dirname, entry->d_name))
- continue;
-
- /* It is sure that, children and grandchildren of .glusterfs
- * are directories, just add them to global queue.
- */
- if (skip_stat (job, entry->d_name)) {
- strncpy (path + boff, entry->d_name, (plen-boff));
- cjob = dirjob_new (path, job);
- if (!cjob) {
- err ("dirjob_new(%s): %s\n",
- path, strerror (errno));
- ret = -1;
- goto out;
- }
- xwork_addcrawl (xwork, cjob);
- continue;
- }
-
- (void) snprintf (gfid_path, sizeof(gfid_path), "%s/.gfid/%s",
- slavemnt, entry->d_name);
- ret = sys_lstat (gfid_path, &statbuf);
-
- if (ret && errno == ENOENT) {
- out ("%s\n", entry->d_name);
- BUMP (skipped_gfids);
- }
-
- if (ret && errno != ENOENT) {
- err ("stat on slave failed(%s): %s\n",
- gfid_path, strerror (errno));
- goto out;
- }
+ (void)snprintf(gfid_path, sizeof(gfid_path), "%s/.gfid/%s", slavemnt,
+ entry->d_name);
+ ret = sys_lstat(gfid_path, &statbuf);
+
+ if (ret && errno == ENOENT) {
+ out("%s\n", entry->d_name);
+ BUMP(skipped_gfids);
+ }
+
+ if (ret && errno != ENOENT) {
+ err("stat on slave failed(%s): %s\n", gfid_path, strerror(errno));
+ goto out;
}
+ }
- ret = 0;
+ ret = 0;
out:
- if (dirp)
- (void) sys_closedir (dirp);
+ if (dirp)
+ (void)sys_closedir(dirp);
- return ret;
+ return ret;
}
-
void *
-xworker_crawl (void *data)
+xworker_crawl(void *data)
{
- struct xwork *xwork = data;
- struct dirjob *job = NULL;
- int ret = -1;
+ struct xwork *xwork = data;
+ struct dirjob *job = NULL;
+ int ret = -1;
- while ((job = xwork_pick (xwork, 0))) {
- ret = xworker_do_crawl (xwork, job);
- dirjob_ret (job, ret);
- }
+ while ((job = xwork_pick(xwork, 0))) {
+ ret = xworker_do_crawl(xwork, job);
+ dirjob_ret(job, ret);
+ }
- return NULL;
+ return NULL;
}
int
-xwork_fini (struct xwork *xwork, int stop)
+xwork_fini(struct xwork *xwork, int stop)
{
- int i = 0;
- int ret = 0;
- void *tret = 0;
-
- pthread_mutex_lock (&xwork->mutex);
- {
- xwork->stop = (xwork->stop || stop);
- pthread_cond_broadcast (&xwork->cond);
- }
- pthread_mutex_unlock (&xwork->mutex);
-
- for (i = 0; i < xwork->count; i++) {
- pthread_join (xwork->cthreads[i], &tret);
- tdbg ("CThread id %ld returned %p\n",
- xwork->cthreads[i], tret);
- }
-
- if (debug) {
- assert (xwork->rootjob->refcnt == 1);
- dirjob_ret (xwork->rootjob, 0);
- }
-
- if (stats)
- pthread_spin_destroy(&stats_lock);
-
- return ret;
+ int i = 0;
+ int ret = 0;
+ void *tret = 0;
+
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ xwork->stop = (xwork->stop || stop);
+ pthread_cond_broadcast(&xwork->cond);
+ }
+ pthread_mutex_unlock(&xwork->mutex);
+
+ for (i = 0; i < xwork->count; i++) {
+ pthread_join(xwork->cthreads[i], &tret);
+ tdbg("CThread id %ld returned %p\n", xwork->cthreads[i], tret);
+ }
+
+ if (debug) {
+ assert(xwork->rootjob->refcnt == 1);
+ dirjob_ret(xwork->rootjob, 0);
+ }
+
+ if (stats)
+ pthread_spin_destroy(&stats_lock);
+
+ return ret;
}
-
int
-xwork_init (struct xwork *xwork, int count)
+xwork_init(struct xwork *xwork, int count)
{
- int i = 0;
- int ret = 0;
- struct dirjob *rootjob = NULL;
+ int i = 0;
+ int ret = 0;
+ struct dirjob *rootjob = NULL;
- if (stats)
- pthread_spin_init (&stats_lock, PTHREAD_PROCESS_PRIVATE);
+ if (stats)
+ pthread_spin_init(&stats_lock, PTHREAD_PROCESS_PRIVATE);
- pthread_mutex_init (&xwork->mutex, NULL);
- pthread_cond_init (&xwork->cond, NULL);
+ pthread_mutex_init(&xwork->mutex, NULL);
+ pthread_cond_init(&xwork->cond, NULL);
- INIT_LIST_HEAD (&xwork->crawl.list);
+ INIT_LIST_HEAD(&xwork->crawl.list);
- rootjob = dirjob_new (".glusterfs", NULL);
- if (debug)
- xwork->rootjob = dirjob_ref (rootjob);
+ rootjob = dirjob_new(".glusterfs", NULL);
+ if (debug)
+ xwork->rootjob = dirjob_ref(rootjob);
- xwork_addcrawl (xwork, rootjob);
+ xwork_addcrawl(xwork, rootjob);
- xwork->count = count;
- for (i = 0; i < count; i++) {
- ret = pthread_create (&xwork->cthreads[i], NULL,
- xworker_crawl, xwork);
- if (ret)
- break;
- tdbg ("Spawned crawler %d thread %ld\n", i,
- xwork->cthreads[i]);
- }
+ xwork->count = count;
+ for (i = 0; i < count; i++) {
+ ret = pthread_create(&xwork->cthreads[i], NULL, xworker_crawl, xwork);
+ if (ret)
+ break;
+ tdbg("Spawned crawler %d thread %ld\n", i, xwork->cthreads[i]);
+ }
- return ret;
+ return ret;
}
-
int
-xfind (const char *basedir)
+xfind(const char *basedir)
{
- struct xwork xwork;
- int ret = 0;
- char *cwd = NULL;
-
- ret = chdir (basedir);
- if (ret) {
- err ("%s: %s\n", basedir, strerror (errno));
- return ret;
- }
+ struct xwork xwork;
+ int ret = 0;
+ char *cwd = NULL;
- cwd = getcwd (0, 0);
- if (!cwd) {
- err ("getcwd(): %s\n", strerror (errno));
- return -1;
- }
+ ret = chdir(basedir);
+ if (ret) {
+ err("%s: %s\n", basedir, strerror(errno));
+ return ret;
+ }
- tdbg ("Working directory: %s\n", cwd);
- free (cwd);
+ cwd = getcwd(0, 0);
+ if (!cwd) {
+ err("getcwd(): %s\n", strerror(errno));
+ return -1;
+ }
- memset (&xwork, 0, sizeof (xwork));
+ tdbg("Working directory: %s\n", cwd);
+ free(cwd);
- ret = xwork_init (&xwork, workers);
- if (ret == 0)
- xworker_crawl (&xwork);
+ memset(&xwork, 0, sizeof(xwork));
- ret = xwork_fini (&xwork, ret);
- stats_dump ();
+ ret = xwork_init(&xwork, workers);
+ if (ret == 0)
+ xworker_crawl(&xwork);
- return ret;
+ ret = xwork_fini(&xwork, ret);
+ stats_dump();
+
+ return ret;
}
static char *
-parse_and_validate_args (int argc, char *argv[])
+parse_and_validate_args(int argc, char *argv[])
{
- char *basedir = NULL;
- struct stat d = {0, };
- int ret = -1;
+ char *basedir = NULL;
+ struct stat d = {
+ 0,
+ };
+ int ret = -1;
#ifndef __FreeBSD__
- unsigned char volume_id[16];
+ unsigned char volume_id[16];
#endif /* __FreeBSD__ */
- char *slv_mnt = NULL;
+ char *slv_mnt = NULL;
- if (argc != 4) {
- err ("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n",
- argv[0]);
- return NULL;
- }
+ if (argc != 4) {
+ err("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n",
+ argv[0]);
+ return NULL;
+ }
- basedir = argv[1];
- ret = sys_lstat (basedir, &d);
- if (ret) {
- err ("%s: %s\n", basedir, strerror (errno));
- return NULL;
- }
+ basedir = argv[1];
+ ret = sys_lstat(basedir, &d);
+ if (ret) {
+ err("%s: %s\n", basedir, strerror(errno));
+ return NULL;
+ }
#ifndef __FreeBSD__
- ret = sys_lgetxattr (basedir, "trusted.glusterfs.volume-id",
- volume_id, 16);
- if (ret != 16) {
- err ("%s:Not a valid brick path.\n", basedir);
- return NULL;
- }
+ ret = sys_lgetxattr(basedir, "trusted.glusterfs.volume-id", volume_id, 16);
+ if (ret != 16) {
+ err("%s:Not a valid brick path.\n", basedir);
+ return NULL;
+ }
#endif /* __FreeBSD__ */
- slv_mnt = argv[2];
- ret = sys_lstat (slv_mnt, &d);
- if (ret) {
- err ("%s: %s\n", slv_mnt, strerror (errno));
- return NULL;
- }
- slavemnt = argv[2];
+ slv_mnt = argv[2];
+ ret = sys_lstat(slv_mnt, &d);
+ if (ret) {
+ err("%s: %s\n", slv_mnt, strerror(errno));
+ return NULL;
+ }
+ slavemnt = argv[2];
- workers = atoi(argv[3]);
- if (workers <= 0)
- workers = DEFAULT_WORKERS;
+ workers = atoi(argv[3]);
+ if (workers <= 0)
+ workers = DEFAULT_WORKERS;
- return basedir;
+ return basedir;
}
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- char *basedir = NULL;
+ char *basedir = NULL;
- basedir = parse_and_validate_args (argc, argv);
- if (!basedir)
- return 1;
+ basedir = parse_and_validate_args(argc, argv);
+ if (!basedir)
+ return 1;
- xfind (basedir);
+ xfind(basedir);
- return 0;
+ return 0;
}
diff --git a/tools/gfind_missing_files/gfid_to_path.py b/tools/gfind_missing_files/gfid_to_path.py
index 8362f68b955..01e08a9494a 100644
--- a/tools/gfind_missing_files/gfid_to_path.py
+++ b/tools/gfind_missing_files/gfid_to_path.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
index f42fe7b05af..e7aaa0b5dd4 100644
--- a/tools/gfind_missing_files/gfind_missing_files.sh
+++ b/tools/gfind_missing_files/gfind_missing_files.sh
@@ -61,7 +61,7 @@ mount_slave()
parse_cli()
{
- if [[ $# -ne 4 ]]; then
+ if [ "$#" -ne 4 ]; then
echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
exit 1
else
diff --git a/tools/glusterfind/Makefile.am b/tools/glusterfind/Makefile.am
index 92fa61461ad..f17dbdb228e 100644
--- a/tools/glusterfind/Makefile.am
+++ b/tools/glusterfind/Makefile.am
@@ -1,11 +1,14 @@
SUBDIRS = src
-EXTRA_DIST = S57glusterfind-delete-post.py
+EXTRA_DIST = S57glusterfind-delete-post.py glusterfind
+if WITH_SERVER
bin_SCRIPTS = glusterfind
+endif
CLEANFILES = $(bin_SCRIPTS)
+if WITH_SERVER
deletehookscriptsdir = $(GLUSTERFS_LIBEXECDIR)/glusterfind/
deletehookscripts_SCRIPTS = S57glusterfind-delete-post.py
@@ -18,3 +21,4 @@ install-data-local:
rm -f $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post
ln -s $(GLUSTERFS_LIBEXECDIR)/glusterfind/S57glusterfind-delete-post.py \
$(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post
+endif
diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py
index fb6c222df03..5beece220f0 100755
--- a/tools/glusterfind/S57glusterfind-delete-post.py
+++ b/tools/glusterfind/S57glusterfind-delete-post.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
import os
import shutil
from errno import ENOENT
@@ -18,7 +18,7 @@ def handle_rm_error(func, path, exc_info):
def get_glusterd_workdir():
p = Popen(["gluster", "system::", "getwd"],
- stdout=PIPE, stderr=PIPE)
+ stdout=PIPE, stderr=PIPE, universal_newlines=True)
out, _ = p.communicate()
diff --git a/tools/glusterfind/glusterfind.in b/tools/glusterfind/glusterfind.in
index cff8973980a..ca154b625dd 100644
--- a/tools/glusterfind/glusterfind.in
+++ b/tools/glusterfind/glusterfind.in
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
@@ -10,6 +10,7 @@
import sys
sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/')
+sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/glusterfind')
from glusterfind.main import main
diff --git a/tools/glusterfind/src/Makefile.am b/tools/glusterfind/src/Makefile.am
index e4469c1c0cf..43b6141b01c 100644
--- a/tools/glusterfind/src/Makefile.am
+++ b/tools/glusterfind/src/Makefile.am
@@ -1,12 +1,14 @@
glusterfinddir = $(GLUSTERFS_LIBEXECDIR)/glusterfind
+if WITH_SERVER
glusterfind_PYTHON = conf.py utils.py __init__.py \
- main.py libgfchangelog.py changelogdata.py
+ main.py libgfchangelog.py changelogdata.py gfind_py2py3.py
glusterfind_SCRIPTS = changelog.py nodeagent.py \
brickfind.py
glusterfind_DATA = tool.conf
+endif
EXTRA_DIST = changelog.py nodeagent.py brickfind.py \
tool.conf changelogdata.py
diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py
index 0ffb3f7432d..1753698b5fa 100644
--- a/tools/glusterfind/src/__init__.py
+++ b/tools/glusterfind/src/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/brickfind.py b/tools/glusterfind/src/brickfind.py
index efc840bca70..73b6350188d 100644
--- a/tools/glusterfind/src/brickfind.py
+++ b/tools/glusterfind/src/brickfind.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -13,7 +13,10 @@ import os
import sys
import logging
from argparse import ArgumentParser, RawDescriptionHelpFormatter
-import urllib
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
import time
from utils import mkdirp, setup_logger, create_file, output_write, find
@@ -38,11 +41,20 @@ def brickfind_crawl(brick, args):
with open(args.outfile, "a+") as fout:
brick_path_len = len(brick)
- def output_callback(path, filter_result):
+ def output_callback(path, filter_result, is_dir):
path = path.strip()
path = path[brick_path_len+1:]
- output_write(fout, path, args.output_prefix,
- encode=(not args.no_encode), tag=args.tag)
+
+ if args.type == "both":
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode), tag=args.tag,
+ field_separator=args.field_separator)
+ else:
+ if (is_dir and args.type == "d") or (
+ (not is_dir) and args.type == "f"):
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode), tag=args.tag,
+ field_separator=args.field_separator)
ignore_dirs = [os.path.join(brick, dirname)
for dirname in
@@ -61,9 +73,9 @@ def _get_args():
parser.add_argument("session", help="Session Name")
parser.add_argument("volume", help="Volume Name")
+ parser.add_argument("node", help="Node Name")
parser.add_argument("brick", help="Brick Name")
parser.add_argument("outfile", help="Output File")
- parser.add_argument("start", help="Start Time", type=float)
parser.add_argument("tag", help="Tag to prefix file name with")
parser.add_argument("--only-query", help="Only query, No session update",
action="store_true")
@@ -73,6 +85,11 @@ def _get_args():
action="store_true")
parser.add_argument("--output-prefix", help="File prefix in output",
default=".")
+ parser.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both')
+ parser.add_argument("--field-separator", help="Field separator",
+ default=" ")
return parser.parse_args()
@@ -81,7 +98,7 @@ if __name__ == "__main__":
args = _get_args()
session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
status_file = os.path.join(session_dir, args.volume,
- "%s.status" % urllib.quote_plus(args.brick))
+ "%s.status" % urllib.quote_plus(args.brick))
status_file_pre = status_file + ".pre"
mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
logger=logger)
@@ -96,6 +113,6 @@ if __name__ == "__main__":
time_to_update = int(time.time())
brickfind_crawl(args.brick, args)
if not args.only_query:
- with open(status_file_pre, "w", buffering=0) as f:
+ with open(status_file_pre, "w") as f:
f.write(str(time_to_update))
sys.exit(0)
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
index 721b8d0ca3a..a5e9ea4288f 100644
--- a/tools/glusterfind/src/changelog.py
+++ b/tools/glusterfind/src/changelog.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -14,9 +14,13 @@ import sys
import time
import xattr
import logging
+from gfind_py2py3 import bytearray_to_str
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import hashlib
-import urllib
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
import codecs
import libgfchangelog
@@ -40,8 +44,6 @@ history_turn_time = 0
logger = logging.getLogger()
-
-
def pgfid_to_path(brick, changelog_data):
"""
For all the pgfids in table, converts into path using recursive
@@ -49,7 +51,7 @@ def pgfid_to_path(brick, changelog_data):
"""
# pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK
for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}):
- # In case of Data/Metadata only, pgfid1 will not be their
+ # In case of Data/Metadata only, pgfid1 will not be there
if row[0] == "":
continue
@@ -94,7 +96,7 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
path = symlink_gfid_to_path(brick, gfid)
path = output_path_prepare(path, args)
changelog_data.gfidpath_update({"path1": path},
- {"gfid": gfid})
+ {"gfid": gfid})
except (IOError, OSError) as e:
logger.warn("Error converting to path: %s" % e)
continue
@@ -104,15 +106,55 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
file_xattrs = xattr.list(p)
for x in file_xattrs:
- if x.startswith("trusted.pgfid."):
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.pgfid."):
# PGFID in pgfid table
- changelog_data.pgfid_add(x.split(".")[-1])
+ changelog_data.pgfid_add(x_str.split(".")[-1])
except (IOError, OSError):
# All OS Errors ignored, since failures will be logged
# in End. All GFIDs present in gfidpath table
continue
+def enum_hard_links_using_gfid2path(brick, gfid, args):
+ hardlinks = []
+ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ if not os.path.isdir(p):
+ # we have a symlink or a normal file
+ try:
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.gfid2path."):
+ # get the value for the xattr i.e. <PGFID>/<BN>
+ v = xattr.getxattr(p, x_str)
+ v_str = bytearray_to_str(v)
+ pgfid, bn = v_str.split(os.sep)
+ try:
+ path = symlink_gfid_to_path(brick, pgfid)
+ fullpath = os.path.join(path, bn)
+ fullpath = output_path_prepare(fullpath, args)
+ hardlinks.append(fullpath)
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+ except (IOError, OSError):
+ pass
+ return hardlinks
+
+
+def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args):
+ path = ""
+ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
+ gfid = row[3].strip()
+ logger.debug("Processing gfid %s" % gfid)
+ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args)
+
+ path = ",".join(hardlinks)
+
+ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid})
+
+
def gfid_to_path_using_pgfid(brick, changelog_data, args):
"""
For all the pgfids collected, Converts to Path and
@@ -160,10 +202,10 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
try:
path = symlink_gfid_to_path(brick, row[0])
find(os.path.join(brick, path),
- callback_func=output_callback,
- filter_func=inode_filter,
- ignore_dirs=ignore_dirs,
- subdirs_crawl=False)
+ callback_func=output_callback,
+ filter_func=inode_filter,
+ ignore_dirs=ignore_dirs,
+ subdirs_crawl=False)
except (IOError, OSError) as e:
logger.warn("Error converting to path: %s" % e)
continue
@@ -245,7 +287,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
session_dir = os.path.join(conf.get_opt("session_dir"),
args.session)
status_file = os.path.join(session_dir, args.volume,
- "%s.status" % urllib.quote_plus(args.brick))
+ "%s.status" % urllib.quote_plus(args.brick))
# Get previous session
try:
@@ -272,9 +314,10 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
actual_end = libgfchangelog.cl_history_changelog(
cl_path, start, end, CHANGELOGAPI_NUM_WORKERS)
except libgfchangelog.ChangelogException as e:
- fail("%s Historical Changelogs not available: %s" % (brick, e),
- logger=logger)
+ fail("%s: %s Historical Changelogs not available: %s" %
+ (args.node, brick, e), logger=logger)
+ logger.info("[1/4] Starting changelog parsing ...")
try:
# scan followed by getchanges till scan returns zero.
# history_scan() is blocking call, till it gets the number
@@ -296,25 +339,34 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
libgfchangelog.cl_history_done(change)
except IOError as e:
logger.warn("Error parsing changelog file %s: %s" %
- (change, e))
+ (change, e))
changelog_data.commit()
except libgfchangelog.ChangelogException as e:
fail("%s Error during Changelog Crawl: %s" % (brick, e),
logger=logger)
+ logger.info("[1/4] Finished changelog parsing.")
+
# Convert all pgfid available from Changelogs
+ logger.info("[2/4] Starting 'pgfid to path' conversions ...")
pgfid_to_path(brick, changelog_data)
changelog_data.commit()
+ logger.info("[2/4] Finished 'pgfid to path' conversions.")
- # Convert all GFIDs for which no other additional details available
- gfid_to_path_using_pgfid(brick, changelog_data, args)
+ # Convert all gfids recorded for data and metadata to all hardlink paths
+ logger.info("[3/4] Starting 'gfid2path' conversions ...")
+ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args)
changelog_data.commit()
+ logger.info("[3/4] Finished 'gfid2path' conversions.")
# If some GFIDs fail to get converted from previous step,
# convert using find
+ logger.info("[4/4] Starting 'gfid to path using batchfind' "
+ "conversions ...")
gfid_to_path_using_batchfind(brick, changelog_data)
changelog_data.commit()
+ logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.")
return actual_end
@@ -328,7 +380,7 @@ def changelog_crawl(brick, start, end, args):
# WORKING_DIR/BRICKHASH/OUTFILE
working_dir = os.path.dirname(args.outfile)
- brickhash = hashlib.sha1(brick)
+ brickhash = hashlib.sha1(brick.encode())
brickhash = str(brickhash.hexdigest())
working_dir = os.path.join(working_dir, brickhash)
@@ -350,9 +402,11 @@ def _get_args():
parser.add_argument("session", help="Session Name")
parser.add_argument("volume", help="Volume Name")
+ parser.add_argument("node", help="Node Name")
parser.add_argument("brick", help="Brick Name")
parser.add_argument("outfile", help="Output File")
parser.add_argument("start", help="Start Time", type=int)
+ parser.add_argument("end", help="End Time", type=int)
parser.add_argument("--only-query", help="Query mode only (no session)",
action="store_true")
parser.add_argument("--debug", help="Debug", action="store_true")
@@ -361,6 +415,7 @@ def _get_args():
action="store_true")
parser.add_argument("--output-prefix", help="File prefix in output",
default=".")
+ parser.add_argument("--type",default="both")
parser.add_argument("-N", "--only-namespace-changes",
help="List only namespace changes",
action="store_true")
@@ -380,13 +435,15 @@ if __name__ == "__main__":
session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
status_file = os.path.join(session_dir, args.volume,
- "%s.status" % urllib.quote_plus(args.brick))
+ "%s.status" % urllib.quote_plus(args.brick))
status_file_pre = status_file + ".pre"
mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
logger=logger)
+ end = -1
if args.only_query:
start = args.start
+ end = args.end
else:
try:
with open(status_file) as f:
@@ -394,13 +451,17 @@ if __name__ == "__main__":
except (ValueError, OSError, IOError):
start = args.start
- end = int(time.time()) - get_changelog_rollover_time(args.volume)
+ # end time is optional; so a -1 may be sent to use the default method of
+ # identifying the end time
+ if end == -1:
+ end = int(time.time()) - get_changelog_rollover_time(args.volume)
+
logger.info("%s Started Changelog Crawl - Start: %s End: %s" % (args.brick,
start,
end))
actual_end = changelog_crawl(args.brick, start, end, args)
if not args.only_query:
- with open(status_file_pre, "w", buffering=0) as f:
+ with open(status_file_pre, "w") as f:
f.write(str(actual_end))
logger.info("%s Finished Changelog Crawl - End: %s" % (args.brick,
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
index 0e32d7b7f91..641593cf4b1 100644
--- a/tools/glusterfind/src/changelogdata.py
+++ b/tools/glusterfind/src/changelogdata.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -10,12 +9,12 @@
# cases as published by the Free Software Foundation.
import sqlite3
-import urllib
import os
-from utils import RecordType
+from utils import RecordType, unquote_plus_space_newline
from utils import output_path_prepare
+
class OutputMerger(object):
"""
Class to merge the output files collected from
@@ -91,7 +90,7 @@ class ChangelogData(object):
self._create_table_pgfid()
self._create_table_inodegfid()
self.args = args
- self.path_sep = "/" if args.no_encode else "%2F"
+ self.path_sep = "/"
def _create_table_gfidpath(self):
drop_table = "DROP TABLE IF EXISTS gfidpath"
@@ -113,6 +112,11 @@ class ChangelogData(object):
"""
self.cursor.execute(create_table)
+ create_index = """
+ CREATE INDEX gfid_index ON gfidpath(gfid);
+ """
+ self.cursor.execute(create_index)
+
def _create_table_inodegfid(self):
drop_table = "DROP TABLE IF EXISTS inodegfid"
self.cursor.execute(drop_table)
@@ -322,36 +326,21 @@ class ChangelogData(object):
def when_create_mknod_mkdir(self, changelogfile, data):
# E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME>
# Add the Entry to DB
- # urllib.unquote_plus will not handle unicode so, encode Unicode to
- # represent in 8 bit format and then unquote
- pgfid1, bn1 = urllib.unquote_plus(
- data[6].encode("utf-8")).split("/", 1)
+ pgfid1, bn1 = data[6].split("/", 1)
if self.args.no_encode:
- # No urlencode since no_encode is set, so convert again to Unicode
- # format from previously encoded.
- bn1 = bn1.decode("utf-8").strip()
- else:
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+ bn1 = unquote_plus_space_newline(bn1).strip()
self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
def when_rename(self, changelogfile, data):
# E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME>
- pgfid1, bn1 = urllib.unquote_plus(
- data[3].encode("utf-8")).split("/", 1)
- pgfid2, bn2 = urllib.unquote_plus(
- data[4].encode("utf-8")).split("/", 1)
+ pgfid1, bn1 = data[3].split("/", 1)
+ pgfid2, bn2 = data[4].split("/", 1)
if self.args.no_encode:
- # Quote again the basename
- bn1 = bn1.decode("utf-8").strip()
- bn2 = bn2.decode("utf-8").strip()
- else:
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
- bn2 = urllib.quote_plus(bn2.strip())
+ bn1 = unquote_plus_space_newline(bn1).strip()
+ bn2 = unquote_plus_space_newline(bn2).strip()
if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
"pgfid1": pgfid1, "bn1": bn1}):
@@ -391,14 +380,9 @@ class ChangelogData(object):
def when_link_symlink(self, changelogfile, data):
# E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME>
# Add as New record in Db as Type NEW
- pgfid1, bn1 = urllib.unquote_plus(
- data[3].encode("utf-8")).split("/", 1)
+ pgfid1, bn1 = data[3].split("/", 1)
if self.args.no_encode:
- # Quote again the basename
- bn1 = bn1.decode("utf-8").strip()
- else:
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+ bn1 = unquote_plus_space_newline(bn1).strip()
self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
@@ -410,19 +394,15 @@ class ChangelogData(object):
def when_unlink_rmdir(self, changelogfile, data):
# E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME>
- pgfid1, bn1 = urllib.unquote_plus(
- data[3].encode("utf-8")).split("/", 1)
+ pgfid1, bn1 = data[3].split("/", 1)
if self.args.no_encode:
- bn1 = bn1.decode("utf-8").strip()
- else:
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+ bn1 = unquote_plus_space_newline(bn1).strip()
deleted_path = data[4] if len(data) == 5 else ""
if deleted_path != "":
- deleted_path = output_path_prepare(deleted_path,
- self.args)
+ deleted_path = unquote_plus_space_newline(deleted_path)
+ deleted_path = output_path_prepare(deleted_path, self.args)
if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
"pgfid1": pgfid1, "bn1": bn1}):
diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py
index d73fee42aad..3849ba5dd1f 100644
--- a/tools/glusterfind/src/conf.py
+++ b/tools/glusterfind/src/conf.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -10,9 +9,12 @@
# cases as published by the Free Software Foundation.
import os
-import ConfigParser
+try:
+ from ConfigParser import ConfigParser
+except ImportError:
+ from configparser import ConfigParser
-config = ConfigParser.ConfigParser()
+config = ConfigParser()
config.read(os.path.join(os.path.dirname(os.path.abspath(__file__)),
"tool.conf"))
diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py
new file mode 100644
index 00000000000..87324fbf350
--- /dev/null
+++ b/tools/glusterfind/src/gfind_py2py3.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import os
+import sys
+from ctypes import create_string_buffer
+
+if sys.version_info >= (3,):
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ # def bytearray_to_str(byte_arr):
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ def gf_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfc.gf_history_changelog(changelog_path.encode(), start, end, num_parallel,
+ actual_end)
+
+ def gfind_changelog_register(libgfc, brick, path, log_file, log_level,
+ retries):
+ return libgfc.gf_changelog_register(brick.encode(), path.encode(), log_file.encode(),
+ log_level, retries)
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile.encode())
+
+ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
+ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
+ field_separator,
+ p_rep,
+ field_separator,
+ row_2_rep))
+
+ def gfind_write(f, row, field_separator, p_rep):
+ f.write(u"{0}{1}{2}\n".format(row,
+ field_separator,
+ p_rep))
+
+
+else:
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ def gf_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfc.gf_history_changelog(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gfind_changelog_register(libgfc, brick, path, log_file, log_level,
+ retries):
+ return libgfc.gf_changelog_register(brick, path, log_file,
+ log_level, retries)
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile)
+
+ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
+ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
+ field_separator,
+ p_rep,
+ field_separator,
+ row_2_rep).encode())
+
+ def gfind_write(f, row, field_separator, p_rep):
+ f.write(u"{0}{1}{2}\n".format(row,
+ field_separator,
+ p_rep).encode())
diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
index dd8153e4e61..513bb101e93 100644
--- a/tools/glusterfind/src/libgfchangelog.py
+++ b/tools/glusterfind/src/libgfchangelog.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -10,51 +9,52 @@
# cases as published by the Free Software Foundation.
import os
-from ctypes import CDLL, get_errno, create_string_buffer, c_ulong, byref
-from ctypes import RTLD_GLOBAL
+from ctypes import CDLL, RTLD_GLOBAL, get_errno, create_string_buffer, c_ulong, byref
from ctypes.util import find_library
+from gfind_py2py3 import bytearray_to_str, gf_create_string_buffer
+from gfind_py2py3 import gfind_history_changelog, gfind_changelog_register
+from gfind_py2py3 import gfind_history_changelog_done
class ChangelogException(OSError):
pass
+libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True)
-libgfc = CDLL(find_library("gfchangelog"), use_errno=True, mode=RTLD_GLOBAL)
-
-def raise_oserr():
+def raise_oserr(prefix=None):
errn = get_errno()
- raise ChangelogException(errn, os.strerror(errn))
+ prefix_or_empty = prefix + ": " if prefix else ""
+ raise ChangelogException(errn, prefix_or_empty + os.strerror(errn))
def cl_init():
ret = libgfc.gf_changelog_init(None)
if ret == -1:
- raise_oserr()
+ raise_oserr(prefix="gf_changelog_init")
def cl_register(brick, path, log_file, log_level, retries=0):
- ret = libgfc.gf_changelog_register(brick, path, log_file,
- log_level, retries)
+ ret = gfind_changelog_register(libgfc, brick, path, log_file,log_level, retries)
if ret == -1:
- raise_oserr()
+ raise_oserr(prefix="gf_changelog_register")
def cl_history_scan():
ret = libgfc.gf_history_changelog_scan()
if ret == -1:
- raise_oserr()
+ raise_oserr(prefix="gf_history_changelog_scan")
return ret
def cl_history_changelog(changelog_path, start, end, num_parallel):
actual_end = c_ulong()
- ret = libgfc.gf_history_changelog(changelog_path, start, end,
+ ret = gfind_history_changelog(libgfc,changelog_path, start, end,
num_parallel,
byref(actual_end))
if ret == -1:
- raise_oserr()
+ raise_oserr(prefix="gf_history_changelog")
return actual_end.value
@@ -62,7 +62,7 @@ def cl_history_changelog(changelog_path, start, end, num_parallel):
def cl_history_startfresh():
ret = libgfc.gf_history_changelog_start_fresh()
if ret == -1:
- raise_oserr()
+ raise_oserr(prefix="gf_history_changelog_start_fresh")
def cl_history_getchanges():
@@ -71,20 +71,22 @@ def cl_history_getchanges():
return f.split('.')[-1]
changes = []
- buf = create_string_buffer('\0', 4096)
+ buf = gf_create_string_buffer(4096)
while True:
ret = libgfc.gf_history_changelog_next_change(buf, 4096)
if ret in (0, -1):
break
- changes.append(buf.raw[:ret - 1])
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
if ret == -1:
- raise_oserr()
+ raise_oserr(prefix="gf_history_changelog_next_change")
return sorted(changes, key=clsort)
def cl_history_done(clfile):
- ret = libgfc.gf_history_changelog_done(clfile)
+ ret = gfind_history_changelog_done(libgfc, clfile)
if ret == -1:
- raise_oserr()
+ raise_oserr(prefix="gf_history_changelog_done")
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
index c125f970a83..4b5466d0114 100644
--- a/tools/glusterfind/src/main.py
+++ b/tools/glusterfind/src/main.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -16,18 +16,20 @@ from multiprocessing import Process
import os
import xml.etree.cElementTree as etree
from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action
+from gfind_py2py3 import gfind_write_row, gfind_write
import logging
import shutil
import tempfile
import signal
from datetime import datetime
+import codecs
+import re
from utils import execute, is_host_local, mkdirp, fail
from utils import setup_logger, human_time, handle_rm_error
from utils import get_changelog_rollover_time, cache_output, create_file
import conf
from changelogdata import OutputMerger
-import codecs
PROG_DESCRIPTION = """
GlusterFS Incremental API
@@ -35,9 +37,9 @@ GlusterFS Incremental API
ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
logger = logging.getLogger()
-node_outfiles = []
vol_statusStr = ""
gtmpfilename = None
+g_pid_nodefile_map = {}
class StoreAbsPath(Action):
@@ -75,12 +77,27 @@ def node_cmd(host, host_uuid, task, cmd, args, opts):
cmd = ["ssh",
"-oNumberOfPasswordPrompts=0",
"-oStrictHostKeyChecking=no",
+ # We force TTY allocation (-t -t) so that Ctrl+C is handed
+ # through; see:
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1382236
+ # Note that this turns stderr of the remote `cmd`
+ # into stdout locally.
"-t",
"-t",
"-i", pem_key_path,
"root@%s" % host] + cmd
- execute(cmd, exit_msg="%s - %s failed" % (host, task), logger=logger)
+ (returncode, err, out) = execute(cmd, logger=logger)
+ if returncode != 0:
+ # Because the `-t -t` above turns the remote stderr into
+ # local stdout, we need to log both stderr and stdout
+ # here to print all error messages.
+ fail("%s - %s failed; stdout (including remote stderr):\n"
+ "%s\n"
+ "stderr:\n"
+ "%s" % (host, task, out, err),
+ returncode,
+ logger=logger)
if opts.get("copy_outfile", False) and not localdir:
cmd_copy = ["scp",
@@ -96,7 +113,7 @@ def node_cmd(host, host_uuid, task, cmd, args, opts):
def run_cmd_nodes(task, args, **kwargs):
- global node_outfiles
+ global g_pid_nodefile_map
nodes = get_nodes(args.volume)
pool = []
for num, node in enumerate(nodes):
@@ -127,23 +144,30 @@ def run_cmd_nodes(task, args, **kwargs):
if tag == "":
tag = '""' if not is_host_local(host_uuid) else ""
- node_outfiles.append(node_outfile)
# remote file will be copied into this directory
mkdirp(os.path.dirname(node_outfile),
exit_on_err=True, logger=logger)
+ FS = args.field_separator
+ if not is_host_local(host_uuid):
+ FS = "'" + FS + "'"
+
cmd = [change_detector,
args.session,
args.volume,
+ host,
brick,
- node_outfile,
- str(kwargs.get("start"))] + \
+ node_outfile] + \
+ ([str(kwargs.get("start")), str(kwargs.get("end"))]
+ if not args.full else []) + \
([tag] if tag is not None else []) + \
["--output-prefix", args.output_prefix] + \
(["--debug"] if args.debug else []) + \
(["--no-encode"] if args.no_encode else []) + \
(["--only-namespace-changes"] if args.only_namespace_changes
- else [])
+ else []) + \
+ (["--type", args.type]) + \
+ (["--field-separator", FS] if args.full else [])
opts["node_outfile"] = node_outfile
opts["copy_outfile"] = True
@@ -157,24 +181,31 @@ def run_cmd_nodes(task, args, **kwargs):
if tag == "":
tag = '""' if not is_host_local(host_uuid) else ""
- node_outfiles.append(node_outfile)
# remote file will be copied into this directory
mkdirp(os.path.dirname(node_outfile),
exit_on_err=True, logger=logger)
+ FS = args.field_separator
+ if not is_host_local(host_uuid):
+ FS = "'" + FS + "'"
+
cmd = [change_detector,
args.session,
args.volume,
+ host,
brick,
- node_outfile,
- str(kwargs.get("start"))] + \
+ node_outfile] + \
+ ([str(kwargs.get("start")), str(kwargs.get("end"))]
+ if not args.full else []) + \
([tag] if tag is not None else []) + \
["--only-query"] + \
["--output-prefix", args.output_prefix] + \
(["--debug"] if args.debug else []) + \
(["--no-encode"] if args.no_encode else []) + \
(["--only-namespace-changes"]
- if args.only_namespace_changes else [])
+ if args.only_namespace_changes else []) + \
+ (["--type", args.type]) + \
+ (["--field-separator", FS] if args.full else [])
opts["node_outfile"] = node_outfile
opts["copy_outfile"] = True
@@ -233,6 +264,7 @@ def run_cmd_nodes(task, args, **kwargs):
args=(host, host_uuid, task, cmd, args, opts))
p.start()
pool.append(p)
+ g_pid_nodefile_map[p.pid] = node_outfile
for num, p in enumerate(pool):
p.join()
@@ -240,8 +272,11 @@ def run_cmd_nodes(task, args, **kwargs):
logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
if task in ["create", "delete"]:
fail("Command %s failed in %s" % (task, nodes[num][1]))
- elif task == "pre" and args.disable_partial:
- sys.exit(1)
+ elif task == "pre" or task == "query":
+ if args.disable_partial:
+ sys.exit(1)
+ else:
+ del g_pid_nodefile_map[p.pid]
@cache_output
@@ -291,6 +326,7 @@ def _get_args():
parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
description=PROG_DESCRIPTION)
subparsers = parser.add_subparsers(dest="mode")
+ subparsers.required = True
# create <SESSION> <VOLUME> [--debug] [--force]
parser_create = subparsers.add_parser('create')
@@ -341,6 +377,11 @@ def _get_args():
help="Tag prefix for file names emitted during"
" a full find operation; default: \"NEW\"",
default="NEW")
+ parser_pre.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both', choices=["f", "d", "both"])
+ parser_pre.add_argument("--field-separator", help="Field separator string",
+ default=" ")
# query <VOLUME> <OUTFILE> --since-time <SINCE_TIME>
# [--output-prefix <OUTPUT_PREFIX>] [--full]
@@ -350,6 +391,8 @@ def _get_args():
action=StoreAbsPath)
parser_query.add_argument("--since-time", help="UNIX epoch time since "
"which listing is required", type=int)
+ parser_query.add_argument("--end-time", help="UNIX epoch time up to "
+ "which listing is required", type=int)
parser_query.add_argument("--no-encode",
help="Do not encode path in output file",
action="store_true")
@@ -366,6 +409,12 @@ def _get_args():
help="Tag prefix for file names emitted during"
" a full find operation; default: \"NEW\"",
default="NEW")
+ parser_query.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both', choices=["f", "d", "both"])
+ parser_query.add_argument("--field-separator",
+ help="Field separator string",
+ default=" ")
# post <SESSION> <VOLUME>
parser_post = subparsers.add_parser('post')
@@ -451,7 +500,7 @@ def enable_volume_options(args):
% args.volume)
-def write_output(outfile, outfilemerger):
+def write_output(outfile, outfilemerger, field_separator):
with codecs.open(outfile, "a", encoding="utf-8") as f:
for row in outfilemerger.get():
# Multiple paths in case of Hardlinks
@@ -460,24 +509,20 @@ def write_output(outfile, outfilemerger):
for p in paths:
if p == "":
continue
- p_rep = p.replace("%2F%2F", "%2F").replace("//", "/")
+ p_rep = p.replace("//", "/")
if not row_2_rep:
- row_2_rep = row[2].replace("%2F%2F", "%2F").replace("//",
- "/")
+ row_2_rep = row[2].replace("//", "/")
if p_rep == row_2_rep:
continue
if row_2_rep and row_2_rep != "":
- f.write(u"{0} {1} {2}\n".format(row[0], p_rep, row_2_rep))
- else:
- f.write(u"{0} {1}\n".format(row[0], p_rep))
-
+ gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep)
-def mode_create(session_dir, args):
- logger.debug("Init is called - Session: %s, Volume: %s"
- % (args.session, args.volume))
+ else:
+ gfind_write(f, row[0], field_separator, p_rep)
- cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
+def validate_volume(volume):
+ cmd = ["gluster", 'volume', 'info', volume, "--xml"]
_, data, _ = execute(cmd,
exit_msg="Failed to Run Gluster Volume Info",
logger=logger)
@@ -485,11 +530,42 @@ def mode_create(session_dir, args):
tree = etree.fromstring(data)
statusStr = tree.find('volInfo/volumes/volume/statusStr').text
except (ParseError, AttributeError) as e:
- fail("Invalid Volume: %s" % e, logger=logger)
-
+ fail("Invalid Volume: Check the Volume name! %s" % e)
if statusStr != "Started":
- fail("Volume %s is not online" % args.volume, logger=logger)
+ fail("Volume %s is not online" % volume)
+
+# The rules for a valid session name.
+SESSION_NAME_RULES = {
+ 'min_length': 2,
+ 'max_length': 256, # same as maximum volume length
+ # Specifies all alphanumeric characters, underscore, hyphen.
+ 'valid_chars': r'0-9a-zA-Z_-',
+}
+
+
+# checks valid session name, fail otherwise
+def validate_session_name(session):
+ # Check for minimum length
+ if len(session) < SESSION_NAME_RULES['min_length']:
+ fail('session_name must be at least ' +
+ str(SESSION_NAME_RULES['min_length']) + ' characters long.')
+ # Check for maximum length
+ if len(session) > SESSION_NAME_RULES['max_length']:
+ fail('session_name must not exceed ' +
+ str(SESSION_NAME_RULES['max_length']) + ' characters length.')
+
+ # Matches strings composed entirely of characters specified within
+ if not re.match(r'^[' + SESSION_NAME_RULES['valid_chars'] +
+ ']+$', session):
+ fail('Session name can only contain these characters: ' +
+ SESSION_NAME_RULES['valid_chars'])
+
+
+def mode_create(session_dir, args):
+ validate_session_name(args.session)
+ logger.debug("Init is called - Session: %s, Volume: %s"
+ % (args.session, args.volume))
mkdirp(session_dir, exit_on_err=True, logger=logger)
mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
logger=logger)
@@ -510,7 +586,7 @@ def mode_create(session_dir, args):
run_cmd_nodes("create", args, time_to_update=str(time_to_update))
if not os.path.exists(status_file) or args.reset_session_time:
- with open(status_file, "w", buffering=0) as f:
+ with open(status_file, "w") as f:
f.write(str(time_to_update))
sys.stdout.write("Session %s created with volume %s\n" %
@@ -521,6 +597,7 @@ def mode_create(session_dir, args):
def mode_query(session_dir, args):
global gtmpfilename
+ global g_pid_nodefile_map
# Verify volume status
cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
@@ -548,39 +625,57 @@ def mode_query(session_dir, args):
enable_volume_options(args)
# Test options
- if not args.since_time and not args.full:
- fail("Please specify either --since-time or --full", logger=logger)
-
- if args.since_time and args.full:
- fail("Please specify either --since-time or --full, but not both",
+ if not args.full and args.type in ["f", "d"]:
+ fail("--type can only be used with --full")
+ if not args.since_time and not args.end_time and not args.full:
+ fail("Please specify either {--since-time and optionally --end-time} "
+ "or --full", logger=logger)
+
+ if args.since_time and args.end_time and args.full:
+ fail("Please specify either {--since-time and optionally --end-time} "
+ "or --full, but not both",
logger=logger)
+ if args.end_time and not args.since_time:
+ fail("Please specify --since-time as well", logger=logger)
+
# Start query command processing
+ start = -1
+ end = -1
if args.since_time:
start = args.since_time
+ if args.end_time:
+ end = args.end_time
else:
start = 0 # --full option is handled separately
logger.debug("Query is called - Session: %s, Volume: %s, "
- "Start time: %s"
- % ("default", args.volume, start))
+ "Start time: %s, End time: %s"
+ % ("default", args.volume, start, end))
prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-")
gtmpfilename = prefix + next(tempfile._get_candidate_names())
- run_cmd_nodes("query", args, start=start, tmpfilename=gtmpfilename)
+ run_cmd_nodes("query", args, start=start, end=end,
+ tmpfilename=gtmpfilename)
# Merger
if args.full:
- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
- execute(cmd,
- exit_msg="Failed to merge output files "
- "collected from nodes", logger=logger)
+ if len(g_pid_nodefile_map) > 0:
+ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+ "collected from nodes", logger=logger)
+ else:
+ fail("Failed to collect any output files from peers. "
+ "Looks like all bricks are offline.", logger=logger)
else:
# Read each Changelogs db and generate finaldb
create_file(args.outfile, exit_on_err=True, logger=logger)
- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
- write_output(args.outfile, outfilemerger)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+ list(g_pid_nodefile_map.values()))
+ write_output(args.outfile, outfilemerger, args.field_separator)
try:
os.remove(args.outfile + ".db")
@@ -594,6 +689,7 @@ def mode_query(session_dir, args):
def mode_pre(session_dir, args):
global gtmpfilename
+ global g_pid_nodefile_map
"""
Read from Session file and write to session.pre file
@@ -605,6 +701,9 @@ def mode_pre(session_dir, args):
mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)
+ if not args.full and args.type in ["f", "d"]:
+ fail("--type can only be used with --full")
+
# If Pre status file exists and running pre command again
if os.path.exists(status_file_pre) and not args.regenerate_outfile:
fail("Post command is not run after last pre, "
@@ -627,19 +726,25 @@ def mode_pre(session_dir, args):
prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-")
gtmpfilename = prefix + next(tempfile._get_candidate_names())
- run_cmd_nodes("pre", args, start=start, tmpfilename=gtmpfilename)
+ run_cmd_nodes("pre", args, start=start, end=-1, tmpfilename=gtmpfilename)
# Merger
if args.full:
- cmd = ["sort", "-u"] + node_outfiles + ["-o", args.outfile]
- execute(cmd,
- exit_msg="Failed to merge output files "
- "collected from nodes", logger=logger)
+ if len(g_pid_nodefile_map) > 0:
+ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+ "collected from nodes", logger=logger)
+ else:
+ fail("Failed to collect any output files from peers. "
+ "Looks like all bricks are offline.", logger=logger)
else:
# Read each Changelogs db and generate finaldb
create_file(args.outfile, exit_on_err=True, logger=logger)
- outfilemerger = OutputMerger(args.outfile + ".db", node_outfiles)
- write_output(args.outfile, outfilemerger)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+ list(g_pid_nodefile_map.values()))
+ write_output(args.outfile, outfilemerger, args.field_separator)
try:
os.remove(args.outfile + ".db")
@@ -648,7 +753,7 @@ def mode_pre(session_dir, args):
run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)
- with open(status_file_pre, "w", buffering=0) as f:
+ with open(status_file_pre, "w") as f:
f.write(str(endtime_to_update))
sys.stdout.write("Generated output file %s\n" % args.outfile)
@@ -774,6 +879,11 @@ def main():
args.mode not in ["create", "list", "query"]:
fail("Invalid session %s" % args.session)
+ # volume involved, validate the volume first
+ if args.mode not in ["list"]:
+ validate_volume(args.volume)
+
+
# "default" is a system defined session name
if args.mode in ["create", "post", "pre", "delete"] and \
args.session == "default":
diff --git a/tools/glusterfind/src/nodeagent.py b/tools/glusterfind/src/nodeagent.py
index 07d82826e0d..679daa6fa76 100644
--- a/tools/glusterfind/src/nodeagent.py
+++ b/tools/glusterfind/src/nodeagent.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -14,7 +14,10 @@ import sys
import os
import logging
from argparse import ArgumentParser, RawDescriptionHelpFormatter
-import urllib
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
from errno import ENOTEMPTY
from utils import setup_logger, mkdirp, handle_rm_error
@@ -49,13 +52,13 @@ def mode_create(args):
session_dir = os.path.join(conf.get_opt("session_dir"),
args.session)
status_file = os.path.join(session_dir, args.volume,
- "%s.status" % urllib.quote_plus(args.brick))
+ "%s.status" % urllib.quote_plus(args.brick))
mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
logger=logger)
if not os.path.exists(status_file) or args.reset_session_time:
- with open(status_file, "w", buffering=0) as f:
+ with open(status_file, "w") as f:
f.write(args.time_to_update)
sys.exit(0)
@@ -64,7 +67,7 @@ def mode_create(args):
def mode_post(args):
session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
status_file = os.path.join(session_dir, args.volume,
- "%s.status" % urllib.quote_plus(args.brick))
+ "%s.status" % urllib.quote_plus(args.brick))
mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
logger=logger)
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
index 70737be760a..906ebd8f252 100644
--- a/tools/glusterfind/src/utils.py
+++ b/tools/glusterfind/src/utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -16,10 +15,12 @@ import xml.etree.cElementTree as etree
import logging
import os
from datetime import datetime
-import urllib
ROOT_GFID = "00000000-0000-0000-0000-000000000001"
DEFAULT_CHANGELOG_INTERVAL = 15
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
cache_data = {}
@@ -35,10 +36,10 @@ class RecordType(object):
def cache_output(func):
def wrapper(*args, **kwargs):
global cache_data
- if cache_data.get(func.func_name, None) is None:
- cache_data[func.func_name] = func(*args, **kwargs)
+ if cache_data.get(func.__name__, None) is None:
+ cache_data[func.__name__] = func(*args, **kwargs)
- return cache_data[func.func_name]
+ return cache_data[func.__name__]
return wrapper
@@ -57,12 +58,13 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
# Capture filter_func output and pass it to callback function
filter_result = filter_func(path)
if filter_result is not None:
- callback_func(path, filter_result)
+ callback_func(path, filter_result, os.path.isdir(path))
for p in os.listdir(path):
full_path = os.path.join(path, p)
- if os.path.isdir(full_path):
+ is_dir = os.path.isdir(full_path)
+ if is_dir:
if subdirs_crawl:
find(full_path, callback_func, filter_func, ignore_dirs)
else:
@@ -72,10 +74,11 @@ def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
else:
filter_result = filter_func(full_path)
if filter_result is not None:
- callback_func(full_path, filter_result)
+ callback_func(full_path, filter_result, is_dir)
-def output_write(f, path, prefix=".", encode=False, tag=""):
+def output_write(f, path, prefix=".", encode=False, tag="",
+ field_separator=" "):
if path == "":
return
@@ -83,10 +86,10 @@ def output_write(f, path, prefix=".", encode=False, tag=""):
path = os.path.join(prefix, path)
if encode:
- path = urllib.quote_plus(path)
+ path = quote_plus_space_newline(path)
# set the field separator
- FS = "" if tag == "" else " "
+ FS = "" if tag == "" else field_separator
f.write("%s%s%s\n" % (tag.strip(), FS, path))
@@ -227,7 +230,11 @@ def get_changelog_rollover_time(volumename):
try:
tree = etree.fromstring(out)
- return int(tree.find('volGetopts/Opt/Value').text)
+ val = tree.find('volGetopts/Opt/Value').text
+ if val is not None:
+ # Filter the value by split, as it may be 'X (DEFAULT)'
+ # and we only need 'X'
+ return int(val.split(' ', 1)[0])
except ParseError:
return DEFAULT_CHANGELOG_INTERVAL
@@ -245,4 +252,16 @@ def output_path_prepare(path, args):
if args.no_encode:
return path
else:
- return urllib.quote_plus(path.encode("utf-8"))
+ return quote_plus_space_newline(path)
+
+
+def unquote_plus_space_newline(s):
+ return s.replace(SPACE_ESCAPE_CHAR, " ")\
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+
+def quote_plus_space_newline(s):
+ return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+ .replace(" ", SPACE_ESCAPE_CHAR)\
+ .replace("\n", NEWLINE_ESCAPE_CHAR)
diff --git a/tools/setgfid2path/Makefile.am b/tools/setgfid2path/Makefile.am
new file mode 100644
index 00000000000..c14787a80ce
--- /dev/null
+++ b/tools/setgfid2path/Makefile.am
@@ -0,0 +1,5 @@
+SUBDIRS = src
+
+EXTRA_DIST = gluster-setgfid2path.8
+
+man8_MANS = gluster-setgfid2path.8
diff --git a/tools/setgfid2path/gluster-setgfid2path.8 b/tools/setgfid2path/gluster-setgfid2path.8
new file mode 100644
index 00000000000..2e228ca8514
--- /dev/null
+++ b/tools/setgfid2path/gluster-setgfid2path.8
@@ -0,0 +1,54 @@
+
+.\" Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+.\" This file is part of GlusterFS.
+.\"
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
+.\"
+.\"
+.TH gluster-setgfid2path 8 "Command line utility to set GFID to Path Xattrs"
+.SH NAME
+gluster-setgfid2path - Gluster tool to set GFID to Path xattrs
+.SH SYNOPSIS
+.B gluster-setgfid2path
+.IR file
+.SH DESCRIPTION
+New feature introduced with Gluster release 3.12, to find full path from GFID.
+This feature can be enabled using Volume set command \fBgluster volume set
+<VOLUME> storage.gfid2path enable\fR
+.PP
+Once \fBgfid2path\fR feature is enabled, it starts recording the necessary
+xattrs required for the feature. But it will not add xattrs for the already
+existing files. This tool provides facility to update the gfid2path xattrs for
+the given file path.
+
+.SH EXAMPLES
+To add xattrs of a single file,
+.PP
+.nf
+.RS
+gluster-setgfid2path /bricks/b1/hello.txt
+.RE
+.fi
+.PP
+To set xattr for all the existing files, run the below script on each bricks.
+.PP
+.nf
+.RS
+BRICK=/bricks/b1
+find $BRICK -type d \\( -path "${BRICK}/.trashcan" -o -path \\
+ "${BRICK}/.glusterfs" \\) -prune -o -type f \\
+ -exec gluster-setgfid2path {} \\;
+.RE
+.fi
+.PP
+.SH SEE ALSO
+.nf
+\fBgluster\fR(8)
+\fR
+.fi
+.SH COPYRIGHT
+.nf
+Copyright(c) 2017 Red Hat, Inc. <http://www.redhat.com>
diff --git a/tools/setgfid2path/src/Makefile.am b/tools/setgfid2path/src/Makefile.am
new file mode 100644
index 00000000000..7316d117070
--- /dev/null
+++ b/tools/setgfid2path/src/Makefile.am
@@ -0,0 +1,16 @@
+gluster_setgfid2pathdir = $(sbindir)
+
+if WITH_SERVER
+gluster_setgfid2path_PROGRAMS = gluster-setgfid2path
+endif
+
+gluster_setgfid2path_SOURCES = main.c
+
+gluster_setgfid2path_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+gluster_setgfid2path_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/tools/setgfid2path/src/main.c b/tools/setgfid2path/src/main.c
new file mode 100644
index 00000000000..4320a7b2481
--- /dev/null
+++ b/tools/setgfid2path/src/main.c
@@ -0,0 +1,130 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <libgen.h>
+
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syscall.h>
+
+#define MAX_GFID2PATH_LINK_SUP 500
+#define GFID_SIZE 16
+#define GFID_XATTR_KEY "trusted.gfid"
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ struct stat st;
+ char *dname = NULL;
+ char *bname = NULL;
+ ssize_t ret_size = 0;
+ uuid_t pgfid_raw = {
+ 0,
+ };
+ char pgfid[36 + 1] = "";
+ char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ char pgfid_bname[1024] = {
+ 0,
+ };
+ char *key = NULL;
+ char *val = NULL;
+ size_t key_size = 0;
+ size_t val_size = 0;
+ const char *file_path = NULL;
+ char *file_path1 = NULL;
+ char *file_path2 = NULL;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: setgfid2path <file-path>\n");
+ return -1;
+ }
+
+ ret = sys_lstat(argv[1], &st);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid File Path\n");
+ return -1;
+ }
+
+ if (st.st_nlink >= MAX_GFID2PATH_LINK_SUP) {
+ fprintf(stderr,
+ "Number of Hardlink support exceeded. "
+ "max=%d\n",
+ MAX_GFID2PATH_LINK_SUP);
+ return -1;
+ }
+
+ file_path = argv[1];
+ file_path1 = strdup(file_path);
+ file_path2 = strdup(file_path);
+
+ dname = dirname(file_path1);
+ bname = basename(file_path2);
+
+ /* Get GFID of Parent directory */
+ ret_size = sys_lgetxattr(dname, GFID_XATTR_KEY, pgfid_raw, GFID_SIZE);
+ if (ret_size != GFID_SIZE) {
+ fprintf(stderr, "Failed to get GFID of parent directory. dir=%s\n",
+ dname);
+ ret = -1;
+ goto out;
+ }
+
+ /* Convert to UUID format */
+ if (uuid_utoa_r(pgfid_raw, pgfid) == NULL) {
+ fprintf(stderr,
+ "Failed to format GFID of parent directory. "
+ "dir=%s GFID=%s\n",
+ dname, pgfid_raw);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find xxhash for PGFID/BaseName */
+ snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", pgfid, bname);
+ gf_xxh64_wrapper((unsigned char *)pgfid_bname, strlen(pgfid_bname),
+ GF_XXHSUM64_DEFAULT_SEED, xxh64);
+
+ key_size = SLEN(GFID2PATH_XATTR_KEY_PREFIX) + GF_XXH64_DIGEST_LENGTH * 2 +
+ 1;
+ key = alloca(key_size);
+ snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64);
+
+ val_size = UUID_CANONICAL_FORM_LEN + NAME_MAX + 2;
+ val = alloca(val_size);
+ snprintf(val, val_size, "%s/%s", pgfid, bname);
+
+ /* Set the Xattr, ignore if same key xattr already exists */
+ ret = sys_lsetxattr(file_path, key, val, strlen(val), XATTR_CREATE);
+ if (ret == -1) {
+ if (errno == EEXIST) {
+ printf("Xattr already exists, ignoring..\n");
+ ret = 0;
+ goto out;
+ }
+
+ fprintf(stderr, "Failed to set gfid2path xattr. errno=%d\n error=%s",
+ errno, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ printf("Success. file=%s key=%s value=%s\n", file_path, key, val);
+
+out:
+ if (file_path1 != NULL)
+ free(file_path1);
+
+ if (file_path2 != NULL)
+ free(file_path2);
+
+ return ret;
+}