summaryrefslogtreecommitdiffstats
path: root/libglusterfs/src
diff options
context:
space:
mode:
authorVenky Shankar <vshankar@redhat.com>2015-02-15 15:05:19 +0530
committerVijay Bellur <vbellur@redhat.com>2015-03-24 10:55:32 -0700
commit7927e8747c731dbb105e93ae66c336338f48f0e6 (patch)
treec7d4ce47ee90ef2483e1baf81327c3d2f2a545ea /libglusterfs/src
parent31f841d6b35c242942b6bdcbfdc83cf548d5235a (diff)
features/bit-rot: Implementation of bit-rot xlator
This is the "Signer" -- responsible for signing files with their checksums upon last file descriptor close (last release()). The event notification facility provided by the changelog xlator is made use of. Moreover, checksums are as of now SHA256 hash of the object data and is the only available hash at this point of time. Therefore, there is no special "what hash to use" type check, although it's does not take much to add various hashing algorithms to sign objects with. Signatures are stored in extended attributes of the objects along with the the type of hashing used to calculate the signature. This makes thing future proof when other hash types are added. The signature infrastructure is provided by bitrot stub: a little piece of code that sits over the POSIX xlator providing interfaces to "get or set" objects signature and it's staleness. Since objects are signed upon receiving release() notification, pre-existing data which are "never" modified would never be signed. To counter this, an initial crawler thread is spawned The crawler scans the entire brick for objects that are unsigned or "missed" signing due to the server going offline (node reboots, crashes, etc..) and triggers an explicit sign. This would also sign objects when bit-rot is enabled for a volume and/or after upgrade. Change-Id: I1d9a98bee6cad1c39c35c53c8fb0fc4bad2bf67b BUG: 1170075 Original-Author: Raghavendra Bhat <raghavendra@redhat.com> Signed-off-by: Venky Shankar <vshankar@redhat.com> Reviewed-on: http://review.gluster.org/9711 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'libglusterfs/src')
-rw-r--r--libglusterfs/src/Makefile.am3
-rw-r--r--libglusterfs/src/changelog.h116
-rw-r--r--libglusterfs/src/common-utils.c23
-rw-r--r--libglusterfs/src/common-utils.h4
-rw-r--r--libglusterfs/src/dict.c19
-rw-r--r--libglusterfs/src/dict.h4
-rw-r--r--libglusterfs/src/mem-types.h2
-rw-r--r--libglusterfs/src/syncop-utils.c86
-rw-r--r--libglusterfs/src/syncop-utils.h6
-rw-r--r--libglusterfs/src/xlator.c10
-rw-r--r--libglusterfs/src/xlator.h3
11 files changed, 276 insertions, 0 deletions
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 818de91cf36..33de0a287c7 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -11,6 +11,7 @@ libglusterfs_la_LIBADD = @LEXLIB@ $(ZLIB_LIBS) $(MATH_LIB)
libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION)
lib_LTLIBRARIES = libglusterfs.la
+libgfchangelogdir = $(includedir)/glusterfs/gfchangelog
CONTRIB_BUILDDIR = $(top_builddir)/contrib
@@ -53,6 +54,8 @@ noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h timespec.
unittest/unittest.h quota-common-utils.h rot-buffs.h \
$(CONTRIBDIR)/timer-wheel/timer-wheel.h
+libgfchangelog_HEADERS = changelog.h
+
EXTRA_DIST = graph.l graph.y
graph.lex.c: graph.l y.tab.h
diff --git a/libglusterfs/src/changelog.h b/libglusterfs/src/changelog.h
new file mode 100644
index 00000000000..08307810704
--- /dev/null
+++ b/libglusterfs/src/changelog.h
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+struct gf_brick_spec;
+
+/**
+ * Max bit shiter for event selection
+ */
+#define CHANGELOG_EV_SELECTION_RANGE 5
+
+#define CHANGELOG_OP_TYPE_JOURNAL (1<<0)
+#define CHANGELOG_OP_TYPE_OPEN (1<<1)
+#define CHANGELOG_OP_TYPE_CREATE (1<<2)
+#define CHANGELOG_OP_TYPE_RELEASE (1<<3)
+#define CHANGELOG_OP_TYPE_BR_RELEASE (1<<4) /* logical release (last close()),
+ sent by bitrot stub */
+#define CHANGELOG_OP_TYPE_MAX (1<<CHANGELOG_EV_SELECTION_RANGE)
+
+
+struct ev_open {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_creat {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_release {
+ unsigned char gfid[16];
+};
+
+struct ev_release_br {
+ int32_t flags;
+ unsigned long version;
+ unsigned char gfid[16];
+};
+
+struct ev_changelog {
+ char path[PATH_MAX];
+};
+
+typedef struct changelog_event {
+ unsigned int ev_type;
+
+ union {
+ struct ev_open open;
+ struct ev_creat create;
+ struct ev_release release;
+ struct ev_changelog journal;
+ struct ev_release_br releasebr;
+ } u;
+} changelog_event_t;
+
+#define CHANGELOG_EV_SIZE (sizeof (changelog_event_t))
+
+/**
+ * event callback, connected & disconnection defs
+ */
+typedef void (CALLBACK) (void *, char *,
+ void *, changelog_event_t *);
+typedef void *(INIT) (void *, struct gf_brick_spec *);
+typedef void (FINI) (void *, char *, void *);
+typedef void (CONNECT) (void *, char *, void *);
+typedef void (DISCONNECT) (void *, char *, void *);
+
+struct gf_brick_spec {
+ char *brick_path;
+ unsigned int filter;
+
+ INIT *init;
+ FINI *fini;
+ CALLBACK *callback;
+ CONNECT *connected;
+ DISCONNECT *disconnected;
+
+ void *ptr;
+};
+
+/* API set */
+
+int
+gf_changelog_register (char *brick_path, char *scratch_dir,
+ char *log_file, int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan ();
+
+int
+gf_changelog_start_fresh ();
+
+ssize_t
+gf_changelog_next_change (char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done (char *file);
+
+/* newer flexible API */
+int
+gf_changelog_init (void *xl);
+
+int
+gf_changelog_register_generic (struct gf_brick_spec *bricks, int count,
+ int ordered, char *logfile, int lvl, void *xl);
+
+#endif
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 751dc8a2e50..1adfdaa1673 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -3245,6 +3245,29 @@ gf_set_log_ident (cmd_args_t *cmd_args)
}
int
+gf_thread_cleanup_xint (pthread_t thread)
+{
+ int ret = 0;
+ void *res = NULL;
+
+ ret = pthread_cancel (thread);
+ if (ret != 0)
+ goto error_return;
+
+ ret = pthread_join (thread, &res);
+ if (ret != 0)
+ goto error_return;
+
+ if (res != PTHREAD_CANCELED)
+ goto error_return;
+
+ ret = 0;
+
+ error_return:
+ return ret;
+}
+
+int
gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
void *(*start_routine)(void *), void *arg)
{
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
index c1deeef3c9d..6ac1442b0bf 100644
--- a/libglusterfs/src/common-utils.h
+++ b/libglusterfs/src/common-utils.h
@@ -707,4 +707,8 @@ gf_get_index_by_elem (char **array, char *elem);
int
glusterfs_is_local_pathinfo (char *pathinfo, gf_boolean_t *local);
+
+int
+gf_thread_cleanup_xint (pthread_t thread);
+
#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index 81db64dfd40..b8b6aeab248 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -2926,3 +2926,22 @@ dict_dump_to_statedump (dict_t *dict, char *dict_name, char *domain)
return;
}
+
+dict_t *
+dict_for_key_value (const char *name, const char *value, size_t size)
+{
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ xattr = dict_new ();
+ if (!xattr)
+ return NULL;
+
+ ret = dict_set_static_bin (xattr, (char *)name, (void *)value, size);
+ if (ret) {
+ dict_destroy (xattr);
+ xattr = NULL;
+ }
+
+ return xattr;
+}
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
index a1a4c85f711..3708eede06d 100644
--- a/libglusterfs/src/dict.h
+++ b/libglusterfs/src/dict.h
@@ -260,4 +260,8 @@ int
dict_dump_to_str (dict_t *dict, char *dump, int dumpsize, char *format);
gf_boolean_t
dict_match_everything (dict_t *d, char *k, data_t *v, void *data);
+
+dict_t *
+dict_for_key_value (const char *name, const char *value, size_t size);
+
#endif
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
index a24e5731114..fc06d52239b 100644
--- a/libglusterfs/src/mem-types.h
+++ b/libglusterfs/src/mem-types.h
@@ -148,6 +148,8 @@ enum gf_common_mem_types_ {
/* glusterd can load the nfs-xlator dynamically and needs these two */
gf_common_mt_nfs_netgroups = 130,
gf_common_mt_nfs_exports = 131,
+ gf_common_mt_gf_brick_spec_t = 132,
+ gf_common_mt_gf_timer_entry_t = 133,
gf_common_mt_end
};
#endif
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
index 53768acd0ac..2fc95fa3e70 100644
--- a/libglusterfs/src/syncop-utils.c
+++ b/libglusterfs/src/syncop-utils.c
@@ -133,6 +133,92 @@ out:
return ret;
}
+/**
+ * Syncop_ftw_throttle can be used in a configurable way to control
+ * the speed at which crawling is done. It takes 2 more arguments
+ * compared to syncop_ftw.
+ * After @count entries are finished in a directory (to be
+ * precise, @count files) sleep for @sleep_time seconds.
+ * If either @count or @sleep_time is <=0, then it behaves similar to
+ * syncop_ftw.
+ */
+int
+syncop_ftw_throttle (xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn) (xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data),
+ int count, int sleep_time)
+{
+ loc_t child_loc = {0, };
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+ gf_dirent_t entries;
+ int tmp = 0;
+
+ if (sleep_time <= 0) {
+ ret = syncop_ftw (subvol, loc, pid, data, fn);
+ goto out;
+ }
+
+ ret = syncop_dirfd (subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD (&entries.list);
+
+ while ((ret = syncop_readdirp (subvol, fd, 131072, offset, 0,
+ &entries))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+
+ tmp = 0;
+
+ list_for_each_entry (entry, &entries.list, list) {
+ offset = entry->d_off;
+
+ if (!strcmp (entry->d_name, ".") ||
+ !strcmp (entry->d_name, ".."))
+ continue;
+
+ if (++tmp >= count)
+ sleep (sleep_time);
+
+ gf_link_inode_from_dirent (NULL, fd->inode, entry);
+
+ ret = fn (subvol, entry, loc, data);
+ if (ret)
+ continue;
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ child_loc.inode = inode_ref (entry->inode);
+ uuid_copy (child_loc.gfid, entry->inode->gfid);
+ ret = syncop_ftw_throttle (subvol, &child_loc,
+ pid, data, fn, count,
+ sleep_time);
+ loc_wipe (&child_loc);
+ if (ret)
+ continue;
+ }
+ }
+
+ gf_dirent_free (&entries);
+ if (ret)
+ break;
+ }
+
+out:
+ if (fd)
+ fd_unref (fd);
+ return ret;
+}
+
int
syncop_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data,
int (*fn) (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
diff --git a/libglusterfs/src/syncop-utils.h b/libglusterfs/src/syncop-utils.h
index 918b3b7c666..7a9ccacb285 100644
--- a/libglusterfs/src/syncop-utils.h
+++ b/libglusterfs/src/syncop-utils.h
@@ -30,4 +30,10 @@ syncop_is_subvol_local (xlator_t *this, loc_t *loc, gf_boolean_t *is_local);
int
syncop_gfid_to_path (inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
char **path_p);
+
+int
+syncop_ftw_throttle (xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn) (xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data),
+ int count, int sleep_time);
#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index cc4726e0ea5..00f411e275b 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -1024,3 +1024,13 @@ glusterd_check_log_level (const char *value)
return log_level;
}
+int
+xlator_subvolume_count (xlator_t *this)
+{
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
+}
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 5a0b114d6a8..9bea950d720 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -989,4 +989,7 @@ glusterfs_leaf_position(xlator_t *tgt);
int
glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves);
+int
+xlator_subvolume_count (xlator_t *this);
+
#endif /* _XLATOR_H */