diff options
author | Venky Shankar <vshankar@redhat.com> | 2015-02-15 15:05:19 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-24 10:55:32 -0700 |
commit | 7927e8747c731dbb105e93ae66c336338f48f0e6 (patch) | |
tree | c7d4ce47ee90ef2483e1baf81327c3d2f2a545ea /libglusterfs/src | |
parent | 31f841d6b35c242942b6bdcbfdc83cf548d5235a (diff) |
features/bit-rot: Implementation of bit-rot xlator
This is the "Signer" -- responsible for signing files with their
checksums upon last file descriptor close (last release()).
The event notification facility provided by the changelog xlator
is made use of.
Moreover, checksums are as of now SHA256 hash of the object data
and is the only available hash at this point of time. Therefore,
there is no special "what hash to use" type check, although it's
does not take much to add various hashing algorithms to sign
objects with. Signatures are stored in extended attributes of the
objects along with the the type of hashing used to calculate the
signature. This makes thing future proof when other hash types
are added. The signature infrastructure is provided by bitrot
stub: a little piece of code that sits over the POSIX xlator
providing interfaces to "get or set" objects signature and it's
staleness.
Since objects are signed upon receiving release() notification,
pre-existing data which are "never" modified would never be
signed. To counter this, an initial crawler thread is spawned
The crawler scans the entire brick for objects that are unsigned
or "missed" signing due to the server going offline (node reboots,
crashes, etc..) and triggers an explicit sign. This would also
sign objects when bit-rot is enabled for a volume and/or after
upgrade.
Change-Id: I1d9a98bee6cad1c39c35c53c8fb0fc4bad2bf67b
BUG: 1170075
Original-Author: Raghavendra Bhat <raghavendra@redhat.com>
Signed-off-by: Venky Shankar <vshankar@redhat.com>
Reviewed-on: http://review.gluster.org/9711
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'libglusterfs/src')
-rw-r--r-- | libglusterfs/src/Makefile.am | 3 | ||||
-rw-r--r-- | libglusterfs/src/changelog.h | 116 | ||||
-rw-r--r-- | libglusterfs/src/common-utils.c | 23 | ||||
-rw-r--r-- | libglusterfs/src/common-utils.h | 4 | ||||
-rw-r--r-- | libglusterfs/src/dict.c | 19 | ||||
-rw-r--r-- | libglusterfs/src/dict.h | 4 | ||||
-rw-r--r-- | libglusterfs/src/mem-types.h | 2 | ||||
-rw-r--r-- | libglusterfs/src/syncop-utils.c | 86 | ||||
-rw-r--r-- | libglusterfs/src/syncop-utils.h | 6 | ||||
-rw-r--r-- | libglusterfs/src/xlator.c | 10 | ||||
-rw-r--r-- | libglusterfs/src/xlator.h | 3 |
11 files changed, 276 insertions, 0 deletions
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 818de91cf36..33de0a287c7 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -11,6 +11,7 @@ libglusterfs_la_LIBADD = @LEXLIB@ $(ZLIB_LIBS) $(MATH_LIB) libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) lib_LTLIBRARIES = libglusterfs.la +libgfchangelogdir = $(includedir)/glusterfs/gfchangelog CONTRIB_BUILDDIR = $(top_builddir)/contrib @@ -53,6 +54,8 @@ noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h timespec. unittest/unittest.h quota-common-utils.h rot-buffs.h \ $(CONTRIBDIR)/timer-wheel/timer-wheel.h +libgfchangelog_HEADERS = changelog.h + EXTRA_DIST = graph.l graph.y graph.lex.c: graph.l y.tab.h diff --git a/libglusterfs/src/changelog.h b/libglusterfs/src/changelog.h new file mode 100644 index 00000000000..08307810704 --- /dev/null +++ b/libglusterfs/src/changelog.h @@ -0,0 +1,116 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GF_CHANGELOG_H +#define _GF_CHANGELOG_H + +struct gf_brick_spec; + +/** + * Max bit shiter for event selection + */ +#define CHANGELOG_EV_SELECTION_RANGE 5 + +#define CHANGELOG_OP_TYPE_JOURNAL (1<<0) +#define CHANGELOG_OP_TYPE_OPEN (1<<1) +#define CHANGELOG_OP_TYPE_CREATE (1<<2) +#define CHANGELOG_OP_TYPE_RELEASE (1<<3) +#define CHANGELOG_OP_TYPE_BR_RELEASE (1<<4) /* logical release (last close()), + sent by bitrot stub */ +#define CHANGELOG_OP_TYPE_MAX (1<<CHANGELOG_EV_SELECTION_RANGE) + + +struct ev_open { + unsigned char gfid[16]; + int32_t flags; +}; + +struct ev_creat { + unsigned char gfid[16]; + int32_t flags; +}; + +struct ev_release { + unsigned char gfid[16]; +}; + +struct ev_release_br { + int32_t flags; + unsigned long version; + unsigned char gfid[16]; +}; + +struct ev_changelog { + char path[PATH_MAX]; +}; + +typedef struct changelog_event { + unsigned int ev_type; + + union { + struct ev_open open; + struct ev_creat create; + struct ev_release release; + struct ev_changelog journal; + struct ev_release_br releasebr; + } u; +} changelog_event_t; + +#define CHANGELOG_EV_SIZE (sizeof (changelog_event_t)) + +/** + * event callback, connected & disconnection defs + */ +typedef void (CALLBACK) (void *, char *, + void *, changelog_event_t *); +typedef void *(INIT) (void *, struct gf_brick_spec *); +typedef void (FINI) (void *, char *, void *); +typedef void (CONNECT) (void *, char *, void *); +typedef void (DISCONNECT) (void *, char *, void *); + +struct gf_brick_spec { + char *brick_path; + unsigned int filter; + + INIT *init; + FINI *fini; + CALLBACK *callback; + CONNECT *connected; + DISCONNECT *disconnected; + + void *ptr; +}; + +/* API set */ + +int +gf_changelog_register (char *brick_path, char *scratch_dir, + char *log_file, int log_levl, int max_reconnects); +ssize_t +gf_changelog_scan (); + +int +gf_changelog_start_fresh (); + +ssize_t +gf_changelog_next_change (char *bufptr, size_t maxlen); + +int +gf_changelog_done (char *file); + +/* newer flexible API */ +int +gf_changelog_init (void *xl); + +int +gf_changelog_register_generic (struct gf_brick_spec *bricks, int count, + int ordered, char *logfile, int lvl, void *xl); + +#endif diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c index 751dc8a2e50..1adfdaa1673 100644 --- a/libglusterfs/src/common-utils.c +++ b/libglusterfs/src/common-utils.c @@ -3245,6 +3245,29 @@ gf_set_log_ident (cmd_args_t *cmd_args) } int +gf_thread_cleanup_xint (pthread_t thread) +{ + int ret = 0; + void *res = NULL; + + ret = pthread_cancel (thread); + if (ret != 0) + goto error_return; + + ret = pthread_join (thread, &res); + if (ret != 0) + goto error_return; + + if (res != PTHREAD_CANCELED) + goto error_return; + + ret = 0; + + error_return: + return ret; +} + +int gf_thread_create (pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) { diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h index c1deeef3c9d..6ac1442b0bf 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/common-utils.h @@ -707,4 +707,8 @@ gf_get_index_by_elem (char **array, char *elem); int glusterfs_is_local_pathinfo (char *pathinfo, gf_boolean_t *local); + +int +gf_thread_cleanup_xint (pthread_t thread); + #endif /* _COMMON_UTILS_H */ diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c index 81db64dfd40..b8b6aeab248 100644 --- a/libglusterfs/src/dict.c +++ b/libglusterfs/src/dict.c @@ -2926,3 +2926,22 @@ dict_dump_to_statedump (dict_t *dict, char *dict_name, char *domain) return; } + +dict_t * +dict_for_key_value (const char *name, const char *value, size_t size) +{ + dict_t *xattr = NULL; + int ret = 0; + + xattr = dict_new (); + if (!xattr) + return NULL; + + ret = dict_set_static_bin (xattr, (char *)name, (void *)value, size); + if (ret) { + dict_destroy (xattr); + xattr = NULL; + } + + return xattr; +} diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h index a1a4c85f711..3708eede06d 100644 --- a/libglusterfs/src/dict.h +++ b/libglusterfs/src/dict.h @@ -260,4 +260,8 @@ int dict_dump_to_str (dict_t *dict, char *dump, int dumpsize, char *format); gf_boolean_t dict_match_everything (dict_t *d, char *k, data_t *v, void *data); + +dict_t * +dict_for_key_value (const char *name, const char *value, size_t size); + #endif diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index a24e5731114..fc06d52239b 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -148,6 +148,8 @@ enum gf_common_mem_types_ { /* glusterd can load the nfs-xlator dynamically and needs these two */ gf_common_mt_nfs_netgroups = 130, gf_common_mt_nfs_exports = 131, + gf_common_mt_gf_brick_spec_t = 132, + gf_common_mt_gf_timer_entry_t = 133, gf_common_mt_end }; #endif diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c index 53768acd0ac..2fc95fa3e70 100644 --- a/libglusterfs/src/syncop-utils.c +++ b/libglusterfs/src/syncop-utils.c @@ -133,6 +133,92 @@ out: return ret; } +/** + * Syncop_ftw_throttle can be used in a configurable way to control + * the speed at which crawling is done. It takes 2 more arguments + * compared to syncop_ftw. + * After @count entries are finished in a directory (to be + * precise, @count files) sleep for @sleep_time seconds. + * If either @count or @sleep_time is <=0, then it behaves similar to + * syncop_ftw. + */ +int +syncop_ftw_throttle (xlator_t *subvol, loc_t *loc, int pid, void *data, + int (*fn) (xlator_t *subvol, gf_dirent_t *entry, + loc_t *parent, void *data), + int count, int sleep_time) +{ + loc_t child_loc = {0, }; + fd_t *fd = NULL; + uint64_t offset = 0; + gf_dirent_t *entry = NULL; + int ret = 0; + gf_dirent_t entries; + int tmp = 0; + + if (sleep_time <= 0) { + ret = syncop_ftw (subvol, loc, pid, data, fn); + goto out; + } + + ret = syncop_dirfd (subvol, loc, &fd, pid); + if (ret) + goto out; + + INIT_LIST_HEAD (&entries.list); + + while ((ret = syncop_readdirp (subvol, fd, 131072, offset, 0, + &entries))) { + if (ret < 0) + break; + + if (ret > 0) { + /* If the entries are only '.', and '..' then ret + * value will be non-zero. so set it to zero here. */ + ret = 0; + } + + tmp = 0; + + list_for_each_entry (entry, &entries.list, list) { + offset = entry->d_off; + + if (!strcmp (entry->d_name, ".") || + !strcmp (entry->d_name, "..")) + continue; + + if (++tmp >= count) + sleep (sleep_time); + + gf_link_inode_from_dirent (NULL, fd->inode, entry); + + ret = fn (subvol, entry, loc, data); + if (ret) + continue; + + if (entry->d_stat.ia_type == IA_IFDIR) { + child_loc.inode = inode_ref (entry->inode); + uuid_copy (child_loc.gfid, entry->inode->gfid); + ret = syncop_ftw_throttle (subvol, &child_loc, + pid, data, fn, count, + sleep_time); + loc_wipe (&child_loc); + if (ret) + continue; + } + } + + gf_dirent_free (&entries); + if (ret) + break; + } + +out: + if (fd) + fd_unref (fd); + return ret; +} + int syncop_dir_scan (xlator_t *subvol, loc_t *loc, int pid, void *data, int (*fn) (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, diff --git a/libglusterfs/src/syncop-utils.h b/libglusterfs/src/syncop-utils.h index 918b3b7c666..7a9ccacb285 100644 --- a/libglusterfs/src/syncop-utils.h +++ b/libglusterfs/src/syncop-utils.h @@ -30,4 +30,10 @@ syncop_is_subvol_local (xlator_t *this, loc_t *loc, gf_boolean_t *is_local); int syncop_gfid_to_path (inode_table_t *itable, xlator_t *subvol, uuid_t gfid, char **path_p); + +int +syncop_ftw_throttle (xlator_t *subvol, loc_t *loc, int pid, void *data, + int (*fn) (xlator_t *subvol, gf_dirent_t *entry, + loc_t *parent, void *data), + int count, int sleep_time); #endif /* _SYNCOP_H */ diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index cc4726e0ea5..00f411e275b 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -1024,3 +1024,13 @@ glusterd_check_log_level (const char *value) return log_level; } +int +xlator_subvolume_count (xlator_t *this) +{ + int i = 0; + xlator_list_t *list = NULL; + + for (list = this->children; list; list = list->next) + i++; + return i; +} diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 5a0b114d6a8..9bea950d720 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -989,4 +989,7 @@ glusterfs_leaf_position(xlator_t *tgt); int glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves); +int +xlator_subvolume_count (xlator_t *this); + #endif /* _XLATOR_H */ |