diff options
Diffstat (limited to 'xlators')
85 files changed, 4312 insertions, 1234 deletions
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 3013ae730..00e0d2676 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -37,7 +37,6 @@ #include "afr.h" #include "afr-transaction.h" -//#include "afr-self-heal-common.h" static void @@ -1750,5 +1749,3 @@ out: } /* }}} */ - - diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 9605d69f4..9e714b026 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -585,12 +585,10 @@ afr_selfheal_data_opendir (xlator_t *this, inode_t *inode) } loc_wipe (&loc); - - return fd; + return fd; } - int afr_selfheal_entry (call_frame_t *frame, xlator_t *this, inode_t *inode) { diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index b31a33237..83628297f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -18,7 +18,6 @@ #include "afr-self-heal.h" #include "byte-order.h" - #define AFR_HEAL_ATTR (GF_SET_ATTR_UID|GF_SET_ATTR_GID|GF_SET_ATTR_MODE) int diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 5e12910b7..ead08425f 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -185,7 +185,7 @@ reconfigure (xlator_t *this, dict_t *options) uint32, out); GF_OPTION_RECONF (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, - options, size, out); + options, size_uint64, out); /* Reset this so we re-discover in case the topology changed. */ GF_OPTION_RECONF ("ensure-durability", priv->ensure_durability, options, bool, out); @@ -331,7 +331,7 @@ init (xlator_t *this) GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out); GF_OPTION_INIT ("quorum-type", qtype, str, out); GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out); - GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size, + GF_OPTION_INIT (AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64, out); fix_quorum_options(this,priv,qtype); @@ -436,7 +436,7 @@ fini (xlator_t *this) priv = this->private; this->private = NULL; afr_priv_destroy (priv); - if (this->itable);//I dont see any destroy func + //if (this->itable);//I dont see any destroy func return 0; } diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am index 174bea841..3fc29bf81 100644 --- a/xlators/cluster/dht/src/Makefile.am +++ b/xlators/cluster/dht/src/Makefile.am @@ -1,4 +1,3 @@ - xlator_LTLIBRARIES = dht.la nufa.la switch.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index d74d0dfd4..2ece28a61 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -47,7 +47,7 @@ struct dht_layout { int gen; int type; int ref; /* use with dht_conf_t->layout_lock */ - int search_unhashed; + gf_boolean_t search_unhashed; struct { int err; /* 0 = normal -1 = dir exists and no xattr @@ -263,7 +263,7 @@ struct dht_conf { int gen; dht_du_t *du_stats; double min_free_disk; - double min_free_inodes; + double min_free_inodes; char disk_unit; int32_t refresh_interval; gf_boolean_t unhashed_sticky_bit; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 36c073973..f2e7467ab 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -366,7 +366,6 @@ dht_reconfigure (xlator_t *this, dict_t *options) " lookup-unhashed should be boolean," " not (%s), defaulting to (%d)", temp_str, conf->search_unhashed); - //return -1; ret = -1; goto out; } @@ -449,11 +448,11 @@ gf_defrag_pattern_list_fill (xlator_t *this, gf_defrag_info_t *defrag, char *dat if (!pattern) goto out; if (!num) { - if (gf_string2bytesize(pattern, &pattern_list->size) + if (gf_string2bytesize_uint64(pattern, &pattern_list->size) == 0) { pattern = "*"; } - } else if (gf_string2bytesize (num, &pattern_list->size) != 0) { + } else if (gf_string2bytesize_uint64 (num, &pattern_list->size) != 0) { gf_log (this->name, GF_LOG_ERROR, "invalid number format \"%s\"", num); goto out; diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am index 2d151422a..4268d6f03 100644 --- a/xlators/cluster/stripe/src/Makefile.am +++ b/xlators/cluster/stripe/src/Makefile.am @@ -1,4 +1,3 @@ - xlator_LTLIBRARIES = stripe.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c index a047d4a2e..3c12809d6 100644 --- a/xlators/cluster/stripe/src/stripe-helpers.c +++ b/xlators/cluster/stripe/src/stripe-helpers.c @@ -13,6 +13,7 @@ #include "stripe.h" #include "byte-order.h" #include "mem-types.h" +#include "logging.h" void stripe_local_wipe (stripe_local_t *local) @@ -260,8 +261,8 @@ stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local, goto out; } - (void) snprintf (stripe_size_str, 20, "%ld", - (local->fctx) ? local->fctx->stripe_size : 0); + (void) snprintf (stripe_size_str, 20, "%"PRId64, + (long long) (local->fctx) ? local->fctx->stripe_size : 0); /* extra bytes for decorations (brackets and <>'s) */ padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER) @@ -504,7 +505,7 @@ set_default_block_size (stripe_private_t *priv, char *num) GF_VALIDATE_OR_GOTO (THIS->name, num, out); - if (gf_string2bytesize (num, &priv->block_size) != 0) { + if (gf_string2bytesize_uint64 (num, &priv->block_size) != 0) { gf_log (THIS->name, GF_LOG_ERROR, "invalid number format \"%s\"", num); goto out; @@ -554,7 +555,7 @@ set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data) if (ret) goto out; } - if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) { + if (gf_string2bytesize_uint64 (num, &stripe_opt->block_size) != 0) { gf_log (this->name, GF_LOG_ERROR, "invalid number format \"%s\"", num); goto out; @@ -674,4 +675,3 @@ uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count, return size; } - diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index 79e80b513..0ebea8168 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -4459,7 +4459,7 @@ stripe_is_bd (dict_t *this, char *key, data_t *value, void *data) return 0; } -inline gf_boolean_t +static inline gf_boolean_t stripe_setxattr_is_bd (dict_t *dict) { gf_boolean_t is_bd = _gf_false; @@ -4628,7 +4628,7 @@ out: return ret; } -inline gf_boolean_t +static inline gf_boolean_t stripe_fsetxattr_is_special (dict_t *dict) { gf_boolean_t is_spl = _gf_false; @@ -5080,7 +5080,7 @@ reconfigure (xlator_t *this, dict_t *options) goto unlock; } - if (gf_string2bytesize (opt->default_value, &priv->block_size)){ + if (gf_string2bytesize_uint64 (opt->default_value, &priv->block_size)){ gf_log (this->name, GF_LOG_ERROR, "Unable to set default block-size "); ret = -1; @@ -5187,7 +5187,7 @@ init (xlator_t *this) ret = -1; goto unlock; } - if (gf_string2bytesize (opt->default_value, &priv->block_size)){ + if (gf_string2bytesize_uint64 (opt->default_value, &priv->block_size)){ gf_log (this->name, GF_LOG_ERROR, "Unable to set default block-size "); ret = -1; @@ -5601,7 +5601,7 @@ err: return 0; } -inline gf_boolean_t +static inline gf_boolean_t stripe_is_special_xattr (const char *name) { gf_boolean_t is_spl = _gf_false; diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index d63fbb26c..5b4c833fb 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -509,7 +509,7 @@ out: return 0; } -inline int +static inline int ios_stats_cleanup (xlator_t *this, inode_t *inode) { diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c index c9d839356..1efd50e65 100644 --- a/xlators/debug/trace/src/trace.c +++ b/xlators/debug/trace/src/trace.c @@ -18,6 +18,44 @@ * Very helpful translator for debugging. */ +static inline void +trace_stat_to_str(struct iatt *buf, char *str) +{ + char atime_buf[256] = {0,}; + char mtime_buf[256] = {0,}; + char ctime_buf[256] = {0,}; + uint64_t ia_time = 0; + + if (!buf) + return; + + ia_time = buf->ia_atime; + strftime (atime_buf, 256, "[%b %d %H:%M:%S]", + localtime ((time_t *)&ia_time)); + ia_time = buf->ia_mtime; + strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", + localtime ((time_t *)&ia_time)); + + ia_time = buf->ia_ctime; + strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", + localtime ((time_t *)&ia_time)); + + snprintf (str, sizeof (str), + "gfid=%s ino=%"PRIu64", mode=%o, " + "nlink=%"GF_PRI_NLINK", uid=%u, " + "gid=%u, size=%"PRIu64", " + "blocks=%"PRIu64", atime=%s, " + "mtime=%s, ctime=%s", + uuid_utoa (buf->ia_gfid), + buf->ia_ino, + st_mode_from_ia (buf->ia_prot, buf->ia_type), + buf->ia_nlink, buf->ia_uid, + buf->ia_gid, buf->ia_size, + buf->ia_blocks, atime_buf, + mtime_buf, ctime_buf); +} + + int dump_history_trace (circular_buffer_t *cb, void *data) { diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h index 045eefb36..62d1bc9c9 100644 --- a/xlators/debug/trace/src/trace.h +++ b/xlators/debug/trace/src/trace.h @@ -59,40 +59,3 @@ typedef struct { "%s", _string); \ } \ } while (0); - -#define trace_stat_to_str(buf, statstr) \ - do { \ - char atime_buf[256] = {0,}; \ - char mtime_buf[256] = {0,}; \ - char ctime_buf[256] = {0,}; \ - uint64_t ia_time = 0; \ - \ - if (!buf) \ - break; \ - \ - ia_time = buf->ia_atime; \ - strftime (atime_buf, 256, "[%b %d %H:%M:%S]", \ - localtime ((time_t *)&ia_time)); \ - \ - ia_time = buf->ia_mtime; \ - strftime (mtime_buf, 256, "[%b %d %H:%M:%S]", \ - localtime ((time_t *)&ia_time)); \ - \ - ia_time = buf->ia_ctime; \ - strftime (ctime_buf, 256, "[%b %d %H:%M:%S]", \ - localtime ((time_t *)&ia_time)); \ - \ - snprintf (statstr, sizeof (statstr), \ - "gfid=%s ino=%"PRIu64", mode=%o, " \ - "nlink=%"GF_PRI_NLINK", uid=%u, " \ - "gid=%u, size=%"PRIu64", " \ - "blocks=%"PRIu64", atime=%s, " \ - "mtime=%s, ctime=%s", \ - uuid_utoa (buf->ia_gfid), buf->ia_ino, \ - st_mode_from_ia (buf->ia_prot, \ - buf->ia_type), \ - buf->ia_nlink, buf->ia_uid, \ - buf->ia_gid, buf->ia_size, \ - buf->ia_blocks, atime_buf, \ - mtime_buf, ctime_buf); \ - } while (0); diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c index 1abdad31d..13b1bd962 100644 --- a/xlators/encryption/crypt/src/crypt.c +++ b/xlators/encryption/crypt/src/crypt.c @@ -4169,9 +4169,9 @@ int32_t master_set_block_size (xlator_t *this, crypt_private_t *priv, if (options != NULL) GF_OPTION_RECONF("block-size", block_size, options, - size, error); + size_uint64, error); else - GF_OPTION_INIT("block-size", block_size, size, error); + GF_OPTION_INIT("block-size", block_size, size_uint64, error); switch (block_size) { case 512: @@ -4224,9 +4224,9 @@ static int master_set_data_key_size (xlator_t *this, crypt_private_t *priv, if (options != NULL) GF_OPTION_RECONF("data-key-size", key_size, options, - size, error); + uint64, error); else - GF_OPTION_INIT("data-key-size", key_size, size, error); + GF_OPTION_INIT("data-key-size", key_size, uint64, error); ret = data_cipher_algs[master->m_alg][master->m_mode].check_key(key_size); if (ret) { diff --git a/xlators/encryption/crypt/src/crypt.h b/xlators/encryption/crypt/src/crypt.h index ff8eb571b..eb7291f13 100644 --- a/xlators/encryption/crypt/src/crypt.h +++ b/xlators/encryption/crypt/src/crypt.h @@ -22,6 +22,7 @@ #include <openssl/cmac.h> #include <openssl/modes.h> #include "crypt-mem-types.h" +#include "compat.h" #define CRYPT_XLATOR_ID (0) @@ -38,13 +39,17 @@ #define MASTER_VOL_KEY_SIZE (32) #define NMTD_VOL_KEY_SIZE (16) -#ifdef __NetBSD__ +#if defined(__NetBSD__) typedef off_t loff_t; #endif +#if defined(GF_DARWIN_HOST_OS) +typedef uint64_t loff_t; +#endif + struct crypt_key { uint32_t len; - const char *label; + const char *label; }; /* @@ -124,7 +129,7 @@ struct master_cipher_info { * master key */ unsigned char m_key[MASTER_VOL_KEY_SIZE]; - /* + /* * volume key for oid authentication */ unsigned char m_nmtd_key[NMTD_VOL_KEY_SIZE]; @@ -870,8 +875,8 @@ static inline linkop_unwind_handler_t linkop_unwind_dispatch(glusterfs_fop_t fop return rename_unwind; default: gf_log("crypt", GF_LOG_ERROR, "Bad link operation %d", fop); - return NULL; - } + return NULL; + } } static inline mtd_op_t linkop_mtdop_dispatch(glusterfs_fop_t fop) diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c index e5465d1b4..5edb9cdd3 100644 --- a/xlators/features/barrier/src/barrier.c +++ b/xlators/features/barrier/src/barrier.c @@ -334,6 +334,74 @@ out: } int +notify (xlator_t *this, int event, void *data, ...) +{ + barrier_priv_t *priv = NULL; + dict_t *dict = NULL; + gf_boolean_t past = _gf_false; + int ret = -1; + gf_boolean_t barrier_enabled = _gf_false; + struct list_head queue = {0,}; + + priv = this->private; + GF_ASSERT (priv); + INIT_LIST_HEAD (&queue); + + switch (event) { + case GF_EVENT_TRANSLATOR_OP: + { + dict = data; + GF_OPTION_RECONF ("barrier", barrier_enabled, dict, + bool, out); + + LOCK (&priv->lock); + { + past = priv->barrier_enabled; + + switch (past) { + case _gf_false: + if (barrier_enabled) { + ret = __barrier_enable (this,priv); + if (ret) + goto unlock; + } else { + gf_log (this->name, GF_LOG_ERROR, + "Already disabled."); + goto unlock; + } + break; + + case _gf_true: + if (!barrier_enabled) { + __barrier_disable(this, &queue); + } else { + gf_log (this->name, GF_LOG_ERROR, + "Already enabled"); + goto unlock; + } + break; + } + ret = 0; + } +unlock: + UNLOCK (&priv->lock); + + if (!list_empty (&queue)) + barrier_dequeue_all (this, &queue); + // missing break is intentional + } + default: + { + default_notify (this, event, data); + ret = 0; + goto out; + } + } +out: + return ret; +} + +int reconfigure (xlator_t *this, dict_t *options) { barrier_priv_t *priv = NULL; @@ -347,7 +415,7 @@ reconfigure (xlator_t *this, dict_t *options) GF_ASSERT (priv); GF_OPTION_RECONF ("barrier", barrier_enabled, options, bool, out); - GF_OPTION_RECONF ("timeout", timeout, options, time, out); + GF_OPTION_RECONF ("barrier-timeout", timeout, options, time, out); INIT_LIST_HEAD (&queue); @@ -359,13 +427,9 @@ reconfigure (xlator_t *this, dict_t *options) case _gf_false: if (barrier_enabled) { ret = __barrier_enable (this, priv); - if (ret) + if (ret) { goto unlock; - - } else { - gf_log (this->name, GF_LOG_ERROR, - "Already disabled"); - goto unlock; + } } break; @@ -373,16 +437,10 @@ reconfigure (xlator_t *this, dict_t *options) if (!barrier_enabled) { __barrier_disable (this, &queue); - } else { - gf_log (this->name, GF_LOG_ERROR, - "Already enabled"); - goto unlock; } break; } - priv->timeout.tv_sec = timeout; - ret = 0; } unlock: @@ -432,7 +490,7 @@ init (xlator_t *this) LOCK_INIT (&priv->lock); GF_OPTION_INIT ("barrier", priv->barrier_enabled, bool, out); - GF_OPTION_INIT ("timeout", timeout, time, out); + GF_OPTION_INIT ("barrier-timeout", timeout, time, out); priv->timeout.tv_sec = timeout; INIT_LIST_HEAD (&priv->queue); @@ -588,7 +646,7 @@ struct volume_options options[] = { "write (with O_SYNC), fsync. It is turned \"off\" by " "default." }, - { .key = {"timeout"}, + { .key = {"barrier-timeout"}, .type = GF_OPTION_TYPE_TIME, .default_value = "120", .description = "After 'timeout' seconds since the time 'barrier' " diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am index 775f026cf..28d5a70aa 100644 --- a/xlators/features/changelog/lib/src/Makefile.am +++ b/xlators/features/changelog/lib/src/Makefile.am @@ -17,7 +17,8 @@ lib_LTLIBRARIES = libgfchangelog.la CONTRIB_BUILDDIR = $(top_builddir)/contrib libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-process.c \ - gf-changelog-helpers.c $(CONTRIBDIR)/uuid/clear.c \ + gf-changelog-helpers.c gf-history-changelog.c \ + $(CONTRIBDIR)/uuid/clear.c \ $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c \ $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/parse.c \ $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c \ diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h index e4cf506c4..fa0edabf0 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-helpers.h +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h @@ -23,6 +23,7 @@ #define GF_CHANGELOG_CURRENT_DIR ".current" #define GF_CHANGELOG_PROCESSED_DIR ".processed" #define GF_CHANGELOG_PROCESSING_DIR ".processing" +#define GF_CHANGELOG_HISTORY_DIR ".history" #ifndef MAXLINE #define MAXLINE 4096 @@ -68,6 +69,9 @@ typedef struct gf_changelog { char gfc_processing_dir[PATH_MAX]; pthread_t gfc_changelog_processor; + + /* Holds gfc for History API */ + struct gf_changelog *hist_gfc; } gf_changelog_t; int diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c index 4b2b25ad5..0827f2cac 100644 --- a/xlators/features/changelog/lib/src/gf-changelog.c +++ b/xlators/features/changelog/lib/src/gf-changelog.c @@ -82,6 +82,10 @@ __attribute__ ((destructor)) gf_changelog_dtor (void) gfc = this->private; if (gfc) { + if (gfc->hist_gfc) { + gf_changelog_cleanup(gfc->hist_gfc); + GF_FREE (gfc->hist_gfc); + } gf_changelog_cleanup (gfc); GF_FREE (gfc); } @@ -437,6 +441,7 @@ gf_changelog_register (char *brick_path, char *scratch_dir, int errn = 0; xlator_t *this = NULL; gf_changelog_t *gfc = NULL; + char hist_scratch_dir[PATH_MAX] = {0,}; this = THIS; if (!this->ctx) @@ -460,6 +465,52 @@ gf_changelog_register (char *brick_path, char *scratch_dir, goto cleanup; } + /* Begin: Changes for History API */ + gfc->hist_gfc = NULL; + + gfc->hist_gfc = GF_CALLOC (1, sizeof (*gfc), + gf_changelog_mt_libgfchangelog_t); + if (!gfc->hist_gfc) + goto cleanup; + + gfc->hist_gfc->gfc_dir = NULL; + gfc->hist_gfc->gfc_fd = gfc->hist_gfc->gfc_sockfd = -1; + gfc->hist_gfc->this = NULL; + + (void) strncpy (hist_scratch_dir, scratch_dir, PATH_MAX); + (void) snprintf (hist_scratch_dir, PATH_MAX, + "%s/"GF_CHANGELOG_HISTORY_DIR"/", + gfc->gfc_working_dir); + + ret = mkdir_p (hist_scratch_dir, 0600, _gf_false); + if (ret) { + errn = errno; + goto cleanup; + } + + gfc->hist_gfc->gfc_working_dir = realpath (hist_scratch_dir, NULL); + if (!gfc->hist_gfc->gfc_working_dir) { + errn = errno; + goto cleanup; + } + + ret = gf_changelog_open_dirs (gfc->hist_gfc); + if (ret) { + errn = errno; + gf_log (this->name, GF_LOG_ERROR, + "could not create entries in history scratch dir"); + goto cleanup; + } + + (void) strncpy (gfc->hist_gfc->gfc_brickpath, brick_path, PATH_MAX); + + for (i=0; i < 256; i++) { + gfc->hist_gfc->rfc3986[i] = + (isalnum(i) || i == '~' || + i == '-' || i == '.' || i == '_') ? i : 0; + } + /* End: Changes for History API*/ + ret = gf_changelog_open_dirs (gfc); if (ret) { errn = errno; @@ -494,7 +545,7 @@ gf_changelog_register (char *brick_path, char *scratch_dir, goto cleanup; } - for (; i < 256; i++) { + for (i=0; i < 256; i++) { gfc->rfc3986[i] = (isalnum(i) || i == '~' || i == '-' || i == '.' || i == '_') ? i : 0; @@ -506,6 +557,10 @@ gf_changelog_register (char *brick_path, char *scratch_dir, goto out; cleanup: + if (gfc->hist_gfc) { + gf_changelog_cleanup (gfc->hist_gfc); + GF_FREE (gfc->hist_gfc); + } gf_changelog_cleanup (gfc); GF_FREE (gfc); this->private = NULL; diff --git a/xlators/features/changelog/lib/src/gf-history-changelog.c b/xlators/features/changelog/lib/src/gf-history-changelog.c new file mode 100644 index 000000000..bfc4cd37d --- /dev/null +++ b/xlators/features/changelog/lib/src/gf-history-changelog.c @@ -0,0 +1,274 @@ +#include <errno.h> +#include <dirent.h> +#include <stddef.h> +#include <sys/types.h> + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <string.h> + +#include "globals.h" +#include "glusterfs.h" +#include "logging.h" + +#include "gf-changelog-helpers.h" + +/* from the changelog translator */ +#include "changelog-misc.h" +#include "changelog-mem-types.h" + +/*@API + * gf_history_changelog_done: + * Move processed history changelog file from .processing + * to .processed + * + * ARGUMENTS: + * file(IN): path to processed history changelog file in + * .processing directory. + * + * RETURN VALUE: + * 0: On success. + * -1: On error. + */ +int +gf_history_changelog_done (char *file) +{ + int ret = -1; + char *buffer = NULL; + xlator_t *this = NULL; + gf_changelog_t *gfc = NULL; + gf_changelog_t *hist_gfc = NULL; + char to_path[PATH_MAX] = {0,}; + + errno = EINVAL; + + this = THIS; + if (!this) + goto out; + + gfc = (gf_changelog_t *) this->private; + if (!gfc) + goto out; + + hist_gfc = gfc->hist_gfc; + if (!hist_gfc) + goto out; + + if (!file || !strlen (file)) + goto out; + + /* make sure 'file' is inside ->gfc_working_dir */ + buffer = realpath (file, NULL); + if (!buffer) + goto out; + + if (strncmp (hist_gfc->gfc_working_dir, + buffer, strlen (hist_gfc->gfc_working_dir))) + goto out; + + (void) snprintf (to_path, PATH_MAX, "%s%s", + hist_gfc->gfc_processed_dir, basename (buffer)); + gf_log (this->name, GF_LOG_DEBUG, + "moving %s to processed directory", file); + ret = rename (buffer, to_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "cannot move %s to %s (reason: %s)", + file, to_path, strerror (errno)); + goto out; + } + + ret = 0; + + out: + if (buffer) + free (buffer); /* allocated by realpath() */ + return ret; +} +/** + * @API + * gf_history_changelog_start_fresh: + * For a set of changelogs, start from the begining. + * It will truncates the history tracker fd. + * + * RETURN VALUES: + * 0: On success. + * -1: On error. + */ +int +gf_history_changelog_start_fresh () +{ + xlator_t *this = NULL; + gf_changelog_t *gfc = NULL; + gf_changelog_t *hist_gfc = NULL; + + this = THIS; + if (!this) + goto out; + + errno = EINVAL; + + gfc = (gf_changelog_t *) this->private; + if (!gfc) + goto out; + + hist_gfc = gfc->hist_gfc; + if (!hist_gfc) + goto out; + + if (gf_ftruncate (hist_gfc->gfc_fd, 0)) + goto out; + + return 0; + + out: + return -1; +} + +/* + * @API + * gf_history_changelog_next_change: + * Return the next history changelog file entry. Zero means all + * history chanelogs are consumed. + * + * ARGUMENTS: + * bufptr(OUT): Path to unprocessed history changelog file + * from tracker file. + * maxlen(IN): Usually PATH_MAX. + * + * RETURN VALUES: + * size: On success. + * -1 : On error. + */ +ssize_t +gf_history_changelog_next_change (char *bufptr, size_t maxlen) +{ + ssize_t size = 0; + int tracker_fd = 0; + xlator_t *this = NULL; + gf_changelog_t *gfc = NULL; + gf_changelog_t *hist_gfc = NULL; + char buffer[PATH_MAX] = {0,}; + + errno = EINVAL; + + this = THIS; + if (!this) + goto out; + + gfc = (gf_changelog_t *) this->private; + if (!gfc) + goto out; + + hist_gfc = gfc->hist_gfc; + if (!hist_gfc) + goto out; + + tracker_fd = hist_gfc->gfc_fd; + + size = gf_readline (tracker_fd, buffer, maxlen); + if (size < 0) + goto out; + if (size == 0) + return 0; + + memcpy (bufptr, buffer, size - 1); + *(buffer + size) = '\0'; + + return size; + + out: + return -1; +} + +/* + * @API + * gf_history_changelog_scan: + * Scan and generate a list of change entries. + * Calling this api multiple times (without calling gf_changlog_done()) + * would result new changelogs(s) being refreshed in the tracker file. + * This call also acts as a cancellation point for the consumer. + * + * RETURN VALUES: + * nr_entries: On success. + * -1 : On error. + */ +ssize_t +gf_history_changelog_scan () +{ + int ret = 0; + int tracker_fd = 0; + size_t len = 0; + size_t off = 0; + xlator_t *this = NULL; + size_t nr_entries = 0; + gf_changelog_t *gfc = NULL; + gf_changelog_t *hist_gfc = NULL; + struct dirent *entryp = NULL; + struct dirent *result = NULL; + char buffer[PATH_MAX] = {0,}; + + this = THIS; + if (!this) + goto out; + + gfc = (gf_changelog_t *) this->private; + if (!gfc) + goto out; + + hist_gfc = gfc->hist_gfc; + if (!hist_gfc) + goto out; + + errno = EINVAL; + + tracker_fd = hist_gfc->gfc_fd; + + if (gf_ftruncate (tracker_fd, 0)) + goto out; + + len = offsetof(struct dirent, d_name) + + pathconf(hist_gfc->gfc_processing_dir, _PC_NAME_MAX) + 1; + entryp = GF_CALLOC (1, len, + gf_changelog_mt_libgfchangelog_dirent_t); + if (!entryp) + goto out; + + rewinddir (hist_gfc->gfc_dir); + while (1) { + ret = readdir_r (hist_gfc->gfc_dir, entryp, &result); + if (ret || !result) + break; + + if ( !strcmp (basename (entryp->d_name), ".") + || !strcmp (basename (entryp->d_name), "..") ) + continue; + + nr_entries++; + + GF_CHANGELOG_FILL_BUFFER (hist_gfc->gfc_processing_dir, + buffer, off, + strlen (hist_gfc->gfc_processing_dir)); + GF_CHANGELOG_FILL_BUFFER (entryp->d_name, buffer, + off, strlen (entryp->d_name)); + GF_CHANGELOG_FILL_BUFFER ("\n", buffer, off, 1); + + if (gf_changelog_write (tracker_fd, buffer, off) != off) { + gf_log (this->name, GF_LOG_ERROR, + "error writing changelog filename" + " to tracker file"); + break; + } + off = 0; + } + + GF_FREE (entryp); + + if (!result) { + if (gf_lseek (tracker_fd, 0, SEEK_SET) != -1) + return nr_entries; + } + out: + return -1; +} diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am index d802584df..525ce97dc 100644 --- a/xlators/features/changelog/src/Makefile.am +++ b/xlators/features/changelog/src/Makefile.am @@ -14,7 +14,7 @@ changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \ changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -Ipolicy/ -fPIC -D_FILE_OFFSET_BITS=64 \ - -D_GNU_SOURCE -D$(GF_HOST_OS) -shared -nostartfiles -DDATADIR=\"$(localstatedir)\" + -D_GNU_SOURCE -D$(GF_HOST_OS) -shared -DDATADIR=\"$(localstatedir)\" AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index f8f254cf6..e4e2dfc96 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -422,9 +422,11 @@ typedef struct { void changelog_thread_cleanup (xlator_t *this, pthread_t thr_id); -inline void * + +void * changelog_get_usable_buffer (changelog_local_t *local); -inline void + +void changelog_set_usable_record_and_length (changelog_local_t *local, size_t len, int xr); void @@ -437,9 +439,9 @@ changelog_inject_single_event (xlator_t *this, changelog_priv_t *priv, changelog_local_t *local, changelog_log_data_t *cld); -inline size_t +size_t changelog_entry_length (); -inline int +int changelog_write (int fd, char *buffer, size_t len); int changelog_write_change (xlator_t *this, changelog_priv_t *priv, diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index 66fdd61be..6d4b502de 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -1370,12 +1370,11 @@ struct volume_options options[] = { .description = "encoding type for changelogs" }, {.key = {"rollover-time"}, - .type = GF_OPTION_TYPE_INT, .description = "time to switch to a new changelog file (in seconds)" }, {.key = {"fsync-interval"}, .type = GF_OPTION_TYPE_TIME, - .default_value = "0", + .default_value = "5", .description = "do not open CHANGELOG file with O_SYNC mode." " instead perform fsync() at specified intervals" }, diff --git a/xlators/features/compress/src/Makefile.am b/xlators/features/compress/src/Makefile.am index 0bf757c06..263b21b78 100644 --- a/xlators/features/compress/src/Makefile.am +++ b/xlators/features/compress/src/Makefile.am @@ -10,7 +10,7 @@ cdc_la_SOURCES = cdc.c cdc-helper.c cdc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ --shared -nostartfiles $(LIBZ_CFLAGS) +-shared $(LIBZ_CFLAGS) AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/xlators/features/glupy/src/setup.py.in b/xlators/features/glupy/src/setup.py.in index 1aea9875f..b9ee02c2b 100644 --- a/xlators/features/glupy/src/setup.py.in +++ b/xlators/features/glupy/src/setup.py.in @@ -16,7 +16,7 @@ setup( description='Glupy is the Python translator interface for GlusterFS', long_description=DESC, author='Gluster Community', - author_email='gluster-devel@nongnu.org', + author_email='gluster-devel@gluster.org', license='LGPLv3', url='http://gluster.org/', package_dir={'gluster':''}, diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 5c1c65fbd..4b2eb2e29 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -497,13 +497,13 @@ fop_fxattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) _xattrop_index_action (this, inode, xattr); } -inline gf_boolean_t +static inline gf_boolean_t index_xattrop_track (loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict) { return (flags == GF_XATTROP_ADD_ARRAY); } -inline gf_boolean_t +static inline gf_boolean_t index_fxattrop_track (fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict) { return (flags == GF_XATTROP_ADD_ARRAY); diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index dc86512be..8496d9d8d 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -715,15 +715,16 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) pl_inode_t *pinode = NULL; struct list_head released; + struct list_head unwind; INIT_LIST_HEAD (&released); + INIT_LIST_HEAD (&unwind); pthread_mutex_lock (&ctx->lock); { list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers, client_list) { list_del_init (&l->client_list); - list_add_tail (&l->client_list, &released); pl_entrylk_log_cleanup (l); @@ -731,25 +732,68 @@ pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) pthread_mutex_lock (&pinode->mutex); { - list_del_init (&l->domain_list); + /* If the entrylk object is part of granted list but not + * blocked list, then perform the following actions: + * i. delete the object from granted list; + * ii. grant other locks (from other clients) that may + * have been blocked on this entrylk; and + * iii. unref the object. + * + * If the entrylk object (L1) is part of both granted + * and blocked lists, then this means that a parallel + * unlock on another entrylk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in + * __grant_blocked_entry_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked entrylks from other clients, now that L1 is + * out of their way and then unref L1 in the end, and + * leave it to the other thread (the one executing + * unlock codepath) to unwind L1's frame, delete it from + * blocked_locks list, and perform the last unref on L1. + * + * If the entrylk object (L1) is part of blocked list + * only, the cleanup code path must: + * i. delete it from the blocked_locks list inside + * this critical section, + * ii. unwind its frame with EAGAIN, + * iii. try and grant blocked entry locks from other + * clients that were otherwise grantable, but were + * blocked to avoid leaving L1 to starve forever. + * iv. unref the object. + */ + if (!list_empty (&l->domain_list)) { + list_del_init (&l->domain_list); + list_add_tail (&l->client_list, + &released); + } else { + list_del_init (&l->blocked_locks); + list_add_tail (&l->client_list, + &unwind); + } } pthread_mutex_unlock (&pinode->mutex); } } pthread_mutex_unlock (&ctx->lock); - list_for_each_entry_safe (l, tmp, &released, client_list) { + list_for_each_entry_safe (l, tmp, &unwind, client_list) { list_del_init (&l->client_list); if (l->frame) STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN, NULL); + list_add_tail (&l->client_list, &released); + } + + list_for_each_entry_safe (l, tmp, &released, client_list) { + list_del_init (&l->client_list); pinode = l->pinode; dom = get_domain (pinode, l->volume); - grant_blocked_inode_locks (this, pinode, dom); + grant_blocked_entry_locks (this, pinode, dom); pthread_mutex_lock (&pinode->mutex); { diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index e7093e60e..c76cb7f91 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -26,7 +26,7 @@ inline void __delete_inode_lock (pl_inode_lock_t *lock) { - list_del (&lock->list); + list_del_init (&lock->list); } static inline void @@ -35,7 +35,7 @@ __pl_inodelk_ref (pl_inode_lock_t *lock) lock->ref++; } -void +inline void __pl_inodelk_unref (pl_inode_lock_t *lock) { lock->ref--; @@ -404,7 +404,7 @@ pl_inodelk_log_cleanup (pl_inode_lock_t *lock) } -/* Release all entrylks from this client */ +/* Release all inodelks from this client */ int pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) { @@ -414,15 +414,16 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) pl_inode_t *pl_inode = NULL; struct list_head released; + struct list_head unwind; INIT_LIST_HEAD (&released); + INIT_LIST_HEAD (&unwind); pthread_mutex_lock (&ctx->lock); { list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers, client_list) { list_del_init (&l->client_list); - list_add_tail (&l->client_list, &released); pl_inodelk_log_cleanup (l); @@ -430,19 +431,64 @@ pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) pthread_mutex_lock (&pl_inode->mutex); { - __delete_inode_lock (l); + /* If the inodelk object is part of granted list but not + * blocked list, then perform the following actions: + * i. delete the object from granted list; + * ii. grant other locks (from other clients) that may + * have been blocked on this inodelk; and + * iii. unref the object. + * + * If the inodelk object (L1) is part of both granted + * and blocked lists, then this means that a parallel + * unlock on another inodelk (L2 say) may have 'granted' + * L1 and added it to 'granted' list in + * __grant_blocked_node_locks() (although using the + * 'blocked_locks' member). In that case, the cleanup + * codepath must try and grant other overlapping + * blocked inodelks from other clients, now that L1 is + * out of their way and then unref L1 in the end, and + * leave it to the other thread (the one executing + * unlock codepath) to unwind L1's frame, delete it from + * blocked_locks list, and perform the last unref on L1. + * + * If the inodelk object (L1) is part of blocked list + * only, the cleanup code path must: + * i. delete it from the blocked_locks list inside + * this critical section, + * ii. unwind its frame with EAGAIN, + * iii. try and grant blocked inode locks from other + * clients that were otherwise grantable, but just + * got blocked to avoid leaving L1 to starve + * forever. + * iv. unref the object. + */ + if (!list_empty (&l->list)) { + __delete_inode_lock (l); + list_add_tail (&l->client_list, + &released); + } else { + list_del_init(&l->blocked_locks); + list_add_tail (&l->client_list, + &unwind); + } } pthread_mutex_unlock (&pl_inode->mutex); } } pthread_mutex_unlock (&ctx->lock); - list_for_each_entry_safe (l, tmp, &released, client_list) { + list_for_each_entry_safe (l, tmp, &unwind, client_list) { list_del_init (&l->client_list); - if (l->frame) + if (l->frame) STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL); + list_add_tail (&l->client_list, &released); + + } + + list_for_each_entry_safe (l, tmp, &released, client_list) { + list_del_init (&l->client_list); pl_inode = l->pl_inode; diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 2db327687..337623d65 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -294,7 +294,7 @@ pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd) { list_for_each_entry (l, &pl_inode->ext_list, list) { - if ((l->fd_num == fd_to_fdnum(fd))) { + if (l->fd_num == fd_to_fdnum(fd)) { found = 1; break; } @@ -319,7 +319,7 @@ delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd) { list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) { - if ((l->fd_num == fd_to_fdnum(fd))) { + if (l->fd_num == fd_to_fdnum(fd)) { if (l->blocked) { list_move_tail (&l->list, &blocked_list); continue; @@ -644,7 +644,8 @@ pl_fgetxattr_handle_lockinfo (xlator_t *this, fd_t *fd, pl_inode_t *pl_inode = NULL; char *key = NULL, *buf = NULL; int32_t op_ret = 0; - unsigned long fdnum = 0, len = 0; + unsigned long fdnum = 0; + int32_t len = 0; dict_t *tmp = NULL; pl_inode = pl_inode_get (this, fd->inode); @@ -1340,7 +1341,7 @@ __fd_has_locks (pl_inode_t *pl_inode, fd_t *fd) posix_lock_t *l = NULL; list_for_each_entry (l, &pl_inode->ext_list, list) { - if ((l->fd_num == fd_to_fdnum(fd))) { + if (l->fd_num == fd_to_fdnum(fd)) { found = 1; break; } @@ -1369,7 +1370,7 @@ __dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd, int ret = 0; list_for_each_entry (l, &pl_inode->ext_list, list) { - if ((l->fd_num == fd_to_fdnum(fd))) { + if (l->fd_num == fd_to_fdnum(fd)) { duplock = lock_dup (l); if (!duplock) { ret = -1; diff --git a/xlators/features/mac-compat/src/Makefile.am b/xlators/features/mac-compat/src/Makefile.am index f8567edce..42ed350e9 100644 --- a/xlators/features/mac-compat/src/Makefile.am +++ b/xlators/features/mac-compat/src/Makefile.am @@ -6,9 +6,10 @@ mac_compat_la_LDFLAGS = -module -avoid-version mac_compat_la_SOURCES = mac-compat.c mac_compat_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la +noinst_HEADERS = mac-compat.h + AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src AM_CFLAGS = -Wall $(GF_CFLAGS) -CLEANFILES = - +CLEANFILES = diff --git a/xlators/features/mac-compat/src/mac-compat.c b/xlators/features/mac-compat/src/mac-compat.c index 7cb550ad5..0eaf563e8 100644 --- a/xlators/features/mac-compat/src/mac-compat.c +++ b/xlators/features/mac-compat/src/mac-compat.c @@ -15,35 +15,28 @@ #include "xlator.h" #include "defaults.h" #include "compat-errno.h" +#include "syscall.h" +#include "mem-pool.h" +#include "mac-compat.h" - -enum apple_xattr { - GF_FINDER_INFO_XATTR, - GF_RESOURCE_FORK_XATTR, - GF_XATTR_ALL, - GF_XATTR_NONE -}; - -static char *apple_xattr_name[] = { - [GF_FINDER_INFO_XATTR] = "com.apple.FinderInfo", - [GF_RESOURCE_FORK_XATTR] = "com.apple.ResourceFork" -}; - -static const char *apple_xattr_value[] = { - [GF_FINDER_INFO_XATTR] = - /* 1 2 3 4 5 6 7 8 */ - "\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0" - "\0\0\0\0\0\0\0\0", - [GF_RESOURCE_FORK_XATTR] = "" -}; - -static int32_t apple_xattr_len[] = { - [GF_FINDER_INFO_XATTR] = 32, - [GF_RESOURCE_FORK_XATTR] = 1 -}; - +static int +dict_key_remove_namespace(dict_t *dict, char *key, data_t *value, void *data) +{ + /* + char buffer[3*value->len+1]; + int index = 0; + for (index = 0; index < value->len; index++) + sprintf(buffer+3*index, " %02x", value->data[index]); + */ + xlator_t *this = (xlator_t *) data; + if (strncmp(key, "user.", 5) == 0) { + dict_set (dict, key + 5, value); + gf_log (this->name, GF_LOG_DEBUG, + "remove_namespace_dict: %s -> %s ", key, key + 5); + dict_del (dict, key); + } + return 0; +} int32_t maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -53,54 +46,91 @@ maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, intptr_t ax = (intptr_t)this->private; int i = 0; + gf_log (this->name, GF_LOG_DEBUG, + "getxattr_cbk: dict %p private: %p xdata %p ", dict, + this->private, xdata); + + if (dict) { + dict_foreach(dict, dict_key_remove_namespace, this); + } + else { + // TODO: we expect dict to exist here, don't know why this + // this is needed + dict = dict_new(); + } + gf_log (this->name, GF_LOG_DEBUG, + "getxattr_cbk: dict %p ax: %ld op_ret %d op_err %d ", dict, ax, + op_ret, op_errno); if ((ax == GF_XATTR_ALL && op_ret >= 0) || ax != GF_XATTR_NONE) { op_ret = op_errno = 0; - for (i = 0; i < GF_XATTR_ALL; i++) { if (dict_get (dict, apple_xattr_name[i])) continue; - + /* set dummy data */ + gf_log (this->name, GF_LOG_DEBUG, + "getxattr_cbk: setting dummy data %p, %s", dict, + apple_xattr_name[i]); if (dict_set (dict, apple_xattr_name[i], bin_to_data ((void *)apple_xattr_value[i], apple_xattr_len[i])) == -1) { op_ret = -1; - op_errno = ENOMEM; + op_errno = ENOATTR; break; } } } - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); - return 0; } -int32_t -maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +static +int prepend_xattr_user_namespace(dict_t *dict, char *key, data_t *value, void *obj) { - intptr_t ax = GF_XATTR_NONE; - int i = 0; + xlator_t *this = (xlator_t *) obj; + dict_t *newdict = (dict_t *) this->private; + char *newkey = NULL; + gf_add_prefix(XATTR_USER_PREFIX, key, &newkey); + key = newkey; + dict_set(newdict, (char *)key, value); + if (newkey) + GF_FREE(newkey); + return 0; +} +intptr_t +check_name(const char *name, char **newkey) +{ + intptr_t ax = GF_XATTR_NONE; if (name) { + int i = 0; for (i = 0; i < GF_XATTR_ALL; i++) { if (strcmp (apple_xattr_name[i], name) == 0) { ax = i; - break; } } + gf_add_prefix("user.", name, newkey); } else ax = GF_XATTR_ALL; + return ax; +} - this->private = (void *)ax; +int32_t +maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + char *newkey = NULL; + this->private = (void *) check_name(name, &newkey); + gf_log (this->name, GF_LOG_DEBUG, + "getxattr: name %s private: %p xdata %p ", name, + this->private, xdata); STACK_WIND (frame, maccomp_getxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, - loc, name, xdata); + loc, newkey, xdata); return 0; } @@ -109,30 +139,17 @@ int32_t maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - intptr_t ax = GF_XATTR_NONE; - int i = 0; - - if (name) { - for (i = 0; i < GF_XATTR_ALL; i++) { - if (strcmp (apple_xattr_name[i], name) == 0) { - ax = i; - - break; - } - } - } else - ax = GF_XATTR_ALL; - - this->private = (void *)ax; + char *newkey = NULL; + this->private = (void *) check_name(name, &newkey); STACK_WIND (frame, maccomp_getxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr, - fd, name, xdata); + fd, newkey, xdata); + GF_FREE(newkey); return 0; } - int32_t maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) @@ -141,12 +158,56 @@ maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret == -1 && ax != GF_XATTR_NONE) op_ret = op_errno = 0; - + gf_log (this->name, GF_LOG_DEBUG, + "setxattr_cbk op_ret %d op_errno %d private: %p xdata %p ", + op_ret, op_errno, this->private, xdata); STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); + return 0; +} +int32_t +maccomp_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *iatt1, + struct iatt *iattr2, dict_t *xdata) +{ + gf_log (this->name, GF_LOG_DEBUG, + "setattr_cbk op_ret %d op_errno %d private: %p xdata %p ", + op_ret, op_errno, this->private, xdata); + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, + iatt1, iattr2, xdata); return 0; } +int map_flags(int flags) +{ + /* DARWIN has different defines on XATTR_ flags. + There do not seem to be a POSIX standard + Parse any other flags over. + NOFOLLOW is always true on Linux and Darwin + */ + int linux_flags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); + if (XATTR_CREATE & flags) + linux_flags |= GF_XATTR_CREATE; + if (XATTR_REPLACE & flags) + linux_flags |= GF_XATTR_REPLACE; + return linux_flags; +} + +int32_t +maccomp_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + char *newkey = NULL; + + this->private = (void *) check_name(name, &newkey); + + STACK_WIND (frame, default_fremovexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, + fd, newkey, xdata); + GF_FREE(newkey); + return 0; +} int32_t maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, @@ -162,16 +223,56 @@ maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, break; } } + dict_t *newdict = dict_new(); + this->private = (void *) newdict; + dict_foreach(dict, prepend_xattr_user_namespace, this); this->private = (void *)ax; - + int linux_flags = map_flags(flags); + gf_log (this->name, GF_LOG_DEBUG, + "setxattr flags: %d -> %d dict %p private: %p xdata %p ", + flags, linux_flags, dict, this->private, xdata); STACK_WIND (frame, maccomp_setxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, - loc, dict, flags, xdata); + loc, newdict, linux_flags, xdata); + dict_unref(newdict); return 0; } +int32_t +maccomp_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *iattr, + int32_t flags, dict_t *xdata) +{ + gf_log (this->name, GF_LOG_DEBUG, + "setattr iattr %p private: %p xdata %p ", + iattr, this->private, xdata); + STACK_WIND (frame, maccomp_setattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, + loc, iattr, flags, xdata); + return 0; +} + +int32_t +maccomp_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + char *newkey = NULL; + this->private = (void *) check_name(name, &newkey); + + STACK_WIND (frame, default_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, newkey, xdata); + + gf_log (this->name, GF_LOG_TRACE, + "removeattr name %p private: %p xdata %p ", + name, this->private, xdata); + GF_FREE(newkey); + return 0; + +} int32_t maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, @@ -188,12 +289,20 @@ maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, } } - this->private = (void *)ax; + dict_t *newdict = dict_new(); + this->private = (void *) newdict; + dict_foreach(dict, prepend_xattr_user_namespace, this); + this->private = (void *)ax; + int linux_flags = map_flags(flags); + gf_log (this->name, GF_LOG_DEBUG, + "fsetxattr flags: %d -> %d dict %p private: %p xdata %p ", + flags, linux_flags, dict, this->private, xdata); STACK_WIND (frame, maccomp_setxattr_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, - fd, dict, flags, xdata); + fd, newdict, linux_flags, xdata); + dict_unref(newdict); return 0; } @@ -224,10 +333,13 @@ fini (xlator_t *this) struct xlator_fops fops = { - .getxattr = maccomp_getxattr, - .fgetxattr = maccomp_fgetxattr, - .setxattr = maccomp_setxattr, - .fsetxattr = maccomp_fsetxattr, + .getxattr = maccomp_getxattr, + .fgetxattr = maccomp_fgetxattr, + .setxattr = maccomp_setxattr, + .setattr = maccomp_setattr, + .fsetxattr = maccomp_fsetxattr, + .removexattr = maccomp_removexattr, + .fremovexattr = maccomp_fremovexattr, }; struct xlator_cbks cbks; diff --git a/xlators/features/mac-compat/src/mac-compat.h b/xlators/features/mac-compat/src/mac-compat.h new file mode 100644 index 000000000..b033ca0e4 --- /dev/null +++ b/xlators/features/mac-compat/src/mac-compat.h @@ -0,0 +1,41 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __MAC_COMPAT_H__ +#define __MAC_COMPAT_H__ + +enum apple_xattr { + GF_FINDER_INFO_XATTR, + GF_RESOURCE_FORK_XATTR, + GF_XATTR_ALL, + GF_XATTR_NONE +}; + +static char *apple_xattr_name[] = { + [GF_FINDER_INFO_XATTR] = "com.apple.FinderInfo", + [GF_RESOURCE_FORK_XATTR] = "com.apple.ResourceFork" +}; + +static const char *apple_xattr_value[] = { + [GF_FINDER_INFO_XATTR] = + /* 1 2 3 4 5 6 7 8 */ + "\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0" + "\0\0\0\0\0\0\0\0", + [GF_RESOURCE_FORK_XATTR] = "" +}; + +static int32_t apple_xattr_len[] = { + [GF_FINDER_INFO_XATTR] = 32, + [GF_RESOURCE_FORK_XATTR] = 1 +}; + +#endif /* __MAC_COMPAT_H__ */ diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h index 6cdd14881..b200413b0 100644 --- a/xlators/features/marker/src/marker-quota-helper.h +++ b/xlators/features/marker/src/marker-quota-helper.h @@ -9,7 +9,7 @@ */ #ifndef _MARKER_QUOTA_HELPER_H -#define _MARKER_QUOTA_HELPER +#define _MARKER_QUOTA_HELPER_H #ifndef _CONFIG_H #define _CONFIG_H diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c index a758e938f..1903fdc40 100644 --- a/xlators/features/marker/src/marker-quota.c +++ b/xlators/features/marker/src/marker-quota.c @@ -365,7 +365,10 @@ mq_update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this, local->loc.path, ntoh64 (*delta)); new_dict = dict_new (); - if (!new_dict); + if (!new_dict) { + errno = ENOMEM; + goto err; + } ret = dict_set_bin (new_dict, QUOTA_SIZE_KEY, delta, 8); if (ret) @@ -385,7 +388,6 @@ mq_update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this, err: if (op_ret == -1 || ret == -1) { local->err = -1; - mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL); } diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index a27a266f0..adcf3d8e7 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -255,18 +255,18 @@ out: return 0; } -int32_t +gf_boolean_t call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name) { struct volume_mark *vol_mark = NULL; marker_conf_t *priv = NULL; - gf_boolean_t ret = _gf_true; + gf_boolean_t is_true = _gf_true; priv = (marker_conf_t *)this->private; if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL || strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) { - ret = _gf_false; + is_true = _gf_false; goto out; } @@ -274,7 +274,7 @@ call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name) marker_getxattr_stampfile_cbk (frame, this, name, vol_mark, NULL); out: - return ret; + return is_true; } int32_t @@ -348,10 +348,10 @@ int32_t marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { - gf_boolean_t ret = _gf_false; - marker_conf_t *priv = NULL; - unsigned long cookie = 0; - marker_local_t *local = NULL; + gf_boolean_t is_true = _gf_false; + marker_conf_t *priv = NULL; + unsigned long cookie = 0; + marker_local_t *local = NULL; priv = this->private; @@ -362,16 +362,15 @@ marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, MARKER_INIT_LOCAL (frame, local); - ret = loc_copy (&local->loc, loc); - if (ret < 0) - goto out; + if ((loc_copy (&local->loc, loc)) < 0) + goto out; gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid); if (priv && priv->feature_enabled & GF_XTIME) - ret = call_from_special_client (frame, this, name); + is_true = call_from_special_client (frame, this, name); - if (ret == _gf_false) { + if (is_true == _gf_false) { if (name == NULL) { /* Signifies that marker translator * has to filter the quota's xattr's, @@ -380,10 +379,11 @@ marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, */ cookie = 1; } - STACK_WIND_COOKIE (frame, marker_getxattr_cbk, (void *)cookie, + STACK_WIND_COOKIE (frame, marker_getxattr_cbk, + (void *)cookie, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, - name, xdata); + FIRST_CHILD(this)->fops->getxattr, + loc, name, xdata); } return 0; diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 4beaae341..2ca4da0c1 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -244,7 +244,7 @@ out: return; } -inline void +static inline void quota_link_count_decrement (quota_local_t *local) { call_stub_t *stub = NULL; @@ -270,7 +270,7 @@ out: return; } -inline void +static inline void quota_handle_validate_error (quota_local_t *local, int32_t op_ret, int32_t op_errno) { @@ -377,7 +377,7 @@ quota_timeout (struct timeval *tv, int32_t timeout) return timed_out; } -inline void +static inline void quota_add_parent (quota_dentry_t *dentry, struct list_head *list) { quota_dentry_t *entry = NULL; @@ -928,7 +928,7 @@ err: return 0; } -inline int +static inline int quota_get_limits (xlator_t *this, dict_t *dict, int64_t *hard_lim, int64_t *soft_lim) { diff --git a/xlators/features/quota/src/quotad-aggregator.c b/xlators/features/quota/src/quotad-aggregator.c index f3f65ca2a..5f13fd251 100644 --- a/xlators/features/quota/src/quotad-aggregator.c +++ b/xlators/features/quota/src/quotad-aggregator.c @@ -227,7 +227,7 @@ quotad_aggregator_getlimit (rpcsvc_request_t *req) if (ret) goto err; - ret = dict_set_int32 (state->xdata, GET_ANCESTRY_PATH_KEY,42); + ret = dict_set_int32 (state->xdata, GET_ANCESTRY_PATH_KEY, 42); if (ret) goto err; diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index ac53e67ee..9b33edf4d 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -29,7 +29,7 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \ - -I$(CONTRIBDIR)/uuid \ + -I$(CONTRIBDIR)/uuid -I$(CONTRIBDIR)/mount \ -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \ -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(XML_CPPFLAGS) diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index b111d2e54..e0373c774 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -3415,6 +3415,18 @@ set_probe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr, "in this state"); break; + case GF_PROBE_MISSED_SNAP_CONFLICT: + snprintf (errstr, len, "Failed to update " + "list of missed snapshots from " + "peer %s", hostname); + break; + + case GF_PROBE_SNAP_CONFLICT: + snprintf (errstr, len, "Conflict in comparing " + "list of snapshots from " + "peer %s", hostname); + break; + default: snprintf (errstr, len, "Probe returned with " "unknown errno %d", op_errno); @@ -3902,6 +3914,52 @@ glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) } static int +get_volinfo_from_brickid (char *brickid, glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *volid_str = NULL; + char *brick = NULL; + char *brickid_dup = NULL; + uuid_t volid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brickid); + + brickid_dup = gf_strdup (brickid); + if (!brickid_dup) + goto out; + + volid_str = brickid_dup; + brick = strchr (brickid_dup, ':'); + if (!brick) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid brickid"); + goto out; + } + + *brick = '\0'; + brick++; + uuid_parse (volid_str, volid); + ret = glusterd_volinfo_find_by_volume_id (volid, volinfo); + if (ret) { + /* Check if it is a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id (volid, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to find volinfo"); + goto out; + } + } + + ret = 0; +out: + GF_FREE (brickid_dup); + return ret; +} + +static int get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) { glusterd_volinfo_t *volinfo = NULL; @@ -3944,13 +4002,14 @@ out: int __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, void *data) + rpc_clnt_event_t event, void *data) { - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - int ret = 0; - char *brickid = NULL; - glusterd_brickinfo_t *brickinfo = NULL; + char *brickid = NULL; + int ret = 0; + glusterd_conf_t *conf = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; brickid = mydata; if (!brickid) @@ -3967,6 +4026,37 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, switch (event) { case RPC_CLNT_CONNECT: + /* If a node on coming back up, already starts a brick + * before the handshake, and the notification comes after + * the handshake is done, then we need to check if this + * is a restored brick with a snapshot pending. If so, we + * need to stop the brick + */ + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = get_volinfo_from_brickid (brickid, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volinfo from " + "brickid(%s)", brickid); + goto out; + } + + ret = glusterd_brick_stop (volinfo, brickinfo, + _gf_false); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to stop %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + + break; + } gf_log (this->name, GF_LOG_DEBUG, "Connected to %s:%s", brickinfo->hostname, brickinfo->path); glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); @@ -3992,6 +4082,7 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, break; } +out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c index 352b6ba11..78730a564 100644 --- a/xlators/mgmt/glusterd/src/glusterd-hooks.c +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c @@ -181,7 +181,7 @@ glusterd_hooks_set_volume_args (dict_t *dict, runner_t *runner) goto out; runner_add_arg (runner, "-o"); - for (i = 1; (ret == 0); i++) { + for (i = 1; ret == 0; i++) { snprintf (query, sizeof (query), "key%d", i); ret = dict_get_str (dict, query, &key); if (ret) diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index fb5e097d9..9b130b4c6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1020,7 +1020,7 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) { int ret = 0; char *volname = NULL; - gf_boolean_t exists = _gf_false; + int exists = 0; char msg[2048] = {0}; char *key = NULL; char *key_fixed = NULL; @@ -1068,6 +1068,7 @@ glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) ret = -1; goto out; } + if (!exists) { ret = snprintf (msg, sizeof (msg), "Option %s does not exist", key); @@ -1819,7 +1820,7 @@ glusterd_op_set_volume (dict_t *dict) if (dict_count == 0) { ret = glusterd_volset_help (NULL, &op_errstr); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "%s", + gf_log (this->name, GF_LOG_ERROR, "%s", (op_errstr)? op_errstr: "Volume set help internal error"); } @@ -2081,12 +2082,12 @@ glusterd_op_sync_volume (dict_t *dict, char **op_errstr, if (volname) { ret = glusterd_add_volume_to_dict (volinfo, rsp_dict, - 1); + 1, "volume"); vol_count = 1; } else { list_for_each_entry (volinfo, &priv->volumes, vol_list) { - ret = glusterd_add_volume_to_dict (volinfo, - rsp_dict, count); + ret = glusterd_add_volume_to_dict (volinfo, rsp_dict, + count, "volume"); if (ret) goto out; @@ -3262,7 +3263,7 @@ glusterd_is_get_op (xlator_t *this, glusterd_op_t op, dict_t *dict) if (op == GD_OP_STATUS_VOLUME) return _gf_true; - if ((op == GD_OP_SET_VOLUME)) { + if (op == GD_OP_SET_VOLUME) { //check for set volume help ret = dict_get_str (dict, "volname", &volname); if (volname && diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c index cf23b6404..7f798ad26 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quota.c +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -474,7 +474,7 @@ glusterd_set_quota_limit (char *volname, char *path, char *hard_limit, new_limit.sl = hton64 (new_limit.sl); - ret = gf_string2bytesize (hard_limit, (uint64_t*)&new_limit.hl); + ret = gf_string2bytesize_uint64 (hard_limit, (uint64_t*)&new_limit.hl); if (ret) goto out; @@ -1400,13 +1400,13 @@ glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) "Faild to get hard-limit from dict"); goto out; } - ret = gf_string2bytesize (hard_limit_str, &hard_limit); + ret = gf_string2bytesize_uint64 (hard_limit_str, &hard_limit); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to convert hard-limit string to value"); goto out; } - if (hard_limit > INT64_MAX) { + if (hard_limit > UINT64_MAX) { ret = -1; ret = gf_asprintf (op_errstr, "Hard-limit %s is greater" " than %"PRId64"bytes. Please set a " diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 27910d132..babd5a3be 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -1242,12 +1242,12 @@ int32_t glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_friend_req req = {{0},}; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_friend_sm_event_t *event = NULL; - dict_t *vols = NULL; + gd1_mgmt_friend_req req = {{0},}; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_friend_sm_event_t *event = NULL; + dict_t *peer_data = NULL; if (!frame || !this || !data) { @@ -1262,15 +1262,37 @@ glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, peerinfo = event->peerinfo; - ret = glusterd_build_volume_dict (&vols); - if (ret) + ret = glusterd_add_volumes_to_export_dict (&peer_data); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to add list of volumes " + "in the peer_data dict for handshake"); goto out; + } + + if (priv->op_version >= GD_OP_VERSION_4) { + ret = glusterd_add_missed_snaps_to_export_dict (peer_data); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to add list of missed snapshots " + "in the peer_data dict for handshake"); + goto out; + } + + ret = glusterd_add_snapshots_to_export_dict (peer_data); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to add list of snapshots " + "in the peer_data dict for handshake"); + goto out; + } + } uuid_copy (req.uuid, MY_UUID); req.hostname = peerinfo->hostname; req.port = peerinfo->port; - ret = dict_allocate_and_serialize (vols, &req.vols.vols_val, + ret = dict_allocate_and_serialize (peer_data, &req.vols.vols_val, &req.vols.vols_len); if (ret) goto out; @@ -1284,8 +1306,8 @@ glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, out: GF_FREE (req.vols.vols_val); - if (vols) - dict_unref (vols); + if (peer_data) + dict_unref (peer_data); gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index 3aaf359ac..7a8b2c94f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -652,15 +652,14 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) glusterd_friend_update_ctx_t *new_ev_ctx = NULL; glusterd_friend_sm_event_t *new_event = NULL; glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; + glusterd_conf_t *conf = NULL; int status = 0; int32_t op_ret = -1; int32_t op_errno = 0; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; this = THIS; - priv = this->private; - + GF_ASSERT (this); GF_ASSERT (ctx); ev_ctx = ctx; uuid_copy (uuid, ev_ctx->uuid); @@ -668,6 +667,9 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) GF_ASSERT (peerinfo); uuid_copy (peerinfo->uuid, ev_ctx->uuid); + conf = this->private; + GF_ASSERT (conf); + //Build comparison logic here. ret = glusterd_compare_friend_data (ev_ctx->vols, &status, peerinfo->hostname); @@ -683,6 +685,31 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) op_ret = -1; } + /* Compare missed_snapshot list with the peer * + * if volume comparison is successful */ + if ((op_ret == 0) && + (conf->op_version >= GD_OP_VERSION_4)) { + ret = glusterd_import_friend_missed_snap_list (ev_ctx->vols); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to import peer's " + "missed_snaps_list."); + event_type = GD_FRIEND_EVENT_LOCAL_RJT; + op_errno = GF_PROBE_MISSED_SNAP_CONFLICT; + op_ret = -1; + } + + ret = glusterd_compare_friend_snapshots (ev_ctx->vols, + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Conflict in comparing peer's snapshots"); + event_type = GD_FRIEND_EVENT_LOCAL_RJT; + op_errno = GF_PROBE_SNAP_CONFLICT; + op_ret = -1; + } + } + ret = glusterd_friend_sm_new_event (event_type, &new_event); if (ret) { @@ -712,9 +739,9 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) // apply a deterministic function to decide via whom we should join the cluster if (strcmp(peerinfo->hostname, ev_ctx->hostname) > 0) { - stop_etcd(priv->etcd_pid); + stop_etcd(conf->etcd_pid); nuke_etcd_dir(); - priv->etcd_pid = start_etcd (uuid_utoa(MY_UUID), peerinfo->hostname); + conf->etcd_pid = start_etcd (uuid_utoa(MY_UUID), peerinfo->hostname); } out: diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c index d11abee70..0e824a022 100644 --- a/xlators/mgmt/glusterd/src/glusterd-snapshot.c +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -18,6 +18,14 @@ #include <sys/resource.h> #include <sys/statvfs.h> #include <sys/mount.h> +#include <signal.h> + + +#if !defined(__NetBSD__) && !defined(GF_DARWIN_HOST_OS) +#include <mntent.h> +#else +#include "mntent_compat.h" +#endif #include "globals.h" #include "compat.h" @@ -40,16 +48,70 @@ #include "cli1-xdr.h" #include "xdr-generic.h" -#ifdef GF_LINUX_HOST_OS -#include <mntent.h> -#endif +#include "lvm-defaults.h" char snap_mount_folder[PATH_MAX]; +/* Look for disconnected peers, for missed snap creates or deletes */ static int32_t glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, - char *snap_uuid, struct list_head *peers, - int32_t op); + struct list_head *peers, int32_t op) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (peers); + GF_ASSERT (vol); + + brick_count = 0; + list_for_each_entry (brickinfo, &vol->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* If the brick belongs to the same node */ + brick_count++; + continue; + } + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesnt belong to this peer */ + continue; + } + + /* Found peer who owns the brick, * + * if peer is not connected or not * + * friend add it to missed snap list */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + vol, brickinfo, + brick_count + 1, + op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); + goto out; + } + } + } + brick_count++; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} /* This function will restore a snapshot volumes * @@ -62,7 +124,6 @@ int glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { int ret = -1; - char *volname = NULL; char *snapname = NULL; xlator_t *this = NULL; glusterd_volinfo_t *snap_volinfo = NULL; @@ -90,7 +151,7 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) snap = glusterd_find_snap_by_name (snapname); if (NULL == snap) { ret = gf_asprintf (op_errstr, "Snap (%s) not found", - snapname); + snapname); if (ret < 0) { goto out; } @@ -99,6 +160,7 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } + /* TODO : As of now there is only volume in snapshot. * Change this when multiple volume snapshot is introduced */ @@ -116,7 +178,6 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) /* From origin glusterd check if * * any peers with snap bricks is down */ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, - snap_volinfo->volname, &priv->peers, GF_SNAP_OPTION_TYPE_RESTORE); if (ret) { @@ -133,7 +194,7 @@ glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) * failure. */ gf_log (this->name, GF_LOG_ERROR, "Failed to restore " - "snap for %s volume", volname); + "snap for %s", snapname); goto out; } @@ -156,7 +217,7 @@ out: * @param rsp_dict response dictionary * @return Negative value on Failure and 0 in success */ -int +int32_t glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { @@ -267,7 +328,7 @@ out: int snap_max_hard_limits_validate (dict_t *dict, char *volname, - uint64_t value, char **op_errstr) + uint64_t value, char **op_errstr) { char err_str[PATH_MAX] = ""; glusterd_conf_t *conf = NULL; @@ -449,7 +510,7 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) for (i = 0; i < volume_count; i++) { memset (snapbrckcnt, '\0', sizeof(snapbrckcnt)); ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1, - "vol%ld_brickcount", i+1); + "vol%"PRId64"_brickcount", i+1); ret = dict_get_int64 (src, snapbrckcnt, &brick_count); if (ret) { gf_log (this->name, GF_LOG_TRACE, @@ -460,7 +521,7 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) for (j = 0; j < brick_count; j++) { /* Fetching data from source dict */ snprintf (key, sizeof(key) - 1, - "vol%ld.brickdir%ld", i+1, j); + "vol%"PRId64".brickdir%"PRId64, i+1, j); ret = dict_get_ptr (src, key, (void **)&snap_brick_dir); @@ -471,7 +532,7 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) } snprintf (key, sizeof(key) - 1, - "vol%ld.brick_snapdevice%ld", i+1, j); + "vol%"PRId64".brick_snapdevice%"PRId64, i+1, j); ret = dict_get_ptr (src, key, (void **)&snap_device); @@ -482,7 +543,7 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) } snprintf (snapbrckord, sizeof(snapbrckord) - 1, - "vol%ld.brick%ld.order", i+1, j); + "vol%"PRId64".brick%"PRId64".order", i+1, j); ret = dict_get_int64 (src, snapbrckord, &brick_order); if (ret) { @@ -493,7 +554,7 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) /* Adding the data in the dst dict */ snprintf (key, sizeof(key) - 1, - "vol%ld.brickdir%ld", i+1, brick_order); + "vol%"PRId64".brickdir%"PRId64, i+1, brick_order); tmpstr = gf_strdup (snap_brick_dir); if (!tmpstr) { @@ -511,7 +572,7 @@ glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) } snprintf (key, sizeof(key) - 1, - "vol%ld.brick_snapdevice%ld", + "vol%"PRId64".brick_snapdevice%"PRId64, i+1, brick_order); tmpstr = gf_strdup (snap_device); @@ -618,8 +679,8 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, goto out; } if (volcount <= 0) { - snprintf (err_str, sizeof (err_str), "Invalid volume count %ld " - "supplied", volcount); + snprintf (err_str, sizeof (err_str), "Invalid volume count %"PRId64 + " supplied", volcount); ret = -1; goto out; } @@ -638,7 +699,7 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, } for (i = 1; i <= volcount; i++) { - snprintf (key, sizeof (key), "volname%ld", i); + snprintf (key, sizeof (key), "volname%"PRId64, i); ret = dict_get_str (dict, key, &volname); if (ret) { snprintf (err_str, sizeof (err_str), @@ -690,7 +751,7 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, goto out; } - snprintf (key, sizeof(key) - 1, "vol%ld_volid", i); + snprintf (key, sizeof(key) - 1, "vol%"PRId64"_volid", i); ret = dict_get_bin (dict, key, (void **)&snap_volid); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -745,7 +806,7 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, } snprintf (key, sizeof(key), - "vol%ld.brick_snapdevice%ld", i, + "vol%"PRId64".brick_snapdevice%"PRId64, i, brick_count); ret = dict_set_dynstr (rsp_dict, key, device); if (ret) { @@ -783,7 +844,7 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, ret = -1; goto out; } - snprintf (key, sizeof(key), "vol%ld.brickdir%ld", i, + snprintf (key, sizeof(key), "vol%"PRId64".brickdir%"PRId64, i, brick_count); ret = dict_set_dynstr (rsp_dict, key, tmpstr); if (ret) { @@ -793,7 +854,7 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, } tmpstr = NULL; - snprintf (key, sizeof(key) - 1, "vol%ld.brick%ld.order", + snprintf (key, sizeof(key) - 1, "vol%"PRId64".brick%"PRId64".order", i, brick_count); ret = dict_set_int64 (rsp_dict, key, brick_order); if (ret) { @@ -805,7 +866,7 @@ glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, brick_count++; brick_order++; } - snprintf (key, sizeof(key) - 1, "vol%ld_brickcount", i); + snprintf (key, sizeof(key) - 1, "vol%"PRId64"_brickcount", i); ret = dict_set_int64 (rsp_dict, key, brick_count); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", @@ -1002,7 +1063,7 @@ glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol, snprintf (msg, sizeof(msg), "remove snapshot of the brick %s:%s, " "device: %s", brickinfo->hostname, brickinfo->path, snap_device); - runner_add_args (&runner, "/sbin/lvremove", "-f", snap_device, NULL); + runner_add_args (&runner, LVM_REMOVE, "-f", snap_device, NULL); runner_log (&runner, "", GF_LOG_DEBUG, msg); ret = runner_run (&runner); @@ -1030,19 +1091,25 @@ glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) this = THIS; GF_ASSERT (this); - GF_ASSERT (rsp_dict); GF_ASSERT (snap_vol); - if (!snap_vol) { - gf_log (this->name, GF_LOG_ERROR, "snap volinfo is NULL"); + if ((snap_vol->is_snap_volume == _gf_false) && + (uuid_is_null (snap_vol->restored_from_snap))) { + gf_log (this->name, GF_LOG_DEBUG, + "Not a snap volume, or a restored snap volume."); + ret = 0; goto out; } brick_count = -1; list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { brick_count++; - if (uuid_compare (brickinfo->uuid, MY_UUID)) + if (uuid_compare (brickinfo->uuid, MY_UUID)) { + gf_log (this->name, GF_LOG_DEBUG, + "%s:%s belongs to a different node", + brickinfo->hostname, brickinfo->path); continue; + } if (brickinfo->snap_status == -1) { gf_log (this->name, GF_LOG_INFO, @@ -1051,20 +1118,23 @@ glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) brickinfo->hostname, brickinfo->path, snap_vol->snapshot->snapname); - /* Adding missed delete to the dict */ - ret = glusterd_add_missed_snaps_to_dict + if (rsp_dict && + (snap_vol->is_snap_volume == _gf_true)) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict (rsp_dict, - snap_vol->volname, + snap_vol, brickinfo, brick_count + 1, GF_SNAP_OPTION_TYPE_DELETE); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to add missed snapshot info " - "for %s:%s in the rsp_dict", - brickinfo->hostname, - brickinfo->path); - goto out; + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); + goto out; + } } continue; @@ -1104,6 +1174,7 @@ glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) ret = 0; out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } @@ -2047,8 +2118,8 @@ glusterd_handle_snapshot_create (rpcsvc_request_t *req, glusterd_op_t op, goto out; } if (volcount <= 0) { - gf_log (this->name, GF_LOG_ERROR, "Invalid volume count %ld " - "supplied", volcount); + gf_log (this->name, GF_LOG_ERROR, "Invalid volume count %"PRId64 + " supplied", volcount); ret = -1; goto out; } @@ -2476,7 +2547,7 @@ glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict) } if (time_stamp <= 0) { ret = -1; - gf_log (this->name, GF_LOG_ERROR, "Invalid time-stamp: %ld", + gf_log (this->name, GF_LOG_ERROR, "Invalid time-stamp: %"PRId64, time_stamp); goto out; } @@ -2550,11 +2621,12 @@ out: /* Added missed_snap_entry to rsp_dict */ int32_t -glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, glusterd_brickinfo_t *brickinfo, int32_t brick_number, int32_t op) { - char *buf = NULL; + char *snap_uuid = NULL; char missed_snap_entry[PATH_MAX] = ""; char name_buf[PATH_MAX] = ""; int32_t missed_snap_count = -1; @@ -2564,20 +2636,20 @@ glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, this = THIS; GF_ASSERT (this); GF_ASSERT (rsp_dict); - GF_ASSERT (snap_uuid); + GF_ASSERT (snap_vol); GF_ASSERT (brickinfo); - snprintf (missed_snap_entry, sizeof(missed_snap_entry), - "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), - snap_uuid, brick_number, brickinfo->path, op, - GD_MISSED_SNAP_PENDING); - - buf = gf_strdup (missed_snap_entry); - if (!buf) { + snap_uuid = gf_strdup (uuid_utoa (snap_vol->snapshot->snap_id)); + if (!snap_uuid) { ret = -1; goto out; } + snprintf (missed_snap_entry, sizeof(missed_snap_entry), + "%s:%s=%s:%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), + snap_uuid, snap_vol->volname, brick_number, brickinfo->path, + op, GD_MISSED_SNAP_PENDING); + /* Fetch the missed_snap_count from the dict */ ret = dict_get_int32 (rsp_dict, "missed_snap_count", &missed_snap_count); @@ -2589,12 +2661,12 @@ glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, /* Setting the missed_snap_entry in the rsp_dict */ snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", missed_snap_count); - ret = dict_set_dynstr (rsp_dict, name_buf, buf); + ret = dict_set_dynstr_with_alloc (rsp_dict, name_buf, + missed_snap_entry); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set missed_snap_entry (%s) " - "in the rsp_dict.", buf); - GF_FREE (buf); + "in the rsp_dict.", missed_snap_entry); goto out; } missed_snap_count++; @@ -2610,6 +2682,9 @@ glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, } out: + if (snap_uuid) + GF_FREE (snap_uuid); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } @@ -2715,7 +2790,7 @@ glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol, /* Figuring out if setactivationskip flag is supported or not */ runinit (&runner); snprintf (msg, sizeof (msg), "running lvcreate help"); - runner_add_args (&runner, "/sbin/lvcreate", "--help", NULL); + runner_add_args (&runner, LVM_CREATE, "--help", NULL); runner_log (&runner, "", GF_LOG_DEBUG, msg); runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); ret = runner_start (&runner); @@ -2744,11 +2819,11 @@ glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol, snprintf (msg, sizeof (msg), "taking snapshot of the brick %s:%s", brickinfo->hostname, brickinfo->path); if (match == _gf_true) - runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + runner_add_args (&runner, LVM_CREATE, "-s", device, "--setactivationskip", "n", "--name", snap_vol->volname, NULL); else - runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + runner_add_args (&runner, LVM_CREATE, "-s", device, "--name", snap_vol->volname, NULL); runner_log (&runner, "", GF_LOG_DEBUG, msg); ret = runner_start (&runner); @@ -2785,9 +2860,7 @@ glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo, glusterd_conf_t *priv = NULL; char snap_brick_mount_path[PATH_MAX] = ""; char snap_brick_path[PATH_MAX] = ""; - char msg[1024] = ""; struct stat statbuf = {0, }; - runner_t runner = {0, }; this = THIS; priv = this->private; @@ -2818,25 +2891,12 @@ glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo, MS_MGC_VAL, "nouuid"); But for now, mounting using runner apis. */ - runinit (&runner); - snprintf (msg, sizeof (msg), "mounting snapshot of the brick %s:%s", - original_brickinfo->hostname, original_brickinfo->path); - runner_add_args (&runner, "mount", "-o", "nouuid", device, - snap_brick_mount_path, NULL); - runner_log (&runner, "", GF_LOG_DEBUG, msg); - - /* let glusterd get blocked till snapshot is over */ - synclock_unlock (&priv->big_lock); - ret = runner_run (&runner); - synclock_lock (&priv->big_lock); + ret = glusterd_mount_lvm_snapshot (device, snap_brick_mount_path); if (ret) { - gf_log (this->name, GF_LOG_ERROR, "mounting the snapshot " - "logical device %s failed (error: %s)", device, - strerror (errno)); + gf_log (this->name, GF_LOG_ERROR, + "Failed to mount lvm snapshot."); goto out; - } else - gf_log (this->name, GF_LOG_DEBUG, "mounting the snapshot " - "logical device %s successful", device); + } ret = stat (snap_brick_path, &statbuf); if (ret) { @@ -2862,7 +2922,11 @@ out: if (ret) { gf_log (this->name, GF_LOG_WARNING, "unmounting the snap brick" " mount %s", snap_brick_mount_path); +#if !defined(GF_DARWIN_HOST_OS) umount (snap_brick_mount_path); +#else + unmount (snap_brick_mount_path, 0); +#endif } gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); @@ -2893,7 +2957,7 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, GF_ASSERT (snap_brickinfo); GF_ASSERT (snap_brick_dir); - snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount, + snprintf (key, sizeof(key) - 1, "vol%"PRId64".brickdir%d", volcount, brick_count); ret = dict_get_ptr (dict, key, (void **)snap_brick_dir); if (ret) { @@ -2938,7 +3002,7 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, if (add_missed_snap) { ret = glusterd_add_missed_snaps_to_dict (rsp_dict, - snap_vol->volname, + snap_vol, original_brickinfo, brick_count + 1, GF_SNAP_OPTION_TYPE_CREATE); @@ -2951,7 +3015,7 @@ glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, } } - snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d", + snprintf (key, sizeof(key), "vol%"PRId64".brick_snapdevice%d", volcount, brick_count); ret = dict_get_ptr (dict, key, (void **)&snap_device); if (ret) { @@ -3038,70 +3102,6 @@ out: return ret; } -/* Look for disconnected peers, for missed snap creates or deletes */ -static int32_t -glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, - char *snap_uuid, struct list_head *peers, - int32_t op) -{ - int32_t brick_count = -1; - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (rsp_dict); - GF_ASSERT (peers); - GF_ASSERT (vol); - GF_ASSERT (snap_uuid); - - brick_count = 0; - list_for_each_entry (brickinfo, &vol->bricks, brick_list) { - if (!uuid_compare (brickinfo->uuid, MY_UUID)) { - /* If the brick belongs to the same node */ - brick_count++; - continue; - } - - list_for_each_entry (peerinfo, peers, uuid_list) { - if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { - /* If the brick doesnt belong to this peer */ - continue; - } - - /* Found peer who owns the brick, * - * if peer is not connected or not * - * friend add it to missed snap list */ - if (!(peerinfo->connected) || - (peerinfo->state.state != - GD_FRIEND_STATE_BEFRIENDED)) { - ret = glusterd_add_missed_snaps_to_dict - (rsp_dict, - snap_uuid, - brickinfo, - brick_count + 1, - op); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to add missed snapshot " - "info for %s:%s in the " - "rsp_dict", brickinfo->hostname, - brickinfo->path); - goto out; - } - } - } - brick_count++; - } - - ret = 0; -out: - gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); - return ret; -} - glusterd_volinfo_t * glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, dict_t *dict, dict_t *rsp_dict, int64_t volcount) @@ -3129,7 +3129,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, GF_ASSERT (rsp_dict); /* fetch username, password and vol_id from dict*/ - snprintf (key, sizeof(key), "volume%ld_username", volcount); + snprintf (key, sizeof(key), "volume%"PRId64"_username", volcount); ret = dict_get_str (dict, key, &username); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " @@ -3137,7 +3137,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, goto out; } - snprintf (key, sizeof(key), "volume%ld_password", volcount); + snprintf (key, sizeof(key), "volume%"PRId64"_password", volcount); ret = dict_get_str (dict, key, &password); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " @@ -3145,7 +3145,7 @@ glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, goto out; } - snprintf (key, sizeof(key) - 1, "vol%ld_volid", volcount); + snprintf (key, sizeof(key) - 1, "vol%"PRId64"_volid", volcount); ret = dict_get_bin (dict, key, (void **)&snap_volid); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -3367,7 +3367,7 @@ glusterd_handle_snapshot_remove (rpcsvc_request_t *req, glusterd_op_t op, goto out; } - snprintf (key, sizeof (key), "volname%ld", volcount); + snprintf (key, sizeof (key), "volname%"PRId64, volcount); ret = dict_set_dynstr (dict, key, volname); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set " @@ -3582,7 +3582,6 @@ glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, /* From origin glusterd check if * * any peers with snap bricks is down */ ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, - snap_volinfo->volname, &priv->peers, GF_SNAP_OPTION_TYPE_DELETE); if (ret) { @@ -3697,7 +3696,14 @@ glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr, goto out; } - ret = glusterd_store_update_missed_snaps (dict, missed_snap_count); + ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_update_missed_snaps (); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to update missed_snaps_list"); @@ -3771,7 +3777,7 @@ glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, } for (i = 1; i <= volcount; i++) { - snprintf (key, sizeof (key), "volname%ld", i); + snprintf (key, sizeof (key), "volname%"PRId64, i); ret = dict_get_str (dict, key, &volname); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -3922,7 +3928,7 @@ snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, soft_limit_value = (active_hard_limit * conf->snap_max_soft_limit) / 100; - snprintf (buf, sizeof(buf), "volume%ld-volname", count); + snprintf (buf, sizeof(buf), "volume%"PRIu64"-volname", count); ret = dict_set_str (rsp_dict, buf, volinfo->volname); if (ret) { snprintf (err_str, PATH_MAX, @@ -3931,7 +3937,7 @@ snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, } snprintf (buf, sizeof(buf), - "volume%ld-snap-max-hard-limit", count); + "volume%"PRIu64"-snap-max-hard-limit", count); ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); if (ret) { snprintf (err_str, PATH_MAX, @@ -3940,7 +3946,7 @@ snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, } snprintf (buf, sizeof(buf), - "volume%ld-active-hard-limit", count); + "volume%"PRIu64"-active-hard-limit", count); ret = dict_set_uint64 (rsp_dict, buf, active_hard_limit); if (ret) { @@ -3950,7 +3956,7 @@ snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, } snprintf (buf, sizeof(buf), - "volume%ld-snap-max-soft-limit", count); + "volume%"PRIu64"-snap-max-soft-limit", count); ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); if (ret) { snprintf (err_str, PATH_MAX, @@ -3984,7 +3990,7 @@ snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, soft_limit_value = (active_hard_limit * conf->snap_max_soft_limit) / 100; - snprintf (buf, sizeof(buf), "volume%ld-volname", count); + snprintf (buf, sizeof(buf), "volume%"PRIu64"-volname", count); ret = dict_set_str (rsp_dict, buf, volinfo->volname); if (ret) { snprintf (err_str, PATH_MAX, @@ -3993,7 +3999,7 @@ snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, } snprintf (buf, sizeof(buf), - "volume%ld-snap-max-hard-limit", count); + "volume%"PRIu64"-snap-max-hard-limit", count); ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); if (ret) { snprintf (err_str, PATH_MAX, @@ -4002,7 +4008,7 @@ snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, } snprintf (buf, sizeof(buf), - "volume%ld-active-hard-limit", count); + "volume%"PRIu64"-active-hard-limit", count); ret = dict_set_uint64 (rsp_dict, buf, active_hard_limit); if (ret) { snprintf (err_str, PATH_MAX, @@ -4011,7 +4017,7 @@ snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, } snprintf (buf, sizeof(buf), - "volume%ld-snap-max-soft-limit", count); + "volume%"PRIu64"-snap-max-soft-limit", count); ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); if (ret) { snprintf (err_str, PATH_MAX, @@ -4196,7 +4202,7 @@ glusterd_get_brick_lvm_details (dict_t *rsp_dict, * for the above given command with separator ":", * The output will be "vgname:lvsize" */ - runner_add_args (&runner, "lvs", device, "--noheading", "-o", + runner_add_args (&runner, LVS, device, "--noheading", "-o", "vg_name,data_percent,lv_size", "--separator", ":", NULL); runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); @@ -5320,6 +5326,30 @@ glusterd_free_snap_op (glusterd_snap_op_t *snap_op) } } +static inline void +glusterd_free_missed_snapinfo (glusterd_missed_snap_info *missed_snapinfo) +{ + glusterd_snap_op_t *snap_opinfo = NULL; + glusterd_snap_op_t *tmp = NULL; + + if (missed_snapinfo) { + list_for_each_entry_safe (snap_opinfo, tmp, + &missed_snapinfo->snap_ops, + snap_ops_list) { + glusterd_free_snap_op (snap_opinfo); + snap_opinfo = NULL; + } + + if (missed_snapinfo->node_uuid) + GF_FREE (missed_snapinfo->node_uuid); + + if (missed_snapinfo->snap_uuid) + GF_FREE (missed_snapinfo->snap_uuid); + + GF_FREE (missed_snapinfo); + } +} + /* Look for duplicates and accordingly update the list */ int32_t glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, @@ -5337,6 +5367,13 @@ glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, snap_ops_list) { + /* If the entry is not for the same snap_vol_id + * then continue + */ + if (strcmp (snap_opinfo->snap_vol_id, + missed_snap_op->snap_vol_id)) + continue; + if ((!strcmp (snap_opinfo->brick_path, missed_snap_op->brick_path)) && (snap_opinfo->op == missed_snap_op->op)) { @@ -5348,8 +5385,10 @@ glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, snap_opinfo->status = GD_MISSED_SNAP_DONE; gf_log (this->name, GF_LOG_INFO, "Updating missed snap status " - "for %s:%d:%s:%d as DONE", - missed_snapinfo->node_snap_info, + "for %s:%s=%s:%d:%s:%d as DONE", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, snap_opinfo->brick_path, snap_opinfo->op); @@ -5362,15 +5401,19 @@ glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, } else if ((snap_opinfo->brick_num == missed_snap_op->brick_num) && (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && - (missed_snap_op->op == - GF_SNAP_OPTION_TYPE_DELETE)) { + ((missed_snap_op->op == + GF_SNAP_OPTION_TYPE_DELETE) || + (missed_snap_op->op == + GF_SNAP_OPTION_TYPE_RESTORE))) { /* Optimizing create and delete entries for the same * brick and same node */ gf_log (this->name, GF_LOG_INFO, "Updating missed snap status " - "for %s:%d:%s:%d as DONE", - missed_snapinfo->node_snap_info, + "for %s:%s=%s:%d:%s:%d as DONE", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, snap_opinfo->brick_path, snap_opinfo->op); @@ -5398,10 +5441,13 @@ out: /* Add new missed snap entry to the missed_snaps list. */ int32_t -glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, - char *brick_path, int32_t snap_op, - int32_t snap_status) +glusterd_add_new_entry_to_list (char *missed_info, char *snap_vol_id, + int32_t brick_num, char *brick_path, + int32_t snap_op, int32_t snap_status) { + char *buf = NULL; + char *save_ptr = NULL; + char node_snap_info[PATH_MAX] = ""; int32_t ret = -1; glusterd_missed_snap_info *missed_snapinfo = NULL; glusterd_snap_op_t *missed_snap_op = NULL; @@ -5413,6 +5459,7 @@ glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, this = THIS; GF_ASSERT(this); GF_ASSERT(missed_info); + GF_ASSERT(snap_vol_id); GF_ASSERT(brick_path); priv = this->private; @@ -5428,6 +5475,11 @@ glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, goto out; } + missed_snap_op->snap_vol_id = gf_strdup(snap_vol_id); + if (!missed_snap_op->snap_vol_id) { + ret = -1; + goto out; + } missed_snap_op->brick_path = gf_strdup(brick_path); if (!missed_snap_op->brick_path) { ret = -1; @@ -5440,8 +5492,10 @@ glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, /* Look for other entries for the same node and same snap */ list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, missed_snaps) { - if (!strcmp (missed_snapinfo->node_snap_info, - missed_info)) { + snprintf (node_snap_info, sizeof(node_snap_info), + "%s:%s", missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid); + if (!strcmp (node_snap_info, missed_info)) { /* Found missed snapshot info for * * the same node and same snap */ match = _gf_true; @@ -5458,8 +5512,24 @@ glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, goto out; } free_missed_snap_info = _gf_true; - missed_snapinfo->node_snap_info = gf_strdup(missed_info); - if (!missed_snapinfo->node_snap_info) { + buf = strtok_r (missed_info, ":", &save_ptr); + if (!buf) { + ret = -1; + goto out; + } + missed_snapinfo->node_uuid = gf_strdup(buf); + if (!missed_snapinfo->node_uuid) { + ret = -1; + goto out; + } + + buf = strtok_r (NULL, ":", &save_ptr); + if (!buf) { + ret = -1; + goto out; + } + missed_snapinfo->snap_uuid = gf_strdup(buf); + if (!missed_snapinfo->snap_uuid) { ret = -1; goto out; } @@ -5486,12 +5556,8 @@ out: glusterd_free_snap_op (missed_snap_op); if (missed_snapinfo && - (free_missed_snap_info == _gf_true)) { - if (missed_snapinfo->node_snap_info) - GF_FREE (missed_snapinfo->node_snap_info); - - GF_FREE (missed_snapinfo); - } + (free_missed_snap_info == _gf_true)) + glusterd_free_missed_snapinfo (missed_snapinfo); } gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); @@ -5507,6 +5573,7 @@ glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) char *save_ptr = NULL; char *nodeid = NULL; char *snap_uuid = NULL; + char *snap_vol_id = NULL; char *brick_path = NULL; char missed_info[PATH_MAX] = ""; char name_buf[PATH_MAX] = ""; @@ -5553,13 +5620,14 @@ glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) */ nodeid = strtok_r (tmp, ":", &save_ptr); snap_uuid = strtok_r (NULL, "=", &save_ptr); + snap_vol_id = strtok_r (NULL, ":", &save_ptr); brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); brick_path = strtok_r (NULL, ":", &save_ptr); snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); if (!nodeid || !snap_uuid || !brick_path || - brick_num < 1 || snap_op < 1 || + !snap_vol_id || brick_num < 1 || snap_op < 1 || snap_status < 1) { gf_log (this->name, GF_LOG_ERROR, "Invalid missed_snap_entry"); @@ -5570,11 +5638,12 @@ glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) snprintf (missed_info, sizeof(missed_info), "%s:%s", nodeid, snap_uuid); - ret = glusterd_store_missed_snaps_list (missed_info, - brick_num, - brick_path, - snap_op, - snap_status); + ret = glusterd_add_new_entry_to_list (missed_info, + snap_vol_id, + brick_num, + brick_path, + snap_op, + snap_status); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to store missed snaps_list"); @@ -5585,6 +5654,7 @@ glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) tmp = NULL; } + ret = 0; out: if (tmp) GF_FREE (tmp); @@ -5592,3 +5662,126 @@ out: gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } + +/* This function will restore origin volume to it's snap. + * The restore operation will simply replace the Gluster origin + * volume with the snap volume. + * TODO: Multi-volume delete to be done. + * Cleanup in case of restore failure is pending. + * + * @param orig_vol volinfo of origin volume + * @param snap_vol volinfo of snapshot volume + * + * @return 0 on success and negative value on error + */ +int +gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_volinfo_t *new_volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + conf = this->private; + GF_ASSERT (conf); + + GF_VALIDATE_OR_GOTO (this->name, orig_vol, out); + GF_VALIDATE_OR_GOTO (this->name, snap_vol, out); + snap = snap_vol->snapshot; + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + /* Snap volume must be stoped before performing the + * restore operation. + */ + ret = glusterd_stop_volume (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "snap volume"); + goto out; + } + + /* Create a new volinfo for the restored volume */ + ret = glusterd_volinfo_dup (snap_vol, &new_volinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create volinfo"); + goto out; + } + + /* Following entries need to be derived from origin volume. */ + strcpy (new_volinfo->volname, orig_vol->volname); + uuid_copy (new_volinfo->volume_id, orig_vol->volume_id); + new_volinfo->snap_count = orig_vol->snap_count; + new_volinfo->snap_max_hard_limit = orig_vol->snap_max_hard_limit; + uuid_copy (new_volinfo->restored_from_snap, + snap_vol->snapshot->snap_id); + + /* Bump the version of the restored volume, so that nodes * + * which are done can sync during handshake */ + new_volinfo->version = orig_vol->version; + + list_for_each_entry_safe (voliter, temp_volinfo, + &orig_vol->snap_volumes, snapvol_list) { + list_add_tail (&voliter->snapvol_list, + &new_volinfo->snap_volumes); + } + /* Copy the snap vol info to the new_volinfo.*/ + ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove " + "LVM backend"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + /* New volinfo always shows the status as created. Therefore + * set the status to the original volume's status. */ + glusterd_set_volume_status (new_volinfo, orig_vol->status); + + /* Once the new_volinfo is completely constructed then delete + * the orinal volinfo + */ + ret = glusterd_volinfo_delete (orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + list_add_tail (&new_volinfo->vol_list, &conf->volumes); + + /* Now delete the snap entry. As a first step delete the snap + * volume information stored in store. */ + ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap %s", snap->snapname); + goto out; + } + + ret = glusterd_store_volinfo (new_volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo"); + goto out; + } + + ret = 0; +out: + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index e28a30c5a..afbc8ff35 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -44,6 +44,7 @@ #include <sys/resource.h> #include <inttypes.h> #include <dirent.h> +#include <mntent.h> void glusterd_replace_slash_with_hyphen (char *str) @@ -525,10 +526,13 @@ int _storeopts (dict_t *this, char *key, data_t *value, void *data) int32_t glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) { - char *str = NULL; - char buf[PATH_MAX] = {0,}; - int32_t ret = -1; + char *str = NULL; + char buf[PATH_MAX] = ""; + int32_t ret = -1; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (fd > 0); GF_ASSERT (volinfo); @@ -576,7 +580,7 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) snprintf (buf, sizeof (buf), "%s", volinfo->parent_volname); ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PARENT_VOLNAME, buf); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Failed to store " + gf_log (this->name, GF_LOG_ERROR, "Failed to store " GLUSTERD_STORE_KEY_PARENT_VOLNAME); goto out; } @@ -620,11 +624,11 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) goto out; } - snprintf (buf, sizeof (buf), "%d", volinfo->is_volume_restored); - ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP, + uuid_utoa (volinfo->restored_from_snap)); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, - "Unable to write is_volume_restored"); + gf_log (this->name, GF_LOG_ERROR, + "Unable to write restored_from_snap"); goto out; } @@ -632,14 +636,14 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, buf); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Unable to write snap-max-hard-limit"); goto out; } out: if (ret) - gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume " + gf_log (this->name, GF_LOG_ERROR, "Unable to write volume " "values for %s", volinfo->volname); return ret; } @@ -673,7 +677,7 @@ glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo) return ret; } -static int32_t +int32_t glusterd_store_create_snap_dir (glusterd_snap_t *snap) { int32_t ret = -1; @@ -2417,9 +2421,12 @@ glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, strlen (GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) { volinfo->snap_max_hard_limit = (uint64_t) atoll (value); - } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, - strlen (GLUSTERD_STORE_KEY_VOL_IS_RESTORED))) { - volinfo->is_volume_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP, + strlen (GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP))) { + ret = uuid_parse (value, volinfo->restored_from_snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "failed to parse restored snap's uuid"); } else if (!strncmp (key, GLUSTERD_STORE_KEY_PARENT_VOLNAME, strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); @@ -2615,7 +2622,7 @@ out: return volinfo; } -inline void +static inline void glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len) { snprintf (path, len, "%s/options", conf->workdir); @@ -2786,6 +2793,209 @@ out: return ret; } +/* Figure out the brick mount path, from the brick path */ +int32_t +glusterd_find_brick_mount_path (char *brick_path, int32_t brick_count, + char **brick_mount_path) +{ + char brick_num[PATH_MAX] = ""; + char *ptr = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brick_path); + GF_ASSERT (brick_mount_path); + + *brick_mount_path = gf_strdup (brick_path); + if (!*brick_mount_path) { + ret = -1; + goto out; + } + + snprintf (brick_num, sizeof(brick_num), "brick%d", brick_count); + + /* Finding the pointer to the end of + * /var/run/gluster/snaps/<snap-uuid> + */ + ptr = strstr (*brick_mount_path, brick_num); + if (!ptr) { + /* Snapshot bricks must have brick num as part + * of the brickpath + */ + gf_log (this->name, GF_LOG_ERROR, + "Invalid brick path(%s)", brick_path); + ret = -1; + goto out; + } + + /* Moving the pointer to the end of + * /var/run/gluster/snaps/<snap-uuid>/<brick_num> + * and assigning '\0' to it. + */ + ptr += strlen(brick_num); + *ptr = '\0'; + + ret = 0; +out: + if (ret && *brick_mount_path) { + GF_FREE (*brick_mount_path); + *brick_mount_path = NULL; + } + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +/* Check if brick_mount_path is already mounted. If not, mount the device_path + * at the brick_mount_path + */ +int32_t +glusterd_mount_brick_paths (char *brick_mount_path, char *device_path) +{ + FILE *mtab = NULL; + int32_t ret = -1; + runner_t runner = {0, }; + struct mntent *entry = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brick_mount_path); + GF_ASSERT (device_path); + + priv = this->private; + GF_ASSERT (priv); + + /* Check if the brick_mount_path is already mounted */ + entry = glusterd_get_mnt_entry_info (brick_mount_path, mtab); + if (entry) { + gf_log (this->name, GF_LOG_INFO, + "brick_mount_path (%s) already mounted.", + brick_mount_path); + ret = 0; + goto out; + } + + /* TODO RHEL 6.5 has the logical volumes inactive by default + * on reboot. Hence activating the logical vol. Check behaviour + * on other systems + */ + /* Activate the snapshot */ + runinit (&runner); + runner_add_args (&runner, "lvchange", "-ay", device_path, + NULL); + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to activate %s. Error: %s", + device_path, strerror(errno)); + goto out; + } else + gf_log (this->name, GF_LOG_DEBUG, + "Activating %s successful", device_path); + + /* Mount the snapshot */ + ret = glusterd_mount_lvm_snapshot (device_path, brick_mount_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to mount lvm snapshot."); + goto out; + } + +out: + if (mtab) + endmntent (mtab); + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +static int32_t +glusterd_store_recreate_brick_mounts (glusterd_volinfo_t *volinfo) +{ + char *brick_mount_path = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t ret = -1; + int32_t brick_count = -1; + struct stat st_buf = {0, }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (volinfo); + + brick_count = 0; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brick_count++; + /* If the brick is not of this node, or its + * snapshot is pending, or the brick is not + * a snapshotted brick, we continue + */ + if ((uuid_compare (brickinfo->uuid, MY_UUID)) || + (brickinfo->snap_status == -1) || + (strlen(brickinfo->device_path) == 0)) + continue; + + /* Fetch the brick mount path from the brickinfo->path */ + ret = glusterd_find_brick_mount_path (brickinfo->path, + brick_count, + &brick_mount_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find brick_mount_path for %s", + brickinfo->path); + goto out; + } + + /* Check if the brickinfo path is present. + * If not create the brick_mount_path */ + ret = lstat (brickinfo->path, &st_buf); + if (ret) { + if (errno == ENOENT) { + ret = mkdir_p (brick_mount_path, 0777, + _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create %s. " + "Error: %s", brick_mount_path, + strerror (errno)); + goto out; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "Brick Path(%s) not valid. " + "Error: %s", brickinfo->path, + strerror(errno)); + goto out; + } + } + + /* Check if brick_mount_path is already mounted. + * If not, mount the device_path at the brick_mount_path */ + ret = glusterd_mount_brick_paths (brick_mount_path, + brickinfo->device_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to mount brick_mount_path"); + goto out; + } + + if (brick_mount_path) { + GF_FREE (brick_mount_path); + brick_mount_path = NULL; + } + } + + ret = 0; +out: + if (ret && brick_mount_path) + GF_FREE (brick_mount_path); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + int32_t glusterd_resolve_snap_bricks (xlator_t *this, glusterd_snap_t *snap) { @@ -2905,25 +3115,16 @@ out: int32_t glusterd_store_retrieve_snap (char *snapname) { - int32_t ret = -1; - dict_t *dict = NULL; - glusterd_snap_t *snap = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; + int32_t ret = -1; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; this = THIS; priv = this->private; GF_ASSERT (priv); GF_ASSERT (snapname); - dict = dict_new(); - if (!dict) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to create dict"); - ret = -1; - goto out; - } - snap = glusterd_new_snap_object (); if (!snap) { gf_log (this->name, GF_LOG_ERROR, "Failed to create " @@ -2946,34 +3147,6 @@ glusterd_store_retrieve_snap (char *snapname) goto out; } - /* Unlike bricks of normal volumes which are resolved at the end of - the glusterd restore, the bricks belonging to the snap volumes of - each snap should be resolved as part of snapshot restore itself. - Because if the snapshot has to be removed, then resolving bricks - helps glusterd in understanding what all bricks have its own uuid - and killing those bricks. - */ - ret = glusterd_resolve_snap_bricks (this, snap); - if (ret) - gf_log (this->name, GF_LOG_WARNING, "resolving the snap bricks" - " failed (snap: %s)", snap?snap->snapname:""); - - /* When the snapshot command from cli is received, the on disk and - in memory structures for the snapshot are created (with the status) - being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is - taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd - dies after taking the backend snapshot, but before updating the - status, then when glusterd comes up, it should treat that snapshot - as a failed snapshot and clean it up. - */ - if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { - ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_true); - if (ret) - gf_log (this->name, GF_LOG_WARNING, "failed to remove" - " the snapshot %s", snap->snapname); - goto out; - } - /* TODO: list_add_order can do 'N-square' comparisions and is not efficient. Find a better solution to store the snap in order */ @@ -2981,9 +3154,6 @@ glusterd_store_retrieve_snap (char *snapname) glusterd_compare_snap_time); out: - if (dict) - dict_unref (dict); - gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } @@ -2994,6 +3164,7 @@ glusterd_store_retrieve_missed_snaps_list (xlator_t *this) { char buf[PATH_MAX] = ""; char path[PATH_MAX] = ""; + char *snap_vol_id = NULL; char *missed_node_info = NULL; char *brick_path = NULL; char *value = NULL; @@ -3048,12 +3219,13 @@ glusterd_store_retrieve_missed_snaps_list (xlator_t *this) } /* Fetch the brick_num, brick_path, snap_op and snap status */ - brick_num = atoi(strtok_r (value, ":", &save_ptr)); + snap_vol_id = strtok_r (value, ":", &save_ptr); + brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); brick_path = strtok_r (NULL, ":", &save_ptr); snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); - if (!missed_node_info || !brick_path || + if (!missed_node_info || !brick_path || !snap_vol_id || brick_num < 1 || snap_op < 1 || snap_status < 1) { gf_log (this->name, GF_LOG_ERROR, @@ -3062,11 +3234,12 @@ glusterd_store_retrieve_missed_snaps_list (xlator_t *this) goto out; } - ret = glusterd_store_missed_snaps_list (missed_node_info, - brick_num, - brick_path, - snap_op, - snap_status); + ret = glusterd_add_new_entry_to_list (missed_node_info, + snap_vol_id, + brick_num, + brick_path, + snap_op, + snap_status); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to store missed snaps_list"); @@ -3145,6 +3318,7 @@ out: int32_t glusterd_store_write_missed_snapinfo (int32_t fd) { + char key[PATH_MAX] = ""; char value[PATH_MAX] = ""; int32_t ret = -1; glusterd_conf_t *priv = NULL; @@ -3164,14 +3338,15 @@ glusterd_store_write_missed_snapinfo (int32_t fd) list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, snap_ops_list) { - snprintf (value, sizeof(value), "%d:%s:%d:%d", + snprintf (key, sizeof(key), "%s:%s", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid); + snprintf (value, sizeof(value), "%s:%d:%s:%d:%d", + snap_opinfo->snap_vol_id, snap_opinfo->brick_num, snap_opinfo->brick_path, snap_opinfo->op, snap_opinfo->status); - ret = gf_store_save_value - (fd, - missed_snapinfo->node_snap_info, - value); + ret = gf_store_save_value (fd, key, value); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to write missed snapinfo"); @@ -3189,7 +3364,7 @@ out: /* Adds the missed snap entries to the in-memory conf->missed_snap_list * * and writes them to disk */ int32_t -glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) +glusterd_store_update_missed_snaps () { int32_t fd = -1; int32_t ret = -1; @@ -3198,17 +3373,10 @@ glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) this = THIS; GF_ASSERT(this); - GF_ASSERT(dict); priv = this->private; GF_ASSERT (priv); - if (missed_snap_count < 1) { - gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); - ret = 0; - goto out; - } - ret = glusterd_store_create_missed_snaps_list_shandle_on_absence (); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to obtain " @@ -3224,13 +3392,6 @@ glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) goto out; } - ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to add missed snaps to list"); - goto out; - } - ret = glusterd_store_write_missed_snapinfo (fd); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -3619,19 +3780,147 @@ out: return ret; } +static int32_t +glusterd_recreate_vol_brick_mounts (xlator_t *this, + glusterd_volinfo_t *volinfo) +{ + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_store_recreate_brick_mounts (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to recreate brick mounts " + "for %s", volinfo->volname); + goto out; + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +/* Bricks for snap volumes are hosted at /var/run/gluster/snaps + * When a volume is restored, it points to the bricks of the snap + * volume it was restored from. Hence on a node restart these + * paths need to be recreated and re-mounted + */ +int32_t +glusterd_recreate_all_snap_brick_mounts (xlator_t *this) +{ + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + /* Recreate bricks of volumes restored from snaps */ + list_for_each_entry (volinfo, &priv->volumes, vol_list) { + /* If the volume is not a restored volume then continue */ + if (uuid_is_null (volinfo->restored_from_snap)) + continue; + + ret = glusterd_recreate_vol_brick_mounts (this, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to recreate brick mounts " + "for %s", volinfo->volname); + goto out; + } + } + + /* Recreate bricks of snapshot volumes */ + list_for_each_entry (snap, &priv->snapshots, snap_list) { + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + ret = glusterd_recreate_vol_brick_mounts (this, + volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to recreate brick mounts " + "for %s", snap->snapname); + goto out; + } + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +/* When the snapshot command from cli is received, the on disk and + * in memory structures for the snapshot are created (with the status) + * being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is + * taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd + * dies after taking the backend snapshot, but before updating the + * status, then when glusterd comes up, it should treat that snapshot + * as a failed snapshot and clean it up. + */ +int32_t +glusterd_snap_cleanup (xlator_t *this) +{ + dict_t *dict = NULL; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + dict = dict_new(); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create dict"); + ret = -1; + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { + ret = glusterd_snap_remove (dict, snap, + _gf_true, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to remove the snapshot %s", + snap->snapname); + goto out; + } + } + } +out: + if (dict) + dict_unref (dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + int32_t glusterd_resolve_all_bricks (xlator_t *this) { - int32_t ret = 0; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); + /* Resolve bricks of volumes */ list_for_each_entry (volinfo, &priv->volumes, vol_list) { list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_resolve_brick (brickinfo); @@ -3643,9 +3932,20 @@ glusterd_resolve_all_bricks (xlator_t *this) } } -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + /* Resolve bricks of snapshot volumes */ + list_for_each_entry (snap, &priv->snapshots, snap_list) { + ret = glusterd_resolve_snap_bricks (this, snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "resolving the snap bricks" + " failed for snap: %s", + snap->snapname); + goto out; + } + } +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } @@ -3680,6 +3980,20 @@ glusterd_restore () if (ret) goto out; + ret = glusterd_snap_cleanup (this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to perform " + "a cleanup of the snapshots"); + goto out; + } + + ret = glusterd_recreate_all_snap_brick_mounts (this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to recreate " + "all snap brick mounts"); + goto out; + } + out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -3722,7 +4036,7 @@ glusterd_store_retrieve_quota_version (glusterd_volinfo_t *volinfo) } version = strtoul (version_str, &tmp, 10); - if (version < 0) { + if ((errno == ERANGE) || (errno == EINVAL)) { gf_log (this->name, GF_LOG_DEBUG, "Invalid version number"); goto out; } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 64c073a8a..63d510cbf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -48,7 +48,7 @@ typedef enum glusterd_store_ver_ac_{ #define GLUSTERD_STORE_KEY_VOL_VERSION "version" #define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" #define GLUSTERD_STORE_KEY_VOL_ID "volume-id" -#define GLUSTERD_STORE_KEY_VOL_IS_RESTORED "is-volume-restored" +#define GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP "restored_from_snap" #define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" #define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" #define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" @@ -168,7 +168,6 @@ int32_t glusterd_store_snap (glusterd_snap_t *snap); int32_t -glusterd_store_update_missed_snaps (dict_t *dict, - int32_t missed_snap_count); +glusterd_store_update_missed_snaps (); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 8e96be91b..7883a98bf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -13,6 +13,12 @@ #endif #include <inttypes.h> +#if !defined(__NetBSD__) && !defined(GF_DARWIN_HOST_OS) +#include <mntent.h> +#else +#include "mntent_compat.h" +#endif + #include "globals.h" #include "glusterfs.h" #include "compat.h" @@ -44,7 +50,6 @@ #include <inttypes.h> #include <signal.h> #include <sys/types.h> -#include <net/if.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <rpc/pmap_clnt.h> @@ -56,11 +61,6 @@ #include <lvm2app.h> #endif - -#ifdef GF_LINUX_HOST_OS -#include <mntent.h> -#endif - #ifdef GF_SOLARIS_HOST_OS #include <sys/sockio.h> #endif @@ -709,7 +709,7 @@ glusterd_snap_volinfo_restore (dict_t *rsp_dict, /* Adding missed delete to the dict */ ret = glusterd_add_missed_snaps_to_dict (rsp_dict, - snap_volinfo->volname, + snap_volinfo, brickinfo, brick_count + 1, GF_SNAP_OPTION_TYPE_RESTORE); @@ -2241,12 +2241,17 @@ out: return ret; } +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, - dict_t *dict, int32_t count) + dict_t *dict, int32_t count, + char *prefix) { int32_t ret = -1; - char prefix[512] = {0,}; + char pfx[512] = {0,}; char key[512] = {0,}; glusterd_brickinfo_t *brickinfo = NULL; int32_t i = 1; @@ -2263,89 +2268,101 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, GF_ASSERT (this); GF_ASSERT (dict); GF_ASSERT (volinfo); + GF_ASSERT (prefix); - snprintf (key, sizeof (key), "volume%d.name", count); + snprintf (key, sizeof (key), "%s%d.name", prefix, count); ret = dict_set_str (dict, key, volinfo->volname); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.type", count); + snprintf (key, sizeof (key), "%s%d.type", prefix, count); ret = dict_set_int32 (dict, key, volinfo->type); if (ret) goto out; - snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); - ret = dict_set_int32 (dict, key, volinfo->is_volume_restored); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Failed to set " - "is_volume_restored option for %s volume", - volinfo->volname); + snprintf (key, sizeof (key), "volume%d.restored_from_snap", count); + ret = dict_set_dynstr_with_alloc + (dict, key, + uuid_utoa (volinfo->restored_from_snap)); + if (ret) goto out; + + if (strlen (volinfo->parent_volname) > 0) { + snprintf (key, sizeof (key), "%s%d.parent_volname", + prefix, count); + ret = dict_set_dynstr_with_alloc (dict, key, + volinfo->parent_volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set parent_volname for %s", + volinfo->volname); + goto out; + } } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick_count", count); + snprintf (key, sizeof (key), "%s%d.brick_count", prefix, count); ret = dict_set_int32 (dict, key, volinfo->brick_count); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.version", count); + snprintf (key, sizeof (key), "%s%d.version", prefix, count); ret = dict_set_int32 (dict, key, volinfo->version); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.status", count); + snprintf (key, sizeof (key), "%s%d.status", prefix, count); ret = dict_set_int32 (dict, key, volinfo->status); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.sub_count", count); + snprintf (key, sizeof (key), "%s%d.sub_count", prefix, count); ret = dict_set_int32 (dict, key, volinfo->sub_count); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.stripe_count", count); + snprintf (key, sizeof (key), "%s%d.stripe_count", prefix, count); ret = dict_set_int32 (dict, key, volinfo->stripe_count); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.replica_count", count); + snprintf (key, sizeof (key), "%s%d.replica_count", prefix, count); ret = dict_set_int32 (dict, key, volinfo->replica_count); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.dist_count", count); + snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count); ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.ckusm", count); + snprintf (key, sizeof (key), "%s%d.ckusm", prefix, count); ret = dict_set_int64 (dict, key, volinfo->cksum); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.transport_type", count); + snprintf (key, sizeof (key), "%s%d.transport_type", prefix, count); ret = dict_set_uint32 (dict, key, volinfo->transport_type); if (ret) goto out; - snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + snprintf (key, sizeof (key), "%s%d.is_snap_volume", prefix, count); ret = dict_set_uint32 (dict, key, volinfo->is_snap_volume); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); goto out; } - snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + snprintf (key, sizeof (key), "%s%d.snap-max-hard-limit", prefix, count); ret = dict_set_uint64 (dict, key, volinfo->snap_max_hard_limit); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); @@ -2358,14 +2375,14 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.volume_id", count); + snprintf (key, sizeof (key), "%s%d.volume_id", prefix, count); ret = dict_set_dynstr (dict, key, volume_id_str); if (ret) goto out; volume_id_str = NULL; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.username", count); + snprintf (key, sizeof (key), "%s%d.username", prefix, count); str = glusterd_auth_get_username (volinfo); if (str) { ret = dict_set_dynstr (dict, key, gf_strdup (str)); @@ -2374,7 +2391,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.password", count); + snprintf (key, sizeof (key), "%s%d.password", prefix, count); str = glusterd_auth_get_password (volinfo); if (str) { ret = dict_set_dynstr (dict, key, gf_strdup (str)); @@ -2383,7 +2400,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d.rebalance", count); + snprintf (key, 256, "%s%d.rebalance", prefix, count); ret = dict_set_int32 (dict, key, volinfo->rebal.defrag_cmd); if (ret) goto out; @@ -2395,22 +2412,22 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d.rebalance-id", count); + snprintf (key, 256, "%s%d.rebalance-id", prefix, count); ret = dict_set_dynstr (dict, key, rebalance_id_str); if (ret) goto out; rebalance_id_str = NULL; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + snprintf (key, sizeof (key), "%s%d.rebalance-op", prefix, count); ret = dict_set_uint32 (dict, key, volinfo->rebal.op); if (ret) goto out; if (volinfo->rebal.dict) { - snprintf (prefix, sizeof (prefix), "volume%d", count); + snprintf (pfx, sizeof (pfx), "%s%d", prefix, count); ctx.dict = dict; - ctx.prefix = prefix; + ctx.prefix = pfx; ctx.opt_count = 1; ctx.key_name = "rebal-dict-key"; ctx.val_name = "rebal-dict-value"; @@ -2425,7 +2442,7 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); + snprintf (key, 256, "%s%d."GLUSTERD_STORE_KEY_RB_STATUS, prefix, count); ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status); if (ret) goto out; @@ -2433,8 +2450,8 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, - count); + snprintf (key, 256, "%s%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, + prefix, count); gf_asprintf (&src_brick, "%s:%s", volinfo->rep_brick.src_brick->hostname, volinfo->rep_brick.src_brick->path); @@ -2443,8 +2460,8 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, - count); + snprintf (key, 256, "%s%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, + prefix, count); gf_asprintf (&dst_brick, "%s:%s", volinfo->rep_brick.dst_brick->hostname, volinfo->rep_brick.dst_brick->path); @@ -2459,16 +2476,16 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.rb_id", count); + snprintf (key, sizeof (key), "%s%d.rb_id", prefix, count); ret = dict_set_dynstr (dict, key, rb_id_str); if (ret) goto out; rb_id_str = NULL; } - snprintf (prefix, sizeof (prefix), "volume%d", count); + snprintf (pfx, sizeof (pfx), "%s%d", prefix, count); ctx.dict = dict; - ctx.prefix = prefix; + ctx.prefix = pfx; ctx.opt_count = 1; ctx.key_name = "key"; ctx.val_name = "value"; @@ -2477,13 +2494,13 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx); ctx.opt_count--; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.opt-count", count); + snprintf (key, sizeof (key), "%s%d.opt-count", prefix, count); ret = dict_set_int32 (dict, key, ctx.opt_count); if (ret) goto out; ctx.dict = dict; - ctx.prefix = prefix; + ctx.prefix = pfx; ctx.opt_count = 1; ctx.key_name = "slave-num"; ctx.val_name = "slave-val"; @@ -2493,42 +2510,42 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, ctx.opt_count--; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.gsync-count", count); + snprintf (key, sizeof (key), "%s%d.gsync-count", prefix, count); ret = dict_set_int32 (dict, key, ctx.opt_count); if (ret) goto out; list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.hostname", - count, i); + snprintf (key, sizeof (key), "%s%d.brick%d.hostname", + prefix, count, i); ret = dict_set_str (dict, key, brickinfo->hostname); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.path", - count, i); + snprintf (key, sizeof (key), "%s%d.brick%d.path", + prefix, count, i); ret = dict_set_str (dict, key, brickinfo->path); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.decommissioned", - count, i); + snprintf (key, sizeof (key), "%s%d.brick%d.decommissioned", + prefix, count, i); ret = dict_set_int32 (dict, key, brickinfo->decommissioned); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.brick_id", - count, i); + snprintf (key, sizeof (key), "%s%d.brick%d.brick_id", + prefix, count, i); ret = dict_set_str (dict, key, brickinfo->brick_id); if (ret) goto out; - snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", - count, i); + snprintf (key, sizeof (key), "%s%d.brick%d.snap_status", + prefix, count, i); ret = dict_set_int32 (dict, key, brickinfo->snap_status); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -2538,8 +2555,8 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; } - snprintf (key, sizeof (key), "volume%d.brick%d.device_path", - count, i); + snprintf (key, sizeof (key), "%s%d.brick%d.device_path", + prefix, count, i); ret = dict_set_str (dict, key, brickinfo->device_path); if (ret) { gf_log (this->name, GF_LOG_ERROR, @@ -2556,19 +2573,19 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, * in the cluster */ memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.op-version", count); + snprintf (key, sizeof (key), "%s%d.op-version", prefix, count); ret = dict_set_int32 (dict, key, volinfo->op_version); if (ret) goto out; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.client-op-version", count); + snprintf (key, sizeof (key), "%s%d.client-op-version", prefix, count); ret = dict_set_int32 (dict, key, volinfo->client_op_version); if (ret) goto out; /*Add volume Capability (BD Xlator) to dict*/ memset (key, 0 ,sizeof (key)); - snprintf (key, sizeof (key), "volume%d.caps", count); + snprintf (key, sizeof (key), "%s%d.caps", prefix, count); ret = dict_set_int32 (dict, key, volinfo->caps); out: @@ -2576,14 +2593,17 @@ out: GF_FREE (rebalance_id_str); GF_FREE (rb_id_str); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ int glusterd_vol_add_quota_conf_to_dict (glusterd_volinfo_t *volinfo, dict_t* load, - int vol_idx) + int vol_idx, char *prefix) { int fd = -1; char *gfid_str = NULL; @@ -2595,6 +2615,7 @@ glusterd_vol_add_quota_conf_to_dict (glusterd_volinfo_t *volinfo, dict_t* load, this = THIS; GF_ASSERT (this); + GF_ASSERT (prefix); ret = glusterd_store_create_quota_conf_sh_on_absence (volinfo); if (ret) @@ -2631,8 +2652,8 @@ glusterd_vol_add_quota_conf_to_dict (glusterd_volinfo_t *volinfo, dict_t* load, goto out; } - snprintf (key, sizeof(key)-1, "volume%d.gfid%d", vol_idx, - gfid_idx); + snprintf (key, sizeof(key)-1, "%s%d.gfid%d", prefix, + vol_idx, gfid_idx); key[sizeof(key)-1] = '\0'; ret = dict_set_dynstr (load, key, gfid_str); if (ret) { @@ -2642,19 +2663,19 @@ glusterd_vol_add_quota_conf_to_dict (glusterd_volinfo_t *volinfo, dict_t* load, gfid_str = NULL; } - snprintf (key, sizeof(key)-1, "volume%d.gfid-count", vol_idx); + snprintf (key, sizeof(key)-1, "%s%d.gfid-count", prefix, vol_idx); key[sizeof(key)-1] = '\0'; ret = dict_set_int32 (load, key, gfid_idx); if (ret) goto out; - snprintf (key, sizeof(key)-1, "volume%d.quota-cksum", vol_idx); + snprintf (key, sizeof(key)-1, "%s%d.quota-cksum", prefix, vol_idx); key[sizeof(key)-1] = '\0'; ret = dict_set_uint32 (load, key, volinfo->quota_conf_cksum); if (ret) goto out; - snprintf (key, sizeof(key)-1, "volume%d.quota-version", vol_idx); + snprintf (key, sizeof(key)-1, "%s%d.quota-version", prefix, vol_idx); key[sizeof(key)-1] = '\0'; ret = dict_set_uint32 (load, key, volinfo->quota_conf_version); if (ret) @@ -2669,7 +2690,237 @@ out: } int32_t -glusterd_build_volume_dict (dict_t **vols) +glusterd_add_missed_snaps_to_export_dict (dict_t *peer_data) +{ + char name_buf[PATH_MAX] = ""; + char value[PATH_MAX] = ""; + int32_t missed_snap_count = 0; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (peer_data); + + priv = this->private; + GF_ASSERT (priv); + + /* Add the missed_entries in the dict */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + list_for_each_entry (snap_opinfo, + &missed_snapinfo->snap_ops, + snap_ops_list) { + snprintf (name_buf, sizeof(name_buf), + "missed_snaps_%d", missed_snap_count); + snprintf (value, sizeof(value), "%s:%s=%s:%d:%s:%d:%d", + missed_snapinfo->node_uuid, + missed_snapinfo->snap_uuid, + snap_opinfo->snap_vol_id, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op, + snap_opinfo->status); + + ret = dict_set_dynstr_with_alloc (peer_data, name_buf, + value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set %s", + name_buf); + goto out; + } + missed_snap_count++; + } + } + + ret = dict_set_int32 (peer_data, "missed_snap_count", + missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set missed_snap_count"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_add_snap_to_dict (glusterd_snap_t *snap, dict_t *peer_data, + int32_t snap_count) +{ + char buf[NAME_MAX] = ""; + char prefix[NAME_MAX] = ""; + int32_t ret = -1; + int32_t volcount = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + gf_boolean_t host_bricks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap); + GF_ASSERT (peer_data); + + snprintf (prefix, sizeof(prefix), "snap%d", snap_count); + + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + volcount++; + ret = glusterd_add_volume_to_dict (volinfo, peer_data, + volcount, prefix); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add snap:%s volume:%s " + "to peer_data dict for handshake", + snap->snapname, volinfo->volname); + goto out; + } + + ret = glusterd_vol_add_quota_conf_to_dict (volinfo, peer_data, + volcount, prefix); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add quota conf for " + "snap:%s volume:%s to peer_data " + "dict for handshake", snap->snapname, + volinfo->volname); + goto out; + } + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + host_bricks = _gf_true; + break; + } + } + } + + snprintf (buf, sizeof(buf), "%s.host_bricks", prefix); + ret = dict_set_int8 (peer_data, buf, (int8_t) host_bricks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set host_bricks for snap %s", + snap->snapname); + goto out; + } + + snprintf (buf, sizeof(buf), "%s.volcount", prefix); + ret = dict_set_int32 (peer_data, buf, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set volcount for snap %s", + snap->snapname); + goto out; + } + + snprintf (buf, sizeof(buf), "%s.snapname", prefix); + ret = dict_set_dynstr_with_alloc (peer_data, buf, snap->snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snapname for snap %s", + snap->snapname); + goto out; + } + + snprintf (buf, sizeof(buf), "%s.snap_id", prefix); + ret = dict_set_dynstr_with_alloc (peer_data, buf, + uuid_utoa (snap->snap_id)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snap_id for snap %s", + snap->snapname); + goto out; + } + + if (snap->description) { + snprintf (buf, sizeof(buf), "%s.snapid", prefix); + ret = dict_set_dynstr_with_alloc (peer_data, buf, + snap->description); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set description for snap %s", + snap->snapname); + goto out; + } + } + + snprintf (buf, sizeof(buf), "%s.time_stamp", prefix); + ret = dict_set_int64 (peer_data, buf, (int64_t)snap->time_stamp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set time_stamp for snap %s", + snap->snapname); + goto out; + } + + snprintf (buf, sizeof(buf), "%s.snap_restored", prefix); + ret = dict_set_int8 (peer_data, buf, snap->snap_restored); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snap_restored for snap %s", + snap->snapname); + goto out; + } + + snprintf (buf, sizeof(buf), "%s.snap_status", prefix); + ret = dict_set_int32 (peer_data, buf, snap->snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snap_status for snap %s", + snap->snapname); + goto out; + } +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_add_snapshots_to_export_dict (dict_t *peer_data) +{ + int32_t snap_count = 0; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (peer_data); + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + snap_count++; + ret = glusterd_add_snap_to_dict (snap, peer_data, snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add snap(%s) to the " + " peer_data dict for handshake", + snap->snapname); + goto out; + } + } + + ret = dict_set_int32 (peer_data, "snap_count", snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap_count"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_add_volumes_to_export_dict (dict_t **peer_data) { int32_t ret = -1; dict_t *dict = NULL; @@ -2677,27 +2928,31 @@ glusterd_build_volume_dict (dict_t **vols) glusterd_volinfo_t *volinfo = NULL; int32_t count = 0; glusterd_dict_ctx_t ctx = {0}; + xlator_t *this = NULL; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); dict = dict_new (); - if (!dict) goto out; list_for_each_entry (volinfo, &priv->volumes, vol_list) { count++; - ret = glusterd_add_volume_to_dict (volinfo, dict, count); + ret = glusterd_add_volume_to_dict (volinfo, dict, count, + "volume"); if (ret) goto out; if (!glusterd_is_volume_quota_enabled (volinfo)) continue; - ret = glusterd_vol_add_quota_conf_to_dict (volinfo, dict, count); + ret = glusterd_vol_add_quota_conf_to_dict (volinfo, dict, + count, "volume"); if (ret) goto out; } - ret = dict_set_int32 (dict, "count", count); if (ret) goto out; @@ -2713,18 +2968,18 @@ glusterd_build_volume_dict (dict_t **vols) if (ret) goto out; - *vols = dict; + *peer_data = dict; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); if (ret) dict_unref (dict); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } int32_t -glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, - char *hostname) +glusterd_compare_friend_volume (dict_t *peer_data, int32_t count, + int32_t *status, char *hostname) { int32_t ret = -1; @@ -2737,14 +2992,14 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, int32_t version = 0; xlator_t *this = NULL; - GF_ASSERT (vols); + GF_ASSERT (peer_data); GF_ASSERT (status); this = THIS; GF_ASSERT (this); snprintf (key, sizeof (key), "volume%d.name", count); - ret = dict_get_str (vols, key, &volname); + ret = dict_get_str (peer_data, key, &volname); if (ret) goto out; @@ -2758,7 +3013,7 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.version", count); - ret = dict_get_int32 (vols, key, &version); + ret = dict_get_int32 (peer_data, key, &version); if (ret) goto out; @@ -2779,7 +3034,7 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, // memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.ckusm", count); - ret = dict_get_uint32 (vols, key, &cksum); + ret = dict_get_uint32 (peer_data, key, &cksum); if (ret) goto out; @@ -2794,7 +3049,7 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.quota-version", count); - ret = dict_get_uint32 (vols, key, "a_version); + ret = dict_get_uint32 (peer_data, key, "a_version); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "quota-version key absent for" " volume %s in peer %s's response", volinfo->volname, @@ -2822,7 +3077,7 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, // memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.quota-cksum", count); - ret = dict_get_uint32 (vols, key, "a_cksum); + ret = dict_get_uint32 (peer_data, key, "a_cksum); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "quota checksum absent for " "volume %s in peer %s's response", volinfo->volname, @@ -2850,7 +3105,7 @@ out: } static int32_t -import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, +import_prdict_dict (dict_t *peer_data, dict_t *dst_dict, char *key_prefix, char *value_prefix, int opt_count, char *prefix) { char key[512] = {0,}; @@ -2865,7 +3120,7 @@ import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s.%s%d", prefix, key_prefix, i); - ret = dict_get_str (vols, key, &opt_key); + ret = dict_get_str (peer_data, key, &opt_key); if (ret) { snprintf (msg, sizeof (msg), "Volume dict key not " "specified"); @@ -2875,7 +3130,7 @@ import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s.%s%d", prefix, value_prefix, i); - ret = dict_get_str (vols, key, &opt_val); + ret = dict_get_str (peer_data, key, &opt_val); if (ret) { snprintf (msg, sizeof (msg), "Volume dict value not " "specified"); @@ -3163,7 +3418,7 @@ out: } int32_t -glusterd_import_friend_volume_opts (dict_t *vols, int count, +glusterd_import_friend_volume_opts (dict_t *peer_data, int count, glusterd_volinfo_t *volinfo) { char key[512] = {0,}; @@ -3172,9 +3427,12 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, char msg[2048] = {0}; char volume_prefix[1024] = {0}; + GF_ASSERT (peer_data); + GF_ASSERT (volinfo); + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.opt-count", count); - ret = dict_get_int32 (vols, key, &opt_count); + ret = dict_get_int32 (peer_data, key, &opt_count); if (ret) { snprintf (msg, sizeof (msg), "Volume option count not " "specified for %s", volinfo->volname); @@ -3182,7 +3440,7 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, } snprintf (volume_prefix, sizeof (volume_prefix), "volume%d", count); - ret = import_prdict_dict (vols, volinfo->dict, "key", "value", + ret = import_prdict_dict (peer_data, volinfo->dict, "key", "value", opt_count, volume_prefix); if (ret) { snprintf (msg, sizeof (msg), "Unable to import options dict " @@ -3192,14 +3450,14 @@ glusterd_import_friend_volume_opts (dict_t *vols, int count, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.gsync-count", count); - ret = dict_get_int32 (vols, key, &opt_count); + ret = dict_get_int32 (peer_data, key, &opt_count); if (ret) { snprintf (msg, sizeof (msg), "Gsync count not " "specified for %s", volinfo->volname); goto out; } - ret = import_prdict_dict (vols, volinfo->gsync_slaves, "slave-num", + ret = import_prdict_dict (peer_data, volinfo->gsync_slaves, "slave-num", "slave-val", opt_count, volume_prefix); if (ret) { snprintf (msg, sizeof (msg), "Unable to import gsync sessions " @@ -3214,10 +3472,15 @@ out: return ret; } +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ int32_t -glusterd_import_new_brick (dict_t *vols, int32_t vol_count, +glusterd_import_new_brick (dict_t *peer_data, int32_t vol_count, int32_t brick_count, - glusterd_brickinfo_t **brickinfo) + glusterd_brickinfo_t **brickinfo, + char *prefix) { char key[512] = {0,}; int ret = -1; @@ -3230,53 +3493,54 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, glusterd_brickinfo_t *new_brickinfo = NULL; char msg[2048] = {0}; - GF_ASSERT (vols); + GF_ASSERT (peer_data); GF_ASSERT (vol_count >= 0); GF_ASSERT (brickinfo); + GF_ASSERT (prefix); memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.hostname", - vol_count, brick_count); - ret = dict_get_str (vols, key, &hostname); + snprintf (key, sizeof (key), "%s%d.brick%d.hostname", + prefix, vol_count, brick_count); + ret = dict_get_str (peer_data, key, &hostname); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload", key); goto out; } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.path", - vol_count, brick_count); - ret = dict_get_str (vols, key, &path); + snprintf (key, sizeof (key), "%s%d.brick%d.path", + prefix, vol_count, brick_count); + ret = dict_get_str (peer_data, key, &path); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload", key); goto out; } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.brick_id", - vol_count, brick_count); - ret = dict_get_str (vols, key, &brick_id); + snprintf (key, sizeof (key), "%s%d.brick%d.brick_id", + prefix, vol_count, brick_count); + ret = dict_get_str (peer_data, key, &brick_id); memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.decommissioned", - vol_count, brick_count); - ret = dict_get_int32 (vols, key, &decommissioned); + snprintf (key, sizeof (key), "%s%d.brick%d.decommissioned", + prefix, vol_count, brick_count); + ret = dict_get_int32 (peer_data, key, &decommissioned); if (ret) { /* For backward compatibility */ ret = 0; } - snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", - vol_count, brick_count); - ret = dict_get_int32 (vols, key, &snap_status); + snprintf (key, sizeof (key), "%s%d.brick%d.snap_status", + prefix, vol_count, brick_count); + ret = dict_get_int32 (peer_data, key, &snap_status); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload", key); goto out; } - snprintf (key, sizeof (key), "volume%d.brick%d.device_path", - vol_count, brick_count); - ret = dict_get_str (vols, key, &snap_device); + snprintf (key, sizeof (key), "%s%d.brick%d.device_path", + prefix, vol_count, brick_count); + ret = dict_get_str (peer_data, key, &snap_device); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload", key); goto out; @@ -3304,22 +3568,28 @@ out: return ret; } +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ int32_t -glusterd_import_bricks (dict_t *vols, int32_t vol_count, - glusterd_volinfo_t *new_volinfo) +glusterd_import_bricks (dict_t *peer_data, int32_t vol_count, + glusterd_volinfo_t *new_volinfo, char *prefix) { int ret = -1; int brick_count = 1; int brickid = 0; glusterd_brickinfo_t *new_brickinfo = NULL; - GF_ASSERT (vols); + GF_ASSERT (peer_data); GF_ASSERT (vol_count >= 0); GF_ASSERT (new_volinfo); + GF_ASSERT (prefix); while (brick_count <= new_volinfo->brick_count) { - ret = glusterd_import_new_brick (vols, vol_count, brick_count, - &new_brickinfo); + ret = glusterd_import_new_brick (peer_data, vol_count, + brick_count, + &new_brickinfo, prefix); if (ret) goto out; if (new_brickinfo->brick_id[0] == '\0') @@ -3337,9 +3607,14 @@ out: return ret; } +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ static int -glusterd_import_quota_conf (dict_t *vols, int vol_idx, - glusterd_volinfo_t *new_volinfo) +glusterd_import_quota_conf (dict_t *peer_data, int vol_idx, + glusterd_volinfo_t *new_volinfo, + char *prefix) { int gfid_idx = 0; int gfid_count = 0; @@ -3352,6 +3627,8 @@ glusterd_import_quota_conf (dict_t *vols, int vol_idx, this = THIS; GF_ASSERT (this); + GF_ASSERT (peer_data); + GF_ASSERT (prefix); if (!glusterd_is_volume_quota_enabled (new_volinfo)) { (void) glusterd_clean_up_quota_store (new_volinfo); @@ -3368,22 +3645,23 @@ glusterd_import_quota_conf (dict_t *vols, int vol_idx, goto out; } - snprintf (key, sizeof (key)-1, "volume%d.quota-cksum", vol_idx); + snprintf (key, sizeof (key)-1, "%s%d.quota-cksum", prefix, vol_idx); key[sizeof(key)-1] = '\0'; - ret = dict_get_uint32 (vols, key, &new_volinfo->quota_conf_cksum); + ret = dict_get_uint32 (peer_data, key, &new_volinfo->quota_conf_cksum); if (ret) gf_log (this->name, GF_LOG_DEBUG, "Failed to get quota cksum"); - snprintf (key, sizeof (key)-1, "volume%d.quota-version", vol_idx); + snprintf (key, sizeof (key)-1, "%s%d.quota-version", prefix, vol_idx); key[sizeof(key)-1] = '\0'; - ret = dict_get_uint32 (vols, key, &new_volinfo->quota_conf_version); + ret = dict_get_uint32 (peer_data, key, + &new_volinfo->quota_conf_version); if (ret) gf_log (this->name, GF_LOG_DEBUG, "Failed to get quota " "version"); - snprintf (key, sizeof (key)-1, "volume%d.gfid-count", vol_idx); + snprintf (key, sizeof (key)-1, "%s%d.gfid-count", prefix, vol_idx); key[sizeof(key)-1] = '\0'; - ret = dict_get_int32 (vols, key, &gfid_count); + ret = dict_get_int32 (peer_data, key, &gfid_count); if (ret) goto out; @@ -3397,10 +3675,10 @@ glusterd_import_quota_conf (dict_t *vols, int vol_idx, gfid_idx = 0; for (gfid_idx = 0; gfid_idx < gfid_count; gfid_idx++) { - snprintf (key, sizeof (key)-1, "volume%d.gfid%d", - vol_idx, gfid_idx); + snprintf (key, sizeof (key)-1, "%s%d.gfid%d", + prefix, vol_idx, gfid_idx); key[sizeof(key)-1] = '\0'; - ret = dict_get_str (vols, key, &gfid_str); + ret = dict_get_str (peer_data, key, &gfid_str); if (ret) goto out; @@ -3481,15 +3759,22 @@ out: return ret; } +/* The prefix represents the type of volume to be added. + * It will be "volume" for normal volumes, and snap# like + * snap1, snap2, for snapshot volumes + */ int32_t -glusterd_import_volinfo (dict_t *vols, int count, - glusterd_volinfo_t **volinfo) +glusterd_import_volinfo (dict_t *peer_data, int count, + glusterd_volinfo_t **volinfo, + char *prefix) { int ret = -1; char key[256] = {0}; + char *parent_volname = NULL; char *volname = NULL; glusterd_volinfo_t *new_volinfo = NULL; char *volume_id_str = NULL; + char *restored_snap = NULL; char msg[2048] = {0}; char *src_brick = NULL; char *dst_brick = NULL; @@ -3501,50 +3786,49 @@ glusterd_import_volinfo (dict_t *vols, int count, int client_op_version = 0; uint32_t is_snap_volume = 0; - GF_ASSERT (vols); + GF_ASSERT (peer_data); GF_ASSERT (volinfo); + GF_ASSERT (prefix); - snprintf (key, sizeof (key), "volume%d.name", count); - ret = dict_get_str (vols, key, &volname); + snprintf (key, sizeof (key), "%s%d.name", prefix, count); + ret = dict_get_str (peer_data, key, &volname); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload", key); goto out; } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); - ret = dict_get_uint32 (vols, key, &is_snap_volume); + snprintf (key, sizeof (key), "%s%d.is_snap_volume", prefix, count); + ret = dict_get_uint32 (peer_data, key, &is_snap_volume); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); goto out; } - if (is_snap_volume == _gf_true) { - gf_log (THIS->name, GF_LOG_DEBUG, - "Not syncing snap volume %s", volname); - ret = 0; - goto out; - } - ret = glusterd_volinfo_new (&new_volinfo); if (ret) goto out; strncpy (new_volinfo->volname, volname, sizeof (new_volinfo->volname)); - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.type", count); - ret = dict_get_int32 (vols, key, &new_volinfo->type); + snprintf (key, sizeof (key), "%s%d.type", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->type); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); goto out; } + snprintf (key, sizeof (key), "%s%d.parent_volname", prefix, count); + ret = dict_get_str (peer_data, key, &parent_volname); + if (!ret) + strncpy (new_volinfo->parent_volname, parent_volname, + sizeof(new_volinfo->parent_volname)); + memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick_count", count); - ret = dict_get_int32 (vols, key, &new_volinfo->brick_count); + snprintf (key, sizeof (key), "%s%d.brick_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->brick_count); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3552,8 +3836,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.version", count); - ret = dict_get_int32 (vols, key, &new_volinfo->version); + snprintf (key, sizeof (key), "%s%d.version", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->version); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3561,8 +3845,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.status", count); - ret = dict_get_int32 (vols, key, (int32_t *)&new_volinfo->status); + snprintf (key, sizeof (key), "%s%d.status", prefix, count); + ret = dict_get_int32 (peer_data, key, (int32_t *)&new_volinfo->status); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3570,8 +3854,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.sub_count", count); - ret = dict_get_int32 (vols, key, &new_volinfo->sub_count); + snprintf (key, sizeof (key), "%s%d.sub_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->sub_count); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3581,8 +3865,8 @@ glusterd_import_volinfo (dict_t *vols, int count, /* not having a 'stripe_count' key is not a error (as peer may be of old version) */ memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.stripe_count", count); - ret = dict_get_int32 (vols, key, &new_volinfo->stripe_count); + snprintf (key, sizeof (key), "%s%d.stripe_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->stripe_count); if (ret) gf_log (THIS->name, GF_LOG_INFO, "peer is possibly old version"); @@ -3590,8 +3874,8 @@ glusterd_import_volinfo (dict_t *vols, int count, /* not having a 'replica_count' key is not a error (as peer may be of old version) */ memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.replica_count", count); - ret = dict_get_int32 (vols, key, &new_volinfo->replica_count); + snprintf (key, sizeof (key), "%s%d.replica_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->replica_count); if (ret) gf_log (THIS->name, GF_LOG_INFO, "peer is possibly old version"); @@ -3599,16 +3883,16 @@ glusterd_import_volinfo (dict_t *vols, int count, /* not having a 'dist_count' key is not a error (as peer may be of old version) */ memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.dist_count", count); - ret = dict_get_int32 (vols, key, &new_volinfo->dist_leaf_count); + snprintf (key, sizeof (key), "%s%d.dist_count", prefix, count); + ret = dict_get_int32 (peer_data, key, &new_volinfo->dist_leaf_count); if (ret) gf_log (THIS->name, GF_LOG_INFO, "peer is possibly old version"); new_volinfo->subvol_count = new_volinfo->brick_count/ glusterd_get_dist_leaf_count (new_volinfo); memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.ckusm", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->cksum); + snprintf (key, sizeof (key), "%s%d.ckusm", prefix, count); + ret = dict_get_uint32 (peer_data, key, &new_volinfo->cksum); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3616,8 +3900,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.volume_id", count); - ret = dict_get_str (vols, key, &volume_id_str); + snprintf (key, sizeof (key), "%s%d.volume_id", prefix, count); + ret = dict_get_str (peer_data, key, &volume_id_str); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3627,8 +3911,8 @@ glusterd_import_volinfo (dict_t *vols, int count, uuid_parse (volume_id_str, new_volinfo->volume_id); memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.username", count); - ret = dict_get_str (vols, key, &str); + snprintf (key, sizeof (key), "%s%d.username", prefix, count); + ret = dict_get_str (peer_data, key, &str); if (!ret) { ret = glusterd_auth_set_username (new_volinfo, str); if (ret) @@ -3636,8 +3920,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.password", count); - ret = dict_get_str (vols, key, &str); + snprintf (key, sizeof (key), "%s%d.password", prefix, count); + ret = dict_get_str (peer_data, key, &str); if (!ret) { ret = glusterd_auth_set_password (new_volinfo, str); if (ret) @@ -3645,8 +3929,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.transport_type", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->transport_type); + snprintf (key, sizeof (key), "%s%d.transport_type", prefix, count); + ret = dict_get_uint32 (peer_data, key, &new_volinfo->transport_type); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3655,17 +3939,19 @@ glusterd_import_volinfo (dict_t *vols, int count, new_volinfo->is_snap_volume = is_snap_volume; - snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored); + snprintf (key, sizeof (key), "%s%d.restored_from_snap", prefix, count); + ret = dict_get_str (peer_data, key, &restored_snap); if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " - "is_volume_restored option for %s", - volname); + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); goto out; } - snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); - ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit); + uuid_parse (restored_snap, new_volinfo->restored_from_snap); + + snprintf (key, sizeof (key), "%s%d.snap-max-hard-limit", prefix, count); + ret = dict_get_uint64 (peer_data, key, + &new_volinfo->snap_max_hard_limit); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3673,8 +3959,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.rebalance", count); - ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); + snprintf (key, sizeof (key), "%s%d.rebalance", prefix, count); + ret = dict_get_uint32 (peer_data, key, &new_volinfo->rebal.defrag_cmd); if (ret) { snprintf (msg, sizeof (msg), "%s missing in payload for %s", key, volname); @@ -3682,8 +3968,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.rebalance-id", count); - ret = dict_get_str (vols, key, &rebalance_id_str); + snprintf (key, sizeof (key), "%s%d.rebalance-id", prefix, count); + ret = dict_get_str (peer_data, key, &rebalance_id_str); if (ret) { /* This is not present in older glusterfs versions, * so don't error out @@ -3694,15 +3980,17 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.rebalance-op", count); - ret = dict_get_uint32 (vols, key,(uint32_t *) &new_volinfo->rebal.op); + snprintf (key, sizeof (key), "%s%d.rebalance-op", prefix, count); + ret = dict_get_uint32 (peer_data, key, + (uint32_t *) &new_volinfo->rebal.op); if (ret) { /* This is not present in older glusterfs versions, * so don't error out */ ret = 0; } - ret = gd_import_friend_volume_rebal_dict (vols, count, new_volinfo); + ret = gd_import_friend_volume_rebal_dict (peer_data, count, + new_volinfo); if (ret) { snprintf (msg, sizeof (msg), "Failed to import rebalance dict " "for volume."); @@ -3710,8 +3998,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); - ret = dict_get_int32 (vols, key, &rb_status); + snprintf (key, 256, "%s%d."GLUSTERD_STORE_KEY_RB_STATUS, prefix, count); + ret = dict_get_int32 (peer_data, key, &rb_status); if (ret) goto out; new_volinfo->rep_brick.rb_status = rb_status; @@ -3719,9 +4007,9 @@ glusterd_import_volinfo (dict_t *vols, int count, if (new_volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, - count); - ret = dict_get_str (vols, key, &src_brick); + snprintf (key, 256, "%s%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, + prefix, count); + ret = dict_get_str (peer_data, key, &src_brick); if (ret) goto out; @@ -3734,9 +4022,9 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, - count); - ret = dict_get_str (vols, key, &dst_brick); + snprintf (key, 256, "%s%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, + prefix, count); + ret = dict_get_str (peer_data, key, &dst_brick); if (ret) goto out; @@ -3749,8 +4037,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.rb_id", count); - ret = dict_get_str (vols, key, &rb_id_str); + snprintf (key, sizeof (key), "%s%d.rb_id", prefix, count); + ret = dict_get_str (peer_data, key, &rb_id_str); if (ret) { /* This is not present in older glusterfs versions, * so don't error out @@ -3762,7 +4050,8 @@ glusterd_import_volinfo (dict_t *vols, int count, } - ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo); + ret = glusterd_import_friend_volume_opts (peer_data, count, + new_volinfo); if (ret) goto out; @@ -3775,13 +4064,13 @@ glusterd_import_volinfo (dict_t *vols, int count, * present. Only one being present is a failure */ memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.op-version", count); - ret = dict_get_int32 (vols, key, &op_version); + snprintf (key, sizeof (key), "%s%d.op-version", prefix, count); + ret = dict_get_int32 (peer_data, key, &op_version); if (ret) ret = 0; memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.client-op-version", count); - ret = dict_get_int32 (vols, key, &client_op_version); + snprintf (key, sizeof (key), "%s%d.client-op-version", prefix, count); + ret = dict_get_int32 (peer_data, key, &client_op_version); if (ret) ret = 0; @@ -3800,11 +4089,11 @@ glusterd_import_volinfo (dict_t *vols, int count, } memset (key, 0 ,sizeof (key)); - snprintf (key, sizeof (key), "volume%d.caps", count); + snprintf (key, sizeof (key), "%s%d.caps", prefix, count); /*This is not present in older glusterfs versions, so ignore ret value*/ - ret = dict_get_int32 (vols, key, &new_volinfo->caps); + ret = dict_get_int32 (peer_data, key, &new_volinfo->caps); - ret = glusterd_import_bricks (vols, count, new_volinfo); + ret = glusterd_import_bricks (peer_data, count, new_volinfo, prefix); if (ret) goto out; @@ -3881,7 +4170,11 @@ glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo, old_brickinfo->hostname, old_brickinfo->path, new_volinfo, &new_brickinfo); - if (ret) { + /* If the brick is stale, i.e it's not a part of the new volume + * or if it's part of the new volume and is pending a snap, + * then stop the brick process + */ + if (ret || (new_brickinfo->snap_status == -1)) { /*TODO: may need to switch to 'atomic' flavour of * brick_stop, once we make peer rpc program also * synctask enabled*/ @@ -3903,9 +4196,34 @@ int32_t glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo, glusterd_volinfo_t *valid_volinfo) { + int32_t ret = -1; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; + GF_ASSERT (stale_volinfo); GF_ASSERT (valid_volinfo); + /* Copy snap_volumes list from stale_volinfo to valid_volinfo */ + valid_volinfo->snap_count = 0; + list_for_each_entry_safe (voliter, temp_volinfo, + &stale_volinfo->snap_volumes, snapvol_list) { + list_add_tail (&voliter->snapvol_list, + &valid_volinfo->snap_volumes); + valid_volinfo->snap_count++; + } + + if ((!uuid_is_null (stale_volinfo->restored_from_snap)) && + (uuid_compare (stale_volinfo->restored_from_snap, + valid_volinfo->restored_from_snap))) { + ret = glusterd_lvm_snapshot_remove (NULL, stale_volinfo); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to remove lvm snapshot for " + "restored volume %s", stale_volinfo->volname); + } + } + /* If stale volume is in started state, copy the port numbers of the * local bricks if they exist in the valid volume information. * stop stale bricks. Stale volume information is going to be deleted. @@ -3992,7 +4310,7 @@ out: } int32_t -glusterd_import_friend_volume (dict_t *vols, size_t count) +glusterd_import_friend_volume (dict_t *peer_data, size_t count) { int32_t ret = -1; @@ -4001,13 +4319,14 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) glusterd_volinfo_t *old_volinfo = NULL; glusterd_volinfo_t *new_volinfo = NULL; - GF_ASSERT (vols); + GF_ASSERT (peer_data); this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - ret = glusterd_import_volinfo (vols, count, &new_volinfo); + ret = glusterd_import_volinfo (peer_data, count, + &new_volinfo, "volume"); if (ret) goto out; @@ -4033,7 +4352,8 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) if (ret) goto out; - ret = glusterd_import_quota_conf (vols, count, new_volinfo); + ret = glusterd_import_quota_conf (peer_data, count, + new_volinfo, "volume"); if (ret) goto out; @@ -4045,20 +4365,20 @@ out: } int32_t -glusterd_import_friend_volumes (dict_t *vols) +glusterd_import_friend_volumes (dict_t *peer_data) { int32_t ret = -1; int32_t count = 0; int i = 1; - GF_ASSERT (vols); + GF_ASSERT (peer_data); - ret = dict_get_int32 (vols, "count", &count); + ret = dict_get_int32 (peer_data, "count", &count); if (ret) goto out; while (i <= count) { - ret = glusterd_import_friend_volume (vols, i); + ret = glusterd_import_friend_volume (peer_data, i); if (ret) goto out; i++; @@ -4159,25 +4479,840 @@ out: } int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname) +glusterd_perform_missed_op (glusterd_snap_t *snap, int32_t op) { - int32_t ret = -1; - int32_t count = 0; - int i = 1; - gf_boolean_t update = _gf_false; - gf_boolean_t stale_nfs = _gf_false; - gf_boolean_t stale_shd = _gf_false; - gf_boolean_t stale_qd = _gf_false; + dict_t *dict = NULL; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + uuid_t null_uuid = {0}; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + + dict = dict_new(); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + ret = -1; + goto out; + } + + switch (op) { + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to remove snap"); + goto out; + } + + break; + case GF_SNAP_OPTION_TYPE_RESTORE: + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, + glusterd_volinfo_t, vol_list); + + /* Find the parent volinfo */ + ret = glusterd_volinfo_find (snap_volinfo->parent_volname, + &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not get volinfo of %s", + snap_volinfo->parent_volname); + goto out; + } + + /* Bump down the original volinfo's version, coz it would have + * incremented already due to volume handshake + */ + volinfo->version--; + uuid_copy (volinfo->restored_from_snap, null_uuid); + + /* Perform the restore */ + ret = gd_restore_snap_volume (dict, volinfo, snap_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to restore " + "snap for %s", snap->snapname); + volinfo->version++; + goto out; + } + + break; + default: + /* The entry must be a create, delete, or + * restore entry + */ + gf_log (this->name, GF_LOG_ERROR, "Invalid missed snap entry"); + ret = -1; + goto out; + } + +out: + dict_unref (dict); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Perform missed deletes and restores on this node */ +int32_t +glusterd_perform_missed_snap_ops () +{ + int32_t ret = -1; + int32_t op_status = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + glusterd_snap_t *snap = NULL; + uuid_t snap_uuid = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + /* If the pending snap_op is not for this node then continue */ + if (strcmp (missed_snapinfo->node_uuid, uuid_utoa (MY_UUID))) + continue; + + /* Find the snap id */ + uuid_parse (missed_snapinfo->snap_uuid, snap_uuid); + snap = NULL; + snap = glusterd_find_snap_by_id (snap_uuid); + if (!snap) { + /* If the snap is not found, then a delete or a + * restore can't be pending on that snap_uuid. + */ + gf_log (this->name, GF_LOG_DEBUG, + "Not a pending delete or restore op"); + continue; + } - GF_ASSERT (vols); + op_status = GD_MISSED_SNAP_PENDING; + list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) { + /* If the snap_op is create or its status is + * GD_MISSED_SNAP_DONE then continue + */ + if ((snap_opinfo->status == GD_MISSED_SNAP_DONE) || + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE)) + continue; + + /* Perform the actual op for the first time for + * this snap, and mark the snap_status as + * GD_MISSED_SNAP_DONE. For other entries for the same + * snap, just mark the entry as done. + */ + if (op_status == GD_MISSED_SNAP_PENDING) { + ret = glusterd_perform_missed_op + (snap, + snap_opinfo->op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to perform missed snap op"); + goto out; + } + op_status = GD_MISSED_SNAP_DONE; + } + + snap_opinfo->status = GD_MISSED_SNAP_DONE; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Import friend volumes missed_snap_list and update * + * missed_snap_list if need be */ +int32_t +glusterd_import_friend_missed_snap_list (dict_t *peer_data) +{ + int32_t missed_snap_count = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (peer_data); + + priv = this->private; + GF_ASSERT (priv); + + /* Add the friends missed_snaps entries to the in-memory list */ + ret = dict_get_int32 (peer_data, "missed_snap_count", + &missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_INFO, + "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_add_missed_snaps_to_list (peer_data, + missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_perform_missed_snap_ops (); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to perform snap operations"); + /* Not going to out at this point coz some * + * missed ops might have been performed. We * + * need to persist the current list * + */ + } + + ret = glusterd_store_update_missed_snaps (); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update missed_snaps_list"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Check for the peer_snap_name in the list of existing snapshots. + * If a snap exists with the same name and a different snap_id, then + * there is a conflict. Set conflict as _gf_true, and snap to the + * conflicting snap object. If a snap exists with the same name, and the + * same snap_id, then there is no conflict. Set conflict as _gf_false + * and snap to the existing snap object. If no snap exists with the + * peer_snap_name, then there is no conflict. Set conflict as _gf_false + * and snap to NULL. + */ +void +glusterd_is_peer_snap_conflicting (char *peer_snap_name, char *peer_snap_id, + gf_boolean_t *conflict, + glusterd_snap_t **snap, char *hostname) +{ + uuid_t peer_snap_uuid = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (peer_snap_name); + GF_ASSERT (peer_snap_id); + GF_ASSERT (conflict); + GF_ASSERT (snap); + GF_ASSERT (hostname); + + *snap = glusterd_find_snap_by_name (peer_snap_name); + if (*snap) { + uuid_parse (peer_snap_id, peer_snap_uuid); + if (!uuid_compare (peer_snap_uuid, (*snap)->snap_id)) { + /* Current node contains the same snap having + * the same snapname and snap_id + */ + gf_log (this->name, GF_LOG_DEBUG, + "Snapshot %s from peer %s present in " + "localhost", peer_snap_name, hostname); + *conflict = _gf_false; + } else { + /* Current node contains the same snap having + * the same snapname but different snap_id + */ + gf_log (this->name, GF_LOG_DEBUG, + "Snapshot %s from peer %s conflicts with " + "snapshot in localhost", peer_snap_name, + hostname); + *conflict = _gf_true; + } + } else { + /* Peer contains snapshots missing on the current node */ + gf_log (this->name, GF_LOG_INFO, + "Snapshot %s from peer %s missing on localhost", + peer_snap_name, hostname); + *conflict = _gf_false; + } +} + +/* Check if the local node is hosting any bricks for the given snapshot */ +gf_boolean_t +glusterd_are_snap_bricks_local (glusterd_snap_t *snap) +{ + gf_boolean_t is_local = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap); + + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + is_local = _gf_true; + goto out; + } + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", is_local); + return is_local; +} + +/* Check if the peer has missed any snap delete for the given snap_id */ +gf_boolean_t +glusterd_peer_has_missed_snap_delete (glusterd_peerinfo_t *peerinfo, + char *peer_snap_id) +{ + char *peer_uuid = NULL; + gf_boolean_t missed_delete = _gf_false; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (peerinfo); + GF_ASSERT (peer_snap_id); + + peer_uuid = uuid_utoa (peerinfo->uuid); + + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + /* Look for missed snap for the same peer, and + * the same snap_id + */ + if ((!strcmp (peer_uuid, missed_snapinfo->node_uuid)) && + (!strcmp (peer_snap_id, missed_snapinfo->snap_uuid))) { + /* Check if the missed snap's op is delete and the + * status is pending + */ + list_for_each_entry (snap_opinfo, + &missed_snapinfo->snap_ops, + snap_ops_list) { + if ((snap_opinfo->op == + GF_SNAP_OPTION_TYPE_DELETE) && + (snap_opinfo->status == + GD_MISSED_SNAP_PENDING)) { + missed_delete = _gf_true; + goto out; + } + } + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", missed_delete); + return missed_delete; +} + +/* Genrate and store snap volfiles for imported snap object */ +int32_t +glusterd_gen_snap_volfiles (glusterd_volinfo_t *snap_vol, char *peer_snap_name) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *parent_volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap_vol); + GF_ASSERT (peer_snap_name); + + ret = glusterd_store_volinfo (snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store snapshot " + "volinfo (%s) for snap %s", snap_vol->volname, + peer_snap_name); + goto out; + } + + ret = generate_brick_volfiles (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "generating the brick volfiles for the " + "snap %s failed", peer_snap_name); + goto out; + } + + ret = generate_client_volfiles (snap_vol, GF_CLIENT_TRUSTED); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "generating the trusted client volfiles for " + "the snap %s failed", peer_snap_name); + goto out; + } + + ret = generate_client_volfiles (snap_vol, GF_CLIENT_OTHER); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "generating the client volfiles for the " + "snap %s failed", peer_snap_name); + goto out; + } + + ret = glusterd_volinfo_find (snap_vol->parent_volname, + &parent_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Parent volinfo " + "not found for %s volume of snap %s", + snap_vol->volname, peer_snap_name); + goto out; + } + + glusterd_list_add_snapvol (parent_volinfo, snap_vol); + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "not starting snap brick %s:%s for " + "for the snap %s (volume: %s)", + brickinfo->hostname, brickinfo->path, + peer_snap_name, parent_volinfo->volname); + continue; + } + + ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "starting the " + "brick %s:%s for the snap %s (volume: %s) " + "failed", brickinfo->hostname, brickinfo->path, + peer_snap_name, parent_volinfo->volname); + goto out; + } + } + + snap_vol->status = GLUSTERD_STATUS_STARTED; + + ret = glusterd_store_volinfo (snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store snap volinfo"); + goto out; + } +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Import snapshot info from peer_data and add it to priv */ +int32_t +glusterd_import_friend_snap (dict_t *peer_data, int32_t snap_count, + char *peer_snap_name, char *peer_snap_id) +{ + char buf[NAME_MAX] = ""; + char prefix[NAME_MAX] = ""; + dict_t *dict = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + int32_t ret = -1; + int32_t volcount = -1; + int32_t i = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (peer_data); + GF_ASSERT (peer_snap_name); + GF_ASSERT (peer_snap_id); + + snprintf (prefix, sizeof(prefix), "snap%d", snap_count); + + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Could not create " + "the snap object for snap %s", peer_snap_name); + goto out; + } + + strcpy (snap->snapname, peer_snap_name); + uuid_parse (peer_snap_id, snap->snap_id); + + snprintf (buf, sizeof(buf), "%s.snapid", prefix); + ret = dict_get_str (peer_data, buf, &snap->description); + + snprintf (buf, sizeof(buf), "%s.time_stamp", prefix); + ret = dict_get_int64 (peer_data, buf, &snap->time_stamp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get time_stamp for snap %s", + peer_snap_name); + goto out; + } + + snprintf (buf, sizeof(buf), "%s.snap_restored", prefix); + ret = dict_get_int8 (peer_data, buf, (int8_t *) &snap->snap_restored); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get snap_restored for snap %s", + peer_snap_name); + goto out; + } + + snprintf (buf, sizeof(buf), "%s.snap_status", prefix); + ret = dict_get_int32 (peer_data, buf, (int32_t *) &snap->snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get snap_status for snap %s", + peer_snap_name); + goto out; + } + + snprintf (buf, sizeof(buf), "%s.volcount", prefix); + ret = dict_get_int32 (peer_data, buf, &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get volcount for snap %s", + peer_snap_name); + goto out; + } + + ret = glusterd_store_create_snap_dir (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap dir"); + goto out; + } + + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + + for (i = 1; i <= volcount; i++) { + ret = glusterd_import_volinfo (peer_data, i, + &snap_vol, prefix); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to import snap volinfo for " + "snap %s", peer_snap_name); + goto out; + } + + snap_vol->snapshot = snap; + + ret = glusterd_gen_snap_volfiles (snap_vol, peer_snap_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate snap vol files " + "for snap %s", peer_snap_name); + goto out; + } + + ret = glusterd_import_quota_conf (peer_data, i, + snap_vol, prefix); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to import quota conf " + "for snap %s", peer_snap_name); + goto out; + } + + snap_vol = NULL; + } + + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", peer_snap_name); + goto out; + } + +out: + if (ret) + glusterd_snap_remove (dict, snap, + _gf_true, _gf_true); + + if (dict) + dict_unref (dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* During a peer-handshake, after the volumes have synced, and the list of + * missed snapshots have synced, the node will perform the pending deletes + * and restores on this list. At this point, the current snapshot list in + * the node will be updated, and hence in case of conflicts arising during + * snapshot handshake, the peer hosting the bricks will be given precedence + * Likewise, if there will be a conflict, and both peers will be in the same + * state, i.e either both would be hosting bricks or both would not be hosting + * bricks, then a decision can't be taken and a peer-reject will happen. + * + * glusterd_compare_and_update_snap() implements the following algorithm to + * perform the above task: + * Step 1: Start. + * Step 2: Check if the peer is missing a delete on the said snap. + * If yes, goto step 6. + * Step 3: Check if there is a conflict between the peer's data and the + * local snap. If no, goto step 5. + * Step 4: As there is a conflict, check if both the peer and the local nodes + * are hosting bricks. Based on the results perform the following: + * Peer Hosts Bricks Local Node Hosts Bricks Action + * Yes Yes Goto Step 7 + * No No Goto Step 7 + * Yes No Goto Step 8 + * No Yes Goto Step 6 + * Step 5: Check if the local node is missing the peer's data. + * If yes, goto step 9. + * Step 6: It's a no-op. Goto step 10 + * Step 7: Peer Reject. Goto step 10 + * Step 8: Delete local node's data. + * Step 9: Accept Peer Data. + * Step 10: Stop + * + */ +int32_t +glusterd_compare_and_update_snap (dict_t *peer_data, int32_t snap_count, + glusterd_peerinfo_t *peerinfo) +{ + char buf[NAME_MAX] = ""; + char prefix[NAME_MAX] = ""; + char *peer_snap_name = NULL; + char *peer_snap_id = NULL; + dict_t *dict = NULL; + glusterd_snap_t *snap = NULL; + gf_boolean_t conflict = _gf_false; + gf_boolean_t is_local = _gf_false; + gf_boolean_t is_hosted = _gf_false; + gf_boolean_t missed_delete = _gf_false; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (peer_data); + GF_ASSERT (peerinfo); + + snprintf (prefix, sizeof(prefix), "snap%d", snap_count); + + /* Fetch the peer's snapname */ + snprintf (buf, sizeof(buf), "%s.snapname", prefix); + ret = dict_get_str (peer_data, buf, &peer_snap_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snapname from peer: %s", + peerinfo->hostname); + goto out; + } + + /* Fetch the peer's snap_id */ + snprintf (buf, sizeof(buf), "%s.snap_id", prefix); + ret = dict_get_str (peer_data, buf, &peer_snap_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_id from peer: %s", + peerinfo->hostname); + goto out; + } + + /* Check if the peer has missed a snap delete for the + * snap in question + */ + missed_delete = glusterd_peer_has_missed_snap_delete (peerinfo, + peer_snap_id); + if (missed_delete == _gf_true) { + /* Peer has missed delete on the missing/conflicting snap_id */ + gf_log (this->name, GF_LOG_INFO, "Peer %s has missed a delete " + "on snap %s", peerinfo->hostname, peer_snap_name); + ret = 0; + goto out; + } + + /* Check if there is a conflict, and if the + * peer data is already present + */ + glusterd_is_peer_snap_conflicting (peer_snap_name, peer_snap_id, + &conflict, &snap, + peerinfo->hostname); + if (conflict == _gf_false) { + if (snap) { + /* Peer has snap with the same snapname + * and snap_id. No need to accept peer data + */ + ret = 0; + goto out; + } else { + /* Peer has snap with the same snapname + * and snap_id, which local node doesn't have. + */ + goto accept_peer_data; + } + } + + /* There is a conflict. Check if the current node is + * hosting bricks for the conflicted snap. + */ + is_local = glusterd_are_snap_bricks_local (snap); + + /* Check if the peer is hosting any bricks for the + * conflicting snap + */ + snprintf (buf, sizeof(buf), "%s.host_bricks", prefix); + ret = dict_get_int8 (peer_data, buf, (int8_t *) &is_hosted); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch host_bricks from peer: %s " + "for %s", peerinfo->hostname, peer_snap_name); + goto out; + } + + /* As there is a conflict at this point of time, the data of the + * node that hosts a brick takes precedence. If both the local + * node and the peer are in the same state, i.e if both of them + * are either hosting or not hosting the bricks, for the snap, + * then it's a peer reject + */ + if (is_hosted == is_local) { + gf_log (this->name, GF_LOG_ERROR, + "Conflict in snapshot %s with peer %s", + peer_snap_name, peerinfo->hostname); + ret = -1; + goto out; + } + + if (is_hosted == _gf_false) { + /* If there was a conflict, and the peer is not hosting + * any brick, then don't accept peer data + */ + gf_log (this->name, GF_LOG_DEBUG, + "Peer doesn't hosts bricks for conflicting " + "snap(%s). Not accepting peer data.", + peer_snap_name); + ret = 0; + goto out; + } + + /* The peer is hosting a brick in case of conflict + * And local node isn't. Hence remove local node's + * data and accept peer data + */ + + gf_log (this->name, GF_LOG_DEBUG, "Peer hosts bricks for conflicting " + "snap(%s). Removing local data. Accepting peer data.", + peer_snap_name); + + dict = dict_new(); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to create dict"); + ret = -1; + goto out; + } + + ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to remove snap %s", snap->snapname); + goto out; + } + +accept_peer_data: + + /* Accept Peer Data */ + ret = glusterd_import_friend_snap (peer_data, snap_count, + peer_snap_name, peer_snap_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to import snap %s from peer %s", + peer_snap_name, peerinfo->hostname); + goto out; + } + +out: + if (dict) + dict_unref (dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Compare snapshots present in peer_data, with the snapshots in + * the current node + */ +int32_t +glusterd_compare_friend_snapshots (dict_t *peer_data, + glusterd_peerinfo_t *peerinfo) +{ + int32_t ret = -1; + int32_t snap_count = 0; + int i = 1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (peer_data); + GF_ASSERT (peerinfo); + + ret = dict_get_int32 (peer_data, "snap_count", &snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to fetch snap_count"); + goto out; + } + + for (i = 1; i <= snap_count; i++) { + /* Compare one snapshot from peer_data at a time */ + ret = glusterd_compare_and_update_snap (peer_data, i, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to compare snapshots with peer %s", + peerinfo->hostname); + goto out; + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_compare_friend_data (dict_t *peer_data, int32_t *status, + char *hostname) +{ + int32_t ret = -1; + int32_t count = 0; + int i = 1; + gf_boolean_t update = _gf_false; + gf_boolean_t stale_nfs = _gf_false; + gf_boolean_t stale_shd = _gf_false; + gf_boolean_t stale_qd = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (peer_data); GF_ASSERT (status); - ret = dict_get_int32 (vols, "count", &count); + ret = dict_get_int32 (peer_data, "count", &count); if (ret) goto out; while (i <= count) { - ret = glusterd_compare_friend_volume (vols, i, status, + ret = glusterd_compare_friend_volume (peer_data, i, status, hostname); if (ret) goto out; @@ -4199,10 +5334,10 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname) stale_shd = _gf_true; if (glusterd_is_nodesvc_running ("quotad")) stale_qd = _gf_true; - ret = glusterd_import_global_opts (vols); + ret = glusterd_import_global_opts (peer_data); if (ret) goto out; - ret = glusterd_import_friend_volumes (vols); + ret = glusterd_import_friend_volumes (peer_data); if (ret) goto out; if (_gf_false == glusterd_are_all_volumes_stopped ()) { @@ -4218,9 +5353,8 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname) } out: - gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d, status: %d", - ret, *status); - + gf_log (this->name, GF_LOG_DEBUG, + "Returning with ret: %d, status: %d", ret, *status); return ret; } @@ -4522,14 +5656,16 @@ glusterd_nodesvc_start (char *server, gf_boolean_t wait) "--trace-children=yes", "--track-origins=yes", NULL); runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); - } + } runner_add_args (&runner, SBIN_DIR"/glusterfs", "-s", "localhost", "--volfile-id", volfileid, "-p", pidfile, "-l", logfile, - "-S", sockfpath, NULL); + "-S", sockfpath, + "-L", "DEBUG", + NULL); if (!strcmp (server, "glustershd")) { snprintf (glusterd_uuid_option, sizeof (glusterd_uuid_option), @@ -5456,7 +6592,6 @@ out: return -1; } -#ifdef GF_LINUX_HOST_OS int glusterd_get_brick_root (char *path, char **mount_point) { @@ -5752,7 +6887,6 @@ out: return device; } -#endif int glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo, @@ -5826,13 +6960,12 @@ glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; } -#ifdef GF_LINUX_HOST_OS + ret = glusterd_add_brick_mount_details (brickinfo, dict, count); if (ret) goto out; ret = glusterd_add_inode_size_to_dict (dict, count); -#endif out: if (ret) gf_log (this->name, GF_LOG_DEBUG, "Error adding brick" @@ -8830,7 +9963,7 @@ glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) } for (i = 0; i < voldisplaycount; i++) { - snprintf (buf, sizeof(buf), "volume%ld-volname", i); + snprintf (buf, sizeof(buf), "volume%"PRIu64"-volname", i); ret = dict_get_str (src, buf, &volname); if (ret) { gf_log ("", GF_LOG_ERROR, @@ -8845,7 +9978,7 @@ glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) } snprintf (buf, sizeof(buf), - "volume%ld-snap-max-hard-limit", i); + "volume%"PRIu64"-snap-max-hard-limit", i); ret = dict_get_uint64 (src, buf, &value); if (ret) { gf_log ("", GF_LOG_ERROR, @@ -8860,7 +9993,7 @@ glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) } snprintf (buf, sizeof(buf), - "volume%ld-active-hard-limit", i); + "volume%"PRIu64"-active-hard-limit", i); ret = dict_get_uint64 (src, buf, &value); if (ret) { gf_log ("", GF_LOG_ERROR, @@ -8875,7 +10008,7 @@ glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) } snprintf (buf, sizeof(buf), - "volume%ld-snap-max-soft-limit", i); + "volume%"PRIu64"-snap-max-soft-limit", i); ret = dict_get_uint64 (src, buf, &value); if (ret) { gf_log ("", GF_LOG_ERROR, @@ -9993,7 +11126,6 @@ glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) if (!new_missed_snapinfo) goto out; - new_missed_snapinfo->node_snap_info = NULL; INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); @@ -10023,7 +11155,6 @@ glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) if (!new_snap_op) goto out; - new_snap_op->brick_path = NULL; new_snap_op->brick_num = -1; new_snap_op->op = -1; new_snap_op->status = -1; @@ -10376,3 +11507,38 @@ glusterd_compare_volume_name(struct list_head *list1, struct list_head *list2) volinfo2 = list_entry(list2, glusterd_volinfo_t, vol_list); return strcmp(volinfo1->volname, volinfo2->volname); } + +int32_t +glusterd_mount_lvm_snapshot (char *device_path, char *brick_mount_path) +{ + char msg[NAME_MAX] = ""; + int32_t ret = -1; + runner_t runner = {0, }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brick_mount_path); + GF_ASSERT (device_path); + + + runinit (&runner); + snprintf (msg, sizeof (msg), "mount -o nouuid %s %s", + device_path, brick_mount_path); + runner_add_args (&runner, "mount", "-o", "nouuid", device_path, + brick_mount_path, NULL); + runner_log (&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mounting the snapshot " + "logical device %s failed (error: %s)", device_path, + strerror (errno)); + goto out; + } else + gf_log (this->name, GF_LOG_DEBUG, "mounting the snapshot " + "logical device %s successful", device_path); + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 23f8ad7f6..84fa89b0e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -8,7 +8,7 @@ cases as published by the Free Software Foundation. */ #ifndef _GLUSTERD_UTILS_H -#define _GLUSTERD_UTILS_H_ +#define _GLUSTERD_UTILS_H #ifndef _CONFIG_H #define _CONFIG_H @@ -182,10 +182,11 @@ glusterd_volume_brickinfo_get_by_brick (char *brick, glusterd_brickinfo_t **brickinfo); int32_t -glusterd_build_volume_dict (dict_t **vols); +glusterd_add_volumes_to_export_dict (dict_t **peer_data); int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname); +glusterd_compare_friend_data (dict_t *peer_data, int32_t *status, + char *hostname); int glusterd_compute_cksum (glusterd_volinfo_t *volinfo, @@ -251,7 +252,7 @@ int glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len); int32_t -glusterd_import_friend_volumes (dict_t *vols); +glusterd_import_friend_volumes (dict_t *peer_data); void glusterd_set_volume_status (glusterd_volinfo_t *volinfo, glusterd_volume_status status); @@ -280,7 +281,8 @@ int32_t glusterd_volume_count_get (void); int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, - dict_t *dict, int32_t count); + dict_t *dict, int32_t count, + char *prefix); int glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, gf_boolean_t localhost, @@ -684,14 +686,16 @@ glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc); int32_t glusterd_compare_volume_name(struct list_head *, struct list_head *); -#ifdef GF_LINUX_HOST_OS + char* glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo); + struct mntent * glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab); + int glusterd_get_brick_root (char *path, char **mount_point); -#endif //LINUX_HOST + int glusterd_compare_snap_time(struct list_head *, struct list_head *); @@ -713,8 +717,42 @@ int32_t glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op); int32_t -glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, glusterd_brickinfo_t *brickinfo, int32_t brick_number, int32_t op); +int32_t +glusterd_add_missed_snaps_to_export_dict (dict_t *peer_data); + +int32_t +glusterd_import_friend_missed_snap_list (dict_t *peer_data); + +int32_t +gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol); + +int32_t +glusterd_mount_lvm_snapshot (char *device_path, char *brick_mount_path); + +int32_t +glusterd_add_snapshots_to_export_dict (dict_t *peer_data); + +int32_t +glusterd_compare_friend_snapshots (dict_t *peer_data, + glusterd_peerinfo_t *peerinfo); + +int32_t +glusterd_snapobject_delete (glusterd_snap_t *snap); + +int32_t +glusterd_snap_volume_remove (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + gf_boolean_t remove_lvm, + gf_boolean_t force); + +int32_t +glusterd_store_create_snap_dir (glusterd_snap_t *snap); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 332c3d359..f42d596ba 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -739,7 +739,7 @@ int glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key) { char *val = NULL; - gf_boolean_t boo = _gf_false; + gf_boolean_t enabled = _gf_false; int ret = 0; ret = glusterd_volinfo_get (volinfo, key, &val); @@ -747,14 +747,14 @@ glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key) return -1; if (val) - ret = gf_string2boolean (val, &boo); + ret = gf_string2boolean (val, &enabled); if (ret) { gf_log ("", GF_LOG_ERROR, "value for %s option is not valid", key); return -1; } - return boo; + return enabled; } gf_boolean_t @@ -1256,8 +1256,8 @@ static int server_check_marker_off (volgen_graph_t *graph, struct volopt_map_entry *vme, glusterd_volinfo_t *volinfo) { - gf_boolean_t bool = _gf_false; - int ret = 0; + gf_boolean_t enabled = _gf_false; + int ret = 0; GF_ASSERT (volinfo); GF_ASSERT (vme); @@ -1265,8 +1265,8 @@ server_check_marker_off (volgen_graph_t *graph, struct volopt_map_entry *vme, if (strcmp (vme->option, "!xtime") != 0) return 0; - ret = gf_string2boolean (vme->value, &bool); - if (ret || bool) + ret = gf_string2boolean (vme->value, &enabled); + if (ret || enabled) goto out; ret = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME); @@ -1277,10 +1277,10 @@ server_check_marker_off (volgen_graph_t *graph, struct volopt_map_entry *vme, } if (ret) { - bool = _gf_false; - ret = glusterd_check_gsync_running (volinfo, &bool); + enabled = _gf_false; + ret = glusterd_check_gsync_running (volinfo, &enabled); - if (bool) { + if (enabled) { gf_log ("", GF_LOG_WARNING, GEOREP" sessions active" "for the volume %s, cannot disable marker " ,volinfo->volname); @@ -1900,6 +1900,10 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, if (ret) return -1; + xl = volgen_graph_add (graph, "features/barrier", volname); + if (!xl) + return -1; + ret = dict_get_int32 (volinfo->dict, "enable-pump", &pump); if (ret == -ENOENT) ret = pump = 0; @@ -4591,128 +4595,3 @@ gd_is_boolean_option (char *key) return _gf_false; } - -/* This function will restore origin volume to it's snap. - * The restore operation will simply replace the Gluster origin - * volume with the snap volume. - * TODO: Multi-volume delete to be done. - * Cleanup in case of restore failure is pending. - * - * @param orig_vol volinfo of origin volume - * @param snap_vol volinfo of snapshot volume - * - * @return 0 on success and negative value on error - */ -int -gd_restore_snap_volume (dict_t *rsp_dict, - glusterd_volinfo_t *orig_vol, - glusterd_volinfo_t *snap_vol) -{ - int ret = -1; - glusterd_volinfo_t *new_volinfo = NULL; - glusterd_snap_t *snap = NULL; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - glusterd_volinfo_t *temp_volinfo = NULL; - glusterd_volinfo_t *voliter = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (rsp_dict); - conf = this->private; - GF_ASSERT (conf); - - GF_VALIDATE_OR_GOTO (this->name, orig_vol, out); - GF_VALIDATE_OR_GOTO (this->name, snap_vol, out); - snap = snap_vol->snapshot; - GF_VALIDATE_OR_GOTO (this->name, snap, out); - - /* Snap volume must be stoped before performing the - * restore operation. - */ - ret = glusterd_stop_volume (snap_vol); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to stop " - "snap volume(%s)", snap_vol->volname); - goto out; - } - - /* Create a new volinfo for the restored volume */ - ret = glusterd_volinfo_dup (snap_vol, &new_volinfo, _gf_true); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to create volinfo"); - goto out; - } - - /* Following entries need to be derived from origin volume. */ - strcpy (new_volinfo->volname, orig_vol->volname); - uuid_copy (new_volinfo->volume_id, orig_vol->volume_id); - new_volinfo->snap_count = orig_vol->snap_count; - new_volinfo->snap_max_hard_limit = orig_vol->snap_max_hard_limit; - new_volinfo->is_volume_restored = _gf_true; - - /* Bump the version of the restored volume, so that nodes * - * which are done can sync during handshake */ - new_volinfo->version = orig_vol->version; - - list_for_each_entry_safe (voliter, temp_volinfo, - &orig_vol->snap_volumes, snapvol_list) { - list_add_tail (&voliter->snapvol_list, - &new_volinfo->snap_volumes); - } - /* Copy the snap vol info to the new_volinfo.*/ - ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap"); - (void)glusterd_volinfo_delete (new_volinfo); - goto out; - } - - /* If the orig_vol is already restored then we should delete - * the backend LVMs */ - if (orig_vol->is_volume_restored) { - ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to remove " - "LVM backend"); - (void)glusterd_volinfo_delete (new_volinfo); - goto out; - } - } - - /* Once the new_volinfo is completely constructed then delete - * the orinal volinfo - */ - ret = glusterd_volinfo_delete (orig_vol); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); - (void)glusterd_volinfo_delete (new_volinfo); - goto out; - } - /* New volinfo always shows the status as created. Therefore - * set the status to stop. */ - glusterd_set_volume_status (new_volinfo, GLUSTERD_STATUS_STOPPED); - - list_add_tail (&new_volinfo->vol_list, &conf->volumes); - - /* Now delete the snap entry. As a first step delete the snap - * volume information stored in store. */ - ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to delete " - "snap %s", snap->snapname); - goto out; - } - - ret = glusterd_store_volinfo (new_volinfo, - GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo"); - goto out; - } - - ret = 0; -out: - - return ret; -} diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index dcba11f32..f4703c288 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -176,7 +176,4 @@ gd_is_xlator_option (char *key); gf_boolean_t gd_is_boolean_option (char *key); -int gd_restore_snap_volume (dict_t *rsp_dict, - glusterd_volinfo_t *orig_vol, - glusterd_volinfo_t *snap_vol); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index eac926d95..504aeb839 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1213,7 +1213,7 @@ glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) if (volinfo->snap_count > 0 || !list_empty(&volinfo->snap_volumes)) { snprintf (msg, sizeof (msg), "Cannot delete Volume %s ," - "as it has %ld snapshots. " + "as it has %"PRIu64" snapshots. " "To delete the volume, " "first delete all the snapshots under it.", volname, volinfo->snap_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 3f1d7d539..1374e82cd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -116,8 +116,8 @@ validate_cache_max_min_size (dict_t *dict, char *key, char *value, "performance.cache-max-file-size", ¤t_max_value); if (current_max_value) { - gf_string2bytesize (current_max_value, &max_value); - gf_string2bytesize (value, &min_value); + gf_string2bytesize_uint64 (current_max_value, &max_value); + gf_string2bytesize_uint64 (value, &min_value); current_min_value = value; } } else if ((!strcmp (key, "performance.cache-max-file-size")) || @@ -126,8 +126,8 @@ validate_cache_max_min_size (dict_t *dict, char *key, char *value, "performance.cache-min-file-size", ¤t_min_value); if (current_min_value) { - gf_string2bytesize (current_min_value, &min_value); - gf_string2bytesize (value, &max_value); + gf_string2bytesize_uint64 (current_min_value, &min_value); + gf_string2bytesize_uint64 (value, &max_value); current_max_value = value; } } @@ -1389,6 +1389,18 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = GLOBAL_DOC, .op_version = 1 }, + { .key = "nfs.rpc-statd", + .voltype = "nfs/server", + .option = "nfs.rpc-statd", + .type = NO_DOC, + .op_version = 4, + }, + { .key = "nfs.log-level", + .voltype = "nfs/server", + .option = "nfs.log-level", + .type = NO_DOC, + .op_version = 4, + }, { .key = "nfs.server-aux-gids", .voltype = "nfs/server", .option = "nfs.server-aux-gids", @@ -1453,6 +1465,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "storage/posix", .op_version = 3 }, + { .key = "storage.xattr-user-namespace-mode", + .voltype = "storage/posix", + .op_version = 4 + }, { .key = "storage.owner-uid", .voltype = "storage/posix", .option = "brick-uid", @@ -1527,6 +1543,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, .op_version = 3 }, + { .key = "features.barrier", + .voltype = "features/barrier", + .value = "disable", + .op_version = 4 + }, + { .key = "features.barrier-timeout", + .voltype = "features/barrier", + .value = "120", + .op_version = 4 + }, { .key = NULL } }; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 6e7a9278d..4d09d7fd9 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -30,6 +30,7 @@ #include "dict.h" #include "compat.h" #include "compat-errno.h" +#include "syscall.h" #include "statedump.h" #include "glusterd-sm.h" #include "glusterd-op-sm.h" @@ -311,7 +312,7 @@ out: } -inline int32_t +static inline int32_t glusterd_program_register (xlator_t *this, rpcsvc_t *svc, rpcsvc_program_t *prog) { @@ -804,7 +805,7 @@ check_prepare_mountbroker_root (char *mountbroker_root) dfd0 = dup (dfd); for (;;) { - ret = openat (dfd, "..", O_RDONLY); + ret = sys_openat (dfd, "..", O_RDONLY); if (ret != -1) { dfd2 = ret; ret = fstat (dfd2, &st2); @@ -839,11 +840,11 @@ check_prepare_mountbroker_root (char *mountbroker_root) st = st2; } - ret = mkdirat (dfd0, MB_HIVE, 0711); + ret = sys_mkdirat (dfd0, MB_HIVE, 0711); if (ret == -1 && errno == EEXIST) ret = 0; if (ret != -1) - ret = fstatat (dfd0, MB_HIVE, &st, AT_SYMLINK_NOFOLLOW); + ret = sys_fstatat (dfd0, MB_HIVE, &st, AT_SYMLINK_NOFOLLOW); if (ret == -1 || st.st_mode != (S_IFDIR|0711)) { gf_log ("", GF_LOG_ERROR, "failed to set up mountbroker-root directory %s", @@ -1255,6 +1256,17 @@ init (xlator_t *this) exit (1); } + snprintf (storedir, PATH_MAX, "%s/snaps", workdir); + + ret = mkdir (storedir, 0777); + + if ((-1 == ret) && (errno != EEXIST)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to create snaps directory %s" + " ,errno = %d", storedir, errno); + exit (1); + } + snprintf (storedir, PATH_MAX, "%s/peers", workdir); ret = mkdir (storedir, 0777); diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 79461fb5d..7157bee64 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -310,7 +310,7 @@ struct glusterd_volinfo_ { char volname[GLUSTERD_MAX_VOLUME_NAME]; gf_boolean_t is_snap_volume; glusterd_snap_t *snapshot; - gf_boolean_t is_volume_restored; + uuid_t restored_from_snap; char parent_volname[GLUSTERD_MAX_VOLUME_NAME]; /* In case of a snap volume i.e (is_snap_volume == TRUE) this @@ -405,6 +405,7 @@ struct glusterd_snap_ { }; typedef struct glusterd_snap_op_ { + char *snap_vol_id; int32_t brick_num; char *brick_path; int32_t op; @@ -413,7 +414,8 @@ typedef struct glusterd_snap_op_ { } glusterd_snap_op_t; typedef struct glusterd_missed_snap_ { - char *node_snap_info; + char *node_uuid; + char *snap_uuid; struct list_head missed_snaps; struct list_head snap_ops; } glusterd_missed_snap_info; @@ -1010,8 +1012,8 @@ int32_t glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count); int32_t -glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, - char *brick_path, int32_t snap_op, - int32_t snap_status); +glusterd_add_new_entry_to_list (char *missed_info, char *snap_vol_id, + int32_t brick_num, char *brick_path, + int32_t snap_op, int32_t snap_status); #endif diff --git a/xlators/mount/fuse/src/Makefile.am b/xlators/mount/fuse/src/Makefile.am index 653121d18..7d1f93447 100644 --- a/xlators/mount/fuse/src/Makefile.am +++ b/xlators/mount/fuse/src/Makefile.am @@ -1,7 +1,9 @@ noinst_HEADERS_linux = $(CONTRIBDIR)/fuse-include/fuse_kernel.h\ $(CONTRIBDIR)/fuse-include/mount_util.h\ $(CONTRIBDIR)/fuse-lib/mount-gluster-compat.h -noinst_HEADERS_darwin = $(CONTRIBDIR)/fuse-include/fuse_kernel_macfuse.h +noinst_HEADERS_darwin = $(CONTRIBDIR)/fuse-include/fuse_kernel_macfuse.h\ + $(CONTRIBDIR)/macfuse/fuse_param.h\ + $(CONTRIBDIR)/macfuse/fuse_ioctl.h noinst_HEADERS_common = $(CONTRIBDIR)/fuse-include/fuse-mount.h\ $(CONTRIBDIR)/fuse-include/fuse-misc.h fuse-mem-types.h \ fuse-bridge.h diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index fd44c4fb5..d5ca4d146 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -29,6 +29,7 @@ static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); * Send an invalidate notification up to fuse to purge the file from local * page cache. */ + static int32_t fuse_invalidate(xlator_t *this, inode_t *inode) { @@ -45,8 +46,8 @@ fuse_invalidate(xlator_t *this, inode_t *inode) return 0; nodeid = inode_to_fuse_nodeid(inode); - gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %lu.", nodeid); - fuse_log_eh (this, "Sending invalidate inode id: %lu gfid: %s", nodeid, + gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %"GF_PRI_INODE"." , nodeid); + fuse_log_eh (this, "Sending invalidate inode id: %"GF_PRI_INODE" gfid: %s", nodeid, uuid_utoa (inode->gfid)); fuse_invalidate_inode(this, nodeid); @@ -236,6 +237,7 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) send_fuse_data (this, finh, obj, sizeof (*(obj))) +#if FUSE_KERNEL_MINOR_VERSION >= 11 static void fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) { @@ -293,6 +295,7 @@ fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) if (inode) inode_unref (inode); } +#endif /* * Send an inval inode notification to fuse. This causes an invalidation of the @@ -301,6 +304,7 @@ fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) { +#if FUSE_KERNEL_MINOR_VERSION >= 11 struct fuse_out_header *fouh = NULL; struct fuse_notify_inval_inode_out *fniio = NULL; fuse_private_t *priv = NULL; @@ -346,8 +350,13 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) if (inode) inode_unref (inode); +#else + gf_log ("glusterfs-fuse", GF_LOG_WARNING, + "fuse_invalidate_inode not implemented on OS X due to missing FUSE notification"); +#endif } + int send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) { @@ -368,7 +377,7 @@ send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) uuid_utoa (inode->gfid)); } else { fuse_log_eh (this, "Sending %s for operation %d on " - "inode %ld", strerror (error), + "inode %" GF_PRI_INODE, strerror (error), finh->opcode, finh->nodeid); } } @@ -624,6 +633,7 @@ fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg) GF_FREE (finh); } +#if FUSE_KERNEL_MINOR_VERSION >= 16 static void fuse_batch_forget(xlator_t *this, fuse_in_header_t *finh, void *msg) { @@ -640,9 +650,9 @@ fuse_batch_forget(xlator_t *this, fuse_in_header_t *finh, void *msg) continue; do_forget(this, finh->unique, ffo[i].nodeid, ffo[i].nlookup); } - GF_FREE(finh); } +#endif static int fuse_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1142,7 +1152,9 @@ fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg) { struct fuse_setattr_in *fsi = msg; +#if FUSE_KERNEL_MINOR_VERSION >= 9 fuse_private_t *priv = NULL; +#endif fuse_state_t *state = NULL; GET_STATE (this, finh, state); @@ -1170,8 +1182,8 @@ fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg) * http://git.kernel.org/?p=linux/kernel/git/torvalds/ * linux-2.6.git;a=commit;h=v2.6.23-5896-gf333211 */ - priv = this->private; #if FUSE_KERNEL_MINOR_VERSION >= 9 + priv = this->private; if (priv->proto_minor >= 9 && fsi->valid & FATTR_LOCKOWNER) state->lk_owner = fsi->lock_owner; #endif @@ -1448,11 +1460,11 @@ fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg) char *name = (char *)(fmi + 1); fuse_state_t *state = NULL; +#if FUSE_KERNEL_MINOR_VERSION >= 12 fuse_private_t *priv = NULL; - int32_t ret = -1; + int32_t ret = -1; priv = this->private; -#if FUSE_KERNEL_MINOR_VERSION >= 12 if (priv->proto_minor < 12) name = (char *)msg + FUSE_COMPAT_MKNOD_IN_SIZE; #endif @@ -1466,8 +1478,8 @@ fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg) state->mode = fmi->mode; state->rdev = fmi->rdev; - priv = this->private; #if FUSE_KERNEL_MINOR_VERSION >=12 + priv = this->private; FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKNOD"); #endif @@ -1515,10 +1527,12 @@ fuse_mkdir (xlator_t *this, fuse_in_header_t *finh, void *msg) { struct fuse_mkdir_in *fmi = msg; char *name = (char *)(fmi + 1); +#if FUSE_KERNEL_MINOR_VERSION >=12 fuse_private_t *priv = NULL; + int32_t ret = -1; +#endif fuse_state_t *state; - int32_t ret = -1; GET_STATE (this, finh, state); @@ -1528,8 +1542,8 @@ fuse_mkdir (xlator_t *this, fuse_in_header_t *finh, void *msg) state->mode = fmi->mode; - priv = this->private; #if FUSE_KERNEL_MINOR_VERSION >=12 + priv = this->private; FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKDIR"); #endif @@ -2004,17 +2018,17 @@ fuse_create (xlator_t *this, fuse_in_header_t *finh, void *msg) { #if FUSE_KERNEL_MINOR_VERSION >= 12 struct fuse_create_in *fci = msg; + fuse_private_t *priv = NULL; + int32_t ret = -1; #else struct fuse_open_in *fci = msg; #endif char *name = (char *)(fci + 1); - fuse_private_t *priv = NULL; fuse_state_t *state = NULL; - int32_t ret = -1; - priv = this->private; #if FUSE_KERNEL_MINOR_VERSION >= 12 + priv = this->private; if (priv->proto_minor < 12) name = (char *)((struct fuse_open_in *)msg + 1); #endif @@ -2028,8 +2042,8 @@ fuse_create (xlator_t *this, fuse_in_header_t *finh, void *msg) state->mode = fci->mode; state->flags = fci->flags; - priv = this->private; #if FUSE_KERNEL_MINOR_VERSION >=12 + priv = this->private; FUSE_ENTRY_CREATE(this, priv, finh, state, fci, "CREATE"); #endif fuse_resolve_and_resume (state, fuse_create_resume); @@ -2167,7 +2181,9 @@ fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg) { struct fuse_read_in *fri = msg; +#if FUSE_KERNEL_MINOR_VERSION >= 9 fuse_private_t *priv = NULL; +#endif fuse_state_t *state = NULL; fd_t *fd = NULL; @@ -2179,8 +2195,8 @@ fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg) fuse_resolve_fd_init (state, &state->resolve, fd); /* See comment by similar code in fuse_settatr */ - priv = this->private; #if FUSE_KERNEL_MINOR_VERSION >= 9 + priv = this->private; if (priv->proto_minor >= 9 && fri->read_flags & FUSE_READ_LOCKOWNER) state->lk_owner = fri->lock_owner; #endif @@ -2188,8 +2204,9 @@ fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg) state->size = fri->size; state->off = fri->offset; /* lets ignore 'fri->read_flags', but just consider 'fri->flags' */ +#if FUSE_KERNEL_MINOR_VERSION >= 9 state->io_flags = fri->flags; - +#endif fuse_resolve_and_resume (state, fuse_readv_resume); } @@ -2270,11 +2287,12 @@ fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg) struct fuse_write_in *fwi = (struct fuse_write_in *) (finh + 1); - fuse_private_t *priv = NULL; fuse_state_t *state = NULL; fd_t *fd = NULL; - +#if FUSE_KERNEL_MINOR_VERSION >= 9 + fuse_private_t *priv = NULL; priv = this->private; +#endif GET_STATE (this, finh, state); fd = FH_TO_FD (fwi->fh); @@ -2283,7 +2301,11 @@ fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg) state->off = fwi->offset; /* lets ignore 'fwi->write_flags', but just consider 'fwi->flags' */ +#if FUSE_KERNEL_MINOR_VERSION >= 9 state->io_flags = fwi->flags; +#else + state->io_flags = fwi->write_flags; +#endif /* TODO: may need to handle below flag (fwi->write_flags & FUSE_WRITE_CACHE); */ @@ -2292,8 +2314,8 @@ fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg) fuse_resolve_fd_init (state, &state->resolve, fd); /* See comment by similar code in fuse_settatr */ - priv = this->private; #if FUSE_KERNEL_MINOR_VERSION >= 9 + priv = this->private; if (priv->proto_minor >= 9 && fwi->write_flags & FUSE_WRITE_LOCKOWNER) state->lk_owner = fwi->lock_owner; #endif @@ -2531,8 +2553,8 @@ fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { fuse_state_t *state = NULL; fuse_in_header_t *finh = NULL; - int size = 0; - int max_size = 0; + size_t size = 0; + size_t max_size = 0; char *buf = NULL; gf_dirent_t *entry = NULL; struct fuse_dirent *fde = NULL; @@ -2558,18 +2580,18 @@ fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, frame->root->unique, op_ret, state->size, state->off); list_for_each_entry (entry, &entries->list, list) { - max_size += FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET + - strlen (entry->d_name)); + size_t fde_size = FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET + + strlen (entry->d_name)); + max_size += fde_size; if (max_size > state->size) { - /* we received to many entries to fit in the request */ - max_size -= FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET + - strlen (entry->d_name)); + /* we received too many entries to fit in the reply */ + max_size -= fde_size; break; } } - if (max_size <= 0) { + if (max_size == 0) { send_fuse_data (this, finh, 0, 0); goto out; } @@ -2636,7 +2658,7 @@ fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg) fuse_resolve_and_resume (state, fuse_readdir_resume); } - +#if FUSE_KERNEL_MINOR_VERSION >= 20 static int fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, @@ -2644,8 +2666,8 @@ fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { fuse_state_t *state = NULL; fuse_in_header_t *finh = NULL; - int max_size = 0; - int size = 0; + size_t max_size = 0; + size_t size = 0; char *buf = NULL; gf_dirent_t *entry = NULL; struct fuse_direntplus *fde = NULL; @@ -2670,19 +2692,18 @@ fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, frame->root->unique, op_ret, state->size, state->off); list_for_each_entry (entry, &entries->list, list) { - max_size += FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET_DIRENTPLUS + - strlen (entry->d_name)); + size_t fdes = FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET_DIRENTPLUS + + strlen (entry->d_name)); + max_size += fdes; if (max_size > state->size) { - /* we received to many entries to fit in the reply */ - max_size -= FUSE_DIRENT_ALIGN ( - FUSE_NAME_OFFSET_DIRENTPLUS + - strlen (entry->d_name)); + /* we received too many entries to fit in the reply */ + max_size -= fdes; break; } } - if (max_size <= 0) { + if (max_size == 0) { send_fuse_data (this, finh, 0, 0); goto out; } @@ -2751,7 +2772,6 @@ out: } - void fuse_readdirp_resume (fuse_state_t *state) { @@ -2782,7 +2802,9 @@ fuse_readdirp (xlator_t *this, fuse_in_header_t *finh, void *msg) fuse_resolve_and_resume (state, fuse_readdirp_resume); } +#endif +#if FUSE_KERNEL_MINOR_VERSION >= 19 #ifdef FALLOC_FL_KEEP_SIZE static int fuse_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, @@ -2825,7 +2847,7 @@ fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg) fuse_resolve_and_resume(state, fuse_fallocate_resume); } #endif /* FALLOC_FL_KEEP_SIZE */ - +#endif /* FUSE minor version >= 19 */ static void fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg) @@ -3097,7 +3119,9 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) gf_log ("fuse", GF_LOG_TRACE, "got request to invalidate %"PRIu64, finh->nodeid); send_fuse_err (this, finh, 0); +#if FUSE_KERNEL_MINOR_VERSION >= 11 fuse_invalidate_entry (this, finh->nodeid); +#endif GF_FREE (finh); return; } @@ -3746,7 +3770,7 @@ fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg) return; } - +#if FUSE_KERNEL_MINOR_VERSION >= 11 static void * notify_kernel_loop (void *data) { @@ -3782,7 +3806,7 @@ notify_kernel_loop (void *data) return NULL; } - +#endif static void fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) @@ -3791,8 +3815,10 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) struct fuse_init_out fino = {0,}; fuse_private_t *priv = NULL; int ret = 0; +#if FUSE_KERNEL_MINOR_VERSION >= 9 int pfd[2] = {0,}; pthread_t messenger; +#endif priv = this->private; @@ -3885,16 +3911,17 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) } if (fini->minor < 9) *priv->msg0_len_p = sizeof(*finh) + FUSE_COMPAT_WRITE_IN_SIZE; -#endif + if (priv->use_readdirp) { if (fini->flags & FUSE_DO_READDIRPLUS) fino.flags |= FUSE_DO_READDIRPLUS; } - +#endif if (priv->fopen_keep_cache == 2) { /* If user did not explicitly set --fopen-keep-cache[=off], then check if kernel support FUSE_AUTO_INVAL_DATA and ... */ +#if FUSE_KERNEL_MINOR_VERSION >= 20 if (fini->flags & FUSE_AUTO_INVAL_DATA) { /* ... enable fopen_keep_cache mode if supported. */ @@ -3903,7 +3930,10 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) "fopen_keep_cache automatically."); fino.flags |= FUSE_AUTO_INVAL_DATA; priv->fopen_keep_cache = 1; - } else { + } else +#endif + { + gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "No support " "for FUSE_AUTO_INVAL_DATA. Disabling " "fopen_keep_cache."); @@ -3914,20 +3944,24 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) /* If user explicitly set --fopen-keep-cache[=on], then enable FUSE_AUTO_INVAL_DATA if possible. */ +#if FUSE_KERNEL_MINOR_VERSION >= 20 if (fini->flags & FUSE_AUTO_INVAL_DATA) { gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "fopen_keep_cache " "is explicitly set. Enabling FUSE_AUTO_INVAL_DATA"); fino.flags |= FUSE_AUTO_INVAL_DATA; - } else { + } else +#endif + { gf_log ("glusterfs-fuse", GF_LOG_WARNING, "fopen_keep_cache " "is explicitly set. Support for " "FUSE_AUTO_INVAL_DATA is missing"); } } +#if FUSE_KERNEL_MINOR_VERSION >= 22 if (fini->flags & FUSE_ASYNC_DIO) fino.flags |= FUSE_ASYNC_DIO; - +#endif ret = send_fuse_obj (this, finh, &fino); if (ret == 0) gf_log ("glusterfs-fuse", GF_LOG_INFO, @@ -5142,11 +5176,20 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = { /* [FUSE_IOCTL] */ /* [FUSE_POLL] */ /* [FUSE_NOTIFY_REPLY] */ + +#if FUSE_KERNEL_MINOR_VERSION >= 16 [FUSE_BATCH_FORGET]= fuse_batch_forget, +#endif + +#if FUSE_KERNEL_MINOR_VERSION >= 19 #ifdef FALLOC_FL_KEEP_SIZE [FUSE_FALLOCATE] = fuse_fallocate, #endif /* FALLOC_FL_KEEP_SIZE */ +#endif + +#if FUSE_KERNEL_MINOR_VERSION >= 21 [FUSE_READDIRPLUS] = fuse_readdirp, +#endif }; @@ -5279,13 +5322,13 @@ init (xlator_t *this_xl) goto cleanup_exit; } - GF_OPTION_INIT ("attribute-timeout", priv->attribute_timeout, double, + GF_OPTION_INIT (ZR_ATTR_TIMEOUT_OPT, priv->attribute_timeout, double, cleanup_exit); - GF_OPTION_INIT ("entry-timeout", priv->entry_timeout, double, + GF_OPTION_INIT (ZR_ENTRY_TIMEOUT_OPT, priv->entry_timeout, double, cleanup_exit); - GF_OPTION_INIT ("negative-timeout", priv->negative_timeout, double, + GF_OPTION_INIT (ZR_NEGATIVE_TIMEOUT_OPT, priv->negative_timeout, double, cleanup_exit); GF_OPTION_INIT ("client-pid", priv->client_pid, int32, cleanup_exit); diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c index 2774bdaa8..0936d6311 100644 --- a/xlators/mount/fuse/src/fuse-helpers.c +++ b/xlators/mount/fuse/src/fuse-helpers.c @@ -235,7 +235,7 @@ out: if (sysctl(name, namelen, &kp, &kplen, NULL, 0) != 0) return; - ngroups = MIN(kp.kp_eproc.e_ucred.cr_ngroups, GF_MAX_AUX_GROUPS); + ngroups = MIN(kp.kp_eproc.e_ucred.cr_ngroups, NGROUPS_MAX); if (call_stack_alloc_groups (frame->root, ngroups) != 0) return; for (i = 0; i < ngroups; i++) diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in index b12b4e04e..539b0f558 100755 --- a/xlators/mount/fuse/utils/mount_glusterfs.in +++ b/xlators/mount/fuse/utils/mount_glusterfs.in @@ -1,188 +1,538 @@ #!/bin/sh -# (C) 2008 Gluster Inc. <http://www.gluster.com> -# +# (C) 2014 Red Hat Inc. <http://www.redhat.com> +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free # Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA +warn () +{ + echo "$@" >/dev/stderr +} _init () { + # log level definitions LOG_NONE=NONE; LOG_CRITICAL=CRITICAL; LOG_ERROR=ERROR; LOG_WARNING=WARNING; - LOG_INFO=INFO; + LOG_INFO=INFO LOG_DEBUG=DEBUG; + LOG_TRACE=TRACE; - # set default log level to ERROR - log_level=$LOG_INFO; -} + HOST_NAME_MAX=64; -start_glusterfs () -{ prefix="@prefix@"; exec_prefix=@exec_prefix@; cmd_line=$(echo "@sbindir@/glusterfs"); - + + alias lsL='ls -L' + uname_s=`uname -s` + case ${uname_s} in + Darwin) + getinode="stat -f %i" + getdev="stat -f %d" + ;; + esac +} + +is_valid_hostname () +{ + local server=$1 + + length=$(echo $server | wc -c) + if [ ${length} -gt ${HOST_NAME_MAX} ]; then + return 1 + fi +} + +parse_backup_volfile_servers () +{ + local server_list=$1 + local servers="" + local new_servers="" + + servers=$(echo ${server_list} | sed 's/\:/ /g') + for server in ${servers}; do + is_valid_hostname ${server} + if [ $? -eq 1 ]; then + continue + fi + new_servers=$(echo "${new_servers} ${server}") + done + + echo ${new_servers} +} + +parse_volfile_servers () +{ + local server_list=$1 + local servers="" + local new_servers="" + + servers=$(echo ${server_list} | sed 's/,/ /g') + for server in ${servers}; do + is_valid_hostname ${server} + if [ $? -eq 1 ]; then + continue + fi + new_servers=$(echo "${new_servers} ${server}") + done + + echo ${new_servers} +} + +start_glusterfs () +{ if [ -n "$log_level_str" ]; then - case "$log_level_str" in - "ERROR") - log_level=$LOG_ERROR; - ;; + case "$( echo $log_level_str | awk '{print toupper($0)}')" in + "ERROR") + log_level=$LOG_ERROR; + ;; "INFO") log_level=$LOG_INFO; ;; - "DEBUG") - log_level=$LOG_DEBUG; - ;; - "CRITICAL") - log_level=$LOG_CRITICAL; - ;; - "WARNING") - log_level=$LOG_WARNING; - ;; - "NONE") - log_level=$LOG_NONE; - ;; - *) - echo "invalid log level $log_level_str, using INFO"; - log_level=$LOG_INFO; - ;; - esac - fi - cmd_line=$(echo "$cmd_line --log-level=$log_level"); - - if [ -n "$log_file" ]; then - cmd_line=$(echo "$cmd_line --log-file=$log_file"); + "DEBUG") + log_level=$LOG_DEBUG; + ;; + "CRITICAL") + log_level=$LOG_CRITICAL; + ;; + "WARNING") + log_level=$LOG_WARNING; + ;; + "TRACE") + log_level=$LOG_TRACE; + ;; + "NONE") + log_level=$LOG_NONE; + ;; + *) + warn "invalid log level $log_level_str, using INFO"; + log_level=$LOG_INFO; + ;; + esac + fi + + # options without values start here + if [ -n "$read_only" ]; then + cmd_line=$(echo "$cmd_line --read-only"); + fi + + if [ -n "$acl" ]; then + cmd_line=$(echo "$cmd_line --acl"); + fi + + if [ -n "$selinux" ]; then + cmd_line=$(echo "$cmd_line --selinux"); + fi + + if [ -n "$enable_ino32" ]; then + cmd_line=$(echo "$cmd_line --enable-ino32"); + fi + + if [ -n "$worm" ]; then + cmd_line=$(echo "$cmd_line --worm"); + fi + if [ -n "$volfile_max_fetch_attempts" ]; then + cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts") + fi + + if [ -n "$fopen_keep_cache" ]; then + cmd_line=$(echo "$cmd_line --fopen-keep-cache"); fi if [ -n "$volfile_check" ]; then - cmd_line=$(echo "$cmd_line --volfile-check"); + cmd_line=$(echo "$cmd_line --volfile-check"); + fi + + if [ -n "$mem_accounting" ]; then + cmd_line=$(echo "$cmd_line --mem-accounting"); + fi + + if [ -n "$aux_gfid_mount" ]; then + cmd_line=$(echo "$cmd_line --aux-gfid-mount"); + fi + + if [ -n "$no_root_squash" ]; then + cmd_line=$(echo "$cmd_line --no-root-squash"); + fi + +#options with values start here + if [ -n "$log_level" ]; then + cmd_line=$(echo "$cmd_line --log-level=$log_level"); + fi + + if [ -n "$log_file" ]; then + cmd_line=$(echo "$cmd_line --log-file=$log_file"); fi if [ -n "$direct_io_mode" ]; then - cmd_line=$(echo "$cmd_line --disable-direct-io-mode"); + cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); fi - - if [ -z "$volfile_loc" ]; then - if [ -n "$transport" ]; then - cmd_line=$(echo "$cmd_line \ ---volfile-server=$server_ip \ ---volfile-server-transport=$transport"); - else - cmd_line=$(echo "$cmd_line \ ---volfile-server=$server_ip"); - fi - else - cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); + + if [ -n "$mac_compat" ]; then + cmd_line=$(echo "$cmd_line --mac-compat=$mac_compat"); + fi + + if [ -n "$use_readdirp" ]; then + cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp"); fi if [ -n "$volume_name" ]; then cmd_line=$(echo "$cmd_line --volume-name=$volume_name"); fi - - if [ -n "$volume_id" ]; then - cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + + if [ -n "$attribute_timeout" ]; then + cmd_line=$(echo "$cmd_line --attribute-timeout=$attribute_timeout"); + fi + + if [ -n "$entry_timeout" ]; then + cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout"); + fi + + if [ -n "$negative_timeout" ]; then + cmd_line=$(echo "$cmd_line --negative-timeout=$negative_timeout"); + fi + + if [ -n "$gid_timeout" ]; then + cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); + fi + + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi + + if [ -n "$cong_threshold" ]; then + cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold"); + fi + + if [ -n "$fuse_mountopts" ]; then + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); + fi + + if [ -n "$xlator_option" ]; then + cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option"); + fi + + # for rdma volume, we have to fetch volfile with '.rdma' added + # to volume name, so that it fetches the right client vol file + volume_id_rdma=""; + + if [ -z "$volfile_loc" ]; then + if [ -n "$server_ip" ]; then + + servers=$(parse_volfile_servers ${server_ip}); + if [ -n "$servers" ]; then + for i in $(echo ${servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done + else + warn "ERROR: No valid servers found on command line.. exiting" + print_usage + exit 1 + fi + + if [ -n "$backupvolfile_server" ]; then + if [ -z "$backup_volfile_servers" ]; then + is_valid_hostname ${backupvolfile_server}; + if [ $? -eq 1 ]; then + warn "ERROR: Invalid backup server specified.. exiting" + exit 1 + fi + cmd_line=$(echo "$cmd_line --volfile-server=$backupvolfile_server"); + fi + fi + + if [ -n "$backup_volfile_servers" ]; then + backup_servers=$(parse_backup_volfile_servers ${backup_volfile_servers}) + for i in $(echo ${backup_servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done + fi + + if [ -n "$server_port" ]; then + cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port"); + fi + + if [ -n "$transport" ]; then + cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport"); + if [ "$transport" = "rdma" ]; then + volume_id_rdma=".rdma"; + fi + fi + + if [ -n "$volume_id" ]; then + if [ -n "$volume_id_rdma" ]; then + volume_id="$volume_id$volume_id_rdma"; + fi + cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + fi + fi + else + cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); + fi + + if [ -n "$fuse_mountopts" ]; then + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); fi cmd_line=$(echo "$cmd_line $mount_point"); - exec $cmd_line; + $cmd_line; + + if [ $? -ne 0 ]; then + exit 1; + fi } +print_usage () +{ +cat << EOF >/dev/stderr +Usage: $0 <volumeserver>:<volumeid/volumeport> -o<options> <mountpoint> +Options: +man 8 $0 +To display the version number of the mount helper: $0 -V +EOF +} -main () +with_options() { - - new_log_level="" - log_file="" - transport="" - direct_io_mode="" - volume_name="" - new_fs_options="" - volfile_check="" - - while getopts o: opt; do - case "$opt" in - o) - options=$(echo $OPTARG | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p'); - [ -z $new_log_level ] && { - new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p'); - } - - [ -z $log_file ] && { - log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p'); - } - - [ -z $transport ] && { - transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p'); - } - - [ -z $direct_io_mode ] && { - direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p'); - } - - [ -z $volfile_check ] && { - volfile_check=$(echo "$options" | sed -n 's/.*volfile-check=\([^,]*\).*/\1/p'); - } - - [ -z $volume_name ] && { - volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p'); - } - - [ -z $volume_id ] && { - volume_id=$(echo "$options" | sed -n 's/.*volume-id=\([^,]*\).*/\1/p'); - } - - this_option=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \ - -e 's/[,]*log-level=[^,]*//' \ - -e 's/[,]*volume-name=[^,]*//' \ - -e 's/[,]*volfile-check=[^,]*//' \ - -e 's/[,]*direct-io-mode=[^,]*//' \ - -e 's/[,]*transport=[^,]*//' \ - -e 's/[,]*volume-id=[^,]*//'); - new_fs_options="$new_fs_options $this_option"; - ;; - esac + local key=$1 + local value=$2 + + # Handle options with values. + case "$key" in + "log-level") + log_level_str=$value + ;; + "log-file") + log_file=$value + ;; + "transport") + transport=$value + ;; + "direct-io-mode") + direct_io_mode=$value + ;; + "mac-compat") + mac_compat=$value + ;; + "volume-name") + volume_name=$value + ;; + "volume-id") + volume_id=$value + ;; + "volfile-check") + volfile_check=$value + ;; + "server-port") + server_port=$value + ;; + "attribute-timeout") + attribute_timeout=$value + ;; + "entry-timeout") + entry_timeout=$value + ;; + "negative-timeout") + negative_timeout=$value + ;; + "gid-timeout") + gid_timeout=$value + ;; + "background-qlen") + bg_qlen=$value + ;; + "backup-volfile-servers") + backup_volfile_servers=$value + ;; + "backupvolfile-server") + backupvolfile_server=$value + ;; + "fetch-attempts") + volfile_max_fetch_attempts=$value + ;; + "congestion-threshold") + cong_threshold=$value + ;; + "xlator-option") + xlator_option=$value + ;; + "fuse-mountopts") + fuse_mountopts=$value + ;; + "use-readdirp") + use_readdirp=$value + ;; + "no-root-squash") + if [ $value == "yes" ] || + [ $value == "on" ] || + [ $value == "enable" ] || + [ $value == "true" ] ; then + no_root_squash=1; + fi ;; + "root-squash") + if [ $value == "no" ] || + [ $value == "off" ] || + [ $value == "disable" ] || + [ $value == "false" ] ; then + no_root_squash=1; + fi ;; + *) + warn "Invalid option: $key" + exit 1 + ;; + esac +} + +without_options() +{ + local option=$1 + # Handle options without values. + case "$option" in + "ro") + read_only=1 + ;; + "acl") + acl=1 + ;; + "selinux") + selinux=1 + ;; + "worm") + worm=1 + ;; + "fopen-keep-cache") + fopen_keep_cache=1 + ;; + "enable-ino32") + enable_ino32=1 + ;; + "mem-accounting") + mem_accounting=1 + ;; + "aux-gfid-mount") + if [ ${uname_s} = "Linux" ]; then + aux_gfid_mount=1 + fi + ;; + # "mount -t glusterfs" sends this, but it's useless. + "rw") + ;; + # these ones are interpreted during system initialization + "noauto") + ;; + "_netdev") + ;; + *) + warn "Invalid option $option"; + exit 1 + ;; + esac +} + +parse_options() +{ + local optarg=${1} + for pair in $(echo ${optarg//,/ }); do + key=$(echo "$pair" | cut -f1 -d'='); + value=$(echo "$pair" | cut -f2- -d'='); + if [ "$key" = "$value" ]; then + without_options $pair; + else + with_options $key $value; + fi done +} - [ -n "$new_log_level" ] && { - log_level_str="$new_log_level"; - } +main () +{ + ## `mount` on OSX specifies options as first argument + if [[ $1 =~ "-o" ]]; then + volfile_loc=$3 + mount_point=$4 + else + volfile_loc=$1 + mount_point=$2 + fi - # TODO: use getopt. This is very much darwin specific - volfile_loc="$1"; - while [ "$volfile_loc" = "-o" ] ; do - shift ; - shift ; - volfile_loc="$1"; + while getopts "Vo:h" opt; do + case "${opt}" in + o) + parse_options ${OPTARG}; + ;; + V) + ${cmd_line} -V; + exit 0; + ;; + h) + print_usage; + exit 0; + ;; + ?) + print_usage; + exit 0; + ;; + esac done - + [ -r "$volfile_loc" ] || { server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p'); - volume_id=$(echo "$volfile_loc" | sed -n 's/[a-zA-Z0-9:.\-]*:\(.*\)/\1/p'); - volfile_loc=""; + volume_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); + [ -n "$volume_str" ] && { + volume_id="$volume_str"; + } + volfile_loc=""; + } + + [ -z "$volume_id" -o -z "$server_ip" ] && { + cat <<EOF >/dev/stderr +ERROR: Server name/volume name unspecified cannot proceed further.. +Please specify correct format +Usage: +man 8 $0 +EOF + exit 1; + } + + grep_ret=$(echo ${mount_point} | grep '^\-o'); + [ "x" != "x${grep_ret}" ] && { + cat <<EOF >/dev/stderr +ERROR: -o options cannot be specified in either first two arguments.. +Please specify correct style +Usage: +man 8 $0 +EOF + exit 1; + } + + # No need to do a ! -d test, it is taken care while initializing the + # variable mount_point + [ -z "$mount_point" -o ! -d "$mount_point" ] && { + cat <<EOF >/dev/stderr +ERROR: Mount point does not exist +Please specify a mount point +Usage: +man 8 $0 +EOF + exit 1; } - # following line is product of love towards sed - # $2=$(echo "$@" | sed -n 's/[^ ]* \([^ ]*\).*/\1/p'); - - mount_point="$2"; - fs_options=$(echo "$fs_options,$new_fs_options"); - start_glusterfs; } diff --git a/xlators/nfs/server/src/acl3.h b/xlators/nfs/server/src/acl3.h index 03d626f3e..220bc9e78 100644 --- a/xlators/nfs/server/src/acl3.h +++ b/xlators/nfs/server/src/acl3.h @@ -13,6 +13,11 @@ #include "glusterfs-acl.h" +#define ACL3_NULL 0 +#define ACL3_GETACL 1 +#define ACL3_SETACL 2 +#define ACL3_PROC_COUNT 3 + #define GF_ACL3_PORT 38469 #define GF_ACL GF_NFS"-ACL" diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c index b91f73a53..56d4cba47 100644 --- a/xlators/nfs/server/src/nfs-fops.c +++ b/xlators/nfs/server/src/nfs-fops.c @@ -38,20 +38,25 @@ nfs_fix_groups (xlator_t *this, call_stack_t *root) struct passwd mypw; char mystrs[1024]; struct passwd *result; +#ifdef GF_DARWIN_HOST_OS + /* BSD/DARWIN does not correctly uses gid_t in getgrouplist */ + int mygroups[GF_MAX_AUX_GROUPS]; +#else gid_t mygroups[GF_MAX_AUX_GROUPS]; +#endif int ngroups; int i; int max_groups; struct nfs_state *priv = this->private; const gid_list_t *agl; - gid_list_t gl; + gid_list_t gl; if (!priv->server_aux_gids) { return; } - /* RPC enforces the GF_AUTH_GLUSTERFS_MAX_GROUPS limit */ - max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(root->lk_owner.len); + /* RPC enforces the GF_AUTH_GLUSTERFS_MAX_GROUPS limit */ + max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(root->lk_owner.len); agl = gid_cache_lookup(&priv->gid_cache, root->uid, 0, 0); if (agl) { diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index 04cf030dc..918e86312 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -35,12 +35,16 @@ #include "options.h" #include "acl3.h" #include "rpc-drc.h" +#include "syscall.h" #define STRINGIFY(val) #val #define TOSTRING(val) STRINGIFY(val) #define OPT_SERVER_AUX_GIDS "nfs.server-aux-gids" #define OPT_SERVER_GID_CACHE_TIMEOUT "nfs.server.aux-gid-timeout" +#define OPT_SERVER_RPC_STATD "nfs.rpc-statd" +#define OPT_SERVER_RPC_STATD_PIDFILE "nfs.rpc-statd-pidfile" +#define OPT_SERVER_RPC_STATD_NOTIFY_PIDFILE "nfs.rpc-statd-notify-pidfile" /* TODO: DATADIR should be based on configure's $(localstatedir) */ #define DATADIR "/var/lib/glusterd" @@ -942,10 +946,14 @@ nfs_init_state (xlator_t *this) goto free_foppool; } } + GF_OPTION_INIT (OPT_SERVER_RPC_STATD, nfs->rpc_statd, path, free_foppool); + + GF_OPTION_INIT (OPT_SERVER_RPC_STATD_PIDFILE, nfs->rpc_statd_pid_file, path, free_foppool); GF_OPTION_INIT (OPT_SERVER_AUX_GIDS, nfs->server_aux_gids, bool, free_foppool); - GF_OPTION_INIT (OPT_SERVER_GID_CACHE_TIMEOUT, nfs->server_aux_gids_max_age, + GF_OPTION_INIT (OPT_SERVER_GID_CACHE_TIMEOUT, + nfs->server_aux_gids_max_age, uint32, free_foppool); if (gid_cache_init(&nfs->gid_cache, nfs->server_aux_gids_max_age) < 0) { @@ -953,9 +961,17 @@ nfs_init_state (xlator_t *this) goto free_foppool; } - if (stat("/sbin/rpc.statd", &stbuf) == -1) { - gf_log (GF_NFS, GF_LOG_WARNING, "/sbin/rpc.statd not found. " - "Disabling NLM"); + ret = sys_access (nfs->rpc_statd, X_OK); + if (ret) { + gf_log (GF_NFS, GF_LOG_WARNING, "%s not enough permissions to" + " access. Disabling NLM", nfs->rpc_statd); + nfs->enable_nlm = _gf_false; + } + + ret = sys_stat (nfs->rpc_statd, &stbuf); + if (ret || !S_ISREG (stbuf.st_mode)) { + gf_log (GF_NFS, GF_LOG_WARNING, "%s not a regular file." + " Disabling NLM", nfs->rpc_statd); nfs->enable_nlm = _gf_false; } @@ -968,8 +984,8 @@ nfs_init_state (xlator_t *this) } ret = rpcsvc_set_outstanding_rpc_limit (nfs->rpcsvc, - this->options, - RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT); + this->options, + RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT); if (ret < 0) { gf_log (GF_NFS, GF_LOG_ERROR, "Failed to configure outstanding-rpc-limit"); @@ -1023,7 +1039,8 @@ nfs_reconfigure_state (xlator_t *this, dict_t *options) { int ret = 0; int keyindx = 0; - char *optstr = NULL; + char *rmtab = NULL; + char *rpc_statd = NULL; gf_boolean_t optbool; uint32_t optuint32; struct nfs_state *nfs = NULL; @@ -1068,19 +1085,36 @@ nfs_reconfigure_state (xlator_t *this, dict_t *options) goto out; } + /* reconfig nfs.rpc-statd... */ + rpc_statd = GF_RPC_STATD_PROG; + if (dict_get (options, OPT_SERVER_RPC_STATD_PIDFILE)) { + ret = dict_get_str (options, "nfs.rpc-statd", &rpc_statd); + if (ret < 0) { + gf_log (GF_NFS, GF_LOG_ERROR, "Failed to read " + "reconfigured option: nfs.rpc-statd"); + goto out; + } + } + + if (strcmp(nfs->rpc_statd, rpc_statd) != 0) { + gf_log (GF_NFS, GF_LOG_INFO, + "Reconfiguring nfs.rpc-statd needs NFS restart"); + goto out; + } + /* reconfig nfs.mount-rmtab */ - optstr = NFS_DATADIR "/rmtab"; + rmtab = NFS_DATADIR "/rmtab"; if (dict_get (options, "nfs.mount-rmtab")) { - ret = dict_get_str (options, "nfs.mount-rmtab", &optstr); + ret = dict_get_str (options, "nfs.mount-rmtab", &rmtab); if (ret < 0) { gf_log (GF_NFS, GF_LOG_ERROR, "Failed to read " "reconfigured option: nfs.mount-rmtab"); goto out; } - gf_path_strip_trailing_slashes (optstr); + gf_path_strip_trailing_slashes (rmtab); } - if (strcmp (nfs->rmtab, optstr) != 0) { - mount_rewrite_rmtab (nfs->mstate, optstr); + if (strcmp (nfs->rmtab, rmtab) != 0) { + mount_rewrite_rmtab (nfs->mstate, rmtab); gf_log (GF_NFS, GF_LOG_INFO, "Reconfigured nfs.mount-rmtab path: %s", nfs->rmtab); @@ -1665,7 +1699,7 @@ struct volume_options options[] = { "unrecognized option warnings." }, { .key = {"rpc-auth.addr.allow"}, - .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, + .type = GF_OPTION_TYPE_CLIENT_AUTH_ADDR, .default_value = "all", .description = "Allow a comma separated list of addresses and/or" " hostnames to connect to the server. By default, all" @@ -1673,7 +1707,7 @@ struct volume_options options[] = { "define a general rule for all exported volumes." }, { .key = {"rpc-auth.addr.reject"}, - .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, + .type = GF_OPTION_TYPE_CLIENT_AUTH_ADDR, .default_value = "none", .description = "Reject a comma separated list of addresses and/or" " hostnames from connecting to the server. By default," @@ -1681,7 +1715,7 @@ struct volume_options options[] = { "define a general rule for all exported volumes." }, { .key = {"rpc-auth.addr.*.allow"}, - .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, + .type = GF_OPTION_TYPE_CLIENT_AUTH_ADDR, .default_value = "all", .description = "Allow a comma separated list of addresses and/or" " hostnames to connect to the server. By default, all" @@ -1689,7 +1723,7 @@ struct volume_options options[] = { "define a rule for a specific exported volume." }, { .key = {"rpc-auth.addr.*.reject"}, - .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, + .type = GF_OPTION_TYPE_CLIENT_AUTH_ADDR, .default_value = "none", .description = "Reject a comma separated list of addresses and/or" " hostnames from connecting to the server. By default," @@ -1819,6 +1853,18 @@ struct volume_options options[] = { "storage, all GlusterFS servers will update and " "output (with 'showmount') the same list." }, + { .key = {OPT_SERVER_RPC_STATD}, + .type = GF_OPTION_TYPE_PATH, + .default_value = GF_RPC_STATD_PROG, + .description = "The executable of RPC statd utility. " + "Defaults to " GF_RPC_STATD_PROG + }, + { .key = {OPT_SERVER_RPC_STATD_PIDFILE}, + .type = GF_OPTION_TYPE_PATH, + .default_value = GF_RPC_STATD_PIDFILE, + .description = "The pid file of RPC statd utility. " + "Defaults to " GF_RPC_STATD_PIDFILE + }, { .key = {OPT_SERVER_AUX_GIDS}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h index 00c7f8046..fc745fbbd 100644 --- a/xlators/nfs/server/src/nfs.h +++ b/xlators/nfs/server/src/nfs.h @@ -86,6 +86,8 @@ struct nfs_state { gid_cache_t gid_cache; uint32_t generation; gf_boolean_t register_portmap; + char *rpc_statd; + char *rpc_statd_pid_file; }; struct nfs_inode_ctx { diff --git a/xlators/nfs/server/src/nfs3-helpers.c b/xlators/nfs/server/src/nfs3-helpers.c index 9059fc341..f67cccf1a 100644 --- a/xlators/nfs/server/src/nfs3-helpers.c +++ b/xlators/nfs/server/src/nfs3-helpers.c @@ -85,7 +85,7 @@ struct nfs3stat_strerror nfs3stat_strerror_table[] = { { NFS3ERR_SERVERFAULT, "Error occurred on the server or IO Error" }, { NFS3ERR_BADTYPE, "Type not supported by the server" }, { NFS3ERR_JUKEBOX, "Cannot complete server initiated request" }, - { -1, "IO Error" }, + { NFS3ERR_END_OF_LIST, "IO Error" }, }; @@ -543,7 +543,7 @@ char * nfsstat3_strerror(int stat) { int i; - for(i = 0; nfs3stat_strerror_table[i].stat != -1; i++) { + for(i = 0; nfs3stat_strerror_table[i].stat != NFS3ERR_END_OF_LIST ; i++) { if (nfs3stat_strerror_table[i].stat == stat) return nfs3stat_strerror_table[i].strerror; } diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c index 0fea135c7..6361f9e20 100644 --- a/xlators/nfs/server/src/nfs3.c +++ b/xlators/nfs/server/src/nfs3.c @@ -5186,7 +5186,7 @@ nfs3_init_options (struct nfs3_state *nfs3, dict_t *options) goto err; } - ret = gf_string2bytesize (optstr, &size64); + ret = gf_string2uint64 (optstr, &size64); if (ret == -1) { gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format" " option: nfs3.read-size"); @@ -5209,7 +5209,7 @@ nfs3_init_options (struct nfs3_state *nfs3, dict_t *options) goto err; } - ret = gf_string2bytesize (optstr, &size64); + ret = gf_string2uint64 (optstr, &size64); if (ret == -1) { gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format" " option: nfs3.write-size"); @@ -5232,7 +5232,7 @@ nfs3_init_options (struct nfs3_state *nfs3, dict_t *options) goto err; } - ret = gf_string2bytesize (optstr, &size64); + ret = gf_string2uint64 (optstr, &size64); if (ret == -1) { gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to format" " option: nfs3.readdir-size"); diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c index c186537ea..4d0083fe2 100644 --- a/xlators/nfs/server/src/nlm4.c +++ b/xlators/nfs/server/src/nlm4.c @@ -33,7 +33,6 @@ #include "nfs-generics.h" #include "rpc-clnt.h" #include "nsm-xdr.h" -#include "nlmcbk-xdr.h" #include "run.h" #include <unistd.h> #include <rpc/pmap_clnt.h> @@ -157,9 +156,9 @@ nlm4_prep_nlm4_testargs (nlm4_testargs *args, struct nfs3_fh *fh, nlm4_lkowner_t *oh, char *cookiebytes) { memset (args, 0, sizeof (*args)); - args->alock.fh.n_bytes = (void *)fh; - args->alock.oh.n_bytes = (void *)oh; - args->cookie.n_bytes = (void *)cookiebytes; + args->alock.fh.nlm4_netobj_val = (void *)fh; + args->alock.oh.nlm4_netobj_val = (void *)oh; + args->cookie.nlm4_netobj_val = (void *)cookiebytes; } void @@ -167,9 +166,9 @@ nlm4_prep_nlm4_lockargs (nlm4_lockargs *args, struct nfs3_fh *fh, nlm4_lkowner_t *oh, char *cookiebytes) { memset (args, 0, sizeof (*args)); - args->alock.fh.n_bytes = (void *)fh; - args->alock.oh.n_bytes = (void *)oh; - args->cookie.n_bytes = (void *)cookiebytes; + args->alock.fh.nlm4_netobj_val = (void *)fh; + args->alock.oh.nlm4_netobj_val = (void *)oh; + args->cookie.nlm4_netobj_val = (void *)cookiebytes; } void @@ -177,9 +176,9 @@ nlm4_prep_nlm4_cancargs (nlm4_cancargs *args, struct nfs3_fh *fh, nlm4_lkowner_t *oh, char *cookiebytes) { memset (args, 0, sizeof (*args)); - args->alock.fh.n_bytes = (void *)fh; - args->alock.oh.n_bytes = (void *)oh; - args->cookie.n_bytes = (void *)cookiebytes; + args->alock.fh.nlm4_netobj_val = (void *)fh; + args->alock.oh.nlm4_netobj_val = (void *)oh; + args->cookie.nlm4_netobj_val = (void *)cookiebytes; } void @@ -187,9 +186,9 @@ nlm4_prep_nlm4_unlockargs (nlm4_unlockargs *args, struct nfs3_fh *fh, nlm4_lkowner_t *oh, char *cookiebytes) { memset (args, 0, sizeof (*args)); - args->alock.fh.n_bytes = (void *)fh; - args->alock.oh.n_bytes = (void *)oh; - args->cookie.n_bytes = (void *)cookiebytes; + args->alock.fh.nlm4_netobj_val = (void *)fh; + args->alock.oh.nlm4_netobj_val = (void *)oh; + args->cookie.nlm4_netobj_val = (void *)cookiebytes; } void @@ -197,9 +196,9 @@ nlm4_prep_shareargs (nlm4_shareargs *args, struct nfs3_fh *fh, nlm4_lkowner_t *oh, char *cookiebytes) { memset (args, 0, sizeof (*args)); - args->share.fh.n_bytes = (void *)fh; - args->share.oh.n_bytes = (void *)oh; - args->cookie.n_bytes = (void *)cookiebytes; + args->share.fh.nlm4_netobj_val = (void *)fh; + args->share.oh.nlm4_netobj_val = (void *)oh; + args->cookie.nlm4_netobj_val = (void *)cookiebytes; } void @@ -210,22 +209,22 @@ nlm4_prep_freeallargs (nlm4_freeallargs *args, nlm4_lkowner_t *oh) } void -nlm_copy_lkowner (gf_lkowner_t *dst, netobj *src) +nlm_copy_lkowner (gf_lkowner_t *dst, nlm4_netobj *src) { - dst->len = src->n_len; - memcpy (dst->data, src->n_bytes, dst->len); + dst->len = src->nlm4_netobj_len; + memcpy (dst->data, src->nlm4_netobj_val, dst->len); } int -nlm_is_oh_same_lkowner (gf_lkowner_t *a, netobj *b) +nlm_is_oh_same_lkowner (gf_lkowner_t *a, nlm4_netobj *b) { if (!a || !b) { gf_log (GF_NLM, GF_LOG_ERROR, "invalid args"); return -1; } - return (a->len == b->n_len && - !memcmp (a->data, b->n_bytes, a->len)); + return (a->len == b->nlm4_netobj_len && + !memcmp (a->data, b->nlm4_netobj_val, a->len)); } nlm4_stats @@ -653,7 +652,7 @@ err: } int -nlm4_generic_reply (rpcsvc_request_t *req, netobj cookie, nlm4_stats stat) +nlm4_generic_reply (rpcsvc_request_t *req, nlm4_netobj cookie, nlm4_stats stat) { nlm4_res res; @@ -975,8 +974,10 @@ nlm4_establish_callback (void *csarg) NLM_V4, IPPROTO_TCP); if (port == 0) { - gf_log (GF_NLM, GF_LOG_ERROR, "Unable to get NLM port of the " - "client. Is the firewall running on client?"); + gf_log (GF_NLM, GF_LOG_ERROR, + "Unable to get NLM port of the client." + " Is the firewall running on client?" + " OR Are RPC services running (rpcinfo -p)?"); goto err; } @@ -2423,9 +2424,21 @@ nlm4svc_init(xlator_t *nfsx) /* unlink sm-notify.pid so that when we restart rpc.statd/sm-notify * it thinks that the machine has restarted and sends NOTIFY to clients. */ - ret = unlink ("/var/run/sm-notify.pid"); + + /* TODO: + notify/rpc.statd is done differently on OSX + + On OSX rpc.statd is controlled by rpc.lockd and are part for launchd + (unified service management framework) + + A runcmd() should be invoking "launchctl start com.apple.lockd" + instead. This is still a theory but we need to thoroughly test it + out. Until then NLM support is non-existent on OSX. + */ + ret = unlink (GF_SM_NOTIFY_PIDFILE); if (ret == -1 && errno != ENOENT) { - gf_log (GF_NLM, GF_LOG_ERROR, "unable to unlink sm-notify"); + gf_log (GF_NLM, GF_LOG_ERROR, "unable to unlink %s: %d", + GF_SM_NOTIFY_PIDFILE, errno); goto err; } /* temporary work around to restart statd, not distro/OS independant. @@ -2433,37 +2446,43 @@ nlm4svc_init(xlator_t *nfsx) * killall will cause problems on solaris. */ - pidfile = fopen ("/var/run/rpc.statd.pid", "r"); + char *pid_file = GF_RPC_STATD_PIDFILE; + if (nfs->rpc_statd_pid_file) + pid_file = nfs->rpc_statd_pid_file; + pidfile = fopen (pid_file, "r"); if (pidfile) { ret = fscanf (pidfile, "%d", &pid); if (ret <= 0) { gf_log (GF_NLM, GF_LOG_WARNING, "unable to get pid of " - "rpc.statd"); + "rpc.statd from %s ", GF_RPC_STATD_PIDFILE); ret = runcmd ("killall", "-9", "rpc.statd", NULL); } else kill (pid, SIGKILL); fclose (pidfile); } else { - gf_log (GF_NLM, GF_LOG_WARNING, "opening the pid file of " - "rpc.statd failed (%s)", strerror (errno)); + gf_log (GF_NLM, GF_LOG_WARNING, "opening %s of " + "rpc.statd failed (%s)", pid_file, strerror (errno)); /* if ret == -1, do nothing - case either statd was not * running or was running in valgrind mode */ ret = runcmd ("killall", "-9", "rpc.statd", NULL); } - ret = unlink ("/var/run/rpc.statd.pid"); + ret = unlink (GF_RPC_STATD_PIDFILE); if (ret == -1 && errno != ENOENT) { - gf_log (GF_NLM, GF_LOG_ERROR, "unable to unlink rpc.statd"); + gf_log (GF_NLM, GF_LOG_ERROR, "unable to unlink %s", pid_file); goto err; } - ret = runcmd ("/sbin/rpc.statd", NULL); + ret = runcmd (nfs->rpc_statd, NULL); if (ret == -1) { - gf_log (GF_NLM, GF_LOG_ERROR, "unable to start rpc.statd"); + gf_log (GF_NLM, GF_LOG_ERROR, "unable to start %s", + nfs->rpc_statd); goto err; } + + pthread_create (&thr, NULL, nsm_thread, (void*)NULL); timeout.tv_sec = nlm_grace_period; diff --git a/xlators/nfs/server/src/nlm4.h b/xlators/nfs/server/src/nlm4.h index 9b5d54081..e234b6944 100644 --- a/xlators/nfs/server/src/nlm4.h +++ b/xlators/nfs/server/src/nlm4.h @@ -31,9 +31,44 @@ #include "nlm4-xdr.h" #include "lkowner.h" +#define NLM4_NULL 0 +#define NLM4_TEST 1 +#define NLM4_LOCK 2 +#define NLM4_CANCEL 3 +#define NLM4_UNLOCK 4 +#define NLM4_GRANTED 5 +#define NLM4_TEST_MSG 6 +#define NLM4_LOCK_MSG 7 +#define NLM4_CANCEL_MSG 8 +#define NLM4_UNLOCK_MSG 9 +#define NLM4_GRANTED_MSG 10 +#define NLM4_TEST_RES 11 +#define NLM4_LOCK_RES 12 +#define NLM4_CANCEL_RES 13 +#define NLM4_UNLOCK_RES 14 +#define NLM4_GRANTED_RES 15 +#define NLM4_SM_NOTIFY 16 +#define NLM4_SEVENTEEN 17 +#define NLM4_EIGHTEEN 18 +#define NLM4_NINETEEN 19 +#define NLM4_SHARE 20 +#define NLM4_UNSHARE 21 +#define NLM4_NM_LOCK 22 +#define NLM4_FREE_ALL 23 +#define NLM4_PROC_COUNT 24 + /* Registered with portmap */ #define GF_NLM4_PORT 38468 #define GF_NLM GF_NFS"-NLM" +#ifdef GF_DARWIN_HOST_OS +#define GF_RPC_STATD_PROG "/usr/sbin/rpc.statd" +#define GF_RPC_STATD_PIDFILE "/var/run/statd.pid" +#define GF_SM_NOTIFY_PIDFILE "/var/run/statd.notify.pid" +#else +#define GF_RPC_STATD_PROG "/sbin/rpc.stat" +#define GF_RPC_STATD_PIDFILE "/var/run/rpc.statd.pid" +#define GF_SM_NOTIFY_PIDFILE "/var/run/sm-notify.pid" +#endif extern rpcsvc_program_t * nlm4svc_init (xlator_t *nfsx); diff --git a/xlators/nfs/server/src/nlmcbk_svc.c b/xlators/nfs/server/src/nlmcbk_svc.c index e1b588765..20d3728d0 100644 --- a/xlators/nfs/server/src/nlmcbk_svc.c +++ b/xlators/nfs/server/src/nlmcbk_svc.c @@ -13,7 +13,6 @@ * It was generated using rpcgen. */ -#include "nlmcbk-xdr.h" #include "nlm4.h" #include "logging.h" #include <stdio.h> diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 8febfc8fb..facff5038 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -41,6 +41,9 @@ ioc_hashfn (void *data, int len) return (offset >> ioc_log2_page_size); } +/* TODO: This function is not used, uncomment when we find a + usage for this function. + static inline ioc_inode_t * ioc_inode_reupdate (ioc_inode_t *ioc_inode) { @@ -54,6 +57,7 @@ ioc_inode_reupdate (ioc_inode_t *ioc_inode) return ioc_inode; } + static inline ioc_inode_t * ioc_get_inode (dict_t *dict, char *name) { @@ -77,6 +81,7 @@ ioc_get_inode (dict_t *dict, char *name) return ioc_inode; } +*/ int32_t ioc_inode_need_revalidate (ioc_inode_t *ioc_inode) @@ -1663,12 +1668,12 @@ reconfigure (xlator_t *this, dict_t *options) } GF_OPTION_RECONF ("max-file-size", table->max_file_size, - options, size, unlock); + options, size_uint64, unlock); GF_OPTION_RECONF ("min-file-size", table->min_file_size, - options, size, unlock); + options, size_uint64, unlock); - if ((table->max_file_size >= 0) && + if ((table->max_file_size <= UINT64_MAX) && (table->min_file_size > table->max_file_size)) { gf_log (this->name, GF_LOG_ERROR, "minimum size (%" PRIu64") of a file that can be cached is " @@ -1679,7 +1684,7 @@ reconfigure (xlator_t *this, dict_t *options) } GF_OPTION_RECONF ("cache-size", cache_size_new, - options, size, unlock); + options, size_uint64, unlock); if (!check_cache_size_ok (this, cache_size_new)) { ret = -1; gf_log (this->name, GF_LOG_ERROR, @@ -1736,13 +1741,13 @@ init (xlator_t *this) table->xl = this; table->page_size = this->ctx->page_size; - GF_OPTION_INIT ("cache-size", table->cache_size, size, out); + GF_OPTION_INIT ("cache-size", table->cache_size, size_uint64, out); GF_OPTION_INIT ("cache-timeout", table->cache_timeout, int32, out); - GF_OPTION_INIT ("min-file-size", table->min_file_size, size, out); + GF_OPTION_INIT ("min-file-size", table->min_file_size, size_uint64, out); - GF_OPTION_INIT ("max-file-size", table->max_file_size, size, out); + GF_OPTION_INIT ("max-file-size", table->max_file_size, size_uint64, out); if (!check_cache_size_ok (this, table->cache_size)) { ret = -1; @@ -1768,7 +1773,7 @@ init (xlator_t *this) INIT_LIST_HEAD (&table->inodes); - if ((table->max_file_size >= 0) + if ((table->max_file_size <= UINT64_MAX) && (table->min_file_size > table->max_file_size)) { gf_log ("io-cache", GF_LOG_ERROR, "minimum size (%" PRIu64") of a file that can be cached is " diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index 402da886f..b8b4c5326 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -854,7 +854,7 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("cache-timeout", conf->cache_timeout, options, int32, out); - GF_OPTION_RECONF ("cache-size", cache_size_new, options, size, out); + GF_OPTION_RECONF ("cache-size", cache_size_new, options, size_uint64, out); if (!check_cache_size_ok (this, cache_size_new)) { ret = -1; gf_log (this->name, GF_LOG_ERROR, @@ -995,11 +995,11 @@ init (xlator_t *this) LOCK_INIT (&priv->table.lock); conf = &priv->conf; - GF_OPTION_INIT ("max-file-size", conf->max_file_size, size, out); + GF_OPTION_INIT ("max-file-size", conf->max_file_size, size_uint64, out); GF_OPTION_INIT ("cache-timeout", conf->cache_timeout, int32, out); - GF_OPTION_INIT ("cache-size", conf->cache_size, size, out); + GF_OPTION_INIT ("cache-size", conf->cache_size, size_uint64, out); if (!check_cache_size_ok (this, conf->cache_size)) { ret = -1; goto out; diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c index 6e2d84591..01c861d52 100644 --- a/xlators/performance/read-ahead/src/read-ahead.c +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -1124,7 +1124,8 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("page-count", conf->page_count, options, uint32, out); - GF_OPTION_RECONF ("page-size", conf->page_size, options, size, out); + GF_OPTION_RECONF ("page-size", conf->page_size, options, size_uint64, + out); ret = 0; out: @@ -1158,7 +1159,7 @@ init (xlator_t *this) conf->page_size = this->ctx->page_size; - GF_OPTION_INIT ("page-size", conf->page_size, size, out); + GF_OPTION_INIT ("page-size", conf->page_size, size_uint64, out); GF_OPTION_INIT ("page-count", conf->page_count, uint32, out); diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c index 53e6756f0..ba96bfcd3 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.c +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -459,9 +459,9 @@ reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options, uint32, err); - GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size, + GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64, err); - GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size, + GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size_uint64, err); return 0; @@ -498,8 +498,8 @@ init(xlator_t *this) goto err; GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err); - GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size, err); - GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size, err); + GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err); + GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err); return 0; diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 00457338d..3cb0d449b 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -2116,7 +2116,7 @@ reconfigure (xlator_t *this, dict_t *options) conf = this->private; - GF_OPTION_RECONF ("cache-size", conf->window_size, options, size, out); + GF_OPTION_RECONF ("cache-size", conf->window_size, options, size_uint64, out); GF_OPTION_RECONF ("flush-behind", conf->flush_behind, options, bool, out); @@ -2163,7 +2163,7 @@ init (xlator_t *this) conf->aggregate_size = WB_AGGREGATE_SIZE; /* configure 'option window-size <size>' */ - GF_OPTION_INIT ("cache-size", conf->window_size, size, out); + GF_OPTION_INIT ("cache-size", conf->window_size, size_uint64, out); if (!conf->window_size && conf->aggregate_size) { gf_log (this->name, GF_LOG_WARNING, diff --git a/xlators/protocol/client/src/client-lk.c b/xlators/protocol/client/src/client-lk.c index 1fd8f0d50..b3c36a420 100644 --- a/xlators/protocol/client/src/client-lk.c +++ b/xlators/protocol/client/src/client-lk.c @@ -227,7 +227,7 @@ subtract_locks (client_posix_lock_t *big, client_posix_lock_t *small) /* LOG-TODO : decide what more info is required here*/ gf_log ("client-protocol", GF_LOG_CRITICAL, "Unexpected case in subtract_locks. Please send " - "a bug report to gluster-devel@nongnu.org"); + "a bug report to gluster-devel@gluster.org"); } return v; diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 85bdc591b..f1415899e 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -21,6 +21,7 @@ #include "statedump.h" #include "compat-errno.h" +#include "xdr-rpc.h" #include "glusterfs3.h" extern rpc_clnt_prog_t clnt_handshake_prog; @@ -2490,7 +2491,7 @@ client_init_grace_timer (xlator_t *this, dict_t *options, conf->grace_ts.tv_nsec = 0; gf_log (this->name, GF_LOG_DEBUG, "Client grace timeout " - "value = %"PRIu64, conf->grace_ts.tv_sec); + "value = %"GF_PRI_SECOND, conf->grace_ts.tv_sec); ret = 0; out: diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index 708acd936..a459f89e1 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -645,7 +645,7 @@ server_setvolume (rpcsvc_request_t *req) fail: rsp.dict.dict_len = dict_serialized_length (reply); - if (rsp.dict.dict_len < 0) { + if (rsp.dict.dict_len > UINT_MAX) { gf_log ("server-handshake", GF_LOG_DEBUG, "failed to get serialized length of reply dict"); op_ret = -1; diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index 76c0036e0..600a311c3 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -33,6 +33,7 @@ server_decode_groups (call_frame_t *frame, rpcsvc_request_t *req) if (frame->root->ngrps == 0) return 0; + /* ngrps cannot be bigger than USHRT_MAX(65535) */ if (frame->root->ngrps > GF_MAX_AUX_GROUPS) return -1; @@ -744,7 +745,7 @@ serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp) /* if 'dict' is present, pack it */ if (entry->dict) { trav->dict.dict_len = dict_serialized_length (entry->dict); - if (trav->dict.dict_len < 0) { + if (trav->dict.dict_len > UINT_MAX) { gf_log (THIS->name, GF_LOG_ERROR, "failed to get serialized length " "of reply dict"); diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c index bf674271c..70b8ab3a6 100644 --- a/xlators/protocol/server/src/server-rpc-fops.c +++ b/xlators/protocol/server/src/server-rpc-fops.c @@ -1611,8 +1611,8 @@ server_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, fd_no = gf_fd_unused_get (serv_ctx->fdtable, fd); fd_ref (fd); - if ((fd_no < 0) || (fd == 0)) { - op_ret = fd_no; + if ((fd_no > UINT64_MAX) || (fd == 0)) { + op_ret = -1; op_errno = errno; } diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 785517058..6bd00cac0 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -688,7 +688,7 @@ server_init_grace_timer (xlator_t *this, dict_t *options, conf->grace_ts.tv_sec = 10; gf_log (this->name, GF_LOG_DEBUG, "Server grace timeout " - "value = %"PRIu64, conf->grace_ts.tv_sec); + "value = %"GF_PRI_SECOND, conf->grace_ts.tv_sec); conf->grace_ts.tv_nsec = 0; diff --git a/xlators/storage/bd/src/bd-helper.c b/xlators/storage/bd/src/bd-helper.c index 4bd1d6111..d598e5755 100644 --- a/xlators/storage/bd/src/bd-helper.c +++ b/xlators/storage/bd/src/bd-helper.c @@ -11,6 +11,7 @@ #include "bd.h" #include "bd-mem-types.h" #include "run.h" +#include "lvm-defaults.h" int bd_inode_ctx_set (inode_t *inode, xlator_t *this, bd_attr_t *ctx) @@ -1018,4 +1019,3 @@ out: return ret; } - diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c index 0954b553d..750b00943 100644 --- a/xlators/storage/bd/src/bd.c +++ b/xlators/storage/bd/src/bd.c @@ -38,6 +38,8 @@ #include "run.h" #include "protocol-common.h" #include "checksum.h" +#include "syscall.h" +#include "lvm-defaults.h" /* * Call back function for setxattr and removexattr. @@ -719,9 +721,8 @@ bd_do_fsync (int fd, int datasync) { int op_errno = 0; -#ifdef HAVE_FDATASYNC if (datasync) { - if (fdatasync (fd)) { + if (sys_fdatasync (fd)) { op_errno = errno; gf_log (THIS->name, GF_LOG_ERROR, "fdatasync on fd=%d failed: %s", @@ -729,9 +730,9 @@ bd_do_fsync (int fd, int datasync) } } else -#endif + { - if (fsync (fd)) { + if (sys_fsync (fd)) { op_errno = errno; gf_log (THIS->name, GF_LOG_ERROR, "fsync on fd=%d failed: %s", diff --git a/xlators/storage/bd/src/bd.h b/xlators/storage/bd/src/bd.h index 5307ca407..62add16cd 100644 --- a/xlators/storage/bd/src/bd.h +++ b/xlators/storage/bd/src/bd.h @@ -38,10 +38,6 @@ #define BD_LV "lv" #define BD_THIN "thin" -#define LVM_RESIZE "/sbin/lvresize" -#define LVM_CREATE "/sbin/lvcreate" -#define LVM_CONVERT "/sbin/lvconvert" - #define VOL_TYPE "volume.type" #define VOL_CAPS "volume.caps" @@ -165,7 +161,7 @@ int bd_clone (bd_local_t *local, bd_priv_t *priv); int bd_merge (bd_priv_t *priv, uuid_t gfid); int bd_get_origin (bd_priv_t *priv, loc_t *loc, fd_t *fd, dict_t *dict); -inline void bd_update_amtime(struct iatt *iatt, int flag); +void bd_update_amtime(struct iatt *iatt, int flag); int bd_snapshot_create (bd_local_t *local, bd_priv_t *priv); int bd_clone (bd_local_t *local, bd_priv_t *priv); int bd_merge (bd_priv_t *priv, uuid_t gfid); diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 5725cad7d..ab46f7f7e 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -784,6 +784,27 @@ out: return op_ret; } +#ifdef GF_DARWIN_HOST_OS +static +void posix_dump_buffer (xlator_t *this, const char *real_path, const char *key, + data_t *value, int flags) +{ + char buffer[3*value->len+1]; + int index = 0; + buffer[0] = 0; + gf_loglevel_t log_level = gf_log_get_loglevel (); + if (log_level == GF_LOG_TRACE) { + char *data = (char *) value->data; + for (index = 0; index < value->len; index++) + sprintf(buffer+3*index, " %02x", data[index]); + } + gf_log (this->name, GF_LOG_DEBUG, + "Dump %s: key:%s flags: %u length:%u data:%s ", + real_path, key, flags, value->len, + (log_level == GF_LOG_TRACE ? buffer : "<skipped in DEBUG>")); +} +#endif + static int gf_xattr_enotsup_log; int @@ -802,7 +823,9 @@ posix_handle_pair (xlator_t *this, const char *real_path, } else { sys_ret = sys_lsetxattr (real_path, key, value->data, value->len, flags); - +#ifdef GF_DARWIN_HOST_OS + posix_dump_buffer(this, real_path, key, value, flags); +#endif if (sys_ret < 0) { ret = -errno; if (errno == ENOTSUP) { @@ -825,13 +848,13 @@ posix_handle_pair (xlator_t *this, const char *real_path, gf_log (this->name, ((errno == EINVAL) ? GF_LOG_DEBUG : GF_LOG_ERROR), - "%s: key:%s error:%s", - real_path, key, + "%s: key:%s flags: %u length:%d error:%s", + real_path, key, flags, value->len, strerror (errno)); #else /* ! DARWIN */ gf_log (this->name, GF_LOG_ERROR, - "%s: key:%s error:%s", - real_path, key, + "%s: key:%s flags: %u length:%d error:%s", + real_path, key, flags, value->len, strerror (errno)); #endif /* DARWIN */ } @@ -1430,12 +1453,10 @@ posix_fsyncer_process (xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync) } if (do_fsync) { -#ifdef HAVE_FDATASYNC if (stub->args.datasync) - ret = fdatasync (pfd->fd); + ret = sys_fdatasync (pfd->fd); else -#endif - ret = fsync (pfd->fd); + ret = sys_fsync (pfd->fd); } else { ret = 0; } diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 2997a07dd..f7800184e 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -761,7 +761,7 @@ posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, if (ret < 0) { ret = -errno; gf_log(this->name, GF_LOG_ERROR, - "zerofill failed on fd %d length %ld %s", + "zerofill failed on fd %d length %" PRId64 " %s", pfd->fd, len, strerror(errno)); goto out; } @@ -2873,16 +2873,14 @@ posix_fsync (call_frame_t *frame, xlator_t *this, if (datasync) { ; -#ifdef HAVE_FDATASYNC - op_ret = fdatasync (_fd); + op_ret = sys_fdatasync (_fd); if (op_ret == -1) { gf_log (this->name, GF_LOG_ERROR, "fdatasync on fd=%p failed: %s", fd, strerror (errno)); } -#endif } else { - op_ret = fsync (_fd); + op_ret = sys_fsync (_fd); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -2925,6 +2923,23 @@ _handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, filler->flags); } +#ifdef GF_DARWIN_HOST_OS +static inline int +map_xattr_flags(int flags) +{ + /* DARWIN has different defines on XATTR_ flags. + There do not seem to be a POSIX standard + Parse any other flags over. + */ + int darwinflags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); + if (GF_XATTR_CREATE & flags) + darwinflags |= XATTR_CREATE; + if (GF_XATTR_REPLACE & flags) + darwinflags |= XATTR_REPLACE; + return darwinflags; +} +#endif + int32_t posix_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int flags, dict_t *xdata) @@ -2951,7 +2966,11 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, filler.real_path = real_path; filler.this = this; +#ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +#else filler.flags = flags; +#endif op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, &filler); if (op_ret < 0) { @@ -3368,7 +3387,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, char *list = NULL; int32_t list_offset = 0; size_t remaining_size = 0; - char key[4096] = {0,}; + char keybuffer[4096] = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -3536,8 +3555,20 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, } if (name) { - strcpy (key, name); - + strcpy (keybuffer, name); + char *key = keybuffer; +#if defined(GF_DARWIN_HOST_OS_DISABLED) + if (priv->xattr_user_namespace == XATTR_STRIP) { + if (strncmp(key, "user.",5) == 0) { + key += 5; + gf_log (this->name, + GF_LOG_DEBUG, + "getxattr for file %s" + " stripping user key: %s -> %s", + real_path, keybuffer, key); + } + } +#endif size = sys_lgetxattr (real_path, key, NULL, 0); if (size <= 0) { op_errno = errno; @@ -3625,14 +3656,13 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, while (remaining_size > 0) { if (*(list + list_offset) == '\0') break; - - strcpy (key, list + list_offset); - size = sys_lgetxattr (real_path, key, NULL, 0); + strcpy (keybuffer, list + list_offset); + size = sys_lgetxattr (real_path, keybuffer, NULL, 0); if (size == -1) { op_ret = -1; op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " - "%s: key = %s (%s)", real_path, key, + "%s: key = %s (%s)", real_path, keybuffer, strerror (op_errno)); break; } @@ -3644,29 +3674,37 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, goto out; } - size = sys_lgetxattr (real_path, key, value, size); + size = sys_lgetxattr (real_path, keybuffer, value, size); if (size == -1) { op_ret = -1; op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " - "%s: key = %s (%s)", real_path, key, + "%s: key = %s (%s)", real_path, keybuffer, strerror (op_errno)); GF_FREE (value); break; } value [size] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, size); +#ifdef GF_DARWIN_HOST_OS + /* The protocol expect namespace for now */ + char *newkey = NULL; + gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey); + strcpy (keybuffer, newkey); + GF_FREE (newkey); +#endif + op_ret = dict_set_dynptr (dict, keybuffer, value, size); if (op_ret < 0) { op_errno = -op_ret; gf_log (this->name, GF_LOG_ERROR, "dict set operation " - "on %s for the key %s failed.", real_path, key); + "on %s for the key %s failed.", real_path, + keybuffer); GF_FREE (value); goto out; } - remaining_size -= strlen (key) + 1; - list_offset += strlen (key) + 1; + remaining_size -= strlen (keybuffer) + 1; + list_offset += strlen (keybuffer) + 1; } /* while (remaining_size > 0) */ @@ -3743,7 +3781,16 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, if (name) { strcpy (key, name); - +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = this->private; + if (priv->xattr_user_namespace == XATTR_STRIP) { + char *newkey = NULL; + gf_add_prefix (XATTR_USER_PREFIX, key, &newkey); + strcpy (key, newkey); + GF_FREE (newkey); + } +#endif size = sys_fgetxattr (_fd, key, NULL, 0); if (size <= 0) { op_errno = errno; @@ -3846,6 +3893,7 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, } value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "dict set operation " @@ -3924,7 +3972,11 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, filler.fd = _fd; filler.this = this; +#ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +#else filler.flags = flags; +#endif op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, &filler); if (op_ret < 0) { @@ -3949,7 +4001,17 @@ _posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) filler = (posix_xattr_filler_t *) data; this = filler->this; - +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = (struct posix_private *) this->private; + char *newkey = NULL; + if (priv->xattr_user_namespace == XATTR_STRIP) { + gf_remove_prefix (XATTR_USER_PREFIX, key, &newkey); + gf_log("remove_xattr", GF_LOG_DEBUG, "key %s => %s" , key, + newkey); + key = newkey; + } +#endif op_ret = sys_lremovexattr (filler->real_path, key); if (op_ret == -1) { filler->op_errno = errno; @@ -3958,7 +4020,9 @@ _posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) "removexattr failed on %s (for %s): %s", filler->real_path, key, strerror (errno)); } - +#ifdef GF_DARWIN_HOST_OS + GF_FREE(newkey); +#endif return op_ret; } @@ -4190,10 +4254,19 @@ _posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, optype = (gf_xattrop_flags_t)(filler->flags); this = filler->this; inode = filler->inode; - count = v->len; array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = this->private; + if (priv->xattr_user_namespace == XATTR_STRIP) { + if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { + k += XATTR_USER_PREFIX_LEN; + } + } +#endif + LOCK (&inode->lock); { if (filler->real_path) { @@ -4260,7 +4333,7 @@ _posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, default: gf_log (this->name, GF_LOG_ERROR, "Unknown xattrop type (%d) on %s. Please send " - "a bug report to gluster-devel@nongnu.org", + "a bug report to gluster-devel@gluster.org", optype, filler->real_path); op_ret = -1; op_errno = EINVAL; @@ -5149,6 +5222,23 @@ set_batch_fsync_mode (struct posix_private *priv, const char *str) return 0; } +#ifdef GF_DARWIN_HOST_OS +static int +set_xattr_user_namespace_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->xattr_user_namespace = XATTR_NONE; + else if (strcmp (str, "strip") == 0) + priv->xattr_user_namespace = XATTR_STRIP; + else if (strcmp (str, "append") == 0) + priv->xattr_user_namespace = XATTR_APPEND; + else if (strcmp (str, "both") == 0) + priv->xattr_user_namespace = XATTR_BOTH; + else + return -1; + return 0; +} +#endif int reconfigure (xlator_t *this, dict_t *options) @@ -5178,6 +5268,21 @@ reconfigure (xlator_t *this, dict_t *options) goto out; } +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str, + options, str, out); + + if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } + +#endif + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, options, bool, out); @@ -5365,7 +5470,8 @@ init (xlator_t *this) dir_data->data); ret = -1; goto out; - } else if ((size == -1) && (errno != ENODATA)) { + } else if ((size == -1) && (errno != ENODATA) && + (errno != ENOATTR)) { /* Wrong 'gfid' is set, it should be error */ gf_log (this->name, GF_LOG_WARNING, "%s: failed to fetch gfid (%s)", @@ -5624,8 +5730,24 @@ init (xlator_t *this) goto out; } - GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, - uint32, out); +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_INIT ("xattr-user-namespace-mode", + xattr_user_namespace_mode_str, str, out); + + if (set_xattr_user_namespace_mode (_private, + xattr_user_namespace_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } +#endif + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); out: return ret; } @@ -5784,5 +5906,15 @@ struct volume_options options[] = { .default_value = "off", .description = "Enable placeholders for gfid to path conversion" }, +#if GF_DARWIN_HOST_OS + { .key = {"xattr-user-namespace-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "none", + .description = "Option to control XATTR user namespace on the raw filesystem: " + "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n" + " The raw filesystem will not be compatible with OS X Finder.\n" + "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n" + }, +#endif { .key = {NULL} } }; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 91e0664ed..c9bfc984d 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -158,6 +158,16 @@ struct posix_private { uint32_t health_check_interval; pthread_t health_check; gf_boolean_t health_check_active; + +#ifdef GF_DARWIN_HOST_OS + enum { + XATTR_NONE = 0, + XATTR_STRIP, + XATTR_APPEND, + XATTR_BOTH, + } xattr_user_namespace; +#endif + }; typedef struct { |