diff options
Diffstat (limited to 'xlators')
65 files changed, 13948 insertions, 786 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 2bab0f853..164a651ba 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -861,6 +861,8 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) GF_FREE (local->transaction.pre_op); GF_FREE (local->transaction.eager_lock); + GF_FREE (local->transaction.fop_subvols); + GF_FREE (local->transaction.failed_subvols); GF_FREE (local->transaction.basename); GF_FREE (local->transaction.new_basename); @@ -889,6 +891,15 @@ afr_replies_wipe (afr_local_t *local, afr_private_t *priv) memset (local->replies, 0, sizeof(*local->replies) * priv->child_count); } +void +afr_remove_eager_lock_stub (afr_local_t *local) +{ + LOCK (&local->fd->lock); + { + list_del_init (&local->transaction.eager_locked); + } + UNLOCK (&local->fd->lock); +} void afr_local_cleanup (afr_local_t *local, xlator_t *this) @@ -900,6 +911,10 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) syncbarrier_destroy (&local->barrier); + if (local->transaction.eager_lock_on && + !list_empty (&local->transaction.eager_locked)) + afr_remove_eager_lock_stub (local); + afr_local_transaction_cleanup (local, this); priv = this->private; @@ -969,6 +984,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) { /* writev */ GF_FREE (local->cont.writev.vector); + if (local->cont.writev.iobref) + iobref_unref (local->cont.writev.iobref); } { /* setxattr */ @@ -2106,6 +2123,12 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) fd_ctx = (afr_fd_ctx_t *)(long) ctx; if (fd_ctx) { + //no need to take any locks + if (!list_empty (&fd_ctx->eager_locked)) + gf_log (this->name, GF_LOG_WARNING, "%s: Stale " + "Eager-lock stubs found", + uuid_utoa (fd->inode->gfid)); + for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) GF_FREE (fd_ctx->pre_op_done[i]); diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 01e078c13..4cb219246 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1487,7 +1487,7 @@ afr_fgetxattr_wind (call_frame_t *frame, xlator_t *this, int subvol) return 0; } - STACK_WIND_COOKIE (frame, (void *) (long) subvol, afr_fgetxattr_cbk, + STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk, (void *) (long) subvol, priv->children[subvol], priv->children[subvol]->fops->fgetxattr, local->fd, local->cont.getxattr.name, diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 3dacfc8dd..3013ae730 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -1588,7 +1588,7 @@ afr_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, if (!transaction_frame) goto out; - local = AFR_FRAME_INIT (frame, op_errno); + local = AFR_FRAME_INIT (transaction_frame, op_errno); if (!local) goto out; @@ -1702,7 +1702,7 @@ afr_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, if (!transaction_frame) goto out; - local = AFR_FRAME_INIT (frame, op_errno); + local = AFR_FRAME_INIT (transaction_frame, op_errno); if (!local) goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index c0385153f..c0548d995 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -159,7 +159,7 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd, */ #define is_last_block(o,b,s) ((s >= o) && (s <= (o + b))) if (HAS_HOLES ((&replies[source].poststat)) && - offset > replies[i].poststat.ia_size && + offset >= replies[i].poststat.ia_size && !is_last_block (offset, size, replies[source].poststat.ia_size) && (iov_0filled (iovec, count) == 0)) @@ -268,6 +268,31 @@ afr_selfheal_data_restore_time (call_frame_t *frame, xlator_t *this, } static int +afr_data_self_heal_type_get (afr_private_t *priv, unsigned char *healed_sinks, + int source, struct afr_reply *replies) +{ + int type = AFR_SELFHEAL_DATA_FULL; + int i = 0; + + if (priv->data_self_heal_algorithm == NULL) { + type = AFR_SELFHEAL_DATA_FULL; + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i] && i != source) + continue; + if (replies[i].poststat.ia_size) { + type = AFR_SELFHEAL_DATA_DIFF; + break; + } + } + } else if (strcmp (priv->data_self_heal_algorithm, "full") == 0) { + type = AFR_SELFHEAL_DATA_FULL; + } else if (strcmp (priv->data_self_heal_algorithm, "diff") == 0) { + type = AFR_SELFHEAL_DATA_DIFF; + } + return type; +} + +static int afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd, int source, unsigned char *healed_sinks, struct afr_reply *replies) @@ -296,14 +321,8 @@ afr_selfheal_data_do (call_frame_t *frame, xlator_t *this, fd_t *fd, "source=%d sinks=%s", uuid_utoa (fd->inode->gfid), source, sinks_str); - for (i = 0; i < priv->child_count; i++) { - if (!healed_sinks[i] && i != source) - continue; - if (replies[i].poststat.ia_size) { - type = AFR_SELFHEAL_DATA_DIFF; - break; - } - } + type = afr_data_self_heal_type_get (priv, healed_sinks, source, + replies); iter_frame = afr_copy_frame (frame); if (!iter_frame) diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index f974fdb59..205ff759e 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1544,7 +1544,7 @@ afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) } - int +int afr_transaction_resume (call_frame_t *frame, xlator_t *this) { afr_local_t *local = NULL; @@ -1555,11 +1555,7 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this) /* We don't need to retain "local" in the fd list anymore, writes to all subvols are finished by now */ - LOCK (&local->fd->lock); - { - list_del_init (&local->transaction.eager_locked); - } - UNLOCK (&local->fd->lock); + afr_remove_eager_lock_stub (local); } afr_restore_lk_owner (frame); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 2e1b78d1c..36042f7b2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -971,4 +971,6 @@ afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this); int afr_local_pathinfo (char *pathinfo, gf_boolean_t *is_local); +void +afr_remove_eager_lock_stub (afr_local_t *local); #endif /* __AFR_H__ */ diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index d391b87d5..d74d0dfd4 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -753,6 +753,7 @@ dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this, int dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat, int32_t update_ctx); +void dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat); int dht_inode_ctx_get (inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx); int dht_inode_ctx_set (inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx); diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 18a501f04..f1dc5072f 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -1109,6 +1109,34 @@ dht_inode_ctx_layout_set (inode_t *inode, xlator_t *this, return ret; } + +void +dht_inode_ctx_time_set (inode_t *inode, xlator_t *this, struct iatt *stat) +{ + dht_inode_ctx_t *ctx = NULL; + dht_stat_time_t *time = 0; + int ret = -1; + + ret = dht_inode_ctx_get (inode, this, &ctx); + + if (ret) + return; + + time = &ctx->time; + + time->mtime = stat->ia_mtime; + time->mtime_nsec = stat->ia_mtime_nsec; + + time->ctime = stat->ia_ctime; + time->ctime_nsec = stat->ia_ctime_nsec; + + time->atime = stat->ia_atime; + time->atime_nsec = stat->ia_atime_nsec; + + return; +} + + int dht_inode_ctx_time_update (inode_t *inode, xlator_t *this, struct iatt *stat, int32_t post) diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 363bff3bf..576f007e5 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -862,9 +862,13 @@ unlock: UNLOCK (&frame->lock); this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) + if (is_last_call (this_call_cnt)) { + if (local->op_ret == 0) + dht_inode_ctx_time_set (local->loc.inode, this, + &local->stbuf); DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno, &local->prebuf, &local->stbuf, xdata); + } return 0; } diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index a17319ba6..4f78f5203 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -94,6 +94,41 @@ out: } +/* + return values: + -1 : failure + -2 : success + +Hard link migration is carried out in three stages. + +(Say there are n hardlinks) +Stage 1: Setting the new hashed subvol information on the 1st hardlink + encountered (linkto setxattr) + +Stage 2: Creating hardlinks on new hashed subvol for the 2nd to (n-1)th + hardlink + +Stage 3: Physical migration of the data file for nth hardlink + +Why to deem "-2" as success and not "0": + + dht_migrate_file expects return value "0" from _is_file_migratable if +the file has to be migrated. + + _is_file_migratable returns zero only when it is called with the +flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS". + + gf_defrag_handle_hardlink calls dht_migrate_file for physical migration +of the data file with the flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS" + +Hence, gf_defrag_handle_hardlink returning "0" for success will force +"dht_migrate_file" to migrate each of the hardlink which is not intended. + +For each of the three stage mentioned above "-2" will be returned and will +be converted to "0" in dht_migrate_file. + +*/ + int32_t gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, struct iatt *stbuf) @@ -164,6 +199,7 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, ret = -1; goto out; } + ret = -2; goto out; } else { linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs); @@ -200,12 +236,19 @@ gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs, if (ret) goto out; } - ret = 0; + ret = -2; out: return ret; } - +/* + return values + 0 : File will be migrated + -2 : File will not be migrated + (This is the return value from gf_defrag_handle_hardlink. Checkout + gf_defrag_handle_hardlink for description of "returning -2") + -1 : failure +*/ static inline int __is_file_migratable (xlator_t *this, loc_t *loc, struct iatt *stbuf, dict_t *xattrs, int flags) @@ -228,7 +271,12 @@ __is_file_migratable (xlator_t *this, loc_t *loc, if (flags == GF_DHT_MIGRATE_HARDLINK) { ret = gf_defrag_handle_hardlink (this, loc, xattrs, stbuf); - if (ret) { + + /* + Returning zero will force the file to be remigrated. + Checkout gf_defrag_handle_hardlink for more information. + */ + if (ret && ret != -2) { gf_log (this->name, GF_LOG_WARNING, "%s: failed to migrate file with link", loc->path); @@ -236,8 +284,8 @@ __is_file_migratable (xlator_t *this, loc_t *loc, } else { gf_log (this->name, GF_LOG_WARNING, "%s: file has hardlinks", loc->path); + ret = -ENOTSUP; } - ret = ENOTSUP; goto out; } @@ -743,9 +791,11 @@ dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to, /* Check if file can be migrated */ ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag); - if (ret) + if (ret) { + if (ret == -2) + ret = 0; goto out; - + } /* Take care of the special files */ if (!IA_ISREG (stbuf.ia_type)) { /* Special files */ diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 9e48a7c6e..d63fbb26c 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -2666,9 +2666,13 @@ reconfigure (xlator_t *this, dict_t *options) struct ios_conf *conf = NULL; int ret = -1; char *sys_log_str = NULL; + char *log_format_str = NULL; + char *logger_str = NULL; int sys_log_level = -1; char *log_str = NULL; int log_level = -1; + int log_format = -1; + int logger = -1; if (!this || !this->private) goto out; @@ -2696,6 +2700,18 @@ reconfigure (xlator_t *this, dict_t *options) gf_log_set_loglevel (log_level); } + GF_OPTION_RECONF ("logger", logger_str, options, str, out); + if (logger_str) { + logger = gf_check_logger (logger_str); + gf_log_set_logger (logger); + } + + GF_OPTION_RECONF ("log-format", log_format_str, options, str, out); + if (log_format_str) { + log_format = gf_check_log_format (log_format_str); + gf_log_set_logformat (log_format); + } + ret = 0; out: gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret); @@ -2727,6 +2743,10 @@ init (xlator_t *this) { struct ios_conf *conf = NULL; char *sys_log_str = NULL; + char *logger_str = NULL; + char *log_format_str = NULL; + int logger = -1; + int log_format = -1; int sys_log_level = -1; char *log_str = NULL; int log_level = -1; @@ -2785,6 +2805,19 @@ init (xlator_t *this) gf_log_set_loglevel (log_level); } + GF_OPTION_INIT ("logger", logger_str, str, out); + if (logger_str) { + logger = gf_check_logger (logger_str); + gf_log_set_logger (logger); + } + + GF_OPTION_INIT ("log-format", log_format_str, str, out); + if (log_format_str) { + log_format = gf_check_log_format (log_format_str); + gf_log_set_logformat (log_format); + } + + this->private = conf; ret = 0; out: @@ -3027,6 +3060,40 @@ struct volume_options options[] = { .value = { "DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE", "TRACE"} }, + { .key = {"logger"}, + .type = GF_OPTION_TYPE_STR, + .value = { GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG} + }, + { .key = {"client-logger"}, + .type = GF_OPTION_TYPE_STR, + .default_value = GF_LOGGER_GLUSTER_LOG, + .description = "Changes the logging sub-system to log to, for the " + "clients", + .value = { GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG} + }, + { .key = {"brick-logger"}, + .type = GF_OPTION_TYPE_STR, + .default_value = GF_LOGGER_GLUSTER_LOG, + .description = "Changes the logging sub-system to log to, for the " + "bricks", + .value = { GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG} + }, + { .key = {"log-format"}, + .type = GF_OPTION_TYPE_STR, + .value = { GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID} + }, + { .key = {"client-log-format"}, + .type = GF_OPTION_TYPE_STR, + .default_value = GF_LOG_FORMAT_WITH_MSG_ID, + .description = "Changes log format for the clients", + .value = { GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID} + }, + { .key = {"brick-log-format"}, + .type = GF_OPTION_TYPE_STR, + .default_value = GF_LOG_FORMAT_WITH_MSG_ID, + .description = "Changes the log format for the bricks", + .value = { GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID} + }, { .key = {NULL} }, }; diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index d2f5ef192..1fdd474c2 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -1,4 +1,4 @@ -SUBDIRS = locks quota read-only mac-compat quiesce marker index \ +SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier \ protect compress changelog gfid-access $(GLUPY_SUBDIR) qemu-block # trash path-converter # filter CLEANFILES = diff --git a/xlators/features/barrier/Makefile.am b/xlators/features/barrier/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/features/barrier/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/features/barrier/src/Makefile.am b/xlators/features/barrier/src/Makefile.am new file mode 100644 index 000000000..8859be328 --- /dev/null +++ b/xlators/features/barrier/src/Makefile.am @@ -0,0 +1,16 @@ +xlator_LTLIBRARIES = barrier.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +barrier_la_LDFLAGS = -module -avoid-version + +barrier_la_SOURCES = barrier.c + +barrier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = barrier.h barrier-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/features/barrier/src/barrier-mem-types.h b/xlators/features/barrier/src/barrier-mem-types.h new file mode 100644 index 000000000..36647a669 --- /dev/null +++ b/xlators/features/barrier/src/barrier-mem-types.h @@ -0,0 +1,20 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __BARRIER_MEM_TYPES_H__ +#define __BARRIER_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_barrier_mem_types_ { + gf_barrier_mt_priv_t = gf_common_mt_end + 1, + gf_barrier_mt_end +}; +#endif diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c new file mode 100644 index 000000000..e5465d1b4 --- /dev/null +++ b/xlators/features/barrier/src/barrier.c @@ -0,0 +1,600 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "barrier.h" +#include "defaults.h" +#include "call-stub.h" + +#include "statedump.h" + +int32_t +barrier_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, + dict_t *xdata) +{ + BARRIER_FOP_CBK (writev, out, frame, this, op_ret, op_errno, + prebuf, postbuf, xdata); +out: + return 0; +} + +int32_t +barrier_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + BARRIER_FOP_CBK (fremovexattr, out, frame, this, op_ret, op_errno, + xdata); +out: + return 0; +} + +int32_t +barrier_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + BARRIER_FOP_CBK (removexattr, out, frame, this, op_ret, op_errno, + xdata); +out: + return 0; +} + +int32_t +barrier_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + BARRIER_FOP_CBK (truncate, out, frame, this, op_ret, op_errno, prebuf, + postbuf, xdata); +out: + return 0; +} + +int32_t +barrier_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + BARRIER_FOP_CBK (ftruncate, out, frame, this, op_ret, op_errno, prebuf, + postbuf, xdata); +out: + return 0; +} + +int32_t +barrier_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + BARRIER_FOP_CBK (rename, out, frame, this, op_ret, op_errno, buf, + preoldparent, postoldparent, prenewparent, + postnewparent, xdata); +out: + return 0; +} + +int32_t +barrier_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + BARRIER_FOP_CBK (rmdir, out, frame, this, op_ret, op_errno, preparent, + postparent, xdata); +out: + return 0; +} + +int32_t +barrier_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + BARRIER_FOP_CBK (unlink, out, frame, this, op_ret, op_errno, preparent, + postparent, xdata); +out: + return 0; +} + +int32_t +barrier_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + BARRIER_FOP_CBK (fsync, out, frame, this, op_ret, op_errno, + prebuf, postbuf, xdata); +out: + return 0; +} + +int32_t +barrier_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + if (!(flags & O_SYNC)) { + STACK_WIND_TAIL (frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, + fd, vector, count, off, flags, iobref, xdata); + + return 0; + } + + STACK_WIND (frame, barrier_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, + off, flags, iobref, xdata); + return 0; +} + +int32_t +barrier_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + STACK_WIND (frame, barrier_fremovexattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fremovexattr, + fd, name, xdata); + return 0; +} + +int32_t +barrier_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + STACK_WIND (frame, barrier_removexattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->removexattr, + loc, name, xdata); + return 0; +} + +int32_t +barrier_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xdata) +{ + STACK_WIND (frame, barrier_truncate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->truncate, + loc, offset, xdata); + return 0; +} + +int32_t +barrier_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) +{ + STACK_WIND (frame, barrier_rename_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->rename, + oldloc, newloc, xdata); + return 0; +} + +int +barrier_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + dict_t *xdata) +{ + STACK_WIND (frame, barrier_rmdir_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->rmdir, + loc, flags, xdata); + return 0; +} + +int32_t +barrier_unlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, int xflag, dict_t *xdata) +{ + STACK_WIND (frame, barrier_unlink_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->unlink, + loc, xflag, xdata); + return 0; +} + +int32_t +barrier_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, dict_t *xdata) +{ + STACK_WIND (frame, barrier_ftruncate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->ftruncate, + fd, offset, xdata); + return 0; +} + +int32_t +barrier_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t flags, dict_t *xdata) +{ + STACK_WIND (frame, barrier_fsync_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fsync, + fd, flags, xdata); + return 0; +} + +call_stub_t * +__barrier_dequeue (xlator_t *this, struct list_head *queue) +{ + call_stub_t *stub = NULL; + barrier_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT (priv); + + if (list_empty (queue)) + goto out; + + stub = list_entry (queue->next, call_stub_t, list); + list_del_init (&stub->list); + +out: + return stub; +} + +void +barrier_dequeue_all (xlator_t *this, struct list_head *queue) +{ + call_stub_t *stub = NULL; + + gf_log (this->name, GF_LOG_INFO, "Dequeuing all the barriered fops"); + + /* TODO: Start the below task in a new thread */ + while ((stub = __barrier_dequeue (this, queue))) + call_resume (stub); + + gf_log (this->name, GF_LOG_INFO, "Dequeuing the barriered fops is " + "finished"); + return; +} + +void +barrier_timeout (void *data) +{ + xlator_t *this = NULL; + barrier_priv_t *priv = NULL; + struct list_head queue = {0,}; + + this = data; + THIS = this; + priv = this->private; + + INIT_LIST_HEAD (&queue); + + gf_log (this->name, GF_LOG_CRITICAL, "Disabling barrier because of " + "the barrier timeout."); + + LOCK (&priv->lock); + { + __barrier_disable (this, &queue); + } + UNLOCK (&priv->lock); + + barrier_dequeue_all (this, &queue); + + return; +} + +void +__barrier_enqueue (xlator_t *this, call_stub_t *stub) +{ + barrier_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT (priv); + + list_add_tail (&stub->list, &priv->queue); + priv->queue_size++; + + return; +} + +void +__barrier_disable (xlator_t *this, struct list_head *queue) +{ + GF_UNUSED int ret = 0; + barrier_priv_t *priv = NULL; + + priv = this->private; + GF_ASSERT (priv); + + if (priv->timer) { + ret = gf_timer_call_cancel (this->ctx, priv->timer); + priv->timer = NULL; + } + + list_splice_init (&priv->queue, queue); + priv->queue_size = 0; + priv->barrier_enabled = _gf_false; +} + +int +__barrier_enable (xlator_t *this, barrier_priv_t *priv) +{ + int ret = -1; + + priv->timer = gf_timer_call_after (this->ctx, priv->timeout, + barrier_timeout, (void *) this); + if (!priv->timer) { + gf_log (this->name, GF_LOG_CRITICAL, "Couldn't add barrier " + "timeout event."); + goto out; + } + + priv->barrier_enabled = _gf_true; + ret = 0; +out: + return ret; +} + +int +reconfigure (xlator_t *this, dict_t *options) +{ + barrier_priv_t *priv = NULL; + gf_boolean_t past = _gf_false; + int ret = -1; + gf_boolean_t barrier_enabled = _gf_false; + uint32_t timeout = {0,}; + struct list_head queue = {0,}; + + priv = this->private; + GF_ASSERT (priv); + + GF_OPTION_RECONF ("barrier", barrier_enabled, options, bool, out); + GF_OPTION_RECONF ("timeout", timeout, options, time, out); + + INIT_LIST_HEAD (&queue); + + LOCK (&priv->lock); + { + past = priv->barrier_enabled; + + switch (past) { + case _gf_false: + if (barrier_enabled) { + ret = __barrier_enable (this, priv); + if (ret) + goto unlock; + + } else { + gf_log (this->name, GF_LOG_ERROR, + "Already disabled"); + goto unlock; + } + break; + + case _gf_true: + if (!barrier_enabled) { + __barrier_disable (this, &queue); + + } else { + gf_log (this->name, GF_LOG_ERROR, + "Already enabled"); + goto unlock; + } + break; + } + + priv->timeout.tv_sec = timeout; + + ret = 0; + } +unlock: + UNLOCK (&priv->lock); + + if (!list_empty (&queue)) + barrier_dequeue_all (this, &queue); + +out: + return ret; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_barrier_mt_end + 1); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Memory accounting " + "initialization failed."); + + return ret; +} + +int +init (xlator_t *this) +{ + int ret = -1; + barrier_priv_t *priv = NULL; + uint32_t timeout = {0,}; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "'barrier' not configured with exactly one child"); + goto out; + } + + if (!this->parents) + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + + priv = GF_CALLOC (1, sizeof (*priv), gf_barrier_mt_priv_t); + if (!priv) + goto out; + + LOCK_INIT (&priv->lock); + + GF_OPTION_INIT ("barrier", priv->barrier_enabled, bool, out); + GF_OPTION_INIT ("timeout", timeout, time, out); + priv->timeout.tv_sec = timeout; + + INIT_LIST_HEAD (&priv->queue); + + if (priv->barrier_enabled) { + ret = __barrier_enable (this, priv); + if (ret == -1) + goto out; + } + + this->private = priv; + ret = 0; +out: + return ret; +} + +void +fini (xlator_t *this) +{ + barrier_priv_t *priv = NULL; + struct list_head queue = {0,}; + + priv = this->private; + if (!priv) + goto out; + + INIT_LIST_HEAD (&queue); + + gf_log (this->name, GF_LOG_INFO, "Disabling barriering and dequeuing " + "all the queued fops"); + LOCK (&priv->lock); + { + __barrier_disable (this, &queue); + } + UNLOCK (&priv->lock); + + if (!list_empty (&queue)) + barrier_dequeue_all (this, &queue); + + this->private = NULL; + + LOCK_DESTROY (&priv->lock); + GF_FREE (priv); +out: + return; +} + +static void +barrier_dump_stub (call_stub_t *stub, char *prefix) +{ + char key[GF_DUMP_MAX_BUF_LEN] = {0,}; + + gf_proc_dump_build_key (key, prefix, "fop"); + gf_proc_dump_write (key, "%s", gf_fop_list[stub->fop]); + + gf_proc_dump_build_key (key, prefix, "gfid"); + gf_proc_dump_write (key, "%s", uuid_utoa (stub->args.loc.gfid)); + + if (stub->args.loc.path) { + gf_proc_dump_build_key (key, prefix, "path"); + gf_proc_dump_write (key, "%s", stub->args.loc.path); + } + if (stub->args.loc.name) { + gf_proc_dump_build_key (key, prefix, "name"); + gf_proc_dump_write (key, "%s", stub->args.loc.name); + } + + return; +} + +static void +__barrier_dump_queue (barrier_priv_t *priv) +{ + call_stub_t *stub = NULL; + char key[GF_DUMP_MAX_BUF_LEN] = {0,}; + int i = 0; + + GF_VALIDATE_OR_GOTO ("barrier", priv, out); + + list_for_each_entry (stub, &priv->queue, list) { + snprintf (key, sizeof (key), "stub.%d", i++); + gf_proc_dump_add_section (key); + barrier_dump_stub(stub, key); + } + +out: + return; +} + +int +barrier_dump_priv (xlator_t *this) +{ + int ret = -1; + char key[GF_DUMP_MAX_BUF_LEN] = {0,}; + barrier_priv_t *priv = NULL; + + GF_VALIDATE_OR_GOTO ("barrier", this, out); + + priv = this->private; + if (!priv) + return 0; + + gf_proc_dump_build_key (key, "xlator.features.barrier", "priv"); + gf_proc_dump_add_section (key); + + LOCK (&priv->lock); + { + gf_proc_dump_build_key (key, "barrier", "enabled"); + gf_proc_dump_write (key, "%d", priv->barrier_enabled); + gf_proc_dump_build_key (key, "barrier", "timeout"); + gf_proc_dump_write (key, "%"PRId64, priv->timeout.tv_sec); + if (priv->barrier_enabled) { + gf_proc_dump_build_key (key, "barrier", "queue_size"); + gf_proc_dump_write (key, "%d", priv->queue_size); + __barrier_dump_queue (priv); + } + } + UNLOCK (&priv->lock); + +out: + return ret; +} + +struct xlator_fops fops = { + + /* Barrier Class fops */ + .rmdir = barrier_rmdir, + .unlink = barrier_unlink, + .rename = barrier_rename, + .removexattr = barrier_removexattr, + .fremovexattr = barrier_fremovexattr, + .truncate = barrier_truncate, + .ftruncate = barrier_ftruncate, + .fsync = barrier_fsync, + + /* Writes with only O_SYNC flag */ + .writev = barrier_writev, +}; + +struct xlator_dumpops dumpops = { + .priv = barrier_dump_priv, +}; + +struct xlator_cbks cbks; + +struct volume_options options[] = { + { .key = {"barrier"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "When \"on\", blocks acknowledgements to application " + "for file operations such as rmdir, rename, unlink, " + "removexattr, fremovexattr, truncate, ftruncate, " + "write (with O_SYNC), fsync. It is turned \"off\" by " + "default." + }, + { .key = {"timeout"}, + .type = GF_OPTION_TYPE_TIME, + .default_value = "120", + .description = "After 'timeout' seconds since the time 'barrier' " + "option was set to \"on\", acknowledgements to file " + "operations are no longer blocked and previously " + "blocked acknowledgements are sent to the application" + }, + { .key = {NULL} }, +}; diff --git a/xlators/features/barrier/src/barrier.h b/xlators/features/barrier/src/barrier.h new file mode 100644 index 000000000..8face9f65 --- /dev/null +++ b/xlators/features/barrier/src/barrier.h @@ -0,0 +1,91 @@ +/* + Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __BARRIER_H__ +#define __BARRIER_H__ + +#include "barrier-mem-types.h" +#include "xlator.h" +#include "timer.h" +#include "call-stub.h" + +#define BARRIER_SAFE_ASSIGN(lock, to, value) \ + do { \ + LOCK (&(lock)); \ + { \ + to = value; \ + } \ + UNLOCK (&(lock)); \ + } while (0) + +#define BARRIER_FOP_CBK(fop_name, label, frame, this, params ...) \ + do { \ + barrier_priv_t *_priv = NULL; \ + call_stub_t *_stub = NULL; \ + gf_boolean_t _barrier_enabled= _gf_false; \ + struct list_head queue = {0, }; \ + \ + INIT_LIST_HEAD (&queue); \ + \ + _priv = this->private; \ + GF_ASSERT (_priv); \ + \ + LOCK (&_priv->lock); \ + { \ + if (_priv->barrier_enabled) { \ + _barrier_enabled = _priv->barrier_enabled;\ + \ + _stub = fop_##fop_name##_cbk_stub \ + (frame, \ + default_##fop_name##_cbk_resume,\ + params); \ + if (!_stub) { \ + __barrier_disable (this, &queue);\ + goto unlock; \ + } \ + \ + __barrier_enqueue (this, _stub); \ + } \ + } \ +unlock: \ + UNLOCK (&_priv->lock); \ + \ + if (_stub) \ + goto label; \ + \ + if (_barrier_enabled && !_stub) { \ + gf_log (this->name, GF_LOG_CRITICAL, \ + "Failed to barrier FOPs, disabling " \ + "barrier. FOP: %s, ERROR: %s", \ + #fop_name, strerror (ENOMEM)); \ + barrier_dequeue_all (this, &queue); \ + } \ + \ + STACK_UNWIND_STRICT (fop_name, frame, params); \ + goto label; \ + } while (0) + +typedef struct { + gf_timer_t *timer; + gf_boolean_t barrier_enabled; + gf_lock_t lock; + struct list_head queue; + struct timespec timeout; + uint32_t queue_size; +} barrier_priv_t; + +int __barrier_enable (xlator_t *this, barrier_priv_t *priv); +void __barrier_enqueue (xlator_t *this, call_stub_t *stub); +void __barrier_disable (xlator_t *this, struct list_head *queue); +void barrier_timeout (void *data); +void barrier_dequeue_all (xlator_t *this, struct list_head *queue); +call_stub_t *__barrier_dequeue (xlator_t *this, struct list_head *queue); + +#endif diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h index efa008059..ead2c70ba 100644 --- a/xlators/features/compress/src/cdc-mem-types.h +++ b/xlators/features/compress/src/cdc-mem-types.h @@ -17,6 +17,7 @@ enum gf_cdc_mem_types { gf_cdc_mt_priv_t = gf_common_mt_end + 1, gf_cdc_mt_vec_t = gf_common_mt_end + 2, gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3, + gf_cdc_mt_end = gf_common_mt_end + 4, }; #endif diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c index a334c7e06..67fc52505 100644 --- a/xlators/features/compress/src/cdc.c +++ b/xlators/features/compress/src/cdc.c @@ -190,6 +190,25 @@ cdc_writev (call_frame_t *frame, } int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_cdc_mt_end); + + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } + + return ret; +} + +int32_t init (xlator_t *this) { int ret = -1; diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c index 8e614397c..5cb6ecfbd 100644 --- a/xlators/features/gfid-access/src/gfid-access.c +++ b/xlators/features/gfid-access/src/gfid-access.c @@ -593,18 +593,19 @@ ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int j = 0; - int i = 0; - int ret = 0; - uint64_t temp_ino = 0; - inode_t *cbk_inode = NULL; - inode_t *true_inode = NULL; - uuid_t random_gfid = {0,}; + int j = 0; + int i = 0; + int ret = 0; + uint64_t temp_ino = 0; + inode_t *cbk_inode = NULL; + inode_t *true_inode = NULL; + uuid_t random_gfid = {0,}; + inode_t *linked_inode = NULL; if (frame->local) cbk_inode = frame->local; else - cbk_inode = inode; + cbk_inode = inode_ref (inode); frame->local = NULL; if (op_ret) @@ -619,6 +620,10 @@ ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if its just previously discover()'d inode */ true_inode = inode_find (inode->table, buf->ia_gfid); if (!true_inode) { + /* This unref is for 'inode_ref()' done in beginning. + This is needed as cbk_inode is allocated new inode + whose unref is taken at the end*/ + inode_unref (cbk_inode); cbk_inode = inode_new (inode->table); if (!cbk_inode) { @@ -630,7 +635,8 @@ ga_virtual_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, path is not yet looked up. Use the current inode itself for now */ - inode_link (inode, NULL, NULL, buf); + linked_inode = inode_link (inode, NULL, NULL, buf); + inode = linked_inode; } else { /* 'inode_ref()' has been done in inode_find() */ inode = true_inode; @@ -674,6 +680,10 @@ unwind: STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, cbk_inode, buf, xdata, postparent); + /* Also handles inode_unref of frame->local if done in ga_lookup */ + if (cbk_inode) + inode_unref (cbk_inode); + return 0; } @@ -819,6 +829,8 @@ ga_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) /* time do another lookup and update the context with proper inode */ op_errno = ESTALE; + /* 'inode_ref()' done in inode_find */ + inode_unref (true_inode); goto err; } @@ -833,7 +845,7 @@ discover: /* if revalidate, then we need to have the proper reference */ if (inode) { tmp_loc.inode = inode_ref (inode); - frame->local = loc->inode; + frame->local = inode_ref (loc->inode); } else { tmp_loc.inode = inode_ref (loc->inode); } diff --git a/xlators/features/glupy/Makefile.am b/xlators/features/glupy/Makefile.am index a985f42a8..060429ecf 100644 --- a/xlators/features/glupy/Makefile.am +++ b/xlators/features/glupy/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = src +SUBDIRS = src examples CLEANFILES = diff --git a/xlators/features/glupy/examples/Makefile.am b/xlators/features/glupy/examples/Makefile.am new file mode 100644 index 000000000..c26abeaaf --- /dev/null +++ b/xlators/features/glupy/examples/Makefile.am @@ -0,0 +1,5 @@ +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +glupyexamplesdir = $(xlatordir)/glupy + +glupyexamples_PYTHON = negative.py helloworld.py debug-trace.py diff --git a/xlators/features/glupy/src/debug-trace.py b/xlators/features/glupy/examples/debug-trace.py index 53e76546b..6eef1b58b 100644 --- a/xlators/features/glupy/src/debug-trace.py +++ b/xlators/features/glupy/examples/debug-trace.py @@ -2,12 +2,13 @@ import sys import stat from uuid import UUID from time import strftime, localtime -from gluster import * +from gluster.glupy import * + # This translator was written primarily to test the fop entry point definitions -# and structure definitions in 'gluster.py'. -# It is similar to the debug-trace translator, one of the already available -# translator types written in C, that logs the arguments passed to the fops and -# their corresponding cbk functions. +# and structure definitions in 'glupy.py'. + +# It is similar to the C language debug-trace translator, which logs the +# arguments passed to the fops and their corresponding cbk functions. dl.get_id.restype = c_long dl.get_id.argtypes = [ POINTER(call_frame_t) ] diff --git a/xlators/features/glupy/src/helloworld.py b/xlators/features/glupy/examples/helloworld.py index 8fe403711..b565a4e5b 100644 --- a/xlators/features/glupy/src/helloworld.py +++ b/xlators/features/glupy/examples/helloworld.py @@ -1,5 +1,5 @@ import sys -from gluster import * +from gluster.glupy import * class xlator (Translator): diff --git a/xlators/features/glupy/src/negative.py b/xlators/features/glupy/examples/negative.py index 1023602b9..e7a4fc07c 100644 --- a/xlators/features/glupy/src/negative.py +++ b/xlators/features/glupy/examples/negative.py @@ -1,6 +1,6 @@ import sys from uuid import UUID -from gluster import * +from gluster.glupy import * # Negative-lookup-caching example. If a file wasn't there the last time we # looked, it's probably still not there. This translator keeps track of @@ -89,4 +89,3 @@ class xlator (Translator): dl.unwind_create(frame,cookie,this,op_ret,op_errno,fd,inode,buf, preparent,postparent,xdata) return 0 - diff --git a/xlators/features/glupy/src/Makefile.am b/xlators/features/glupy/src/Makefile.am index 21b91a164..ae7b6d14d 100644 --- a/xlators/features/glupy/src/Makefile.am +++ b/xlators/features/glupy/src/Makefile.am @@ -1,11 +1,12 @@ xlator_LTLIBRARIES = glupy.la +# Ensure GLUSTER_PYTHON_PATH is passed to glupy.so xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features - glupydir = $(xlatordir)/glupy +AM_CPPFLAGS = $(PYTHONDEV_CPPFLAGS) $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -isystem $(BUILD_PYTHON_INC) +AM_CFLAGS = $(PYTHONDEV_CFLAGS) -Wall -fno-strict-aliasing -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" $(GF_CFLAGS) -glupy_PYTHON = gluster.py negative.py helloworld.py debug-trace.py - +# Flags to build glupy.so with glupy_la_LDFLAGS = $(PYTHONDEV_LDFLAGS) -module -avoid-version -shared -nostartfiles glupy_la_SOURCES = glupy.c glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ @@ -13,8 +14,8 @@ glupy_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ noinst_HEADERS = glupy.h -AM_CPPFLAGS = $(PYTHONDEV_CPPFLAGS) $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -isystem $(BUILD_PYTHON_INC) - -AM_CFLAGS = $(PYTHONDEV_CFLAGS) -Wall -fno-strict-aliasing -DGLUSTER_PYTHON_PATH=\"$(glupydir)\" $(GF_CFLAGS) +# Install glupy.py into the Python site-packages area +pyglupydir = $(pythondir)/gluster +pyglupy_PYTHON = glupy.py CLEANFILES = diff --git a/xlators/features/glupy/src/glupy.c b/xlators/features/glupy/src/glupy.c index dc86c0071..7492124dd 100644 --- a/xlators/features/glupy/src/glupy.c +++ b/xlators/features/glupy/src/glupy.c @@ -2314,6 +2314,25 @@ get_rootunique (call_frame_t *frame) } int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_glupy_mt_end); + + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + " failed"); + return ret; + } + + return ret; +} + +int32_t init (xlator_t *this) { glupy_private_t *priv = NULL; @@ -2365,7 +2384,7 @@ init (xlator_t *this) goto *err_cleanup; } - gf_log (this->name, GF_LOG_ERROR, "py_mod_name = %s", module_name); + gf_log (this->name, GF_LOG_DEBUG, "py_mod_name = %s", module_name); priv->py_module = PyImport_Import(py_mod_name); Py_DECREF(py_mod_name); if (!priv->py_module) { @@ -2375,6 +2394,7 @@ init (xlator_t *this) } goto *err_cleanup; } + gf_log (this->name, GF_LOG_INFO, "Import of %s succeeded", module_name); err_cleanup = &&err_deref_module; py_init_func = PyObject_GetAttrString(priv->py_module, "xlator"); @@ -2407,7 +2427,7 @@ init (xlator_t *this) } goto *err_cleanup; } - gf_log (this->name, GF_LOG_INFO, "init returned %p", priv->py_xlator); + gf_log (this->name, GF_LOG_DEBUG, "init returned %p", priv->py_xlator); return 0; diff --git a/xlators/features/glupy/src/gluster.py b/xlators/features/glupy/src/glupy.py index a5daa77d3..a5daa77d3 100644 --- a/xlators/features/glupy/src/gluster.py +++ b/xlators/features/glupy/src/glupy.py diff --git a/xlators/features/glupy/src/setup.py.in b/xlators/features/glupy/src/setup.py.in new file mode 100644 index 000000000..1aea9875f --- /dev/null +++ b/xlators/features/glupy/src/setup.py.in @@ -0,0 +1,24 @@ +from distutils.core import setup + +DESC = """GlusterFS is a clustered file-system capable of scaling to +several petabytes. It aggregates various storage bricks over Infiniband +RDMA or TCP/IP interconnect into one large parallel network file system. +GlusterFS is one of the most sophisticated file systems in terms of +features and extensibility. It borrows a powerful concept called +Translators from GNU Hurd kernel. Much of the code in GlusterFS is in +user space and easily manageable. + +This package contains Glupy, the Python translator interface for GlusterFS.""" + +setup( + name='glusterfs-glupy', + version='@PACKAGE_VERSION@', + description='Glupy is the Python translator interface for GlusterFS', + long_description=DESC, + author='Gluster Community', + author_email='gluster-devel@nongnu.org', + license='LGPLv3', + url='http://gluster.org/', + package_dir={'gluster':''}, + packages=['gluster'] +) diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index ea6995627..dc86512be 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -62,6 +62,7 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, newlock->volume = domain; newlock->owner = frame->root->lk_owner; newlock->frame = frame; + newlock->this = frame->this; if (conn_id) { newlock->connection_id = gf_strdup (conn_id); diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index 25f2ed051..ac53e67ee 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -12,7 +12,8 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ - glusterd-locks.c glusterd-etcd.c + glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ + glusterd-mgmt.c glusterd-etcd.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ @@ -22,7 +23,8 @@ glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ - glusterd-syncop.h glusterd-hooks.h glusterd-locks.h glusterd-etcd.h + glusterd-syncop.h glusterd-hooks.h glusterd-locks.h \ + glusterd-mgmt.h glusterd-etcd.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index f15ec7b18..1804dd02e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1207,6 +1207,7 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) int ret = 0; char *volname = NULL; int count = 0; + int replica_count = 0; int i = 0; char *bricks = NULL; char *brick_list = NULL; @@ -1215,17 +1216,31 @@ glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) char *brick = NULL; glusterd_brickinfo_t *brickinfo = NULL; glusterd_volinfo_t *volinfo = NULL; - glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; char msg[2048] = {0,}; gf_boolean_t brick_alloc = _gf_false; char *all_bricks = NULL; char *str_ret = NULL; gf_boolean_t is_force = _gf_false; - priv = THIS->private; - if (!priv) - goto out; + this = THIS; + GF_ASSERT (this); + + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to get replica count"); + } + if (replica_count > 0) { + ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS, + msg, sizeof(msg)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + } ret = dict_get_str (dict, "volname", &volname); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, @@ -1376,6 +1391,14 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) this = THIS; GF_ASSERT (this); + ret = op_version_check (this, GD_OP_VER_PERSISTENT_AFR_XATTRS, + msg, sizeof(msg)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + ret = dict_get_str (dict, "volname", &volname); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 34dd20545..b111d2e54 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -377,6 +377,21 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + /* As of now, the snap volumes are also displayed as part of + volume info command. So this change is to display whether + the volume is original volume or the snap_volume. If + displaying of snap volumes in volume info o/p is not needed + this should be removed. + */ + snprintf (key, 256, "volume%d.snap_volume", count); + ret = dict_set_int32 (volumes, key, volinfo->is_snap_volume); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "failed to set whether " + "the volume is a snap volume or actual volume (%s)", + volinfo->volname); + goto out; + } + snprintf (key, 256, "volume%d.brick_count", count); ret = dict_set_int32 (volumes, key, volinfo->brick_count); if (ret) @@ -635,7 +650,7 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, goto out; } - /* Based on the op_version, acquire a cluster or volume lock */ + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) { ret = glusterd_lock (MY_UUID); if (ret) { @@ -665,7 +680,7 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, goto out; } - ret = glusterd_volume_lock (volname, MY_UUID); + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol"); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock for %s", volname); @@ -712,11 +727,12 @@ local_locking_done: out: if (locked && ret) { /* Based on the op-version, we release the - * cluster or volume lock */ + * cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) glusterd_unlock (MY_UUID); else { - ret = glusterd_volume_unlock (volname, MY_UUID); + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to release lock for %s", @@ -823,188 +839,6 @@ glusterd_handle_cluster_lock (rpcsvc_request_t *req) __glusterd_handle_cluster_lock); } -static int -glusterd_handle_volume_lock_fn (rpcsvc_request_t *req) -{ - gd1_mgmt_volume_lock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - xlator_t *this = NULL; - glusterd_op_info_t txn_op_info = {{0},}; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (req); - - ret = xdr_to_generic (req->msg[0], &lock_req, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " - "request received from peer"); - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log (this->name, GF_LOG_DEBUG, "Received volume lock req " - "from uuid: %s txn_id: %s", uuid_utoa (lock_req.uuid), - uuid_utoa (lock_req.txn_id)); - - if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { - gf_log (this->name, GF_LOG_WARNING, "%s doesn't " - "belong to the cluster. Ignoring request.", - uuid_utoa (lock_req.uuid)); - ret = -1; - goto out; - } - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); - if (!ctx) { - ret = -1; - goto out; - } - - uuid_copy (ctx->uuid, lock_req.uuid); - ctx->req = req; - - ctx->dict = dict_new (); - if (!ctx->dict) { - ret = -1; - goto out; - } - - ret = dict_unserialize (lock_req.dict.dict_val, - lock_req.dict.dict_len, &ctx->dict); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to unserialize the dictionary"); - goto out; - } - - glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req.op, - ctx->dict, req); - - ret = glusterd_set_txn_opinfo (&lock_req.txn_id, &txn_op_info); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to set transaction's opinfo"); - goto out; - } - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, - &lock_req.txn_id, ctx); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "Failed to inject event GD_OP_EVENT_LOCK"); - -out: - if (ret) { - if (ctx) { - if (ctx->dict) - dict_destroy (ctx->dict); - GF_FREE (ctx); - } - } - - glusterd_friend_sm (); - glusterd_op_sm (); - - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int -glusterd_handle_volume_lock (rpcsvc_request_t *req) -{ - return glusterd_big_locked_handler (req, - glusterd_handle_volume_lock_fn); -} - -static int -glusterd_handle_volume_unlock_fn (rpcsvc_request_t *req) -{ - gd1_mgmt_volume_unlock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (req); - - ret = xdr_to_generic (req->msg[0], &lock_req, - (xdrproc_t)xdr_gd1_mgmt_volume_unlock_req); - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " - "request received from peer"); - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log (this->name, GF_LOG_DEBUG, "Received volume unlock req " - "from uuid: %s", uuid_utoa (lock_req.uuid)); - - if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { - gf_log (this->name, GF_LOG_WARNING, "%s doesn't " - "belong to the cluster. Ignoring request.", - uuid_utoa (lock_req.uuid)); - ret = -1; - goto out; - } - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); - if (!ctx) { - ret = -1; - goto out; - } - - uuid_copy (ctx->uuid, lock_req.uuid); - ctx->req = req; - - ctx->dict = dict_new (); - if (!ctx->dict) { - ret = -1; - goto out; - } - - ret = dict_unserialize (lock_req.dict.dict_val, - lock_req.dict.dict_len, &ctx->dict); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, - "failed to unserialize the dictionary"); - goto out; - } - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, - &lock_req.txn_id, ctx); - if (ret) - gf_log (this->name, GF_LOG_ERROR, - "Failed to inject event GD_OP_EVENT_UNLOCK"); - -out: - if (ret) { - if (ctx) { - if (ctx->dict) - dict_destroy (ctx->dict); - GF_FREE (ctx); - } - } - - glusterd_friend_sm (); - glusterd_op_sm (); - - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int -glusterd_handle_volume_unlock (rpcsvc_request_t *req) -{ - return glusterd_big_locked_handler (req, - glusterd_handle_volume_unlock_fn); -} - int glusterd_req_ctx_create (rpcsvc_request_t *rpc_req, glusterd_op_t op, uuid_t uuid, @@ -1334,6 +1168,8 @@ __glusterd_handle_cli_deprobe (rpcsvc_request_t *req) char *hostname = NULL; int port = 0; int flags = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp = NULL; this = THIS; GF_ASSERT (this); @@ -1374,7 +1210,6 @@ __glusterd_handle_cli_deprobe (rpcsvc_request_t *req) gf_log (this->name, GF_LOG_ERROR, "Failed to get port"); goto out; } - ret = dict_get_int32 (dict, "flags", &flags); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to get flags"); @@ -1394,22 +1229,30 @@ __glusterd_handle_cli_deprobe (rpcsvc_request_t *req) } if (!(flags & GF_CLI_FLAG_OP_FORCE)) { - if (!uuid_is_null (uuid)) { - /* Check if peers are connected, except peer being detached*/ - if (!glusterd_chk_peers_connected_befriended (uuid)) { - ret = -1; - op_errno = GF_DEPROBE_FRIEND_DOWN; - goto out; - } - ret = glusterd_all_volume_cond_check ( - glusterd_friend_brick_belongs, - -1, &uuid); - if (ret) { - op_errno = GF_DEPROBE_BRICK_EXIST; - goto out; - } + /* Check if peers are connected, except peer being + * detached*/ + if (!glusterd_chk_peers_connected_befriended (uuid)) { + ret = -1; + op_errno = GF_DEPROBE_FRIEND_DOWN; + goto out; } + } + /* Check for if volumes exist with some bricks on the peer being + * detached. It's not a problem if a volume contains none or all + * of its bricks on the peer being detached + */ + list_for_each_entry_safe (volinfo, tmp, &priv->volumes, + vol_list) { + ret = glusterd_friend_contains_vol_bricks (volinfo, + uuid); + if (ret == 1) { + op_errno = GF_DEPROBE_BRICK_EXIST; + goto out; + } + } + + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { if (glusterd_is_any_volume_in_server_quorum (this) && !does_gd_meet_server_quorum (this)) { gf_log (this->name, GF_LOG_ERROR, "Quorum does not " @@ -2244,12 +2087,12 @@ glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) } int -glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, +glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, int32_t status) { - gd1_mgmt_volume_lock_rsp rsp = {{0},}; - int ret = -1; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; GF_ASSERT (req); GF_ASSERT (txn_id); @@ -2260,20 +2103,20 @@ glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, uuid_copy (rsp.txn_id, *txn_id); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); - gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume lock, ret: %d", + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to mgmt_v3 lock, ret: %d", ret); return ret; } int -glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, +glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, int32_t status) { - gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; int ret = -1; GF_ASSERT (req); @@ -2285,9 +2128,9 @@ glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, uuid_copy (rsp.txn_id, *txn_id); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); - gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume unlock, ret: %d", + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to mgmt_v3 unlock, ret: %d", ret); return ret; @@ -4081,8 +3924,12 @@ get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) brick++; uuid_parse (volid_str, volid); ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo); - if (ret) - goto out; + if (ret) { + /* Check if it a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id (volid, &volinfo); + if (ret) + goto out; + } ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, brickinfo); @@ -4296,9 +4143,10 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, } else { list_for_each_entry (volinfo, &conf->volumes, vol_list) { - ret = glusterd_volume_unlock + ret = glusterd_mgmt_v3_unlock (volinfo->volname, - peerinfo->uuid); + peerinfo->uuid, + "vol"); if (ret) gf_log (this->name, GF_LOG_TRACE, @@ -4426,8 +4274,9 @@ rpcsvc_actor_t gd_svc_cli_actors[] = { [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", GLUSTER_CLI_STATEDUMP_VOLUME, glusterd_handle_cli_statedump_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA}, [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", GLUSTER_CLI_CLRLOCKS_VOLUME, glusterd_handle_cli_clearlocks_volume, NULL, 0, DRC_NA}, - [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE, glusterd_handle_copy_file, NULL, 0, DRC_NA}, - [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC, glusterd_handle_sys_exec, NULL, 0, DRC_NA}, + [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE, glusterd_handle_copy_file, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC, glusterd_handle_sys_exec, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SNAP] = {"SNAP", GLUSTER_CLI_SNAP, glusterd_handle_snapshot, NULL, 0, DRC_NA}, }; struct rpcsvc_program gd_svc_cli_prog = { @@ -4459,18 +4308,3 @@ struct rpcsvc_program gd_svc_cli_prog_ro = { .actors = gd_svc_cli_actors_ro, .synctask = _gf_true, }; - -rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { - [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_null, NULL, 0, DRC_NA}, - [GLUSTERD_MGMT_V3_VOLUME_LOCK] = { "VOL_LOCK", GLUSTERD_MGMT_V3_VOLUME_LOCK, glusterd_handle_volume_lock, NULL, 0, DRC_NA}, - [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = { "VOL_UNLOCK", GLUSTERD_MGMT_V3_VOLUME_UNLOCK, glusterd_handle_volume_unlock, NULL, 0, DRC_NA}, -}; - -struct rpcsvc_program gd_svc_mgmt_v3_prog = { - .progname = "GlusterD svc mgmt v3", - .prognum = GD_MGMT_PROGRAM, - .progver = GD_MGMT_V3_VERSION, - .numactors = GLUSTERD_MGMT_V3_MAXVALUE, - .actors = gd_svc_mgmt_v3_actors, - .synctask = _gf_true, -}; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index e0508faf6..5078526e9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -36,21 +36,139 @@ extern struct rpc_clnt_program gd_mgmt_v3_prog; typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data); +static int +get_snap_volname_and_volinfo (const char *volpath, char **volname, + glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *save_ptr = NULL; + char *str_token = NULL; + char *snapname = NULL; + char *volname_token = NULL; + char *vol = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (volpath); + GF_ASSERT (volinfo); + + str_token = gf_strdup (volpath); + if (NULL == str_token) { + goto out; + } + + /* Input volname will have below formats: + * /snaps/<snapname>/<volname>.<hostname> + * or + * /snaps/<snapname>/<parent-volname> + * We need to extract snapname and parent_volname */ + + /*split string by "/" */ + strtok_r (str_token, "/", &save_ptr); + snapname = strtok_r(NULL, "/", &save_ptr); + if (!snapname) { + gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath); + goto out; + } + + volname_token = strtok_r(NULL, "/", &save_ptr); + if (!volname_token) { + gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap %s", snapname); + goto out; + } + + /* Find if its a parent volume name or snap volume + * name. This function will succeed if volname_token + * is a parent volname + */ + ret = glusterd_volinfo_find (volname_token, volinfo); + if (ret) { + *volname = gf_strdup (volname_token); + if (NULL == *volname) { + ret = -1; + goto out; + } + + ret = glusterd_snap_volinfo_find (volname_token, snap, + volinfo); + if (ret) { + /* Split the volume name */ + vol = strtok_r (volname_token, ".", &save_ptr); + if (!vol) { + gf_log(this->name, GF_LOG_ERROR, "Invalid " + "volname (%s)", volname_token); + goto out; + } + + ret = glusterd_snap_volinfo_find (vol, snap, volinfo); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap volume from volname (%s)", + vol); + goto out; + } + } + } else { + /*volname_token is parent volname*/ + ret = glusterd_snap_volinfo_find_from_parent_volname ( + volname_token, snap, volinfo); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap volume from parent " + "volname (%s)", volname_token); + goto out; + } + + /* Since volname_token is a parent volname we should + * get the snap volname here*/ + *volname = gf_strdup ((*volinfo)->volname); + if (NULL == *volname) { + ret = -1; + goto out; + } + } + +out: + if (ret && NULL != *volname) { + GF_FREE (*volname); + *volname = NULL; + } + return ret; +} + static size_t build_volfile_path (const char *volname, char *path, size_t path_len, char *trusted_str) { - struct stat stbuf = {0,}; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char *vol = NULL; - char *dup_volname = NULL; - char *free_ptr = NULL; - char *tmp = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *server = NULL; - - priv = THIS->private; + struct stat stbuf = {0,}; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + char *vol = NULL; + char *dup_volname = NULL; + char *free_ptr = NULL; + char *save_ptr = NULL; + char *str_token = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *server = NULL; + const char *volname_ptr = NULL; + char path_prefix [PATH_MAX] = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volname); + GF_ASSERT (path); if (strstr (volname, "gluster/")) { server = strchr (volname, '/') + 1; @@ -58,6 +176,22 @@ build_volfile_path (const char *volname, char *path, path, path_len); ret = 1; goto out; + } else if ((str_token = strstr (volname, "/snaps/"))) { + ret = get_snap_volname_and_volinfo (str_token, &dup_volname, + &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get snap" + " volinfo from path (%s)", volname); + ret = -1; + goto out; + } + + snprintf (path_prefix, sizeof (path_prefix), "%s/snaps/%s", + priv->workdir, volinfo->snapshot->snapname); + + free_ptr = dup_volname; + volname_ptr = dup_volname; + goto gotvolinfo; } else if (volname[0] != '/') { /* Normal behavior */ dup_volname = gf_strdup (volname); @@ -68,39 +202,53 @@ build_volfile_path (const char *volname, char *path, dup_volname = gf_strdup (&volname[1]); } + if (!dup_volname) { + gf_log(THIS->name, GF_LOG_ERROR, "strdup failed"); + ret = -1; + goto out; + } free_ptr = dup_volname; + volname_ptr = volname; + + snprintf (path_prefix, sizeof (path_prefix), "%s/vols", + priv->workdir); ret = glusterd_volinfo_find (dup_volname, &volinfo); + if (ret) { /* Split the volume name */ - vol = strtok_r (dup_volname, ".", &tmp); + vol = strtok_r (dup_volname, ".", &save_ptr); if (!vol) goto out; + ret = glusterd_volinfo_find (vol, &volinfo); if (ret) goto out; } +gotvolinfo: if (!glusterd_auth_get_username (volinfo)) trusted_str = NULL; - ret = snprintf (path, path_len, "%s/vols/%s/%s.vol", - priv->workdir, volinfo->volname, volname); + ret = snprintf (path, path_len, "%s/%s/%s.vol", path_prefix, + volinfo->volname, volname_ptr); if (ret == -1) goto out; ret = stat (path, &stbuf); if ((ret == -1) && (errno == ENOENT)) { - snprintf (path, path_len, "%s/vols/%s/%s%s-fuse.vol", - priv->workdir, volinfo->volname, - (trusted_str ? trusted_str : ""), dup_volname); + snprintf (path, path_len, "%s/%s/%s%s-fuse.vol", + path_prefix, volinfo->volname, + (trusted_str ? trusted_str : ""), + dup_volname); + ret = stat (path, &stbuf); } if ((ret == -1) && (errno == ENOENT)) { - snprintf (path, path_len, "%s/vols/%s/%s-tcp.vol", - priv->workdir, volinfo->volname, volname); + snprintf (path, path_len, "%s/%s/%s-tcp.vol", + path_prefix, volinfo->volname, volname_ptr); } ret = 1; diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c index 9e8bbc21b..28358aa55 100644 --- a/xlators/mgmt/glusterd/src/glusterd-locks.c +++ b/xlators/mgmt/glusterd/src/glusterd-locks.c @@ -26,10 +26,40 @@ #include <signal.h> -/* Initialize the global vol-lock list(dict) when +#define GF_MAX_LOCKING_ENTITIES 2 + +/* Valid entities that the mgmt_v3 lock can hold locks upon * + * To add newer entities to be locked, we can just add more * + * entries to this table along with the type and default value */ +glusterd_valid_entities valid_types[] = { + { "vol", _gf_true }, + { "snap", _gf_false }, + { NULL }, +}; + +/* Checks if the lock request is for a valid entity */ +gf_boolean_t +glusterd_mgmt_v3_is_type_valid (char *type) +{ + int32_t i = 0; + gf_boolean_t ret = _gf_false; + + GF_ASSERT (type); + + for (i = 0; valid_types[i].type; i++) { + if (!strcmp (type, valid_types[i].type)) { + ret = _gf_true; + break; + } + } + + return ret; +} + +/* Initialize the global mgmt_v3 lock list(dict) when * glusterd is spawned */ int32_t -glusterd_vol_lock_init () +glusterd_mgmt_v3_lock_init () { int32_t ret = -1; xlator_t *this = NULL; @@ -40,8 +70,8 @@ glusterd_vol_lock_init () priv = this->private; GF_ASSERT (priv); - priv->vol_lock = dict_new (); - if (!priv->vol_lock) + priv->mgmt_v3_lock = dict_new (); + if (!priv->mgmt_v3_lock) goto out; ret = 0; @@ -49,10 +79,10 @@ out: return ret; } -/* Destroy the global vol-lock list(dict) when +/* Destroy the global mgmt_v3 lock list(dict) when * glusterd cleanup is performed */ void -glusterd_vol_lock_fini () +glusterd_mgmt_v3_lock_fini () { xlator_t *this = NULL; glusterd_conf_t *priv = NULL; @@ -62,31 +92,31 @@ glusterd_vol_lock_fini () priv = this->private; GF_ASSERT (priv); - if (priv->vol_lock) - dict_unref (priv->vol_lock); + if (priv->mgmt_v3_lock) + dict_unref (priv->mgmt_v3_lock); } int32_t -glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid) +glusterd_get_mgmt_v3_lock_owner (char *key, uuid_t *uuid) { - int32_t ret = -1; - glusterd_vol_lock_obj *lock_obj = NULL; - glusterd_conf_t *priv = NULL; - uuid_t no_owner = {0,}; - xlator_t *this = NULL; + int32_t ret = -1; + glusterd_mgmt_v3_lock_obj *lock_obj = NULL; + glusterd_conf_t *priv = NULL; + uuid_t no_owner = {0,}; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - if (!volname || !uuid) { - gf_log ("", GF_LOG_ERROR, "volname or uuid is null."); + if (!key || !uuid) { + gf_log (this->name, GF_LOG_ERROR, "key or uuid is null."); ret = -1; goto out; } - ret = dict_get_bin (priv->vol_lock, volname, (void **) &lock_obj); + ret = dict_get_bin (priv->mgmt_v3_lock, key, (void **) &lock_obj); if (!ret) uuid_copy (*uuid, lock_obj->lock_owner); else @@ -94,49 +124,433 @@ glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid) ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* This function is called with the locked_count and type, to * + * release all the acquired locks. */ +static int32_t +glusterd_release_multiple_locks_per_entity (dict_t *dict, uuid_t uuid, + int32_t locked_count, + char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t op_ret = 0; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + if (locked_count == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "No %s locked as part of this transaction", + type); + goto out; + } + + /* Release all the locks held */ + for (i = 0; i < locked_count; i++) { + snprintf (name_buf, sizeof(name_buf), + "%sname%d", type, i+1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s locked_count = %d", + name_buf, locked_count); + op_ret = ret; + continue; + } + + ret = glusterd_mgmt_v3_unlock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release lock for %s.", + name); + op_ret = ret; + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", op_ret); + return op_ret; +} + +/* Given the count and type of the entity this function acquires * + * locks on multiple elements of the same entity. For example: * + * If type is "vol" this function tries to acquire locks on multiple * + * volumes */ +static int32_t +glusterd_acquire_multiple_locks_per_entity (dict_t *dict, uuid_t uuid, + int32_t count, char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + /* Locking one element after other */ + for (i = 0; i < count; i++) { + snprintf (name_buf, sizeof(name_buf), + "%sname%d", type, i+1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s count = %d", + name_buf, count); + break; + } + + ret = glusterd_mgmt_v3_lock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock for %s %s " + "on behalf of %s. Reversing " + "this transaction", type, name, + uuid_utoa(uuid)); + break; + } + locked_count++; + } + + if (count == locked_count) { + /* If all locking ops went successfuly, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one element, unlock others and return failure */ + ret = glusterd_release_multiple_locks_per_entity (dict, uuid, + locked_count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release multiple %s locks", + type); + } + ret = -1; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Given the type of entity, this function figures out if it should unlock a * + * single element of multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly unlock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_unlock_entity (dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean (dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Locks were not held for this particular entity * + * Hence nothing to release */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + snprintf (name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32 (dict, name_buf, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be unlocked */ + snprintf (name_buf, sizeof(name_buf), "%sname", + type); + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_unlock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release lock for %s %s " + "on behalf of %s.", type, name, + uuid_utoa(uuid)); + goto out; + } + } else { + /* Unlocking one element name after another */ + ret = glusterd_release_multiple_locks_per_entity (dict, + uuid, + count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } +/* Given the type of entity, this function figures out if it should lock a * + * single element or multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly lock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_lock_entity (dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean (dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Not holding locks for this particular entity */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + snprintf (name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32 (dict, name_buf, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be locked */ + snprintf (name_buf, sizeof(name_buf), "%sname", + type); + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_lock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock for %s %s " + "on behalf of %s.", type, name, + uuid_utoa(uuid)); + goto out; + } + } else { + /* Locking one element name after another */ + ret = glusterd_acquire_multiple_locks_per_entity (dict, + uuid, + count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Try to release locks of multiple entities like * + * volume, snaps etc. */ +int32_t +glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid) +{ + int32_t i = -1; + int32_t ret = -1; + int32_t op_ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is null."); + ret = -1; + goto out; + } + + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_unlock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to unlock all %s", + valid_types[i].type); + op_ret = ret; + } + } + + ret = op_ret; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Try to acquire locks on multiple entities like * + * volume, snaps etc. */ int32_t -glusterd_volume_lock (char *volname, uuid_t uuid) +glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid) { - int32_t ret = -1; - glusterd_vol_lock_obj *lock_obj = NULL; - glusterd_conf_t *priv = NULL; - uuid_t owner = {0}; - xlator_t *this = NULL; + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is null."); + ret = -1; + goto out; + } + + /* Locking one entity after other */ + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_lock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to lock all %s", + valid_types[i].type); + break; + } + locked_count++; + } + + if (locked_count == GF_MAX_LOCKING_ENTITIES) { + /* If all locking ops went successfuly, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one entity, unlock others and return failure */ + for (i = 0; i < locked_count; i++) { + ret = glusterd_mgmt_v3_unlock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to unlock all %s", + valid_types[i].type); + } + } + ret = -1; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_lock (const char *name, uuid_t uuid, char *type) +{ + char key[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_mgmt_v3_lock_obj *lock_obj = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t is_valid = _gf_true; + uuid_t owner = {0}; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - if (!volname) { - gf_log ("", GF_LOG_ERROR, "volname is null."); + if (!name || !type) { + gf_log (this->name, GF_LOG_ERROR, "name or type is null."); ret = -1; goto out; } - ret = glusterd_get_vol_lock_owner (volname, &owner); + is_valid = glusterd_mgmt_v3_is_type_valid (type); + if (is_valid != _gf_true) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid entity. Cannot perform locking " + "operation on %s types", type); + ret = -1; + goto out; + } + + ret = snprintf (key, sizeof(key), "%s_%s", name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to create key"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Trying to acquire lock of %s %s for %s as %s", + type, name, uuid_utoa (uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner (key, &owner); if (ret) { - gf_log ("", GF_LOG_WARNING, - "Unable to get volume lock owner"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get mgmt_v3 lock owner"); goto out; } /* If the lock has already been held for the given volume * we fail */ if (!uuid_is_null (owner)) { - gf_log ("", GF_LOG_WARNING, "Unable to acquire lock. " - "Lock for %s held by %s", volname, - uuid_utoa (owner)); + gf_log (this->name, GF_LOG_WARNING, "Lock for %s held by %s", + name, uuid_utoa (owner)); ret = -1; goto out; } - lock_obj = GF_CALLOC (1, sizeof(glusterd_vol_lock_obj), - gf_common_mt_vol_lock_obj_t); + lock_obj = GF_CALLOC (1, sizeof(glusterd_mgmt_v3_lock_obj), + gf_common_mt_mgmt_v3_lock_obj_t); if (!lock_obj) { ret = -1; goto out; @@ -144,70 +558,99 @@ glusterd_volume_lock (char *volname, uuid_t uuid) uuid_copy (lock_obj->lock_owner, uuid); - ret = dict_set_bin (priv->vol_lock, volname, lock_obj, - sizeof(glusterd_vol_lock_obj)); + ret = dict_set_bin (priv->mgmt_v3_lock, key, lock_obj, + sizeof(glusterd_mgmt_v3_lock_obj)); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set lock owner " - "in volume lock"); + gf_log (this->name, GF_LOG_ERROR, + "Unable to set lock owner in mgmt_v3 lock"); if (lock_obj) GF_FREE (lock_obj); goto out; } - gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully held by %s", - volname, uuid_utoa (uuid)); + gf_log (this->name, GF_LOG_DEBUG, + "Lock for %s %s successfully held by %s", + type, name, uuid_utoa (uuid)); ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } int32_t -glusterd_volume_unlock (char *volname, uuid_t uuid) +glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) { - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - uuid_t owner = {0}; - xlator_t *this = NULL; + char key[PATH_MAX] = ""; + int32_t ret = -1; + gf_boolean_t is_valid = _gf_true; + glusterd_conf_t *priv = NULL; + uuid_t owner = {0}; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - if (!volname) { - gf_log ("", GF_LOG_ERROR, "volname is null."); + if (!name || !type) { + gf_log (this->name, GF_LOG_ERROR, "name is null."); + ret = -1; + goto out; + } + + is_valid = glusterd_mgmt_v3_is_type_valid (type); + if (is_valid != _gf_true) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid entity. Cannot perform unlocking " + "operation on %s types", type); ret = -1; goto out; } - ret = glusterd_get_vol_lock_owner (volname, &owner); - if (ret) + ret = snprintf (key, sizeof(key), "%s_%s", + name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create key"); + ret = -1; goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Trying to release lock of %s %s for %s as %s", + type, name, uuid_utoa (uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner (key, &owner); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get mgmt_v3 lock owner"); + goto out; + } if (uuid_is_null (owner)) { - gf_log ("", GF_LOG_WARNING, "Lock for %s not held", volname); + gf_log (this->name, GF_LOG_WARNING, + "Lock for %s %s not held", type, name); ret = -1; goto out; } ret = uuid_compare (uuid, owner); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "Lock owner mismatch. " - "Lock for %s held by %s", - volname, uuid_utoa (owner)); + gf_log (this->name, GF_LOG_WARNING, "Lock owner mismatch. " + "Lock for %s %s held by %s", + type, name, uuid_utoa (owner)); goto out; } - /* Removing the volume lock from the global list */ - dict_del (priv->vol_lock, volname); + /* Removing the mgmt_v3 lock from the global list */ + dict_del (priv->mgmt_v3_lock, key); - gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully released", - volname); + gf_log (this->name, GF_LOG_DEBUG, + "Lock for %s %s successfully released", + type, name); ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h index 6e3f56f9c..b9cc8c0d1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-locks.h +++ b/xlators/mgmt/glusterd/src/glusterd-locks.h @@ -15,23 +15,37 @@ #include "config.h" #endif -typedef struct glusterd_volume_lock_object_ { +typedef struct glusterd_mgmt_v3_lock_object_ { uuid_t lock_owner; -} glusterd_vol_lock_obj; +} glusterd_mgmt_v3_lock_obj; + +typedef struct glusterd_mgmt_v3_lock_valid_entities { + char *type; /* Entity type like vol, snap */ + gf_boolean_t default_value; /* The default value that * + * determines if the locks * + * should be held for that * + * entity */ +} glusterd_valid_entities; int32_t -glusterd_vol_lock_init (); +glusterd_mgmt_v3_lock_init (); void -glusterd_vol_lock_fini (); +glusterd_mgmt_v3_lock_fini (); + +int32_t +glusterd_get_mgmt_v3_lock_owner (char *volname, uuid_t *uuid); + +int32_t +glusterd_mgmt_v3_lock (const char *key, uuid_t uuid, char *type); int32_t -glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid); +glusterd_mgmt_v3_unlock (const char *key, uuid_t uuid, char *type); int32_t -glusterd_volume_lock (char *volname, uuid_t uuid); +glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid); int32_t -glusterd_volume_unlock (char *volname, uuid_t uuid); +glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index 51057c274..e6f6a0333 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -67,7 +67,9 @@ typedef enum gf_gld_mem_types_ { gf_gld_mt_hooks_priv_t = gf_common_mt_end + 51, gf_gld_mt_mop_commit_req_t = gf_common_mt_end + 52, gf_gld_mt_int = gf_common_mt_end + 53, - gf_gld_mt_end = gf_common_mt_end + 54, + gf_gld_mt_snap_t = gf_common_mt_end + 54, + gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55, + gf_gld_mt_end = gf_common_mt_end + 56, } gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c new file mode 100644 index 000000000..81c5aa579 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -0,0 +1,936 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" + +static int +glusterd_mgmt_v3_null (rpcsvc_request_t *req) +{ + return 0; +} + +static int +glusterd_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, int32_t status) +{ + + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid (&rsp.uuid); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + + gf_log (this->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 lock, ret: %d", ret); + + return ret; +} + +static int +glusterd_synctasked_mgmt_v3_lock (rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (ctx); + GF_ASSERT (ctx->dict); + + /* Trying to acquire multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_lock (ctx->dict, ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire mgmt_v3 locks for %s", + uuid_utoa (ctx->uuid)); + + ret = glusterd_mgmt_v3_lock_send_resp (req, ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_op_state_machine_mgmt_v3_lock (rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req->op, + ctx->dict, req); + + ret = glusterd_set_txn_opinfo (&lock_req->txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, + &lock_req->txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); + +out: + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_lock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_v3_lock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_lock_ctx_t *ctx = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + gf_boolean_t free_ctx = _gf_false; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received mgmt_v3 lock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) { + ret = glusterd_synctasked_mgmt_v3_lock (req, &lock_req, ctx); + /* The above function does not take ownership of ctx. + * Therefore we need to free the ctx explicitly. */ + free_ctx = _gf_true; + } + else { + ret = glusterd_op_state_machine_mgmt_v3_lock (req, &lock_req, + ctx); + } + +out: + + if (ret || free_ctx) { + if (ctx->dict) + dict_unref (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + free (lock_req.dict.dict_val); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_pre_validate_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_pre_val_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to pre validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_pre_validate_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode pre validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_pre_validate_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_pre_validate_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send Pre Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_brick_op_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_brick_op_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to brick op, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_brick_op_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode brick op " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_brick_op_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick Op failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_brick_op_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send brick op " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_commit_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_commit_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, "Responded to commit, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_commit_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_commit_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "commit failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_commit_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send commit " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_post_validate_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_post_val_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to post validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_post_validate_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode post validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_post_validate_fn (op_req.op, op_req.op_ret, dict, + &op_errstr, rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_post_validate_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send Post Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, int32_t status) +{ + + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid (&rsp.uuid); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + + gf_log (this->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 unlock, ret: %d", ret); + + return ret; +} + +static int +glusterd_syctasked_mgmt_v3_unlock (rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *unlock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (ctx); + + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (ctx->dict, ctx->uuid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks for %s", + uuid_utoa(ctx->uuid)); + } + + ret = glusterd_mgmt_v3_unlock_send_resp (req, ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + + +static int +glusterd_op_state_machine_mgmt_v3_unlock (rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, + &lock_req->txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_UNLOCK"); + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_unlock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_v3_unlock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + gf_boolean_t free_ctx = _gf_false; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received volume unlock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) { + ret = glusterd_syctasked_mgmt_v3_unlock (req, &lock_req, ctx); + /* The above function does not take ownership of ctx. + * Therefore we need to free the ctx explicitly. */ + free_ctx = _gf_true; + } + else { + ret = glusterd_op_state_machine_mgmt_v3_unlock (req, &lock_req, + ctx); + } + +out: + + if (ret || free_ctx) { + if (ctx->dict) + dict_unref (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + free (lock_req.dict.dict_val); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_mgmt_v3_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_mgmt_v3_lock_fn); +} + +static int +glusterd_handle_pre_validate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_pre_validate_fn); +} + +static int +glusterd_handle_brick_op (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_brick_op_fn); +} + +static int +glusterd_handle_commit (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_commit_fn); +} + +static int +glusterd_handle_post_validate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_post_validate_fn); +} + +int +glusterd_handle_mgmt_v3_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_mgmt_v3_unlock_fn); +} + +rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { + [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_mgmt_v3_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_LOCK] = { "MGMT_V3_LOCK", GLUSTERD_MGMT_V3_LOCK, glusterd_handle_mgmt_v3_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_PRE_VALIDATE] = { "PRE_VAL", GLUSTERD_MGMT_V3_PRE_VALIDATE, glusterd_handle_pre_validate, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_BRICK_OP] = { "BRCK_OP", GLUSTERD_MGMT_V3_BRICK_OP, glusterd_handle_brick_op, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_COMMIT] = { "COMMIT", GLUSTERD_MGMT_V3_COMMIT, glusterd_handle_commit, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_POST_VALIDATE] = { "POST_VAL", GLUSTERD_MGMT_V3_POST_VALIDATE, glusterd_handle_post_validate, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_UNLOCK] = { "MGMT_V3_UNLOCK", GLUSTERD_MGMT_V3_UNLOCK, glusterd_handle_mgmt_v3_unlock, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_mgmt_v3_prog = { + .progname = "GlusterD svc mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .numactors = GLUSTERD_MGMT_V3_MAXVALUE, + .actors = gd_svc_mgmt_v3_actors, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c new file mode 100644 index 000000000..5295f889e --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -0,0 +1,1899 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" + +extern struct rpc_clnt_program gd_mgmt_v3_prog; + + +static void +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char *peer_str = NULL; + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int32_t len = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (args); + GF_ASSERT (uuid); + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + err_str[len] = '\0'; + } + + switch (op_code) { + case GLUSTERD_MGMT_V3_LOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Locking failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_PRE_VALIDATE: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Pre Validation failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_BRICK_OP: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Brick ops failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_COMMIT: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Commit failed on %s. %s", + peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_POST_VALIDATE: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Post Validation failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_UNLOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Unlocking failed " + "on %s. %s", peer_str, err_str); + break; + } + } + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + len = snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + err_str[len] = '\0'; + + gf_log (this->name, GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +int32_t +gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + ret = glusterd_snapshot_prevalidate (dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Snapshot Prevalidate Failed"); + goto out; + } + + break; + + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_brickop (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "snapshot brickop " + "failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Snapshot Commit Failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "postvalidate operation failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + /* Even though the lock command has failed, while collating the errors + (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be + used. @args is obtained from frame->local. So before checking the + status of the request and going out if its a failure, args should be + set to frame->local. Otherwise, while collating args will be NULL. + This applies to other phases such as prevalidate, brickop, commit and + postvalidate also. + */ + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_LOCK, + peerinfo, rsp.uuid); + if (rsp.dict.dict_val) + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_lock_cbk_fn); +} + +int +gd_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + gd1_mgmt_v3_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_LOCK, + gd_mgmt_v3_lock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_lock_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *dict, char **op_errstr, int npeers, + gf_boolean_t *is_acquired) +{ + char *volname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t ret = -1; + int32_t peer_cnt = 0; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (is_acquired); + + peers = &conf->xaction_peers; + + /* Trying to acquire multiple mgmt_v3 locks on local node */ + ret = glusterd_multiple_mgmt_v3_lock (dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire mgmt_v3 locks on localhost"); + goto out; + } + + *is_acquired = _gf_true; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending mgmt_v3 lock req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_lock (op, dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + if (ret) { + if (*op_errstr) + gf_log (this->name, GF_LOG_ERROR, "%s", + *op_errstr); + + if (volname) + ret = gf_asprintf (op_errstr, + "Another transaction is in progress " + "for %s. Please try again after " + "sometime.", volname); + else + ret = gf_asprintf (op_errstr, + "Another transaction is in progress " + "Please try again after sometime."); + + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + + return ret; +} + +int +glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op, + dict_t *aggr, dict_t *rsp) +{ + int32_t ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (aggr); + GF_ASSERT (rsp); + + switch (op) { + case GD_OP_SNAP: + ret = glusterd_snap_pre_validate_use_rsp_dict (aggr, rsp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to aggregate prevalidate " + "response dictionaries."); + goto out; + } + break; + default: + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Invalid op (%s)", + gd_op_list[op]); + + break; + } +out: + return ret; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_pre_val_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + free (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_pre_validate_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } + pthread_mutex_unlock (&args->lock_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref (rsp_dict); + + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_PRE_VALIDATE, + peerinfo, rsp.uuid); + + if (rsp.op_errstr) + free (rsp.op_errstr); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_pre_validate_cbk_fn); +} + +int +gd_mgmt_v3_pre_validate_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_PRE_VALIDATE, + gd_mgmt_v3_pre_validate_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_pre_val_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_pre_validate (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Pre Validation on local node */ + ret = gd_mgmt_v3_pre_validate_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Pre-validation failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_pre_validate_aggr_rsp_dict (op, req_dict, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending Pre Validation req to other nodes in the cluster */ + gd_syncargs_init (&args, req_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_pre_validate_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent pre valaidation req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int +glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict, + glusterd_op_t op) +{ + int32_t ret = -1; + dict_t *req_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (op_errstr); + GF_ASSERT (dict); + + req_dict = dict_new (); + if (!req_dict) + goto out; + + switch (op) { + case GD_OP_SNAP: + dict_copy (dict, req_dict); + break; + default: + break; + } + + *req = req_dict; + ret = 0; +out: + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_brick_op_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + /* If the operation failed, then iov can be NULL. So better check the + status of the operation and then worry about iov (if the status of + the command is success) + */ + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_BRICK_OP, + peerinfo, rsp.uuid); + + if (rsp.op_errstr) + free (rsp.op_errstr); + + if (rsp.dict.dict_val) + free (rsp.dict.dict_val); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_brick_op_cbk_fn); +} + +int +gd_mgmt_v3_brick_op_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_BRICK_OP, + gd_mgmt_v3_brick_op_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_brick_op_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_brick_op (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Perform brick op on local node */ + ret = gd_mgmt_v3_brick_op_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick ops failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Brick ops failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending brick op req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_brick_op_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick ops failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent brick op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_commit_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + free (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } + pthread_mutex_unlock (&args->lock_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref (rsp_dict); + + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_COMMIT, + peerinfo, rsp.uuid); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_commit_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_commit_cbk_fn); +} + +int +gd_mgmt_v3_commit_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_COMMIT, + gd_mgmt_v3_commit_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_commit_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_commit (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (op_ctx); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Commit on local node */ + ret = gd_mgmt_v3_commit_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Commit failed " + "on localhost. Please " + "check log file for details."); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending commit req to other nodes in the cluster */ + gd_syncargs_init (&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_commit_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent commit req for %s to %d " + "peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_post_val_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_POST_VALIDATE, + peerinfo, rsp.uuid); + if (rsp.op_errstr) + free (rsp.op_errstr); + + if (rsp.dict.dict_val) + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_post_validate_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_post_validate_cbk_fn); +} + +int +gd_mgmt_v3_post_validate_req (glusterd_op_t op, int32_t op_ret, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + req.op_ret = op_ret; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_POST_VALIDATE, + gd_mgmt_v3_post_validate_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_post_val_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, + int32_t op_ret, dict_t *dict, dict_t *req_dict, + char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + GF_ASSERT (peers); + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Copy the contents of dict like missed snaps info to req_dict */ + dict_copy (dict, req_dict); + + /* Post Validation on local node */ + ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Post-validation failed " + "on localhost. Please check " + "log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending Post Validation req to other nodes in the cluster */ + gd_syncargs_init (&args, req_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_post_validate_req (op, op_ret, req_dict, peerinfo, + &args, MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent post valaidation req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_UNLOCK, + peerinfo, rsp.uuid); + if (rsp.dict.dict_val) + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_unlock_cbk_fn); +} + +int +gd_mgmt_v3_unlock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_UNLOCK, + gd_mgmt_v3_unlock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_release_peer_locks (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *dict, int32_t op_ret, + char **op_errstr, int npeers, + gf_boolean_t is_acquired) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + /* If the lock has not been held during this + * transaction, do not send unlock requests */ + if (!is_acquired) + goto out; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending mgmt_v3 unlock req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_unlock (op, dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unlock failed on peers"); + + if (!op_ret && args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent unlock op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); + +out: + return ret; +} + +int32_t +glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t npeers = 0; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + conf = this->private; + GF_ASSERT (conf); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32 (dict, "is_synctasked", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + goto out; + } + dict_copy (dict, tmp_dict); + + /* BUILD PEERS LIST */ + INIT_LIST_HEAD (&conf->xaction_peers); + npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr, + npeers, &is_acquired); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); + goto out; + } + + /* COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); + goto out; + } + + /* POST-COMMIT VALIDATE PHASE */ + /* As of now, post_validate is not handling any other + commands other than snapshot. So as of now, I am + sending 0 (op_ret as 0). + */ + ret = glusterd_mgmt_v3_post_validate (conf, op, 0, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + goto out; + } + + ret = 0; +out: + op_ret = ret; + /* UNLOCK PHASE FOR PEERS*/ + (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, + op_ret, &op_errstr, + npeers, is_acquired); + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); + + if (req_dict) + dict_unref (req_dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return 0; +} + +int32_t +glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t npeers = 0; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + gf_boolean_t success = _gf_false; + char *tmp_errstr = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + conf = this->private; + GF_ASSERT (conf); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32 (dict, "is_synctasked", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + goto out; + } + dict_copy (dict, tmp_dict); + + /* BUILD PEERS LIST */ + INIT_LIST_HEAD (&conf->xaction_peers); + npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr, + npeers, &is_acquired); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); + goto out; + } + + /* BRICK OP PHASE for initiating barrier*/ + ret = dict_set_int32 (req_dict, "barrier", 1); + if (ret) + goto out; + ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); + goto unbarrier; + } + + /* COMMIT OP PHASE */ + /* TODO: As of now, the plan is to do quorum check before sending the + commit fop and if the quorum succeeds, then commit is sent to all + the other glusterds. + snap create functionality now creates the in memory and on disk + objects for the snapshot (marking them as incomplete), takes the lvm + snapshot and then updates the status of the in memory and on disk + snap objects as complete. Suppose one of the glusterds goes down + after taking the lvm snapshot, but before updating the snap object, + then treat it as a snapshot create failure and trigger cleanup. + i.e the number of commit responses received by the originator + glusterd shold be the same as the number of peers it has sent the + request to (i.e npeers variable). If not, then originator glusterd + will initiate cleanup in post-validate fop. + Question: What if one of the other glusterds goes down as explained + above and along with it the originator glusterd also goes down? + Who will initiate the cleanup? + */ + ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); + /* If the main op fails, we should save the error string. + Because, op_errstr will be used for unbarrier and + unlock ops also. We might lose the actual error that + caused the failure. + */ + tmp_errstr = op_errstr; + op_errstr = NULL; + goto unbarrier; + } + + success = _gf_true; +unbarrier: + /* BRICK OP PHASE for removing the barrier*/ + ret = dict_set_int32 (req_dict, "barrier", 0); + if (ret) + goto out; + ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, + &op_errstr, npeers); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); + goto out; + } + + ret = 0; + +out: + op_ret = ret; + + if (success == _gf_false) + op_ret = -1; + + /* POST-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + op_ret = -1; + } + + /* UNLOCK PHASE FOR PEERS*/ + (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, + op_ret, &op_errstr, + npeers, is_acquired); + + /* If the commit op (snapshot taking) failed, then the error is stored + in tmp_errstr and unbarrier is called. Suppose, if unbarrier also + fails, then the error happened in unbarrier is logged and freed. + The error happened in commit op, which is stored in tmp_errstr + is sent to cli. + */ + if (tmp_errstr) { + if (op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "unbarrier brick op" + "failed with the error %s", op_errstr); + GF_FREE (op_errstr); + op_errstr = NULL; + } + op_errstr = tmp_errstr; + } + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); + + if (req_dict) + dict_unref (req_dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h new file mode 100644 index 000000000..b185a9bec --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_MGMT_H_ +#define _GLUSTERD_MGMT_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +int32_t +gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int32_t +glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int +glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src); + +#endif /* _GLUSTERD_MGMT_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index e3ae369e4..fb5e097d9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -158,7 +158,7 @@ glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id) uuid_generate (**txn_id); ret = dict_set_bin (dict, "transaction_id", - *txn_id, sizeof (uuid_t)); + *txn_id, sizeof (**txn_id)); if (ret) { gf_log ("", GF_LOG_ERROR, "Failed to set transaction id."); @@ -517,7 +517,20 @@ glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickin brick_req->name = gf_strdup (name); break; + case GD_OP_SNAP: + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; + + brick_req->op = GLUSTERD_VOLUME_BARRIER_OP; + ret = dict_get_str (dict, "volname", &volname); + if (ret) + goto out; + snprintf (name, 1024, "%s-server",volname); + brick_req->name = gf_strdup (name); + break; default: goto out; break; @@ -1659,14 +1672,25 @@ glusterd_stop_bricks (glusterd_volinfo_t *volinfo) int glusterd_start_bricks (glusterd_volinfo_t *volinfo) { - glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (volinfo); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (glusterd_brick_start (volinfo, brickinfo, _gf_false)) - return -1; + ret = glusterd_brick_start (volinfo, brickinfo, _gf_false); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to start %s:%s for %s", + brickinfo->hostname, brickinfo->path, + volinfo->volname); + goto out; + } } - return 0; + ret = 0; +out: + return ret; } static int @@ -2687,7 +2711,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - /* Based on the op_version, acquire a cluster or volume lock */ + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) { proc = &peerinfo->mgmt->proctable [GLUSTERD_MGMT_CLUSTER_LOCK]; @@ -2709,7 +2733,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) dict_ref (dict); proc = &peerinfo->mgmt_v3->proctable - [GLUSTERD_MGMT_V3_VOLUME_LOCK]; + [GLUSTERD_MGMT_V3_LOCK]; if (proc->fn) { ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); @@ -2723,7 +2747,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) ret = proc->fn (NULL, this, dict); if (ret) { gf_log (this->name, GF_LOG_WARNING, - "Failed to send volume lock " + "Failed to send mgmt_v3 lock " "request for operation " "'Volume %s' to peer %s", gd_op_list[opinfo.op], @@ -2770,7 +2794,7 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) continue; /* Based on the op_version, - * release the cluster or volume lock */ + * release the cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) { proc = &peerinfo->mgmt->proctable [GLUSTERD_MGMT_CLUSTER_UNLOCK]; @@ -2792,7 +2816,7 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) dict_ref (dict); proc = &peerinfo->mgmt_v3->proctable - [GLUSTERD_MGMT_V3_VOLUME_UNLOCK]; + [GLUSTERD_MGMT_V3_UNLOCK]; if (proc->fn) { ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); @@ -2870,7 +2894,7 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) /* If the req came from a node running on older op_version * the dict won't be present. Based on it acquiring a cluster - * or volume lock */ + * or mgmt_v3 lock */ if (lock_ctx->dict == NULL) { ret = glusterd_lock (lock_ctx->uuid); glusterd_op_lock_send_resp (lock_ctx->req, ret); @@ -2880,14 +2904,15 @@ glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) gf_log (this->name, GF_LOG_ERROR, "Unable to acquire volname"); else { - ret = glusterd_volume_lock (volname, lock_ctx->uuid); + ret = glusterd_mgmt_v3_lock (volname, lock_ctx->uuid, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock for %s", volname); } - glusterd_op_volume_lock_send_resp (lock_ctx->req, + glusterd_op_mgmt_v3_lock_send_resp (lock_ctx->req, &event->txn_id, ret); dict_unref (lock_ctx->dict); @@ -2917,7 +2942,7 @@ glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) /* If the req came from a node running on older op_version * the dict won't be present. Based on it releasing the cluster - * or volume lock */ + * or mgmt_v3 lock */ if (lock_ctx->dict == NULL) { ret = glusterd_unlock (lock_ctx->uuid); glusterd_op_unlock_send_resp (lock_ctx->req, ret); @@ -2927,14 +2952,15 @@ glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) gf_log (this->name, GF_LOG_ERROR, "Unable to acquire volname"); else { - ret = glusterd_volume_unlock (volname, lock_ctx->uuid); + ret = glusterd_mgmt_v3_unlock (volname, lock_ctx->uuid, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to release lock for %s", volname); } - glusterd_op_volume_unlock_send_resp (lock_ctx->req, + glusterd_op_mgmt_v3_unlock_send_resp (lock_ctx->req, &event->txn_id, ret); dict_unref (lock_ctx->dict); @@ -3466,7 +3492,7 @@ glusterd_op_start_rb_timer (dict_t *dict, uuid_t *txn_id) } ret = dict_set_bin (rb_ctx, "transaction_id", - txn_id, sizeof (uuid_t)); + txn_id, sizeof (*txn_id)); if (ret) { gf_log ("", GF_LOG_ERROR, "Failed to set transaction id."); @@ -4243,7 +4269,7 @@ glusterd_op_txn_complete (uuid_t *txn_id) glusterd_op_reset_ctx (); glusterd_op_clear_errstr (); - /* Based on the op-version, we release the cluster or volume lock */ + /* Based on the op-version, we release the cluster or mgmt_v3 lock */ if (priv->op_version < GD_OP_VERSION_4) { ret = glusterd_unlock (MY_UUID); /* unlock cant/shouldnt fail here!! */ @@ -4260,7 +4286,8 @@ glusterd_op_txn_complete (uuid_t *txn_id) "Locks have not been held."); if (volname) { - ret = glusterd_volume_unlock (volname, MY_UUID); + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to release lock for %s", @@ -4355,7 +4382,7 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) } ret = dict_set_bin (rsp_dict, "transaction_id", - txn_id, sizeof(uuid_t)); + txn_id, sizeof(*txn_id)); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set transaction id."); @@ -4467,7 +4494,7 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) } ret = dict_set_bin (rsp_dict, "transaction_id", - txn_id, sizeof(uuid_t)); + txn_id, sizeof(*txn_id)); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to set transaction id."); @@ -5225,6 +5252,61 @@ _select_rxlators_for_full_self_heal (xlator_t *this, static int +glusterd_bricks_select_snap (dict_t *dict, char **op_errstr, + struct list_head *selected) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_pending_node_t *pending_node = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int brick_index = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get" + " volname"); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brick_index++; + if (uuid_compare (brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started (brickinfo)) { + continue; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = brick_index; + list_add_tail (&pending_node->list, + selected); + pending_node = NULL; + } + + ret = 0; + +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret); + return ret; +} + +static int fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo, cli_cmd_type type, dict_t *req_dict) { @@ -5795,7 +5877,9 @@ glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr, ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr, selected); break; - + case GD_OP_SNAP: + ret = glusterd_bricks_select_snap (dict, op_errstr, selected); + break; default: break; } diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index e78eff44d..ed6d7fd57 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -688,7 +688,7 @@ rb_src_brick_restart (glusterd_volinfo_t *volinfo, sleep (2); ret = glusterd_volume_start_glusterfs (volinfo, src_brickinfo, - _gf_false); + _gf_false); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to start " "glusterfs, ret: %d", ret); diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index 18f37c190..27910d132 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -137,6 +137,7 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, case GD_OP_CLEARLOCKS_VOLUME: case GD_OP_HEAL_VOLUME: case GD_OP_QUOTA: + case GD_OP_SNAP: { /*nothing specific to be done*/ break; @@ -645,10 +646,10 @@ glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, } static int32_t -glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, +glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { - gd1_mgmt_volume_lock_rsp rsp = {{0},}; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; int ret = -1; int32_t op_ret = -1; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; @@ -667,10 +668,10 @@ glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, } ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, - "Failed to decode volume lock " + "Failed to decode mgmt_v3 lock " "response received from peer"); rsp.op_ret = -1; rsp.op_errno = EINVAL; @@ -682,13 +683,13 @@ glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, txn_id = &rsp.txn_id; gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, - "Received volume lock %s from uuid: %s", + "Received mgmt_v3 lock %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { gf_log (this->name, GF_LOG_CRITICAL, - "Volume lock response received " + "mgmt_v3 lock response received " "from unknown peer: %s. Ignoring response", uuid_utoa (rsp.uuid)); goto out; @@ -717,18 +718,18 @@ out: } int32_t -glusterd_vol_lock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_mgmt_v3_lock_peers_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { return glusterd_big_locked_cbk (req, iov, count, myframe, - glusterd_vol_lock_cbk_fn); + glusterd_mgmt_v3_lock_peers_cbk_fn); } static int32_t -glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { - gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; int ret = -1; int32_t op_ret = -1; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; @@ -747,10 +748,10 @@ glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, } ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, - "Failed to decode volume unlock " + "Failed to decode mgmt_v3 unlock " "response received from peer"); rsp.op_ret = -1; rsp.op_errno = EINVAL; @@ -762,7 +763,7 @@ glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, txn_id = &rsp.txn_id; gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, - "Received volume unlock %s from uuid: %s", + "Received mgmt_v3 unlock %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); @@ -770,7 +771,7 @@ glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, if (ret) { gf_log (this->name, GF_LOG_CRITICAL, - "Volume unlock response received " + "mgmt_v3 unlock response received " "from unknown peer: %s. Ignoring response", uuid_utoa (rsp.uuid)); goto out; @@ -799,11 +800,11 @@ out: } int32_t -glusterd_vol_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +glusterd_mgmt_v3_unlock_peers_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { return glusterd_big_locked_cbk (req, iov, count, myframe, - glusterd_vol_unlock_cbk_fn); + glusterd_mgmt_v3_unlock_peers_cbk_fn); } int32_t @@ -1404,10 +1405,10 @@ out: } int32_t -glusterd_vol_lock (call_frame_t *frame, xlator_t *this, - void *data) +glusterd_mgmt_v3_lock_peers (call_frame_t *frame, xlator_t *this, + void *data) { - gd1_mgmt_volume_lock_req req = {{0},}; + gd1_mgmt_v3_lock_req req = {{0},}; int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; glusterd_conf_t *priv = NULL; @@ -1440,6 +1441,7 @@ glusterd_vol_lock (call_frame_t *frame, xlator_t *this, goto out; } + /* Sending valid transaction ID to peers */ ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); if (ret) { @@ -1460,19 +1462,19 @@ glusterd_vol_lock (call_frame_t *frame, xlator_t *this, ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, peerinfo->mgmt_v3, - GLUSTERD_MGMT_V3_VOLUME_LOCK, NULL, - this, glusterd_vol_lock_cbk, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); + GLUSTERD_MGMT_V3_LOCK, NULL, + this, glusterd_mgmt_v3_lock_peers_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int32_t -glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, +glusterd_mgmt_v3_unlock_peers (call_frame_t *frame, xlator_t *this, void *data) { - gd1_mgmt_volume_unlock_req req = {{0},}; + gd1_mgmt_v3_unlock_req req = {{0},}; int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; glusterd_conf_t *priv = NULL; @@ -1505,6 +1507,7 @@ glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, goto out; } + /* Sending valid transaction ID to peers */ ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); if (ret) { @@ -1525,10 +1528,10 @@ glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, peerinfo->mgmt_v3, - GLUSTERD_MGMT_V3_VOLUME_UNLOCK, NULL, - this, glusterd_vol_unlock_cbk, + GLUSTERD_MGMT_V3_UNLOCK, NULL, + this, glusterd_mgmt_v3_unlock_peers_cbk, (xdrproc_t) - xdr_gd1_mgmt_volume_unlock_req); + xdr_gd1_mgmt_v3_unlock_req); out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -1955,9 +1958,9 @@ struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { }; struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { - [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, - [GLUSTERD_MGMT_V3_VOLUME_LOCK] = {"VOLUME_LOCK", glusterd_vol_lock}, - [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = {"VOLUME_UNLOCK", glusterd_vol_unlock}, + [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, + [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_mgmt_v3_lock_peers}, + [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", glusterd_mgmt_v3_unlock_peers}, }; struct rpc_clnt_program gd_mgmt_prog = { diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index 2490ba665..3aaf359ac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -528,6 +528,9 @@ out: return ret; } +/* Clean up stale volumes on the peer being detached. The volumes which have + * bricks on other peers are stale with respect to the detached peer. + */ static int glusterd_peer_detach_cleanup (glusterd_conf_t *priv) { @@ -539,6 +542,12 @@ glusterd_peer_detach_cleanup (glusterd_conf_t *priv) list_for_each_entry_safe (volinfo,tmp_volinfo, &priv->volumes, vol_list) { + /* The peer detach checks make sure that, at this point in the + * detach process, there are only volumes contained completely + * within or completely outside the detached peer. + * The only stale volumes at this point are the ones + * completely outside the peer and can be safely deleted. + */ if (!glusterd_friend_contains_vol_bricks (volinfo, MY_UUID)) { gf_log (THIS->name, GF_LOG_INFO, diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c new file mode 100644 index 000000000..d11abee70 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -0,0 +1,5594 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <inttypes.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/resource.h> +#include <sys/statvfs.h> +#include <sys/mount.h> + +#include "globals.h" +#include "compat.h" +#include "protocol-common.h" +#include "xlator.h" +#include "logging.h" +#include "timer.h" +#include "glusterd-mem-types.h" +#include "glusterd.h" +#include "glusterd-sm.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-store.h" +#include "run.h" +#include "glusterd-volgen.h" +#include "glusterd-mgmt.h" +#include "glusterd-syncop.h" + +#include "syscall.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" + +#ifdef GF_LINUX_HOST_OS +#include <mntent.h> +#endif + +char snap_mount_folder[PATH_MAX]; + +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op); + +/* This function will restore a snapshot volumes + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (NULL == snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) not found", + snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, glusterd_volinfo_t, + vol_list); + + ret = glusterd_volinfo_find (snap_volinfo->parent_volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get volinfo of " + "%s", snap_volinfo->parent_volname); + goto out; + } + + if (is_origin_glusterd (dict) == _gf_true) { + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap restores"); + goto out; + } + } + + ret = gd_restore_snap_volume (rsp_dict, volinfo, snap_volinfo); + if (ret) { + /* No need to update op_errstr because it is assumed + * that the called function will do that in case of + * failure. + */ + gf_log (this->name, GF_LOG_ERROR, "Failed to restore " + "snap for %s volume", volname); + goto out; + } + + ret = 0; + + /* TODO: Need to check if we need to delete the snap after the + * operation is successful or not. Also need to persist the state + * of restore operation in the store. + */ +out: + return ret; +} + +/* This function is called before actual restore is taken place. This function + * will validate whether the snapshot volumes are ready to be restored or not. + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @param rsp_dict response dictionary + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + int32_t i = 0; + int32_t volcount = 0; + gf_boolean_t snap_restored = _gf_false; + char key[PATH_MAX] = {0, }; + char *volname = NULL; + char *snapname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (NULL == snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) not found", + snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + snap_restored = snap->snap_restored; + + if (snap_restored) { + ret = gf_asprintf (op_errstr, "Snap (%s) is already " + "restored", snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + ret = dict_set_str (rsp_dict, "snapname", snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap name(%s)", snapname); + goto out; + } + + ret = dict_get_int32 (dict, "volcount", &volcount); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume count"); + goto out; + } + + /* Snapshot restore will only work if all the volumes, + that are part of the snapshot, are stopped. */ + for (i = 1; i <= volcount; ++i) { + snprintf (key, sizeof (key), "volname%d", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume (%s) not found", + volname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + if (glusterd_is_volume_started (volinfo)) { + ret = gf_asprintf (op_errstr, "Volume (%s) has been " + "started. Volume needs to be stopped before restoring " + "a snapshot.", volname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + } + + ret = 0; +out: + return ret; +} + +int +snap_max_hard_limits_validate (dict_t *dict, char *volname, + uint64_t value, char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + if (volname) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (!ret) { + if (volinfo->is_snap_volume) { + ret = -1; + snprintf (err_str, PATH_MAX, + "%s is a snap volume. Configuring " + "snap-max-hard-limit for a snap " + "volume is prohibited.", volname); + goto out; + } + } + } + + if (value) { + /* Max limit for the system is GLUSTERD_SNAPS_MAX_HARD_LIMIT + * but max limit for a volume is conf->snap_max_hard_limit. + */ + if (volname) { + max_limit = conf->snap_max_hard_limit; + } else { + max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + } + } + + if ((value < 0) || (value > max_limit)) { + ret = -1; + snprintf (err_str, PATH_MAX, "Invalid snap-max-hard-limit" + "%"PRIu64 ". Expected range 0 - %"PRIu64, + value, max_limit); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +glusterd_snapshot_config_prevalidate (dict_t *dict, char **op_errstr) +{ + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int ret = -1; + int config_command = 0; + char err_str[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + uint64_t value = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + gf_loglevel_t loglevel = GF_LOG_ERROR; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "config-command", &config_command); + if (ret) { + snprintf (err_str, sizeof (err_str), + "failed to get config-command type"); + goto out; + } + + ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit); + + ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit); + + ret = dict_get_str (dict, "volname", &volname); + + if (volname) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), + "Volume %s does not exist.", volname); + goto out; + } + } + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (hard_limit) { + /* Validations for snap-max-hard-limits */ + ret = snap_max_hard_limits_validate (dict, volname, + hard_limit, op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-hard-limit validation " + "failed."); + goto out; + } + } + + if (soft_limit) { + max_limit = GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT; + if ((soft_limit < 0) || (soft_limit > max_limit)) { + ret = -1; + snprintf (err_str, PATH_MAX, "Invalid " + "snap-max-soft-limit ""%" + PRIu64 ". Expected range 0 - %"PRIu64, + value, max_limit); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + + if (ret && err_str[0] != '\0') { + gf_log (this->name, loglevel, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + + return ret; +} + +int +glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *snap_brick_dir = NULL; + char *snap_device = NULL; + char *tmpstr = NULL; + char key[PATH_MAX] = ""; + char snapbrckcnt[PATH_MAX] = ""; + char snapbrckord[PATH_MAX] = ""; + int ret = -1; + int64_t i = -1; + int64_t j = -1; + int64_t volume_count = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dst); + GF_ASSERT (src); + + ret = dict_get_int64 (src, "volcount", &volume_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + for (i = 0; i < volume_count; i++) { + memset (snapbrckcnt, '\0', sizeof(snapbrckcnt)); + ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1, + "vol%ld_brickcount", i+1); + ret = dict_get_int64 (src, snapbrckcnt, &brick_count); + if (ret) { + gf_log (this->name, GF_LOG_TRACE, + "No bricks for this volume in this dict"); + continue; + } + + for (j = 0; j < brick_count; j++) { + /* Fetching data from source dict */ + snprintf (key, sizeof(key) - 1, + "vol%ld.brickdir%ld", i+1, j); + + ret = dict_get_ptr (src, key, + (void **)&snap_brick_dir); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to fetch %s", key); + continue; + } + + snprintf (key, sizeof(key) - 1, + "vol%ld.brick_snapdevice%ld", i+1, j); + + ret = dict_get_ptr (src, key, + (void **)&snap_device); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_device"); + goto out; + } + + snprintf (snapbrckord, sizeof(snapbrckord) - 1, + "vol%ld.brick%ld.order", i+1, j); + + ret = dict_get_int64 (src, snapbrckord, &brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get brick order"); + goto out; + } + + /* Adding the data in the dst dict */ + snprintf (key, sizeof(key) - 1, + "vol%ld.brickdir%ld", i+1, brick_order); + + tmpstr = gf_strdup (snap_brick_dir); + if (!tmpstr) { + gf_log (this->name, GF_LOG_ERROR, + "Out Of Memory"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dst, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (tmpstr); + goto out; + } + + snprintf (key, sizeof(key) - 1, + "vol%ld.brick_snapdevice%ld", + i+1, brick_order); + + tmpstr = gf_strdup (snap_device); + if (!tmpstr) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dst, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (tmpstr); + goto out; + } + } + } + + ret = 0; +out: + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snap_create_pre_val_use_rsp_dict (dst, src); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to use " + "rsp dict"); + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + char *snapname = NULL; + char *device = NULL; + char *tmpstr = NULL; + char *brick_dir = NULL; + char snap_brick_dir[PATH_MAX] = ""; + char *mnt_pt = NULL; + char key[PATH_MAX] = ""; + char snap_mount[PATH_MAX] = ""; + char snap_volname[64] = ""; + char err_str[PATH_MAX] = ""; + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + uuid_t *snap_volid = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; + glusterd_conf_t *conf = NULL; + int64_t effective_max_limit = 0; + + this = THIS; + GF_ASSERT (op_errstr); + conf = this->private; + GF_ASSERT (conf); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + snprintf (err_str, sizeof (err_str), "Invalid volume count %ld " + "supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get snapname"); + goto out; + } + + if (glusterd_find_snap_by_name (snapname)) { + ret = -1; + snprintf (err_str, sizeof (err_str), "Snap %s already exists", + snapname); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%ld", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), + "failed to get volume name"); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), + "Volume (%s) does not exist ", volname); + goto out; + } + + ret = -1; + if (!glusterd_is_volume_started (volinfo)) { + snprintf (err_str, sizeof (err_str), "volume %s is " + "not started", volinfo->volname); + loglevel = GF_LOG_WARNING; + goto out; + } + if (glusterd_is_defrag_on (volinfo)) { + snprintf (err_str, sizeof (err_str), + "rebalance process is running for the " + "volume %s", volname); + loglevel = GF_LOG_WARNING; + goto out; + } + /* TODO: Also check whether geo replication is running */ + + if (volinfo->is_snap_volume == _gf_true) { + snprintf (err_str, sizeof (err_str), + "Volume %s is a snap volume", volname); + loglevel = GF_LOG_WARNING; + goto out; + } + + if (volinfo->snap_max_hard_limit < conf->snap_max_hard_limit) + effective_max_limit = volinfo->snap_max_hard_limit; + else + effective_max_limit = conf->snap_max_hard_limit; + + if (volinfo->snap_count >= effective_max_limit) { + snprintf (err_str, sizeof (err_str), + "The number of existing snaps has reached " + "the effective maximum limit of %"PRIu64" ," + "for the volume %s", effective_max_limit, + volname); + loglevel = GF_LOG_WARNING; + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%ld_volid", i); + ret = dict_get_bin (dict, key, (void **)&snap_volid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volid"); + goto out; + } + + /* snap volume uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names + provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN (snap_volname, *snap_volid); + + brick_count = 0; + brick_order = 0; + /* Adding snap bricks mount paths to the dict */ + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) { + brick_order++; + continue; + } + + if (!glusterd_is_brick_started (brickinfo)) { + gf_log (this->name, GF_LOG_WARNING, + "brick %s:%s is not started", + brickinfo->hostname, + brickinfo->path); + brick_order++; + brick_count++; + continue; + } + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + snprintf (err_str, sizeof (err_str), + "getting device name for the brick " + "%s:%s failed", brickinfo->hostname, + brickinfo->path); + ret = -1; + goto out; + } + + device = glusterd_build_snap_device_path (device, + snap_volname); + if (!device) { + snprintf (err_str, sizeof (err_str), + "cannot copy the snapshot device " + "name (volname: %s, snapname: %s)", + volinfo->volname, snapname); + loglevel = GF_LOG_WARNING; + ret = -1; + goto out; + } + + snprintf (key, sizeof(key), + "vol%ld.brick_snapdevice%ld", i, + brick_count); + ret = dict_set_dynstr (rsp_dict, key, device); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (device); + goto out; + } + + ret = glusterd_get_brick_root (brickinfo->path, + &mnt_pt); + if (ret) { + snprintf (err_str, sizeof (err_str), + "could not get the root of the brick path %s", + brickinfo->path); + loglevel = GF_LOG_WARNING; + goto out; + } + if (strncmp (brickinfo->path, mnt_pt, strlen(mnt_pt))) { + snprintf (err_str, sizeof (err_str), + "brick: %s brick mount: %s", + brickinfo->path, mnt_pt); + loglevel = GF_LOG_WARNING; + goto out; + } + + brick_dir = &brickinfo->path[strlen (mnt_pt)]; + brick_dir++; + + snprintf (snap_brick_dir, sizeof (snap_brick_dir), + "/%s", brick_dir); + + tmpstr = gf_strdup (snap_brick_dir); + if (!tmpstr) { + ret = -1; + goto out; + } + snprintf (key, sizeof(key), "vol%ld.brickdir%ld", i, + brick_count); + ret = dict_set_dynstr (rsp_dict, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", snap_mount); + goto out; + } + tmpstr = NULL; + + snprintf (key, sizeof(key) - 1, "vol%ld.brick%ld.order", + i, brick_count); + ret = dict_set_int64 (rsp_dict, key, brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + goto out; + } + + brick_count++; + brick_order++; + } + snprintf (key, sizeof(key) - 1, "vol%ld_brickcount", i); + ret = dict_set_int64 (rsp_dict, key, brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + } + + ret = dict_set_int64 (rsp_dict, "volcount", volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount"); + goto out; + } + + ret = 0; +out: + if (ret) + GF_FREE (tmpstr); + + if (ret && err_str[0] != '\0') { + gf_log (this->name, loglevel, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +glusterd_snap_t* +glusterd_new_snap_object() +{ + glusterd_snap_t *snap = NULL; + + snap = GF_CALLOC (1, sizeof (*snap), gf_gld_mt_snap_t); + + if (snap) { + if (LOCK_INIT (&snap->lock)) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed initiating" + " snap lock"); + GF_FREE (snap); + return NULL; + } + + INIT_LIST_HEAD (&snap->snap_list); + INIT_LIST_HEAD (&snap->volumes); + snap->snapname[0] = 0; + snap->snap_status = GD_SNAP_STATUS_INIT; + } + + return snap; + +}; + +/* Function glusterd_list_add_snapvol adds the volinfo object (snapshot volume) + to the snapshot object list and to the parent volume list */ +int32_t +glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_snap_t *snap = NULL; + + GF_VALIDATE_OR_GOTO ("glusterd", origin_vol, out); + GF_VALIDATE_OR_GOTO ("glusterd", snap_vol, out); + + snap = snap_vol->snapshot; + GF_ASSERT (snap); + + list_add_tail (&snap_vol->vol_list, &snap->volumes); + LOCK (&origin_vol->lock); + { + list_add_order (&snap_vol->snapvol_list, + &origin_vol->snap_volumes, + glusterd_compare_snap_vol_time); + origin_vol->snap_count++; + } + UNLOCK (&origin_vol->lock); + + gf_log (THIS->name, GF_LOG_DEBUG, "Snap %s added to the list", + snap->snapname); + ret = 0; + out: + return ret; +} + +glusterd_snap_t* +glusterd_find_snap_by_name (char *snapname) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snapname); + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!strcmp (snap->snapname, snapname)) { + gf_log (THIS->name, GF_LOG_DEBUG, "Found " + "snap %s (%s)", snap->snapname, + uuid_utoa (snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +glusterd_snap_t* +glusterd_find_snap_by_id (uuid_t snap_id) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + if (uuid_is_null(snap_id)) + goto out; + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!uuid_compare (snap->snap_id, snap_id)) { + gf_log (THIS->name, GF_LOG_DEBUG, "Found " + "snap %s (%s)", snap->snapname, + uuid_utoa (snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +int +glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo, + const char *mount_pt, const char *snap_device) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + char msg[1024] = {0, }; + char pidfile[PATH_MAX] = {0, }; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + if (!brickinfo) { + gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL"); + goto out; + } + GF_ASSERT (snap_vol); + GF_ASSERT (mount_pt); + GF_ASSERT (snap_device); + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv); + if (gf_is_service_running (pidfile, &pid)) { + ret = kill (pid, SIGKILL); + if (ret && errno != ESRCH) { + gf_log (this->name, GF_LOG_ERROR, "Unable to kill pid " + "%d reason : %s", pid, strerror(errno)); + goto out; + } + } + + runinit (&runner); + snprintf (msg, sizeof (msg), "umount the snapshot mounted path %s", + mount_pt); + runner_add_args (&runner, "umount", mount_pt, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + /* We need not do synclock_unlock => runner_run => synclock_lock here. + Because it is needed if we are running a glusterfs process in + runner_run, so that when the glusterfs process started wants to + communicate to glusterd, glusterd wont be able to respond if it + has held the big lock. So we do unlock, run glusterfs process + (thus communicate to glusterd), lock. But since this is not a + glusterfs command that is being run, unlocking and then relocking + is not needed. + */ + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "unmounting the " + "path %s (brick: %s) failed (%s)", mount_pt, + brickinfo->path, strerror (errno)); + goto out; + } + + runinit (&runner); + snprintf (msg, sizeof(msg), "remove snapshot of the brick %s:%s, " + "device: %s", brickinfo->hostname, brickinfo->path, + snap_device); + runner_add_args (&runner, "/sbin/lvremove", "-f", snap_device, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, snap_device); + goto out; + } + +out: + return ret; +} + +int32_t +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) +{ + char *mnt_pt = NULL; + struct mntent *entry = NULL; + int32_t brick_count = -1; + int32_t ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + FILE *mtab = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + + if (!snap_vol) { + gf_log (this->name, GF_LOG_ERROR, "snap volinfo is NULL"); + goto out; + } + + brick_count = -1; + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + brick_count++; + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "snapshot was pending. lvm not present " + "for brick %s:%s of the snap %s.", + brickinfo->hostname, brickinfo->path, + snap_vol->snapshot->snapname); + + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_vol->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + continue; + } + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "getting the root " + "of the brick for volume %s (snap %s) failed ", + snap_vol->volname, snap_vol->snapshot->snapname); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { + gf_log (this->name, GF_LOG_WARNING, "getting the mount" + " entry for the brick %s:%s of the snap %s " + "(volume: %s) failed", brickinfo->hostname, + brickinfo->path, snap_vol->snapshot->snapname, + snap_vol->volname); + ret = -1; + goto out; + } + ret = glusterd_do_lvm_snapshot_remove (snap_vol, brickinfo, + mnt_pt, + entry->mnt_fsname); + if (mtab) + endmntent (mtab); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "remove the snapshot %s (%s)", + brickinfo->path, entry->mnt_fsname); + goto out; + } + + } + + ret = 0; +out: + return ret; +} + + +int32_t +glusterd_snap_volume_remove (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + gf_boolean_t remove_lvm, + gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *origin_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + + if (!snap_vol) { + gf_log(this->name, GF_LOG_WARNING, "snap_vol in NULL"); + ret = -1; + goto out; + } + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + ret = glusterd_brick_stop (snap_vol, brickinfo, _gf_false); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to stop " + "brick for volume %s", snap_vol->volname); + save_ret = ret; + + /* Continue to cleaning up the snap in case of error + if force flag is enabled */ + if (!force) + goto out; + } + } + + /* Only remove the backend lvm when required */ + if (remove_lvm) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove " + "lvm snapshot volume %s", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_volume (snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove volume %s " + "from store", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (!list_empty(&snap_vol->snapvol_list)) { + ret = glusterd_volinfo_find (snap_vol->parent_volname, + &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "parent volinfo %s for volume %s", + snap_vol->parent_volname, snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + origin_vol->snap_count--; + } + + ret = glusterd_volinfo_delete (snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove volinfo " + "%s ", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (save_ret) + ret = save_ret; +out: + gf_log (this->name, GF_LOG_TRACE, "returning %d", ret); + return ret; +} + +int32_t +glusterd_snapobject_delete (glusterd_snap_t *snap) +{ + if (snap == NULL) { + gf_log(THIS->name, GF_LOG_WARNING, "snap is NULL"); + return -1; + } + + list_del_init (&snap->snap_list); + list_del_init (&snap->volumes); + if (LOCK_DESTROY(&snap->lock)) + gf_log (THIS->name, GF_LOG_WARNING, "Failed destroying lock" + "of snap %s", snap->snapname); + + GF_FREE (snap->description); + GF_FREE (snap); + + return 0; +} + +int32_t +glusterd_snap_remove (dict_t *rsp_dict, + glusterd_snap_t *snap, + gf_boolean_t remove_lvm, + gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); + + if (!snap) { + gf_log(this->name, GF_LOG_WARNING, "snap is NULL"); + ret = -1; + goto out; + } + + list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { + ret = glusterd_snap_volume_remove (rsp_dict, snap_vol, + remove_lvm, force); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove " + "volinfo %s for snap %s", snap_vol->volname, + snap->snapname); + save_ret = ret; + + /* Continue to cleaning up the snap in case of error + if force flag is enabled */ + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_snap (snap); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove snap %s " + "from store", snap->snapname); + save_ret = ret; + if (!force) + goto out; + } + + ret = glusterd_snapobject_delete (snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap object %s", snap->snapname); + + if (save_ret) + ret = save_ret; +out: + gf_log (THIS->name, GF_LOG_TRACE, "returning %d", ret); + return ret; +} + +static int +glusterd_snapshot_get_snapvol_detail (dict_t *dict, + glusterd_volinfo_t *snap_vol, + char *keyprefix, int detail) +{ + int ret = -1; + int snap_limit = 0; + char key[PATH_MAX] = {0,}; + char *value = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + + GF_ASSERT (dict); + GF_ASSERT (snap_vol); + GF_ASSERT (keyprefix); + + /* Volume Name */ + value = gf_strdup (snap_vol->volname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.volname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume name in dictionary: %s", key); + goto out; + } + + /* Volume ID */ + value = gf_strdup (uuid_utoa (snap_vol->volume_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.vol-id", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume id in dictionary: %s", key); + goto out; + } + value = NULL; + + /* volume status */ + snprintf (key, sizeof (key), "%s.vol-status", keyprefix); + switch (snap_vol->status) { + case GLUSTERD_STATUS_STARTED: + ret = dict_set_str (dict, key, "Started"); + break; + case GLUSTERD_STATUS_STOPPED: + ret = dict_set_str (dict, key, "Stopped"); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_str (dict, key, "None"); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Invalid volume status"); + ret = -1; + goto out; + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volume status" + " in dictionary: %s", key); + goto out; + } + + + ret = glusterd_volinfo_find (snap_vol->parent_volname, &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the parent " + "volinfo for the volume %s", snap_vol->volname); + goto out; + } + + /* Snaps available */ + if (conf->snap_max_hard_limit < origin_vol->snap_max_hard_limit) { + snap_limit = conf->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } else { + snap_limit = origin_vol->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } + + snprintf (key, sizeof (key), "%s.snaps-available", keyprefix); + if (snap_limit > origin_vol->snap_count) + ret = dict_set_int32 (dict, key, + snap_limit - origin_vol->snap_count); + else + ret = dict_set_int32 (dict, key, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set available snaps"); + goto out; + } + + snprintf (key, sizeof (key), "%s.snapcount", keyprefix); + ret = dict_set_int32 (dict, key, origin_vol->snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount"); + goto out; + } + + if (!detail) + goto out; + + /* Parent volume name */ + value = gf_strdup (snap_vol->parent_volname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.origin-volname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set parent " + "volume name in dictionary: %s", key); + goto out; + } + value = NULL; + + ret = 0; +out: + if (value) + GF_FREE (value); + + return ret; +} + +static int +glusterd_snapshot_get_snap_detail (dict_t *dict, glusterd_snap_t *snap, + char *keyprefix, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int volcount = 0; + char key[PATH_MAX] = {0,}; + char *value = NULL; + char *timestr = NULL; + struct tm *tmptr = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (dict); + GF_ASSERT (snap); + GF_ASSERT (keyprefix); + + /* Snap Name */ + value = gf_strdup (snap->snapname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.snapname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap name in dictionary"); + goto out; + } + + /* Snap ID */ + value = gf_strdup (uuid_utoa (snap->snap_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-id", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap id in dictionary"); + goto out; + } + value = NULL; + + tmptr = localtime (&(snap->time_stamp)); + if (NULL == tmptr) { + gf_log (this->name, GF_LOG_ERROR, "Failed to convert " + "time_t to *tm"); + ret = -1; + goto out; + } + + timestr = GF_CALLOC (1, PATH_MAX, gf_gld_mt_char); + if (NULL == timestr) { + ret = -1; + goto out; + } + + ret = strftime (timestr, PATH_MAX, "%Y-%m-%d %H:%M:%S", tmptr); + if (0 == ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to convert time_t " + "to string"); + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-time", keyprefix); + ret = dict_set_dynstr (dict, key, timestr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap time stamp in dictionary"); + goto out; + } + timestr = NULL; + + /* If snap description is provided then add that into dictionary */ + if (NULL != snap->description) { + value = gf_strdup (snap->description); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-desc", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap description in dictionary"); + goto out; + } + value = NULL; + } + + snprintf (key, sizeof (key), "%s.snap-status", keyprefix); + switch (snap->snap_status) { + case GD_SNAP_STATUS_INIT: + ret = dict_set_str (dict, key, "Init"); + break; + case GD_SNAP_STATUS_IN_USE: + ret = dict_set_str (dict, key, "In-use"); + break; + case GD_SNAP_STATUS_DECOMMISSION: + ret = dict_set_str (dict, key, "Decommisioned"); + break; + case GD_SNAP_STATUS_RESTORED: + ret = dict_set_str (dict, key, "Restored"); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_str (dict, key, "None"); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Invalid snap status"); + ret = -1; + goto out; + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap status " + "in dictionary"); + goto out; + } + + if (volinfo) { + volcount = 1; + snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail (dict, + volinfo, key, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + goto done; + } + + list_for_each_entry_safe (snap_vol, tmp_vol, &snap->volumes, vol_list) { + volcount++; + snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail (dict, + snap_vol, key, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + } + +done: + snprintf (key, sizeof (key), "%s.vol-count", keyprefix); + ret = dict_set_int32 (dict, key, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE (value); + + if (timestr) + GF_FREE(timestr); + + return ret; +} + +static int +glusterd_snapshot_get_all_snap_info (dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char key[PATH_MAX] = {0,}; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + + /* General parameter validation */ + GF_ASSERT (dict); + + list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) { + snapcount++; + snprintf (key, sizeof (key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail (dict, snap, key, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snapdetail for snap %s", snap->snapname); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_get_info_by_volume (dict_t *dict, char *volname, + char *err_str, size_t len) +{ + int ret = -1; + int snapcount = 0; + int snap_limit = 0; + char *value = NULL; + char key[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + + GF_ASSERT (dict); + GF_ASSERT (volname); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, "Volume (%s) does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + /* Snaps available */ + if (conf->snap_max_hard_limit < volinfo->snap_max_hard_limit) { + snap_limit = conf->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } else { + snap_limit = volinfo->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } + + if (snap_limit > volinfo->snap_count) + ret = dict_set_int32 (dict, "snaps-available", + snap_limit - volinfo->snap_count); + else + ret = dict_set_int32 (dict, "snaps-available", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set available snaps"); + goto out; + } + + /* Origin volume name */ + value = gf_strdup (volinfo->volname); + if (!value) + goto out; + + ret = dict_set_dynstr (dict, "origin-volname", value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set parent " + "volume name in dictionary: %s", key); + goto out; + } + value = NULL; + + list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) { + snapcount++; + snprintf (key, sizeof (key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail (dict, + snap_vol->snapshot, + key, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snapdetail for snap %s", + snap_vol->snapshot->snapname); + goto out; + } + } + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE (value); + + return ret; +} + +/* This function will be called from RPC handler routine. + * This function is responsible for getting the requested + * snapshot info into the dictionary. + * + * @param req RPC request object. Required for sending a response back. + * @param op glusterd operation. Required for sending a response back. + * @param dict pointer to dictionary which will contain both + * request and response key-pair values. + * @return -1 on error and 0 on success + */ +int +glusterd_handle_snapshot_info (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int8_t snap_driven = 1; + char *volname = NULL; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + int32_t cmd = GF_SNAP_INFO_TYPE_ALL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, req, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get type " + "of snapshot info"); + goto out; + } + + switch (cmd) { + case GF_SNAP_INFO_TYPE_ALL: + { + ret = glusterd_snapshot_get_all_snap_info (dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get info of all snaps"); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_SNAP: + { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snap name"); + goto out; + } + + ret = dict_set_int32 (dict, "snap-count", 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snapcount"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, + "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + ret = glusterd_snapshot_get_snap_detail (dict, snap, + "snap1", NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snap detail of snap " + "%s", snap->snapname); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volname"); + goto out; + } + ret = glusterd_snapshot_get_info_by_volume (dict, + volname, err_str, len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volume info of volume " + "%s", volname); + goto out; + } + snap_driven = 0; + break; + } + } + + ret = dict_set_int8 (dict, "snap-driven", snap_driven); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap-driven"); + goto out; + } + + /* If everything is successful then send the response back to cli. + * In case of failure the caller of this function will take care + of the response */ + ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This function sets all the snapshot names in the dictionary */ +int +glusterd_snapshot_get_all_snapnames (dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[PATH_MAX] = {0,}; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (dict); + + list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) { + snapcount++; + snapname = gf_strdup (snap->snapname); + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, "strdup failed"); + ret = -1; + goto out; + } + snprintf (key, sizeof (key), "snapname%d", snapcount); + ret = dict_set_dynstr (dict, key, snapname); + if (ret) { + GF_FREE (snapname); + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +/* This function sets all the snapshot names + under a given volume in the dictionary */ +int +glusterd_snapshot_get_vol_snapnames (dict_t *dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[PATH_MAX] = {0,}; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (dict); + GF_ASSERT (volinfo); + + list_for_each_entry_safe (snap_vol, tmp_vol, + &volinfo->snap_volumes, snapvol_list) { + snapcount++; + snapname = gf_strdup (snap_vol->snapshot->snapname); + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, + "strdup failed"); + ret = -1; + goto out; + } + snprintf (key, sizeof (key), "snapname%d", snapcount); + ret = dict_set_dynstr (dict, key, snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set %s", key); + GF_FREE (snapname); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +int +glusterd_handle_snapshot_list (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_VALIDATE_OR_GOTO (this->name, req, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + /* Ignore error for getting volname as it is optional */ + ret = dict_get_str (dict, "volname", &volname); + + if (NULL == volname) { + ret = glusterd_snapshot_get_all_snapnames (dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snapshot list"); + goto out; + } + } else { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, + "Volume (%s) does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + goto out; + } + + ret = glusterd_snapshot_get_vol_snapnames (dict, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snapshot list for volume %s", + volname); + goto out; + } + } + + /* If everything is successful then send the response back to cli. + In case of failure the caller of this function will take of response.*/ + ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This is a snapshot create handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual snap creation on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_create (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + int64_t volcount = 0; + xlator_t *this = NULL; + char key[PATH_MAX] = ""; + char *username = NULL; + char *password = NULL; + uuid_t *uuid_ptr = NULL; + uuid_t tmp_uuid = {0}; + int i = 0; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid volume count %ld " + "supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the snapname"); + goto out; + } + + if (strlen(snapname) >= GLUSTERD_MAX_SNAP_NAME) { + snprintf (err_str, len, "snapname cannot exceed 255 " + "characters"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_log (this->name, GF_LOG_ERROR, "Out Of Memory"); + ret = -1; + goto out; + } + + uuid_generate (*uuid_ptr); + ret = dict_set_bin (dict, "snap-id", uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-id"); + GF_FREE (uuid_ptr); + goto out; + } + uuid_ptr = NULL; + + ret = dict_set_int64 (dict, "snap-time", (int64_t)time(NULL)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-time"); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%d", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volume name"); + goto out; + } + + /* generate internal username and password for the snap*/ + uuid_generate (tmp_uuid); + username = gf_strdup (uuid_utoa (tmp_uuid)); + snprintf (key, sizeof(key), "volume%d_username", i); + ret = dict_set_dynstr (dict, key, username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap " + "username for volume %s", volname); + GF_FREE (username); + goto out; + } + + uuid_generate (tmp_uuid); + password = gf_strdup (uuid_utoa (tmp_uuid)); + snprintf (key, sizeof(key), "volume%d_password", i); + ret = dict_set_dynstr (dict, key, password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap " + "password for volume %s", volname); + GF_FREE (password); + goto out; + } + + uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_log (this->name, GF_LOG_ERROR, "Out Of Memory"); + ret = -1; + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%d_volid", i); + uuid_generate (*uuid_ptr); + ret = dict_set_bin (dict, key, uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snap_volid"); + GF_FREE (uuid_ptr); + goto out; + } + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + } + +out: + return ret; +} + +/* This is a snapshot status handler function. This function will be + * executed in a originator node. This function is responsible for + * calling mgmt v3 framework to get the actual snapshot status from + * all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot status request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * return : 0 in case of success. + * -1 in case of failure. + * + */ +int +glusterd_handle_snapshot_status (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + int i = 0; + dict_t *voldict = NULL; + char key[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get status type"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + /* IF we give "gluster snapshot status" + * then lock is held on all snaps. + * This is the place where necessary information + * (snapname and snapcount)is populated in dictionary + * for locking. + */ + ++i; + list_for_each_entry (snap, &conf->snapshots, snap_list) + { + snprintf (key, sizeof (key), "snapname%d", i); + buf = gf_strdup (snap->snapname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapname (%s) " + "in the dictionary", + snap->snapname); + GF_FREE (buf); + goto out; + } + + buf = NULL; + i++; + } + + ret = dict_set_int32 (dict, "snapcount", i - 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not " + "save snapcount in the dictionary"); + goto out; + } + break; + } + + case GF_SNAP_STATUS_TYPE_SNAP: + { + /* IF we give "gluster snapshot status <snapname>" + * then lock is held on single snap. + * This is the place where necessary information + * (snapname)is populated in dictionary + * for locking. + */ + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s)" + "does not exist", snapname); + gf_log(this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, "Volume (%s) " + "does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + goto out; + } + + i = 1; + list_for_each_entry (snap_volinfo, + &volinfo->snap_volumes, snapvol_list) { + snprintf (key, sizeof (key), "snapname%d", i); + + buf = gf_strdup + (snap_volinfo->snapshot->snapname); + if (!buf) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapname"); + GF_FREE (buf); + goto out; + } + + buf = NULL; + i++; + } + + ret = dict_set_int32 (dict, "snapcount", i-1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapcount"); + goto out; + } + break; + default: + { + gf_log (this->name, GF_LOG_ERROR, "Unknown type"); + ret = -1; + goto out; + } + } + + /* Volume lock is not necessary for snapshot status, hence + * turning it off + */ + ret = dict_set_int8 (dict, "hold_vol_locks", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Setting volume lock " + "flag failed"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate " + "snap phases"); + goto out; + } + + ret = 0; + +out: + if (voldict) { + dict_unref (voldict); + } + return ret; +} + + +/* This is a snapshot restore handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual restore on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_restore (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + int32_t i = 0; + char key[PATH_MAX] = ""; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + list_for_each_entry (snap_volinfo, &snap->volumes, vol_list) { + i++; + snprintf (key, sizeof (key), "volname%d", i); + buf = gf_strdup (snap_volinfo->parent_volname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not set " + "parent volume name %s in the dict", + snap_volinfo->parent_volname); + GF_FREE (buf); + goto out; + } + buf = NULL; + } + + ret = dict_set_int32 (dict, "volcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save volume count"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +glusterd_snap_t* +glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict) +{ + char *snapname = NULL; + uuid_t *snap_id = NULL; + char *description = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int64_t time_stamp = 0; + + this = THIS; + priv = this->private; + + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + /* Fetch snapname, description, id and time from dict */ + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname"); + goto out; + } + + /* Ignore ret value for description*/ + ret = dict_get_str (dict, "description", &description); + + ret = dict_get_bin (dict, "snap-id", (void **)&snap_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap_id"); + goto out; + } + + ret = dict_get_int64 (dict, "snap-time", &time_stamp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap-time"); + goto out; + } + if (time_stamp <= 0) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Invalid time-stamp: %ld", + time_stamp); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!strcmp (snap->snapname, snapname) || + !uuid_compare (snap->snap_id, *snap_id)) { + gf_log (THIS->name, GF_LOG_ERROR, + "Found duplicate snap %s (%s)", + snap->snapname, uuid_utoa (snap->snap_id)); + ret = -1; + break; + } + } + if (ret) { + snap = NULL; + goto out; + } + + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Could not create " + "the snap object for snap %s", snapname); + goto out; + } + + strcpy (snap->snapname, snapname); + uuid_copy (snap->snap_id, *snap_id); + snap->time_stamp = (time_t)time_stamp; + /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot + is taken and snap is really ready to use, set the status to + GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete + snapshots and cleaning them up. + */ + snap->snap_status = GD_SNAP_STATUS_INIT; + if (description) { + snap->description = gf_strdup (description); + if (snap->description == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Saving the Snap Description Failed"); + ret = -1; + goto out; + } + } + + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + + gf_log (this->name, GF_LOG_TRACE, "Snap %s added to the list", + snap->snapname); + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); + snap = NULL; + } + + return snap; +} + +/* Added missed_snap_entry to rsp_dict */ +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op) +{ + char *buf = NULL; + char missed_snap_entry[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t missed_snap_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_uuid); + GF_ASSERT (brickinfo); + + snprintf (missed_snap_entry, sizeof(missed_snap_entry), + "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), + snap_uuid, brick_number, brickinfo->path, op, + GD_MISSED_SNAP_PENDING); + + buf = gf_strdup (missed_snap_entry); + if (!buf) { + ret = -1; + goto out; + } + + /* Fetch the missed_snap_count from the dict */ + ret = dict_get_int32 (rsp_dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + /* Initialize the missed_snap_count for the first time */ + missed_snap_count = 0; + } + + /* Setting the missed_snap_entry in the rsp_dict */ + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + missed_snap_count); + ret = dict_set_dynstr (rsp_dict, name_buf, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_entry (%s) " + "in the rsp_dict.", buf); + GF_FREE (buf); + goto out; + } + missed_snap_count++; + + /* Setting the new missed_snap_count in the dict */ + ret = dict_set_int32 (rsp_dict, "missed_snap_count", + missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_count for %s " + "in the rsp_dict.", missed_snap_entry); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* This function is called to get the device path of the snap lvm. Usually + if /dev/mapper/<group-name>-<lvm-name> is the device for the lvm, + then the snap device will be /dev/<group-name>/<snapname>. + This function takes care of building the path for the snap device. +*/ +char * +glusterd_build_snap_device_path (char *device, char *snapname) +{ + char snap[PATH_MAX] = ""; + char msg[1024] = ""; + char volgroup[PATH_MAX] = ""; + char *snap_device = NULL; + xlator_t *this = NULL; + runner_t runner = {0,}; + char *ptr = NULL; + int ret = -1; + + this = THIS; + GF_ASSERT (this); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "device is NULL"); + goto out; + } + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, "snapname is NULL"); + goto out; + } + + runinit (&runner); + runner_add_args (&runner, "/sbin/lvs", "--noheadings", "-o", "vg_name", + device, NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + snprintf (msg, sizeof (msg), "Get volume group for device %s", device); + runner_log (&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group " + "for device %s", device); + runner_end (&runner); + goto out; + } + ptr = fgets(volgroup, sizeof(volgroup), + runner_chio (&runner, STDOUT_FILENO)); + if (!ptr || !strlen(volgroup)) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group " + "for snap %s", snapname); + runner_end (&runner); + ret = -1; + goto out; + } + runner_end (&runner); + + snprintf (snap, sizeof(snap), "/dev/%s/%s", gf_trim(volgroup), + snapname); + snap_device = gf_strdup (snap); + if (!snap_device) { + gf_log (this->name, GF_LOG_WARNING, "Cannot copy the " + "snapshot device name for snapname: %s)", snapname); + } + +out: + return snap_device; +} + +/* This function actually calls the command (or the API) for taking the + snapshot of the backend brick filesystem. If this is successful, + then call the glusterd_snap_create function to create the snap object + for glusterd +*/ +char * +glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo) +{ + char msg[NAME_MAX] = ""; + char buf[PATH_MAX] = ""; + char *snap_device = NULL; + char *ptr = NULL; + char *device = NULL; + int ret = -1; + gf_boolean_t match = _gf_false; + runner_t runner = {0,}; + xlator_t *this = NULL; + + this = THIS; + + if (!brickinfo) { + gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL"); + goto out; + } + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "getting device name for " + "the brick %s:%s failed", brickinfo->hostname, + brickinfo->path); + goto out; + } + + /* Figuring out if setactivationskip flag is supported or not */ + runinit (&runner); + snprintf (msg, sizeof (msg), "running lvcreate help"); + runner_add_args (&runner, "/sbin/lvcreate", "--help", NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to run lvcreate help"); + runner_end (&runner); + goto out; + } + + /* Looking for setactivationskip in lvcreate --help */ + do { + ptr = fgets(buf, sizeof(buf), + runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + if (strstr(buf, "setactivationskip")) { + match = _gf_true; + break; + } + } + } while (ptr != NULL); + runner_end (&runner); + + /* Takng the actual snapshot */ + runinit (&runner); + snprintf (msg, sizeof (msg), "taking snapshot of the brick %s:%s", + brickinfo->hostname, brickinfo->path); + if (match == _gf_true) + runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + "--setactivationskip", "n", "--name", + snap_vol->volname, NULL); + else + runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + "--name", snap_vol->volname, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "taking snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, device); + runner_end (&runner); + goto out; + } + runner_end (&runner); + + snap_device = glusterd_build_snap_device_path (device, + snap_vol->volname); + if (!snap_device) { + gf_log (this->name, GF_LOG_WARNING, "Cannot copy the snapshot " + "device name for snap %s (volume id: %s)", + snap_vol->snapshot->snapname, snap_vol->volname); + ret = -1; + goto out; + } + +out: + return snap_device; +} + +int32_t +glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *original_brickinfo, + int32_t brick_count, char *snap_brick_dir) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char snap_brick_mount_path[PATH_MAX] = ""; + char snap_brick_path[PATH_MAX] = ""; + char msg[1024] = ""; + struct stat statbuf = {0, }; + runner_t runner = {0, }; + + this = THIS; + priv = this->private; + + GF_ASSERT (device); + GF_ASSERT (snap_volinfo); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brick_dir); + + snprintf (snap_brick_mount_path, sizeof (snap_brick_mount_path), + "%s/%s/brick%d", snap_mount_folder, snap_volinfo->volname, + brick_count+1); + + snprintf (snap_brick_path, sizeof (snap_brick_path), "%s%s", + snap_brick_mount_path, snap_brick_dir); + + ret = mkdir_p (snap_brick_mount_path, 0777, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "creating the brick directory" + " %s for the snapshot %s(device: %s) failed", + snap_brick_mount_path, snap_volinfo->volname, device); + goto out; + } + /* mount the snap logical device on the directory inside + /run/gluster/snaps/<snapname>/@snap_brick_mount_path + Way to mount the snap brick via mount api is this. + ret = mount (device, snap_brick_mount_path, entry->mnt_type, + MS_MGC_VAL, "nouuid"); + But for now, mounting using runner apis. + */ + runinit (&runner); + snprintf (msg, sizeof (msg), "mounting snapshot of the brick %s:%s", + original_brickinfo->hostname, original_brickinfo->path); + runner_add_args (&runner, "mount", "-o", "nouuid", device, + snap_brick_mount_path, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + /* let glusterd get blocked till snapshot is over */ + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mounting the snapshot " + "logical device %s failed (error: %s)", device, + strerror (errno)); + goto out; + } else + gf_log (this->name, GF_LOG_DEBUG, "mounting the snapshot " + "logical device %s successful", device); + + ret = stat (snap_brick_path, &statbuf); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "stat of the brick %s" + "(brick mount: %s) failed (%s)", snap_brick_path, + snap_brick_mount_path, strerror (errno)); + goto out; + } + ret = sys_lsetxattr (snap_brick_path, + GF_XATTR_VOL_ID_KEY, + snap_volinfo->volume_id, 16, + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "extended attribute %s on %s. Reason: " + "%s, snap: %s", GF_XATTR_VOL_ID_KEY, + snap_brick_path, strerror (errno), + snap_volinfo->volname); + goto out; + } + +out: + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "unmounting the snap brick" + " mount %s", snap_brick_mount_path); + umount (snap_brick_mount_path); + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, + char **snap_brick_dir, int64_t volcount, + int32_t brick_count) +{ + char key[PATH_MAX] = ""; + char snap_brick_path[PATH_MAX] = ""; + char *snap_device = NULL; + gf_boolean_t add_missed_snap = _gf_false; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); + GF_ASSERT (snap_brick_dir); + + snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount, + brick_count); + ret = dict_get_ptr (dict, key, (void **)snap_brick_dir); + if (ret) { + /* Using original brickinfo here because it will be a + * pending snapshot and storing the original brickinfo + * will help in mapping while recreating the missed snapshot + */ + gf_log (this->name, GF_LOG_WARNING, "Unable to fetch " + "snap mount path (%s). Using original brickinfo", key); + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + + /* In origiator node add snaps missed + * from different nodes to the dict + */ + if (is_origin_glusterd (dict) == _gf_true) + add_missed_snap = _gf_true; + } else { + /* Create brick-path in the format /var/run/gluster/snaps/ * + * <snap-uuid>/<original-brick#>/snap-brick-dir * + */ + snprintf (snap_brick_path, sizeof(snap_brick_path), + "%s/%s/brick%d%s", snap_mount_folder, + snap_vol->volname, brick_count+1, + *snap_brick_dir); + } + + if ((snap_brickinfo->snap_status != -1) && + (!uuid_compare (original_brickinfo->uuid, MY_UUID)) && + (!glusterd_is_brick_started (original_brickinfo))) { + /* In case if the brick goes down after prevalidate. */ + gf_log (this->name, GF_LOG_WARNING, "brick %s:%s is not" + " started (snap: %s)", + original_brickinfo->hostname, + original_brickinfo->path, + snap_vol->snapshot->snapname); + + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + add_missed_snap = _gf_true; + } + + if (add_missed_snap) { + ret = glusterd_add_missed_snaps_to_dict (rsp_dict, + snap_vol->volname, + original_brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_CREATE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to add missed" + " snapshot info for %s:%s in the rsp_dict", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + } + + snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d", + volcount, brick_count); + ret = dict_get_ptr (dict, key, (void **)&snap_device); + if (ret) { + /* If the device name is empty, so will be the brick path + * Hence the missed snap has already been added above + */ + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch " + "snap device (%s). Leaving empty", key); + } else + strcpy (snap_brickinfo->device_path, snap_device); + + ret = gf_canonicalize_path (snap_brick_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to canonicalize path"); + goto out; + } + + strcpy (snap_brickinfo->hostname, original_brickinfo->hostname); + strcpy (snap_brickinfo->path, snap_brick_path); + uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid); + /* AFR changelog names are based on brick_id and hence the snap + * volume's bricks must retain the same ID */ + strcpy (snap_brickinfo->brick_id, original_brickinfo->brick_id); + list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks); + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol, dict_t *rsp_dict, + glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, + char *snap_brick_dir, int32_t brick_count) +{ + char *device = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (origin_vol); + GF_ASSERT (snap_vol); + GF_ASSERT (rsp_dict); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); + GF_ASSERT (snap_brick_dir); + + device = glusterd_take_lvm_snapshot (snap_vol, original_brickinfo); + /* Fail the snapshot even though snapshot on one of + the bricks fails. At the end when we check whether + the snapshot volume meets quorum or not, then the + the snapshot can either be treated as success, or + in case of failure we can undo the changes and return + failure to cli. */ + if (!device) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to take snapshot of %s:%s", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + + /* create the complete brick here */ + ret = glusterd_snap_brick_create (device, snap_vol, + original_brickinfo, + brick_count, snap_brick_dir); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to" + " create the brickinfo for the snap %s" + ", volume %s", snap_vol->snapshot->snapname, + origin_vol->volname); + goto out; + } + +out: + if (device) + GF_FREE (device); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Look for disconnected peers, for missed snap creates or deletes */ +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (peers); + GF_ASSERT (vol); + GF_ASSERT (snap_uuid); + + brick_count = 0; + list_for_each_entry (brickinfo, &vol->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* If the brick belongs to the same node */ + brick_count++; + continue; + } + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesnt belong to this peer */ + continue; + } + + /* Found peer who owns the brick, * + * if peer is not connected or not * + * friend add it to missed snap list */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_uuid, + brickinfo, + brick_count + 1, + op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); + goto out; + } + } + } + brick_count++; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +glusterd_volinfo_t * +glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, + dict_t *dict, dict_t *rsp_dict, int64_t volcount) +{ + char key[PATH_MAX] = ""; + char *snap_brick_dir = NULL; + char *username = NULL; + char *password = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_vol = NULL; + uuid_t *snap_volid = NULL; + int32_t ret = -1; + int32_t brick_count = 0; + glusterd_brickinfo_t *snap_brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (dict); + GF_ASSERT (origin_vol); + GF_ASSERT (rsp_dict); + + /* fetch username, password and vol_id from dict*/ + snprintf (key, sizeof(key), "volume%ld_username", volcount); + ret = dict_get_str (dict, key, &username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " + "snap %s", key, snap->snapname); + goto out; + } + + snprintf (key, sizeof(key), "volume%ld_password", volcount); + ret = dict_get_str (dict, key, &password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " + "snap %s", key, snap->snapname); + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%ld_volid", volcount); + ret = dict_get_bin (dict, key, (void **)&snap_volid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volid"); + goto out; + } + + /* We are not setting the username and password here as + * we need to set the user name and password passed in + * the dictionary + */ + ret = glusterd_volinfo_dup (origin_vol, &snap_vol, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to duplicate volinfo " + "for the snapshot %s", snap->snapname); + goto out; + } + + /* uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN (snap_vol->volname, *snap_volid); + uuid_copy (snap_vol->volume_id, *snap_volid); + snap_vol->is_snap_volume = _gf_true; + strcpy (snap_vol->parent_volname, origin_vol->volname); + snap_vol->snapshot = snap; + + glusterd_auth_set_username (snap_vol, username); + glusterd_auth_set_password (snap_vol, password); + + /* Adding snap brickinfos to the snap volinfo */ + brick_count = 0; + list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) { + snap_brickinfo = NULL; + + ret = glusterd_brickinfo_new (&snap_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "initializing the brick for the snap " + "volume failed (snapname: %s)", snap->snapname); + goto out; + } + + ret = glusterd_add_bricks_to_snap_volume (dict, rsp_dict, + snap_vol, + brickinfo, + snap_brickinfo, + &snap_brick_dir, + volcount, + brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add the snap brick for " + "%s:%s to the snap volume", + brickinfo->hostname, brickinfo->path); + GF_FREE (snap_brickinfo); + goto out; + } + + /* Take snapshot of the brick */ + if ((uuid_compare (brickinfo->uuid, MY_UUID)) || + (snap_brickinfo->snap_status == -1)) { + brick_count++; + continue; + } + + ret = glusterd_take_brick_snapshot (origin_vol, snap_vol, + rsp_dict, brickinfo, + snap_brickinfo, + snap_brick_dir, + brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to take snapshot for %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + brick_count++; + } + + /*TODO: the quorum check of the snap volume here */ + + ret = glusterd_store_volinfo (snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store snapshot " + "volinfo (%s) for snap %s", snap_vol->volname, + snap->snapname); + goto out; + } + + ret = generate_brick_volfiles (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the brick " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = generate_client_volfiles (snap_vol, GF_CLIENT_TRUSTED); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the trusted " + "client volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = generate_client_volfiles (snap_vol, GF_CLIENT_OTHER); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the client " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = glusterd_list_add_snapvol (origin_vol, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "could not add the snap " + "volume %s to the list", snap_vol->volname); + goto out; + } + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "not starting snap brick %s:%s for " + "for the snap %s (volume: %s)", + brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + continue; + } + + ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "starting the " + "brick %s:%s for the snap %s (volume: %s) " + "failed", brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + goto out; + } + } + + snap_vol->status = GLUSTERD_STATUS_STARTED; + ret = glusterd_store_volinfo (snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store snap volinfo"); + goto out; + } + +out: + if (ret) { + if (snap_vol) + glusterd_snap_volume_remove (rsp_dict, snap_vol, + _gf_true, _gf_true); + snap_vol = NULL; + } + + return snap_vol; +} + +/* This is a snapshot remove handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt v3 framework to do the actual remove on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot remove request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_remove (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char key[PATH_MAX] = ""; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + + /* Set volnames in the dict to get mgmt_v3 lock */ + list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { + volcount++; + volname = gf_strdup (snap_vol->parent_volname); + if (!volname) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "strdup failed"); + goto out; + } + + snprintf (key, sizeof (key), "volname%ld", volcount); + ret = dict_set_dynstr (dict, key, volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume name in dictionary"); + GF_FREE (volname); + goto out; + } + volname = NULL; + } + ret = dict_set_int64 (dict, "volcount", volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_remove_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist", + snapname); + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_status_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + char *snapname = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (op_errstr); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "Input dict is NULL"); + goto out; + } + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch status cmd"); + goto out; + } + + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + break; + } + case GF_SNAP_STATUS_TYPE_SNAP: + { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch snapname"); + goto out; + } + + if (!glusterd_find_snap_by_name (snapname)) { + ret = gf_asprintf (op_errstr, "Snap (%s) " + "not found", snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Snap (%s) " + "not found", snapname); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume (%s)" + "not found", volname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Volume " + "%s not present", volname); + goto out; + } + break; + + } + default: + { + gf_log (this->name, GF_LOG_ERROR, "Invalid command"); + break; + } + } + ret = 0; + +out: + return ret; +} + +int32_t +glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + char *dup_snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + priv = this->private; + GF_ASSERT (priv); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist", + snapname); + ret = -1; + goto out; + } + + if (is_origin_glusterd (dict) == _gf_true) { + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, + glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap deletes"); + goto out; + } + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove snap %s", + snapname); + goto out; + } + + dup_snapname = gf_strdup (snapname); + if (!dup_snapname) { + gf_log (this->name, GF_LOG_ERROR, "Strdup failed"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, "snapname", dup_snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set the snapname"); + GF_FREE (dup_snapname); + goto out; + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + char *name = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "getting the snap " + "name failed (volume: %s)", volinfo->volname); + goto out; + } + + /* + If the snapname is not found that means the failure happened at + staging, or in commit, before the snap object is created, in which + case there is nothing to cleanup. So set ret to 0. + */ + snap = glusterd_find_snap_by_name (name); + if (!snap) { + gf_log (this->name, GF_LOG_INFO, "snap %s is not found", name); + ret = 0; + goto out; + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing the snap %s failed", + name); + goto out; + } + + name = NULL; + + ret = 0; + +out: + + return ret; +} + +/* In case of a successful, delete or create operation, during post_validate * + * look for missed snap operations and update the missed snap lists */ +int32_t +glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + int32_t missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + ret = dict_get_int32 (dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_update_missed_snaps (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update missed_snaps_list"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char *tmp_name = NULL; + char key[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname"); + goto out; + } + tmp_name = gf_strdup (snapname); + if (!tmp_name) { + gf_log (this->name, GF_LOG_ERROR, "Out of memory"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, "snapname", tmp_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snapname in rsp_dict"); + GF_FREE (tmp_name); + goto out; + } + tmp_name = NULL; + + snap = glusterd_create_snap_object (dict, rsp_dict); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "creating the" + "snap object %s failed", snapname); + ret = -1; + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%ld", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get the volinfo for " + "the volume %s", volname); + goto out; + } + + /* TODO: Create a stub where the bricks are + added parallely by worker threads so that + the snap creating happens parallely. */ + snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict, + rsp_dict, i); + if (!snap_vol) { + ret = -1; + gf_log (this->name, GF_LOG_WARNING, "taking the " + "snapshot of the volume %s failed", volname); + goto out; + } + } + + snap->snap_status = GD_SNAP_STATUS_IN_USE; + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); + snap = NULL; + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +snap_max_hard_limit_set_commit (dict_t *dict, uint64_t value, + char *volname, char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (volname); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + /* TODO: Initiate auto deletion when there is a limit change */ + if (!volname) { + /* For system limit */ + conf->snap_max_hard_limit = value; + + ret = glusterd_store_global_info (this); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-hard-limit for system"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to get the" + " volinfo for volume %s", volname); + goto out; + } + + volinfo->snap_max_hard_limit = value; + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-hard-limit for volume %s", volname); + goto out; + } + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, + char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t active_hard_limit = 0; + uint64_t snap_max_limit = 0; + uint64_t soft_limit_value = -1; + uint64_t count = 0; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (volname); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + if (!volname) { + /* For system limit */ + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (volinfo->is_snap_volume == _gf_true) + continue; + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > conf->snap_max_hard_limit) + active_hard_limit = conf->snap_max_hard_limit; + else + active_hard_limit = snap_max_limit; + soft_limit_value = (active_hard_limit * + conf->snap_max_soft_limit) / 100; + + snprintf (buf, sizeof(buf), "volume%ld-volname", count); + ret = dict_set_str (rsp_dict, buf, volinfo->volname); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, + active_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + count++; + } + + ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set voldisplaycount"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to get the" + " volinfo for volume %s", volname); + goto out; + } + + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > conf->snap_max_hard_limit) + active_hard_limit = conf->snap_max_hard_limit; + else + active_hard_limit = snap_max_limit; + + soft_limit_value = (active_hard_limit * + conf->snap_max_soft_limit) / 100; + + snprintf (buf, sizeof(buf), "volume%ld-volname", count); + ret = dict_set_str (rsp_dict, buf, volinfo->volname); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, active_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + count++; + + ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set voldisplaycount"); + goto out; + } + + } + + ret = dict_set_uint64 (rsp_dict, "snap-max-hard-limit", + conf->snap_max_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set sys-snap-max-hard-limit "); + goto out; + } + + ret = dict_set_uint64 (rsp_dict, "snap-max-soft-limit", + conf->snap_max_soft_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set sys-snap-max-hard-limit "); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +glusterd_snapshot_config_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + xlator_t *this = NULL; + int ret = -1; + char err_str[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + int config_command = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "config-command", &config_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + /* Ignore the return value of the following dict_get, + * as they are optional + */ + ret = dict_get_str (dict, "volname", &volname); + + ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit); + + ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit); + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (hard_limit) { + /* Commit ops for snap-max-hard-limit */ + ret = snap_max_hard_limit_set_commit (dict, hard_limit, + volname, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-hard-limit set " + "commit failed."); + goto out; + } + } + + if (soft_limit) { + /* For system limit */ + conf->snap_max_soft_limit = soft_limit; + + ret = glusterd_store_global_info (this); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-soft-limit for system"); + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", + err_str); + goto out; + } + } + break; + + case GF_SNAP_CONFIG_DISPLAY: + /* Reading data from local node only */ + if (!is_origin_glusterd (dict)) { + ret = 0; + break; + } + + ret = snap_max_limits_display_commit (rsp_dict, volname, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-limit " + "display commit failed."); + goto out; + } + break; + default: + break; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_get_brick_lvm_details (dict_t *rsp_dict, + glusterd_brickinfo_t *brickinfo, char *volname, + char *device, char *key_prefix) +{ + + int ret = -1; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + xlator_t *this = NULL; + char msg[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + char *ptr = NULL; + char *token = NULL; + char key[PATH_MAX] = ""; + char *value = NULL; + + GF_ASSERT (rsp_dict); + GF_ASSERT (brickinfo); + GF_ASSERT (volname); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "Getting device name for " + "the brick %s:%s failed", brickinfo->hostname, + brickinfo->path); + goto out; + } + runinit (&runner); + snprintf (msg, sizeof (msg), "running lvs command, " + "for getting snap status"); + /* Using lvs command fetch the Volume Group name, + * Percentage of data filled and Logical Volume size + * + * "-o" argument is used to get the desired information, + * example : "lvs /dev/VolGroup/thin_vol -o vgname,lv_size", + * will get us Volume Group name and Logical Volume size. + * + * Here separator used is ":", + * for the above given command with separator ":", + * The output will be "vgname:lvsize" + */ + runner_add_args (&runner, "lvs", device, "--noheading", "-o", + "vg_name,data_percent,lv_size", + "--separator", ":", NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not perform lvs action"); + goto end; + } + do { + ptr = fgets (buf, sizeof (buf), + runner_chio (&runner, STDOUT_FILENO)); + + if (ptr == NULL) + break; + token = strtok (buf, ":"); + if (token != NULL) { + while (token && token[0] == ' ') + token++; + if (!token) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Invalid vg entry"); + goto end; + } + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.vgname", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto end; + } + } + + token = strtok (NULL, ":"); + if (token != NULL) { + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.data", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save data percent "); + goto end; + } + } + token = strtok (NULL, ":"); + if (token != NULL) { + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.lvsize", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save meta data percent "); + goto end; + } + } + + } while (ptr != NULL); + + ret = 0; + +end: + runner_end (&runner); + +out: + if (ret && value) { + GF_FREE (value); + } + + return ret; +} + +int +glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict, + char *keyprefix, int index, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char key[PATH_MAX] = ""; + char *device = NULL; + char *value = NULL; + char brick_path[PATH_MAX] = ""; + char pidfile[PATH_MAX] = ""; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (keyprefix); + GF_ASSERT (snap_volinfo); + GF_ASSERT (brickinfo); + + ret = snprintf (key, sizeof (key), "%s.brick%d.path", keyprefix, + index); + if (ret < 0) { + goto out; + } + + ret = snprintf (brick_path, sizeof (brick_path), + "%s:%s", brickinfo->hostname, brickinfo->path); + if (ret < 0) { + goto out; + } + + value = gf_strdup (brick_path); + if (!value) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to store " + "brick_path %s", brickinfo->path); + goto out; + } + + if (brickinfo->snap_status == -1) { + /* Setting vgname as "Pending Snapshot" */ + value = gf_strdup ("Pending Snapshot"); + if (!value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.brick%d.vgname", + keyprefix, index); + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto out; + } + + ret = 0; + goto out; + } + value = NULL; + + ret = snprintf (key, sizeof (key), "%s.brick%d.status", + keyprefix, index); + if (ret < 0) { + goto out; + } + + if (brickinfo->status == GF_BRICK_STOPPED) { + value = gf_strdup ("No"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick status"); + goto out; + } + value = NULL; + } else { + value = gf_strdup ("Yes"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick status"); + goto out; + } + value = NULL; + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo, + brickinfo, priv); + ret = gf_is_service_running (pidfile, &pid); + + ret = snprintf (key, sizeof (key), "%s.brick%d.pid", + keyprefix, index); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, pid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save pid %d", pid); + goto out; + } + } + + ret = snprintf (key, sizeof (key), "%s.brick%d", + keyprefix, index); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_brick_lvm_details (rsp_dict, brickinfo, + snap_volinfo->volname, + device, key); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "brick LVM details"); + goto out; + } +out: + if (ret && value) { + GF_FREE (value); + } + + return ret; +} + +int +glusterd_get_single_snap_status (char **op_errstr, dict_t *rsp_dict, + char *keyprefix, glusterd_snap_t *snap) +{ + int ret = -1; + xlator_t *this = NULL; + char key[PATH_MAX] = ""; + char brickkey[PATH_MAX] = ""; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int volcount = 0; + int brickcount = 0; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (keyprefix); + GF_ASSERT (snap); + + list_for_each_entry_safe (snap_volinfo, tmp_volinfo, &snap->volumes, + vol_list) { + ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix, + volcount); + if (ret < 0) { + goto out; + } + list_for_each_entry (brickinfo, &snap_volinfo->bricks, + brick_list) { + if (!glusterd_is_local_brick (this, snap_volinfo, + brickinfo)) { + brickcount++; + continue; + } + + ret = glusterd_get_single_brick_status (op_errstr, + rsp_dict, key, brickcount, + snap_volinfo, brickinfo); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting " + "single snap status failed"); + goto out; + } + brickcount++; + } + ret = snprintf (brickkey, sizeof (brickkey), "%s.brickcount", + key); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, brickkey, brickcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick count"); + goto out; + } + volcount++; + } + + ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save volcount"); + goto out; + } + +out: + + return ret; +} + +int +glusterd_get_each_snap_object_status (char **op_errstr, dict_t *rsp_dict, + glusterd_snap_t *snap, char *keyprefix) +{ + int ret = -1; + char key[PATH_MAX] = ""; + char *temp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); + GF_ASSERT (keyprefix); + + /* TODO : Get all the snap volume info present in snap object, + * as of now, There will be only one snapvolinfo per snap object + */ + ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix); + if (ret < 0) { + goto out; + } + + temp = gf_strdup (snap->snapname); + if (temp == NULL) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (rsp_dict, key, temp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save " + "snap name"); + goto out; + } + + temp = NULL; + + ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix); + if (ret < 0) { + goto out; + } + + temp = gf_strdup (uuid_utoa (snap->snap_id)); + if (temp == NULL) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, key, temp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save " + "snap UUID"); + goto out; + } + + temp = NULL; + + ret = glusterd_get_single_snap_status (op_errstr, rsp_dict, keyprefix, + snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not get single snap status"); + goto out; + } + + ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save volcount"); + goto out; + } +out: + if (ret && temp) + GF_FREE (temp); + + return ret; +} + +int +glusterd_get_snap_status_of_volume (char **op_errstr, dict_t *rsp_dict, + char *volname, char *keyprefix) { + int ret = -1; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + char key[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int i = 0; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (volname); + GF_ASSERT (keyprefix); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volinfo of " + "volume %s", volname); + goto out; + } + + list_for_each_entry_safe (snap_volinfo, temp_volinfo, + &volinfo->snap_volumes, snapvol_list) { + ret = snprintf (key, sizeof (key), "status.snap%d", i); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap_volinfo->snapshot, key); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Function : " + "glusterd_get_single_snap_status failed"); + goto out; + } + i++; + } + + ret = dict_set_int32 (rsp_dict, "status.snapcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to save snapcount"); + ret = -1; + goto out; + } +out: + return ret; +} + +int +glusterd_get_all_snapshot_status (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t i = 0; + int ret = -1; + char key[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + list_for_each_entry_safe (snap, tmp_snap, + &priv->snapshots, snap_list) { + ret = snprintf (key, sizeof (key), "status.snap%d", i); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap, key); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get " + "the details of a snap object: %s", + snap->snapname); + goto out; + } + i++; + } + + ret = dict_set_int32 (rsp_dict, "status.snapcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + + +int +glusterd_snapshot_status_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + xlator_t *this = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + char *get_buffer = NULL; + int32_t cmd = -1; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + char *volname = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get status cmd type"); + goto out; + } + + ret = dict_set_int32 (rsp_dict, "cmd", cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save status cmd in rsp dictionary"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + ret = glusterd_get_all_snapshot_status (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snapshot status"); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_SNAP: + { + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) " + "not found", snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snap volinfo"); + goto out; + } + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap, "status.snap0"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get status of snap %s", get_buffer); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to" + " get volume name"); + goto out; + } + + ret = glusterd_get_snap_status_of_volume (op_errstr, + rsp_dict, volname, "status.vol0"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Function :" + " glusterd_get_snap_status_of_volume " + "failed"); + goto out; + } + } + } + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret, + char **op_errstr, dict_t *rsp_dict) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + if (op_ret) { + ret = glusterd_do_snap_cleanup (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "cleanup operation " + "failed"); + goto out; + } + } else { + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t snap_command = 0; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snapshot_config_commit (dict, op_errstr, + rsp_dict); + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "delete snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to " + "restore snapshot"); + goto out; + } + + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "show snapshot status"); + goto out; + } + break; + + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + break; + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + int64_t vol_count = 0; + int64_t count = 1; + char key[1024] = {0,}; + char *volname = NULL; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = dict_get_int64 (dict, "volcount", &vol_count); + if (ret) + goto out; + while (count <= vol_count) { + snprintf (key, 1024, "volname%"PRId64, count); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get volname"); + goto out; + } + ret = dict_set_str (dict, "volname", volname); + if (ret) + goto out; + + ret = gd_brick_op_phase (GD_OP_SNAP, NULL, dict, + op_errstr); + if (ret) + goto out; + volname = NULL; + count++; + } + + dict_del (dict, "volname"); + ret = 0; + break; + case GF_SNAP_OPTION_TYPE_DELETE: + break; + default: + break; + } + +out: + return ret; +} + +int +glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "pre-validation failed"); + goto out; + } + break; + + case (GF_SNAP_OPTION_TYPE_CONFIG): + ret = glusterd_snapshot_config_prevalidate (dict, op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot config " + "pre-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot restore " + "validation failed"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot remove " + "validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot status " + "validation failed"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snapshot_create_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "post-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "update missed snaps list"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_handle_snapshot_fn (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_SNAP; + int type = 0; + glusterd_conf_t *conf = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + char *volname = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len > 0) { + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } + + dict->extra_stdfree = cli_req.dict.dict_val; + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) { + GF_FREE (host_uuid); + goto out; + } + + + } else { + gf_log (this->name, GF_LOG_ERROR, "request dict length is %d", + cli_req.dict.dict_len); + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Command type not found"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + switch (type) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_handle_snapshot_create (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_handle_snapshot_restore (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot restore " + "failed: %s", err_str); + } + + break; + case GF_SNAP_OPTION_TYPE_INFO: + ret = glusterd_handle_snapshot_info (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot info " + "failed"); + } + break; + case GF_SNAP_OPTION_TYPE_LIST: + ret = glusterd_handle_snapshot_list (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot list " + "failed"); + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + /* TODO : Type of lock to be taken when we are setting + * limits system wide + */ + ret = dict_get_str (dict, "volname", &volname); + if (!volname) { + ret = dict_set_int32 (dict, "hold_vol_locks", + _gf_false); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "Unable to set hold_vol_locks value " + "as _gf_false"); + goto out; + } + + } + ret = glusterd_mgmt_v3_initiate_all_phases (req, cli_op, dict); + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_handle_snapshot_remove (req, cli_op, dict, + err_str, + sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot delete " + "failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_START: + case GF_SNAP_OPTION_TYPE_STOP: + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_handle_snapshot_status (req, cli_op, dict, + err_str, + sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot status " + "failed: %s", err_str); + } + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Unkown snapshot request " + "type (%d)", type); + ret = -1; /* Failure */ + } + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + + return ret; +} + +int +glusterd_handle_snapshot (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, glusterd_handle_snapshot_fn); +} + +static inline void +glusterd_free_snap_op (glusterd_snap_op_t *snap_op) +{ + if (snap_op) { + if (snap_op->brick_path) + GF_FREE (snap_op->brick_path); + + GF_FREE (snap_op); + } +} + +/* Look for duplicates and accordingly update the list */ +int32_t +glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, + glusterd_snap_op_t *missed_snap_op) +{ + int32_t ret = -1; + glusterd_snap_op_t *snap_opinfo = NULL; + gf_boolean_t match = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_snapinfo); + GF_ASSERT(missed_snap_op); + + list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) { + if ((!strcmp (snap_opinfo->brick_path, + missed_snap_op->brick_path)) && + (snap_opinfo->op == missed_snap_op->op)) { + /* If two entries have conflicting status + * GD_MISSED_SNAP_DONE takes precedence + */ + if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) && + (missed_snap_op->status == GD_MISSED_SNAP_DONE)) { + snap_opinfo->status = GD_MISSED_SNAP_DONE; + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + match = _gf_true; + break; + } else if ((snap_opinfo->brick_num == + missed_snap_op->brick_num) && + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && + (missed_snap_op->op == + GF_SNAP_OPTION_TYPE_DELETE)) { + /* Optimizing create and delete entries for the same + * brick and same node + */ + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + snap_opinfo->status = GD_MISSED_SNAP_DONE; + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + } + + if (match == _gf_true) { + gf_log (this->name, GF_LOG_INFO, + "Duplicate entry. Not updating"); + glusterd_free_snap_op (missed_snap_op); + } else { + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add new missed snap entry to the missed_snaps list. */ +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status) +{ + int32_t ret = -1; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *missed_snap_op = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t free_missed_snap_info = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_info); + GF_ASSERT(brick_path); + + priv = this->private; + GF_ASSERT (priv); + + /* Create the snap_op object consisting of the * + * snap id and the op */ + ret = glusterd_missed_snap_op_new (&missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create new missed snap object."); + ret = -1; + goto out; + } + + missed_snap_op->brick_path = gf_strdup(brick_path); + if (!missed_snap_op->brick_path) { + ret = -1; + goto out; + } + missed_snap_op->brick_num = brick_num; + missed_snap_op->op = snap_op; + missed_snap_op->status = snap_status; + + /* Look for other entries for the same node and same snap */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + if (!strcmp (missed_snapinfo->node_snap_info, + missed_info)) { + /* Found missed snapshot info for * + * the same node and same snap */ + match = _gf_true; + break; + } + } + + if (match == _gf_false) { + /* First snap op missed for the brick */ + ret = glusterd_missed_snapinfo_new (&missed_snapinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create missed snapinfo"); + goto out; + } + free_missed_snap_info = _gf_true; + missed_snapinfo->node_snap_info = gf_strdup(missed_info); + if (!missed_snapinfo->node_snap_info) { + ret = -1; + goto out; + } + + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + list_add_tail (&missed_snapinfo->missed_snaps, + &priv->missed_snaps_list); + + ret = 0; + goto out; + } else { + ret = glusterd_update_missed_snap_entry (missed_snapinfo, + missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update existing missed snap entry."); + goto out; + } + } + +out: + if (ret) { + glusterd_free_snap_op (missed_snap_op); + + if (missed_snapinfo && + (free_missed_snap_info == _gf_true)) { + if (missed_snapinfo->node_snap_info) + GF_FREE (missed_snapinfo->node_snap_info); + + GF_FREE (missed_snapinfo); + } + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add missing snap entries to the in-memory conf->missed_snap_list */ +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) +{ + char *buf = NULL; + char *tmp = NULL; + char *save_ptr = NULL; + char *nodeid = NULL; + char *snap_uuid = NULL; + char *brick_path = NULL; + char missed_info[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + /* We can update the missed_snaps_list without acquiring * + * any additional locks as big lock will be held. */ + for (i = 0; i < missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (dict, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "missed_snap_entry = %s", + buf); + + /* Need to make a duplicate string coz the same dictionary * + * is resent to the non-originator nodes */ + tmp = gf_strdup (buf); + if (!tmp) { + ret = -1; + goto out; + } + + /* Fetch the node-id, snap-id, brick_num, + * brick_path, snap_op and snap status + */ + nodeid = strtok_r (tmp, ":", &save_ptr); + snap_uuid = strtok_r (NULL, "=", &save_ptr); + brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!nodeid || !snap_uuid || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + snprintf (missed_info, sizeof(missed_info), "%s:%s", + nodeid, snap_uuid); + + ret = glusterd_store_missed_snaps_list (missed_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + GF_FREE (tmp); + tmp = NULL; + } + +out: + if (tmp) + GF_FREE (tmp); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 37cf98894..e28a30c5a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -46,7 +46,7 @@ #include <dirent.h> void -glusterd_replace_slash_with_hipen (char *str) +glusterd_replace_slash_with_hyphen (char *str) { char *ptr = NULL; @@ -85,7 +85,7 @@ glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo, GF_ASSERT (len >= PATH_MAX); snprintf (key_vol_brick, len, "%s", brickinfo->path); - glusterd_replace_slash_with_hipen (key_vol_brick); + glusterd_replace_slash_with_hyphen (key_vol_brick); } static void @@ -246,6 +246,20 @@ glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo) if (ret) goto out; + if (strlen(brickinfo->device_path) > 0) { + snprintf (value, sizeof(value), "%s", brickinfo->device_path); + ret = gf_store_save_value (fd, + GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, value); + if (ret) + goto out; + } + + snprintf (value, sizeof(value), "%d", brickinfo->snap_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + value); + if (ret) + goto out; + if (!brickinfo->vg[0]) goto out; @@ -511,14 +525,13 @@ int _storeopts (dict_t *this, char *key, data_t *value, void *data) int32_t glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) { - char *str = NULL; + char *str = NULL; + char buf[PATH_MAX] = {0,}; + int32_t ret = -1; GF_ASSERT (fd > 0); GF_ASSERT (volinfo); - char buf[PATH_MAX] = {0,}; - int32_t ret = -1; - snprintf (buf, sizeof (buf), "%d", volinfo->type); ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf); if (ret) @@ -560,6 +573,14 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) if (ret) goto out; + snprintf (buf, sizeof (buf), "%s", volinfo->parent_volname); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PARENT_VOLNAME, buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to store " + GLUSTERD_STORE_KEY_PARENT_VOLNAME); + goto out; + } + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID, uuid_utoa (volinfo->volume_id)); if (ret) @@ -599,6 +620,23 @@ glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) goto out; } + snprintf (buf, sizeof (buf), "%d", volinfo->is_volume_restored); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write is_volume_restored"); + goto out; + } + + snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->snap_max_hard_limit); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write snap-max-hard-limit"); + goto out; + } + out: if (ret) gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume " @@ -616,8 +654,7 @@ glusterd_store_voldirpath_set (glusterd_volinfo_t *volinfo, char *voldirpath, priv = THIS->private; GF_ASSERT (priv); - snprintf (voldirpath, len, "%s/%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname); + GLUSTERD_GET_VOLUME_DIR (voldirpath, volinfo, priv); } static int32_t @@ -631,10 +668,32 @@ glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo) glusterd_store_voldirpath_set (volinfo, voldirpath, sizeof (voldirpath)); ret = gf_store_mkdir (voldirpath); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } +static int32_t +glusterd_store_create_snap_dir (glusterd_snap_t *snap) +{ + int32_t ret = -1; + char snapdirpath[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + + GLUSTERD_GET_SNAP_DIR (snapdirpath, snap, priv); + + ret = mkdir_p (snapdirpath, 0755, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snaps dir " + "%s", snapdirpath); + } + return ret; +} + int32_t glusterd_store_volinfo_write (int fd, glusterd_volinfo_t *volinfo) { @@ -659,6 +718,49 @@ out: return ret; } +int32_t +glusterd_store_snapinfo_write (glusterd_snap_t *snap) +{ + int32_t ret = -1; + int fd = 0; + char buf[PATH_MAX] = ""; + + GF_ASSERT (snap); + + fd = gf_store_mkstemp (snap->shandle); + if (fd <= 0) + goto out; + + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_ID, + uuid_utoa (snap->snap_id)); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", snap->snap_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_STATUS, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", snap->snap_restored); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_RESTORED, buf); + if (ret) + goto out; + + if (snap->description) { + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_DESC, + snap->description); + if (ret) + goto out; + } + + snprintf (buf, sizeof (buf), "%ld", snap->time_stamp); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, buf); + +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + static void glusterd_store_rbstatepath_set (glusterd_volinfo_t *volinfo, char *rbstatepath, size_t len) @@ -718,6 +820,36 @@ glusterd_store_quota_conf_path_set (glusterd_volinfo_t *volinfo, GLUSTERD_VOLUME_QUOTA_CONFIG); } +static void +glusterd_store_missed_snaps_list_path_set (char *missed_snaps_list, + size_t len) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (missed_snaps_list); + GF_ASSERT (len <= PATH_MAX); + + snprintf (missed_snaps_list, len, "%s/snaps/" + GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir); +} + +static void +glusterd_store_snapfpath_set (glusterd_snap_t *snap, char *snap_fpath, + size_t len) +{ + glusterd_conf_t *priv = NULL; + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_fpath); + GF_ASSERT (len <= PATH_MAX); + + snprintf (snap_fpath, len, "%s/snaps/%s/%s", priv->workdir, + snap->snapname, GLUSTERD_SNAP_INFO_FILE); +} + int32_t glusterd_store_create_rbstate_shandle_on_absence (glusterd_volinfo_t *volinfo) { @@ -778,6 +910,43 @@ glusterd_store_create_quota_conf_sh_on_absence (glusterd_volinfo_t *volinfo) return ret; } + +static int32_t +glusterd_store_create_missed_snaps_list_shandle_on_absence () +{ + char missed_snaps_list[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + glusterd_store_missed_snaps_list_path_set (missed_snaps_list, + sizeof(missed_snaps_list)); + + ret = gf_store_handle_create_on_absence + (&priv->missed_snaps_list_shandle, + missed_snaps_list); + return ret; +} + +int32_t +glusterd_store_create_snap_shandle_on_absence (glusterd_snap_t *snap) +{ + char snapfpath[PATH_MAX] = {0}; + int32_t ret = 0; + + GF_ASSERT (snap); + + glusterd_store_snapfpath_set (snap, snapfpath, sizeof (snapfpath)); + ret = gf_store_handle_create_on_absence (&snap->shandle, snapfpath); + return ret; +} + int32_t glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd) { @@ -789,7 +958,7 @@ glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd) list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_store_brickinfo (volinfo, brickinfo, - brick_count, vol_fd); + brick_count, vol_fd); if (ret) goto out; brick_count++; @@ -1094,6 +1263,60 @@ out: } int32_t +glusterd_store_snap_atomic_update (glusterd_snap_t *snap) +{ + int ret = -1; + GF_ASSERT (snap); + + ret = gf_store_rename_tmppath (snap->shandle); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't rename " + "temporary file(s): Reason %s", strerror (errno)); + + return ret; +} + +int32_t +glusterd_store_snap (glusterd_snap_t *snap) +{ + int32_t ret = -1; + + GF_ASSERT (snap); + + ret = glusterd_store_create_snap_dir (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap dir"); + goto out; + } + + ret = glusterd_store_create_snap_shandle_on_absence (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap info " + "file"); + goto out; + } + + ret = glusterd_store_snapinfo_write (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to write snap info"); + goto out; + } + + ret = glusterd_store_snap_atomic_update (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR,"Failed to do automic update"); + goto out; + } + +out: + if (ret && snap->shandle) + gf_store_unlink_tmppath (snap->shandle); + + gf_log (THIS->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac) { int32_t ret = -1; @@ -1150,7 +1373,6 @@ out: return ret; } - int32_t glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) { @@ -1177,8 +1399,8 @@ glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv); snprintf (delete_path, sizeof (delete_path), - "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir, - uuid_utoa (volinfo->volume_id)); + "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir, + uuid_utoa (volinfo->volume_id)); snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, priv->workdir); @@ -1270,6 +1492,116 @@ out: return ret; } +/*TODO: cleanup the duplicate code and implement a generic function for + * deleting snap/volume depending on the parameter flag */ +int32_t +glusterd_store_delete_snap (glusterd_snap_t *snap) +{ + char pathname[PATH_MAX] = {0,}; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + char path[PATH_MAX] = {0,}; + char delete_path[PATH_MAX] = {0,}; + char trashdir[PATH_MAX] = {0,}; + struct stat st = {0, }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (snap); + GLUSTERD_GET_SNAP_DIR (pathname, snap, priv); + + snprintf (delete_path, sizeof (delete_path), + "%s/"GLUSTERD_TRASH"/snap-%s.deleted", priv->workdir, + uuid_utoa (snap->snap_id)); + + snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, + priv->workdir); + + ret = mkdir (trashdir, 0777); + if (ret && errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " + "directory, reason : %s", strerror (errno)); + ret = -1; + goto out; + } + + ret = rename (pathname, delete_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to rename snap " + "directory %s to %s", pathname, delete_path); + rename_fail = _gf_true; + goto out; + } + + dir = opendir (delete_path); + if (!dir) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." + " Reason : %s", delete_path, strerror (errno)); + ret = 0; + goto out; + } + + glusterd_for_each_entry (entry, dir); + while (entry) { + snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name); + ret = stat (path, &st); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to stat " + "entry %s : %s", path, strerror (errno)); + goto stat_failed; + } + + if (S_ISDIR (st.st_mode)) + ret = rmdir (path); + else + ret = unlink (path); + + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, " Failed to remove " + "%s. Reason : %s", path, strerror (errno)); + } + + gf_log (this->name, GF_LOG_DEBUG, "%s %s", + ret ? "Failed to remove":"Removed", + entry->d_name); +stat_failed: + memset (path, 0, sizeof(path)); + glusterd_for_each_entry (entry, dir); + } + + ret = closedir (dir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " + "Reason : %s",delete_path, strerror (errno)); + } + + ret = rmdir (delete_path); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", + delete_path, strerror (errno)); + } + ret = rmdir (trashdir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:" + " %s", trashdir, strerror (errno)); + } + +out: + if (snap->shandle) { + gf_store_handle_destroy (snap->shandle); + snap->shandle = NULL; + } + ret = (rename_fail == _gf_true) ? -1: 0; + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} int glusterd_store_global_info (xlator_t *this) @@ -1280,6 +1612,7 @@ glusterd_store_global_info (xlator_t *this) char path[PATH_MAX] = {0,}; gf_store_handle_t *handle = NULL; char *uuid_str = NULL; + char buf[256] = {0, }; conf = this->private; @@ -1332,6 +1665,24 @@ glusterd_store_global_info (xlator_t *this) goto out; } + snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_hard_limit); + ret = gf_store_save_value (handle->fd, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing snap-max-hard-limit failed ret = %d", ret); + goto out; + } + + snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_soft_limit); + ret = gf_store_save_value (handle->fd, + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing snap-max-soft-limit failed ret = %d", ret); + goto out; + } + ret = gf_store_rename_tmppath (handle); out: if (handle) { @@ -1405,15 +1756,94 @@ out: return ret; } +int +glusterd_retrieve_sys_snap_max_limit (xlator_t *this, uint64_t *limit, + char *key) +{ + char *limit_str = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + uint64_t tmp_limit = 0; + char *tmp = NULL; + char path[PATH_MAX] = {0,}; + gf_store_handle_t *handle = NULL; + + GF_ASSERT (this); + priv = this->private; + + GF_ASSERT (priv); + GF_ASSERT (limit); + GF_ASSERT (key); + + if (!priv->handle) { + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_INFO_FILE); + ret = gf_store_handle_retrieve (path, &handle); + + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get store " + "handle!"); + goto out; + } + + priv->handle = handle; + } + + ret = gf_store_retrieve_value (priv->handle, + key, + &limit_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No previous %s present", key); + goto out; + } + + tmp_limit = strtoul (limit_str, &tmp, 10); + if ((tmp_limit <= 0) || (tmp && strlen (tmp) > 1)) { + gf_log (this->name, GF_LOG_WARNING, "invalid version number"); + goto out; + } + + *limit = tmp_limit; + + ret = 0; +out: + if (limit_str) + GF_FREE (limit_str); + + return ret; +} static int glusterd_restore_op_version (xlator_t *this) { - glusterd_conf_t *conf = NULL; - int ret = 0; - int op_version = 0; + glusterd_conf_t *conf = NULL; + int ret = 0; + int op_version = 0; conf = this->private; + ret = glusterd_retrieve_sys_snap_max_limit (this, + &conf->snap_max_hard_limit, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to retrieve system snap-max-hard-limit, " + "setting it to default value(%d)", + GLUSTERD_SNAPS_MAX_HARD_LIMIT); + conf->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + } + + ret = glusterd_retrieve_sys_snap_max_limit (this, + &conf->snap_max_soft_limit, + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to retrieve system snap-max-soft-limit, " + "setting it to default value(%d)", + GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT); + conf->snap_max_soft_limit = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + } + ret = glusterd_retrieve_op_version (this, &op_version); if (!ret) { if ((op_version < GD_OP_VERSION_MIN) || @@ -1501,7 +1931,6 @@ out: int32_t glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) { - int32_t ret = 0; glusterd_brickinfo_t *brickinfo = NULL; gf_store_iter_t *iter = NULL; @@ -1523,7 +1952,7 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) priv = THIS->private; - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv) + GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); ret = gf_store_iter_new (volinfo->shandle, &tmpiter); @@ -1606,6 +2035,13 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) { gf_string2int (value, &brickinfo->decommissioned); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, + strlen (GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) { + strncpy (brickinfo->device_path, value, + sizeof (brickinfo->device_path)); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + strlen (GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) { + gf_string2int (value, &brickinfo->snap_status); } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_VGNAME, strlen (GLUSTERD_STORE_KEY_BRICK_VGNAME))) { @@ -1659,10 +2095,9 @@ out: int32_t -glusterd_store_retrieve_rbstate (char *volname) +glusterd_store_retrieve_rbstate (glusterd_volinfo_t *volinfo) { int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; @@ -1670,15 +2105,13 @@ glusterd_store_retrieve_rbstate (char *volname) glusterd_conf_t *priv = NULL; char path[PATH_MAX] = {0,}; gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + xlator_t *this = NULL; - priv = THIS->private; - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get" - "volinfo for %s.", volname); - goto out; - } + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, @@ -1755,16 +2188,15 @@ glusterd_store_retrieve_rbstate (char *volname) goto out; out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } int32_t -glusterd_store_retrieve_node_state (char *volname) +glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo) { int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; @@ -1779,13 +2211,8 @@ glusterd_store_retrieve_node_state (char *volname) this = THIS; GF_ASSERT (this); priv = this->private; - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get" - "volinfo for %s.", volname); - goto out; - } + GF_ASSERT (priv); + GF_ASSERT (volinfo); GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, @@ -1866,50 +2293,59 @@ out: dict_unref (volinfo->rebal.dict); if (tmp_dict) dict_unref (tmp_dict); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); return ret; } -int32_t -glusterd_store_retrieve_volume (char *volname) -{ - int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; - gf_store_iter_t *iter = NULL; - char *key = NULL; - char *value = NULL; - char volpath[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - int exists = 0; - gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; - ret = glusterd_volinfo_new (&volinfo); - if (ret) - goto out; +int +glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int exists = 0; + char *key = NULL; + char *value = NULL; + char volpath[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; - strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + this = THIS; + GF_ASSERT (this); + conf = THIS->private; + GF_ASSERT (volinfo); - priv = THIS->private; + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf); - GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, GLUSTERD_VOLUME_INFO_FILE); ret = gf_store_handle_retrieve (path, &volinfo->shandle); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "volinfo handle is NULL"); goto out; + } ret = gf_store_iter_new (volinfo->shandle, &iter); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get new store " + "iter"); goto out; + } ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get next store " + "iter"); goto out; + } while (!ret) { + gf_log ("", GF_LOG_DEBUG, "key = %s value = %s", key, value); if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TYPE, strlen (GLUSTERD_STORE_KEY_VOL_TYPE))) { volinfo->type = atoi (value); @@ -1978,6 +2414,15 @@ glusterd_store_retrieve_volume (char *volname) } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CAPS, strlen (GLUSTERD_STORE_KEY_VOL_CAPS))) { volinfo->caps = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + strlen (GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) { + volinfo->snap_max_hard_limit = (uint64_t) atoll (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, + strlen (GLUSTERD_STORE_KEY_VOL_IS_RESTORED))) { + volinfo->is_volume_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_PARENT_VOLNAME, + strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { + strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); } else { if (is_key_glusterd_hooks_friendly (key)) { @@ -1994,8 +2439,11 @@ glusterd_store_retrieve_volume (char *volname) goto out; case 0: - gf_log ("", GF_LOG_ERROR, "Unknown key: %s", - key); + /*Ignore GLUSTERD_STORE_KEY_VOL_BRICK since + glusterd_store_retrieve_bricks gets it later*/ + if (!strstr (key, GLUSTERD_STORE_KEY_VOL_BRICK)) + gf_log ("", GF_LOG_WARNING, + "Unknown key: %s", key); break; case 1: @@ -2073,10 +2521,49 @@ glusterd_store_retrieve_volume (char *volname) goto out; ret = gf_store_iter_destroy (iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store " + "iter"); + goto out; + } + + ret = 0; +out: + return ret; +} + +glusterd_volinfo_t* +glusterd_store_retrieve_volume (char *volname, glusterd_snap_t *snap) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *origin_volinfo = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volname); + ret = glusterd_volinfo_new (&volinfo); if (ret) goto out; + strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + volinfo->snapshot = snap; + if (snap) + volinfo->is_snap_volume = _gf_true; + + ret = glusterd_store_update_volinfo (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to update volinfo " + "for %s volume", volname); + goto out; + } + ret = glusterd_store_retrieve_bricks (volinfo); if (ret) goto out; @@ -2102,13 +2589,30 @@ glusterd_store_retrieve_volume (char *volname) goto out; - list_add_order (&volinfo->vol_list, &priv->volumes, - glusterd_compare_volume_name); + if (!snap) { + list_add_order (&volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); + } else { + ret = glusterd_volinfo_find (volinfo->parent_volname, + &origin_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Parent volinfo " + "not found for %s volume", volname); + goto out; + } + glusterd_list_add_snapvol (origin_volinfo, volinfo); + } out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (ret) { + if (volinfo) + glusterd_volinfo_delete (volinfo); + volinfo = NULL; + } - return ret; + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return volinfo; } inline void @@ -2208,9 +2712,9 @@ out: } int32_t -glusterd_store_retrieve_volumes (xlator_t *this) +glusterd_store_retrieve_volumes (xlator_t *this, glusterd_snap_t *snap) { - int32_t ret = 0; + int32_t ret = -1; char path[PATH_MAX] = {0,}; glusterd_conf_t *priv = NULL; DIR *dir = NULL; @@ -2222,51 +2726,58 @@ glusterd_store_retrieve_volumes (xlator_t *this) GF_ASSERT (priv); - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX); + if (snap) + snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, + snap->snapname); + else + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_VOLUME_DIR_PREFIX); dir = opendir (path); if (!dir) { gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); - ret = -1; goto out; } glusterd_for_each_entry (entry, dir); while (entry) { - ret = glusterd_store_retrieve_volume (entry->d_name); - if (ret) { + if ( entry->d_type != DT_DIR ) + goto next; + + volinfo = glusterd_store_retrieve_volume (entry->d_name, snap); + if (!volinfo) { gf_log ("", GF_LOG_ERROR, "Unable to restore " "volume: %s", entry->d_name); + ret = -1; goto out; } - ret = glusterd_store_retrieve_rbstate (entry->d_name); + ret = glusterd_store_retrieve_rbstate (volinfo); if (ret) { /* Backward compatibility */ gf_log ("", GF_LOG_INFO, "Creating a new rbstate " "for volume: %s.", entry->d_name); - ret = glusterd_volinfo_find (entry->d_name, &volinfo); ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo); ret = glusterd_store_perform_rbstate_store (volinfo); } - ret = glusterd_store_retrieve_node_state (entry->d_name); + ret = glusterd_store_retrieve_node_state (volinfo); if (ret) { /* Backward compatibility */ gf_log ("", GF_LOG_INFO, "Creating a new node_state " "for volume: %s.", entry->d_name); - ret = glusterd_volinfo_find (entry->d_name, &volinfo); - ret = glusterd_store_create_nodestate_sh_on_absence (volinfo); ret = glusterd_store_perform_node_state_store (volinfo); } +next: glusterd_for_each_entry (entry, dir); } + ret = 0; + out: if (dir) closedir (dir); @@ -2276,6 +2787,481 @@ out: } int32_t +glusterd_resolve_snap_bricks (xlator_t *this, glusterd_snap_t *snap) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "resolve brick failed in restore"); + goto out; + } + } + } + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return ret; +} + +int +glusterd_store_update_snap (glusterd_snap_t *snap) +{ + int ret = -1; + char *key = NULL; + char *value = NULL; + char snappath[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + + this = THIS; + conf = this->private; + GF_ASSERT (snap); + + GLUSTERD_GET_SNAP_DIR (snappath, snap, conf); + + snprintf (path, sizeof (path), "%s/%s", snappath, + GLUSTERD_SNAP_INFO_FILE); + + ret = gf_store_handle_retrieve (path, &snap->shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "snap handle is NULL"); + goto out; + } + + ret = gf_store_iter_new (snap->shandle, &iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get new store " + "iter"); + goto out; + } + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get next store " + "iter"); + goto out; + } + + while (!ret) { + gf_log (this->name, GF_LOG_DEBUG, "key = %s value = %s", + key, value); + + if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_ID, + strlen (GLUSTERD_STORE_KEY_SNAP_ID))) { + ret = uuid_parse (value, snap->snap_id); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "Failed to parse uuid"); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_RESTORED, + strlen (GLUSTERD_STORE_KEY_SNAP_RESTORED))) { + snap->snap_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_STATUS, + strlen (GLUSTERD_STORE_KEY_SNAP_STATUS))) { + snap->snap_status = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_DESC, + strlen (GLUSTERD_STORE_KEY_SNAP_DESC))) { + snap->description = gf_strdup (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, + strlen (GLUSTERD_STORE_KEY_SNAP_TIMESTAMP))) { + snap->time_stamp = atoi (value); + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + } + + if (op_errno != GD_STORE_EOF) + goto out; + + ret = gf_store_iter_destroy (iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store " + "iter"); + } + +out: + return ret; +} + +int32_t +glusterd_store_retrieve_snap (char *snapname) +{ + int32_t ret = -1; + dict_t *dict = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snapname); + + dict = dict_new(); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create dict"); + ret = -1; + goto out; + } + + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + " snap object"); + goto out; + } + + strncpy (snap->snapname, snapname, strlen(snapname)); + ret = glusterd_store_update_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to update snapshot " + "for %s snap", snapname); + goto out; + } + + ret = glusterd_store_retrieve_volumes (this, snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to retrieve " + "snap volumes for snap %s", snapname); + goto out; + } + + /* Unlike bricks of normal volumes which are resolved at the end of + the glusterd restore, the bricks belonging to the snap volumes of + each snap should be resolved as part of snapshot restore itself. + Because if the snapshot has to be removed, then resolving bricks + helps glusterd in understanding what all bricks have its own uuid + and killing those bricks. + */ + ret = glusterd_resolve_snap_bricks (this, snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "resolving the snap bricks" + " failed (snap: %s)", snap?snap->snapname:""); + + /* When the snapshot command from cli is received, the on disk and + in memory structures for the snapshot are created (with the status) + being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is + taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd + dies after taking the backend snapshot, but before updating the + status, then when glusterd comes up, it should treat that snapshot + as a failed snapshot and clean it up. + */ + if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { + ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_true); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "failed to remove" + " the snapshot %s", snap->snapname); + goto out; + } + + /* TODO: list_add_order can do 'N-square' comparisions and + is not efficient. Find a better solution to store the snap + in order */ + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + +out: + if (dict) + dict_unref (dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +/* Read the missed_snap_list and update the in-memory structs */ +int32_t +glusterd_store_retrieve_missed_snaps_list (xlator_t *this) +{ + char buf[PATH_MAX] = ""; + char path[PATH_MAX] = ""; + char *missed_node_info = NULL; + char *brick_path = NULL; + char *value = NULL; + char *save_ptr = NULL; + FILE *fp = NULL; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + gf_store_op_errno_t store_errno = GD_STORE_SUCCESS; + + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + /* Get the path of the missed_snap_list */ + glusterd_store_missed_snaps_list_path_set (path, sizeof(path)); + + fp = fopen (path, "r"); + if (!fp) { + /* If errno is ENOENT then there are no missed snaps yet */ + if (errno != ENOENT) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to open %s. " + "Error: %s", path, strerror(errno)); + } else { + gf_log (this->name, GF_LOG_INFO, + "No missed snaps list."); + ret = 0; + } + goto out; + } + + do { + ret = gf_store_read_and_tokenize (fp, buf, + &missed_node_info, &value, + &store_errno); + if (ret) { + if (store_errno == GD_STORE_EOF) { + gf_log (this->name, + GF_LOG_DEBUG, + "EOF for missed_snap_list"); + ret = 0; + break; + } + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch data from " + "missed_snaps_list. Error: %s", + gf_store_strerror (store_errno)); + goto out; + } + + /* Fetch the brick_num, brick_path, snap_op and snap status */ + brick_num = atoi(strtok_r (value, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!missed_node_info || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + ret = glusterd_store_missed_snaps_list (missed_node_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + } while (store_errno == GD_STORE_SUCCESS); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_store_retrieve_snaps (xlator_t *this) +{ + int32_t ret = 0; + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + + GF_ASSERT (this); + priv = this->private; + + GF_ASSERT (priv); + + snprintf (path, PATH_MAX, "%s/snaps", priv->workdir); + + dir = opendir (path); + + if (!dir) { + /* If snaps dir doesn't exists ignore the error for + backward compatibility */ + if (errno != ENOENT) { + ret = -1; + gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); + } + goto out; + } + + glusterd_for_each_entry (entry, dir); + + while (entry) { + if (entry->d_type == DT_DIR) { + ret = glusterd_store_retrieve_snap (entry->d_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to restore snapshot: %s", + entry->d_name); + goto out; + } + } + + glusterd_for_each_entry (entry, dir); + } + + /* Retrieve missed_snaps_list */ + ret = glusterd_store_retrieve_missed_snaps_list (this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to retrieve missed_snaps_list"); + goto out; + } + +out: + if (dir) + closedir (dir); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + + return ret; +} + +/* Writes all the contents of conf->missed_snap_list */ +int32_t +glusterd_store_write_missed_snapinfo (int32_t fd) +{ + char value[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT (priv); + + /* Write the missed_snap_entry */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + list_for_each_entry (snap_opinfo, + &missed_snapinfo->snap_ops, + snap_ops_list) { + snprintf (value, sizeof(value), "%d:%s:%d:%d", + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op, snap_opinfo->status); + ret = gf_store_save_value + (fd, + missed_snapinfo->node_snap_info, + value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snapinfo"); + goto out; + } + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Adds the missed snap entries to the in-memory conf->missed_snap_list * + * and writes them to disk */ +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) +{ + int32_t fd = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + if (missed_snap_count < 1) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_create_missed_snaps_list_shandle_on_absence (); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to obtain " + "missed_snaps_list store handle."); + goto out; + } + + fd = gf_store_mkstemp (priv->missed_snaps_list_shandle); + if (fd <= 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create tmp file"); + ret = -1; + goto out; + } + + ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_write_missed_snapinfo (fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snaps to disk"); + goto out; + } + + ret = gf_store_rename_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to rename the tmp file"); + goto out; + } +out: + if (ret && (fd > 0)) { + ret = gf_store_unlink_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to unlink the tmp file"); + } + ret = -1; + } + + if (fd > 0) + close (fd); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) { int32_t ret = -1; @@ -2678,7 +3664,11 @@ glusterd_restore () goto out; } - ret = glusterd_store_retrieve_volumes (this); + ret = glusterd_store_retrieve_volumes (this, NULL); + if (ret) + goto out; + + ret = glusterd_store_retrieve_snaps (this); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 955abb09f..64c073a8a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -35,43 +35,56 @@ typedef enum glusterd_store_ver_ac_{ } glusterd_volinfo_ver_ac_t; -#define GLUSTERD_STORE_UUID_KEY "UUID" - -#define GLUSTERD_STORE_KEY_VOL_TYPE "type" -#define GLUSTERD_STORE_KEY_VOL_COUNT "count" -#define GLUSTERD_STORE_KEY_VOL_STATUS "status" -#define GLUSTERD_STORE_KEY_VOL_PORT "port" -#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" -#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" -#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" -#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" -#define GLUSTERD_STORE_KEY_VOL_VERSION "version" -#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" -#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" -#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" -#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" -#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" -#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port" -#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" -#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" -#define GLUSTERD_STORE_KEY_USERNAME "username" -#define GLUSTERD_STORE_KEY_PASSWORD "password" -#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" +#define GLUSTERD_STORE_UUID_KEY "UUID" + +#define GLUSTERD_STORE_KEY_VOL_TYPE "type" +#define GLUSTERD_STORE_KEY_VOL_COUNT "count" +#define GLUSTERD_STORE_KEY_VOL_STATUS "status" +#define GLUSTERD_STORE_KEY_VOL_PORT "port" +#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" +#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" +#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" +#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" +#define GLUSTERD_STORE_KEY_VOL_VERSION "version" +#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" +#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" +#define GLUSTERD_STORE_KEY_VOL_IS_RESTORED "is-volume-restored" +#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" +#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" +#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" +#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" +#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" +#define GLUSTERD_STORE_KEY_USERNAME "username" +#define GLUSTERD_STORE_KEY_PASSWORD "password" +#define GLUSTERD_STORE_KEY_PARENT_VOLNAME "parent_volname" +#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" #define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" -#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_BRICK_PATH "path" -#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" -#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" +#define GLUSTERD_STORE_KEY_SNAP_NAME "name" +#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id" +#define GLUSTERD_STORE_KEY_SNAP_DESC "desc" +#define GLUSTERD_STORE_KEY_SNAP_TIMESTAMP "time-stamp" +#define GLUSTERD_STORE_KEY_SNAP_STATUS "status" +#define GLUSTERD_STORE_KEY_SNAP_RESTORED "snap-restored" +#define GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT "snap-max-hard-limit" +#define GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT "snap-max-soft-limit" + +#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_BRICK_PATH "path" +#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" +#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" #define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned" -#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg" -#define GLUSTERD_STORE_KEY_BRICK_ID "brick-id" +#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg" +#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path" +#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status" +#define GLUSTERD_STORE_KEY_BRICK_ID "brick-id" -#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" -#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_PEER_STATE "state" +#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" +#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_PEER_STATE "state" -#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" +#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" #define glusterd_for_each_entry(entry, dir) \ do {\ @@ -94,6 +107,9 @@ int32_t glusterd_store_delete_volume (glusterd_volinfo_t *volinfo); int32_t +glusterd_store_delete_snap (glusterd_snap_t *snap); + +int32_t glusterd_retrieve_uuid (); int32_t @@ -128,8 +144,17 @@ int32_t glusterd_store_retrieve_options (xlator_t *this); int32_t +glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo); + +int32_t glusterd_store_options (xlator_t *this, dict_t *opts); +void +glusterd_replace_slash_with_hyphen (char *str); + +int32_t +glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo); + int32_t glusterd_store_create_quota_conf_sh_on_absence (glusterd_volinfo_t *volinfo); @@ -139,4 +164,11 @@ glusterd_store_retrieve_quota_version (glusterd_volinfo_t *volinfo); int glusterd_store_save_quota_version_and_cksum (glusterd_volinfo_t *volinfo); +int32_t +glusterd_store_snap (glusterd_snap_t *snap); + +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, + int32_t missed_snap_count); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index 578bce897..b36d6f616 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -21,7 +21,7 @@ extern glusterd_op_info_t opinfo; -static inline void +void gd_synctask_barrier_wait (struct syncargs *args, int count) { glusterd_conf_t *conf = THIS->private; @@ -56,17 +56,17 @@ gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, "Error: %s", op_errstr); switch (op_code) { - case GLUSTERD_MGMT_V3_VOLUME_LOCK: + case GLUSTERD_MGMT_V3_LOCK: { snprintf (op_err, sizeof(op_err) - 1, - "Locking volume failed " + "Locking failed " "on %s. %s", peer_str, err_str); break; } - case GLUSTERD_MGMT_V3_VOLUME_UNLOCK: + case GLUSTERD_MGMT_V3_UNLOCK: { snprintf (op_err, sizeof(op_err) - 1, - "Unlocking volume failed " + "Unlocking failed " "on %s. %s", peer_str, err_str); break; } @@ -164,7 +164,7 @@ gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, return; } -static void +void gd_syncargs_init (struct syncargs *args, dict_t *op_ctx) { args->dict = op_ctx; @@ -268,7 +268,7 @@ extern struct rpc_clnt_program gd_mgmt_prog; extern struct rpc_clnt_program gd_brick_prog; extern struct rpc_clnt_program gd_mgmt_v3_prog; -static int +int glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) { int ret = 0; @@ -339,6 +339,12 @@ glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) goto out; break; + case GD_OP_SNAP: + ret = glusterd_snap_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + default: break; } @@ -347,13 +353,13 @@ out: } int32_t -_gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, +gd_syncop_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { int ret = -1; struct syncargs *args = NULL; glusterd_peerinfo_t *peerinfo = NULL; - gd1_mgmt_volume_lock_rsp rsp = {{0},}; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; call_frame_t *frame = NULL; int op_ret = -1; int op_errno = -1; @@ -374,7 +380,7 @@ _gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, } ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); if (ret < 0) goto out; @@ -384,7 +390,7 @@ _gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, op_errno = rsp.op_errno; out: gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, - GLUSTERD_MGMT_V3_VOLUME_LOCK, + GLUSTERD_MGMT_V3_LOCK, peerinfo, rsp.uuid); STACK_DESTROY (frame->root); synctask_barrier_wake(args); @@ -392,21 +398,21 @@ out: } int32_t -gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +gd_syncop_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { return glusterd_big_locked_cbk (req, iov, count, myframe, - _gd_syncop_mgmt_volume_lock_cbk); + gd_syncop_mgmt_v3_lock_cbk_fn); } int -gd_syncop_mgmt_volume_lock (glusterd_op_t op, dict_t *op_ctx, - glusterd_peerinfo_t *peerinfo, - struct syncargs *args, uuid_t my_uuid, - uuid_t recv_uuid, uuid_t txn_id) +gd_syncop_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) { int ret = -1; - gd1_mgmt_volume_lock_req req = {{0},}; + gd1_mgmt_v3_lock_req req = {{0},}; glusterd_conf_t *conf = THIS->private; GF_ASSERT(op_ctx); @@ -425,10 +431,10 @@ gd_syncop_mgmt_volume_lock (glusterd_op_t op, dict_t *op_ctx, synclock_unlock (&conf->big_lock); ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, &gd_mgmt_v3_prog, - GLUSTERD_MGMT_V3_VOLUME_LOCK, - gd_syncop_mgmt_volume_lock_cbk, + GLUSTERD_MGMT_V3_LOCK, + gd_syncop_mgmt_v3_lock_cbk, (xdrproc_t) - xdr_gd1_mgmt_volume_lock_req); + xdr_gd1_mgmt_v3_lock_req); synclock_lock (&conf->big_lock); out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); @@ -436,13 +442,13 @@ out: } int32_t -_gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +gd_syncop_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { int ret = -1; struct syncargs *args = NULL; glusterd_peerinfo_t *peerinfo = NULL; - gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; call_frame_t *frame = NULL; int op_ret = -1; int op_errno = -1; @@ -463,7 +469,7 @@ _gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, } ret = xdr_to_generic (*iov, &rsp, - (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); if (ret < 0) goto out; @@ -476,7 +482,7 @@ _gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, op_errno = rsp.op_errno; out: gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, - GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + GLUSTERD_MGMT_V3_UNLOCK, peerinfo, rsp.uuid); STACK_DESTROY (frame->root); synctask_barrier_wake(args); @@ -484,20 +490,20 @@ out: } int32_t -gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) +gd_syncop_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) { return glusterd_big_locked_cbk (req, iov, count, myframe, - _gd_syncop_mgmt_volume_unlock_cbk); + gd_syncop_mgmt_v3_unlock_cbk_fn); } int -gd_syncop_mgmt_volume_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, - struct syncargs *args, uuid_t my_uuid, - uuid_t recv_uuid, uuid_t txn_id) +gd_syncop_mgmt_v3_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) { int ret = -1; - gd1_mgmt_volume_unlock_req req = {{0},}; + gd1_mgmt_v3_unlock_req req = {{0},}; glusterd_conf_t *conf = THIS->private; GF_ASSERT(op_ctx); @@ -515,10 +521,10 @@ gd_syncop_mgmt_volume_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, synclock_unlock (&conf->big_lock); ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, &gd_mgmt_v3_prog, - GLUSTERD_MGMT_V3_VOLUME_UNLOCK, - gd_syncop_mgmt_volume_unlock_cbk, + GLUSTERD_MGMT_V3_UNLOCK, + gd_syncop_mgmt_v3_unlock_cbk, (xdrproc_t) - xdr_gd1_mgmt_volume_unlock_req); + xdr_gd1_mgmt_v3_unlock_req); synclock_lock (&conf->big_lock); out: gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); @@ -884,10 +890,12 @@ gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, GD_SYNCOP (rpc, (&args), NULL, gd_syncop_brick_op_cbk, req, &gd_brick_prog, req->op, xdr_gd1_mgmt_brick_op_req); - if (args.errstr && errstr) - *errstr = args.errstr; - else - GF_FREE (args.errstr); + if (args.errstr) { + if ((strlen(args.errstr) > 0) && errstr) + *errstr = args.errstr; + else + GF_FREE (args.errstr); + } if (GD_OP_STATUS_VOLUME == op) { ret = dict_set_int32 (args.dict, "index", pnode->index); @@ -1097,8 +1105,8 @@ gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, gd_syncop_mgmt_lock (peerinfo, &args, MY_UUID, peer_uuid); } else - gd_syncop_mgmt_volume_lock (op, op_ctx, peerinfo, &args, - MY_UUID, peer_uuid, txn_id); + gd_syncop_mgmt_v3_lock (op, op_ctx, peerinfo, &args, + MY_UUID, peer_uuid, txn_id); peer_cnt++; } gd_synctask_barrier_wait((&args), peer_cnt); @@ -1321,7 +1329,7 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerinfo_t *tmp = NULL; uuid_t tmp_uuid = {0}; - int peer_cnt = 0; + int peer_cnt = 0; int ret = -1; xlator_t *this = NULL; struct syncargs args = {0}; @@ -1358,9 +1366,9 @@ gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int *op_ret, if (volname) { list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { - gd_syncop_mgmt_volume_unlock (op_ctx, peerinfo, - &args, MY_UUID, - tmp_uuid, txn_id); + gd_syncop_mgmt_v3_unlock (op_ctx, peerinfo, + &args, MY_UUID, + tmp_uuid, txn_id); peer_cnt++; list_del_init (&peerinfo->op_peers_list); } @@ -1386,7 +1394,7 @@ out: if (is_acquired) { /* Based on the op-version, - * we release the cluster or volume lock + * we release the cluster or mgmt_v3 lock * and clear the op */ glusterd_op_clear_op (op); @@ -1394,7 +1402,8 @@ out: glusterd_unlock (MY_UUID); else { if (volname) { - ret = glusterd_volume_unlock (volname, MY_UUID); + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); if (ret) gf_log (this->name, GF_LOG_ERROR, "Unable to release lock for %s", @@ -1551,7 +1560,7 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) goto out; } - /* Based on the op_version, acquire a cluster or volume lock */ + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ if (conf->op_version < GD_OP_VERSION_4) { ret = glusterd_lock (MY_UUID); if (ret) { @@ -1580,7 +1589,7 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) goto out; } - ret = glusterd_volume_lock (volname, MY_UUID); + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol"); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock for %s", volname); diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h index 7e4d47f9a..e83ea2f4c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.h +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h @@ -12,6 +12,7 @@ #include "syncop.h" #include "glusterd-sm.h" +#include "glusterd.h" #define GD_SYNC_OPCODE_KEY "sync-mgmt-operation" @@ -51,4 +52,20 @@ int gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *arg, int gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *arg, uuid_t my_uuid, uuid_t recv_uuid, int op, dict_t *dict_out, dict_t *op_ctx); + +void +gd_synctask_barrier_wait (struct syncargs *args, int count); + +int +gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, + glusterd_op_t op); +int +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr); + +int +glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp); + +void +gd_syncargs_init (struct syncargs *args, dict_t *op_ctx); #endif /* __RPC_SYNCOP_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 1f278ca84..8e96be91b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -491,8 +491,11 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) if (!new_volinfo) goto out; + LOCK_INIT (&new_volinfo->lock); INIT_LIST_HEAD (&new_volinfo->vol_list); + INIT_LIST_HEAD (&new_volinfo->snapvol_list); INIT_LIST_HEAD (&new_volinfo->bricks); + INIT_LIST_HEAD (&new_volinfo->snap_volumes); new_volinfo->dict = dict_new (); if (!new_volinfo->dict) { @@ -508,6 +511,10 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) goto out; } + snprintf (new_volinfo->parent_volname, GLUSTERD_MAX_VOLUME_NAME, "N/A"); + + new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + new_volinfo->xl = THIS; pthread_mutex_init (&new_volinfo->reflock, NULL); @@ -520,6 +527,220 @@ out: return ret; } +/* This function will create a new volinfo and then + * dup the entries from volinfo to the new_volinfo. + * + * @param volinfo volinfo which will be duplicated + * @param dup_volinfo new volinfo which will be created + * @param set_userauth if this true then auth info is also set + * + * @return 0 on success else -1 + */ +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_volinfo, out); + + ret = glusterd_volinfo_new (&new_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to create the " + "duplicate volinfo for the volume %s", + volinfo->volname); + goto out; + } + + new_volinfo->type = volinfo->type; + new_volinfo->replica_count = volinfo->replica_count; + new_volinfo->stripe_count = volinfo->stripe_count; + new_volinfo->dist_leaf_count = volinfo->dist_leaf_count; + new_volinfo->sub_count = volinfo->sub_count; + new_volinfo->transport_type = volinfo->transport_type; + new_volinfo->nfs_transport_type = volinfo->nfs_transport_type; + new_volinfo->brick_count = volinfo->brick_count; + + dict_copy (volinfo->dict, new_volinfo->dict); + gd_update_volume_op_versions (new_volinfo); + + if (set_userauth) { + glusterd_auth_set_username (new_volinfo, + volinfo->auth.username); + glusterd_auth_set_password (new_volinfo, + volinfo->auth.password); + } + + *dup_volinfo = new_volinfo; + ret = 0; +out: + if (ret && (NULL != new_volinfo)) { + (void) glusterd_volinfo_delete (new_volinfo); + } + return ret; +} + +/* This function will duplicate brickinfo + * + * @param brickinfo Source brickinfo + * @param dup_brickinfo Destination brickinfo + * + * @return 0 on success else -1 + */ +int32_t +glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *dup_brickinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_brickinfo, out); + + strcpy (dup_brickinfo->hostname, brickinfo->hostname); + strcpy (dup_brickinfo->path, brickinfo->path); + strcpy (dup_brickinfo->device_path, brickinfo->device_path); + ret = gf_canonicalize_path (dup_brickinfo->path); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to canonicalize " + "brick path"); + goto out; + } + uuid_copy (dup_brickinfo->uuid, brickinfo->uuid); + + dup_brickinfo->port = brickinfo->port; + dup_brickinfo->rdma_port = brickinfo->rdma_port; + if (NULL != brickinfo->logfile) { + dup_brickinfo->logfile = gf_strdup (brickinfo->logfile); + if (NULL == dup_brickinfo->logfile) { + ret = -1; + goto out; + } + } + strcpy (dup_brickinfo->brick_id, brickinfo->brick_id); + dup_brickinfo->status = brickinfo->status; + dup_brickinfo->snap_status = brickinfo->snap_status; +out: + return ret; +} + +/* This function will copy snap volinfo to the new + * passed volinfo and regenerate backend store files + * for the restored snap. + * + * @param new_volinfo new volinfo + * @param snap_volinfo volinfo of snap volume + * + * @return 0 on success and -1 on failure + * + * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves + */ +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + + GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, snap_volinfo, out); + + brick_count = 0; + list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) { + ret = glusterd_brickinfo_new (&new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + "new brickinfo"); + goto out; + } + + /* Duplicate brickinfo */ + ret = glusterd_brickinfo_dup (brickinfo, new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to dup " + "brickinfo"); + goto out; + } + + /* If the brick is not of this peer, or snapshot is missed * + * for the brick do not replace the xattr for it */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + (brickinfo->snap_status != -1)) { + /* We need to replace the volume id of all the bricks + * to the volume id of the origin volume. new_volinfo + * has the origin volume's volume id*/ + ret = sys_lsetxattr (new_brickinfo->path, + GF_XATTR_VOL_ID_KEY, + new_volinfo->volume_id, + sizeof (new_volinfo->volume_id), + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set extended attribute %s on %s. " + "Reason: %s, snap: %s", + GF_XATTR_VOL_ID_KEY, + new_brickinfo->path, strerror (errno), + new_volinfo->volname); + goto out; + } + } + + /* If a snapshot is pending for this brick then + * restore should also be pending + */ + if (brickinfo->snap_status == -1) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_volinfo->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + } + + list_add_tail (&new_brickinfo->brick_list, + &new_volinfo->bricks); + /* ownership of new_brickinfo is passed to new_volinfo */ + new_brickinfo = NULL; + brick_count++; + } + + /* Regenerate all volfiles */ + ret = glusterd_create_volfiles_and_notify_services (new_volinfo); + +out: + if (ret && (NULL != new_brickinfo)) { + (void) glusterd_brickinfo_delete (new_brickinfo); + } + + return ret; +} + void glusterd_auth_cleanup (glusterd_volinfo_t *volinfo) { @@ -620,6 +841,7 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_del_init (&volinfo->vol_list); + list_del_init (&volinfo->snapvol_list); ret = glusterd_volume_brickinfos_delete (volinfo); if (ret) @@ -645,7 +867,6 @@ out: return ret; } - int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) { @@ -1160,6 +1381,42 @@ glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volin return ret; } +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); + + if (uuid_is_null(volume_id)) { + gf_log (this->name, GF_LOG_WARNING, "Volume UUID is NULL"); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + list_for_each_entry (voliter, &snap->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + goto out; + } + } + + gf_log (this->name, GF_LOG_WARNING, "Snap volume not found"); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) { @@ -1169,7 +1426,6 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) glusterd_conf_t *priv = NULL; GF_ASSERT (volname); - this = THIS; GF_ASSERT (this); @@ -1178,7 +1434,8 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) { if (!strcmp (tmp_volinfo->volname, volname)) { - gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", volname); + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + volname); ret = 0; *volinfo = tmp_volinfo; break; @@ -1190,6 +1447,68 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) } int32_t +glusterd_snap_volinfo_find (char *snap_volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_volname); + + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->volname, snap_volname)) { + ret = 0; + *volinfo = snap_vol; + goto out; + } + } + + gf_log (this->name, GF_LOG_WARNING, "Snap volume %s not found", + snap_volname); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (origin_volname); + + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->parent_volname, origin_volname)) { + ret = 0; + *volinfo = snap_vol; + goto out; + } + } + + gf_log (this->name, GF_LOG_DEBUG, "Snap volume not found(snap: %s, " + "origin-volume: %s", snap->snapname, origin_volname); + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t glusterd_service_stop (const char *service, char *pidfile, int sig, gf_boolean_t force_kill) { @@ -1281,10 +1600,9 @@ glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, */ int32_t glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, char *socketpath) { int ret = 0; - char socketpath[PATH_MAX] = {0}; char volume_id_str[64]; char *brickid = NULL; dict_t *options = NULL; @@ -1293,12 +1611,9 @@ glusterd_brick_connect (glusterd_volinfo_t *volinfo, GF_ASSERT (volinfo); GF_ASSERT (brickinfo); + GF_ASSERT (socketpath); if (brickinfo->rpc == NULL) { - glusterd_set_brick_socket_filepath (volinfo, brickinfo, - socketpath, - sizeof (socketpath)); - /* Setting frame-timeout to 10mins (600seconds). * Unix domain sockets ensures that the connection is reliable. * The default timeout of 30mins used for unreliable network @@ -1379,9 +1694,23 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, priv = this->private; GF_ASSERT (priv); + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = 0; + goto out; + } + ret = _mk_rundir_p (volinfo); if (ret) goto out; + + glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, + sizeof (socketpath)); + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); if (gf_is_service_running (pidfile, NULL)) goto connect; @@ -1417,8 +1746,15 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } - snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, - brickinfo->hostname, exp_path); + if (volinfo->is_snap_volume) { + snprintf (volfile, PATH_MAX,"/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, volinfo->volname, + brickinfo->hostname, exp_path); + } else { + snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, + brickinfo->hostname, exp_path); + } if (volinfo->logdir) { snprintf (logfile, PATH_MAX, "%s/%s.log", @@ -1430,9 +1766,6 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, if (!brickinfo->logfile) brickinfo->logfile = gf_strdup (logfile); - glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, - sizeof (socketpath)); - (void) snprintf (glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s", uuid_utoa (MY_UUID)); runner_add_args (&runner, SBIN_DIR"/glusterfsd", @@ -1480,9 +1813,13 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, brickinfo->rdma_port = rdma_port; connect: - ret = glusterd_brick_connect (volinfo, brickinfo); - if (ret) + ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to connect to brick %s:%s on %s", + brickinfo->hostname, brickinfo->path, socketpath); goto out; + } out: return ret; } @@ -1546,11 +1883,10 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, gf_boolean_t del_brick) { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - int ret = 0; - glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = {0,}; + int ret = 0; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1806,7 +2142,7 @@ out: } int glusterd_compute_cksum (glusterd_volinfo_t *volinfo, - gf_boolean_t is_quota_conf) + gf_boolean_t is_quota_conf) { int ret = -1; uint32_t cs = 0; @@ -1921,7 +2257,10 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, glusterd_dict_ctx_t ctx = {0}; char *rebalance_id_str = NULL; char *rb_id_str = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1936,6 +2275,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_set_int32 (dict, key, volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to set " + "is_volume_restored option for %s volume", + volinfo->volname); + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.brick_count", count); ret = dict_set_int32 (dict, key, volinfo->brick_count); @@ -1990,6 +2338,20 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_set_uint32 (dict, key, volinfo->is_snap_volume); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_set_uint64 (dict, key, volinfo->snap_max_hard_limit); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); + goto out; + } + volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); if (!volume_id_str) { ret = -1; @@ -2165,6 +2527,28 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + count, i); + ret = dict_set_int32 (dict, key, brickinfo->snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_status for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + count, i); + ret = dict_set_str (dict, key, brickinfo->device_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_device for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + i++; } @@ -2719,7 +3103,7 @@ glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, gf_boolean_t meets_quorum) { glusterd_brickinfo_t *brickinfo = NULL; - glusterd_conf_t *conf = NULL; + glusterd_conf_t *conf = NULL; conf = this->private; if (volinfo->status != GLUSTERD_STATUS_STARTED) @@ -2837,6 +3221,8 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, { char key[512] = {0,}; int ret = -1; + int32_t snap_status = 0; + char *snap_device = NULL; char *hostname = NULL; char *path = NULL; char *brick_id = NULL; @@ -2880,12 +3266,30 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, ret = 0; } + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + vol_count, brick_count); + ret = dict_get_int32 (vols, key, &snap_status); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + vol_count, brick_count); + ret = dict_get_str (vols, key, &snap_device); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + ret = glusterd_brickinfo_new (&new_brickinfo); if (ret) goto out; strcpy (new_brickinfo->path, path); strcpy (new_brickinfo->hostname, hostname); + strcpy (new_brickinfo->device_path, snap_device); + new_brickinfo->snap_status = snap_status; new_brickinfo->decommissioned = decommissioned; if (brick_id) strcpy (new_brickinfo->brick_id, brick_id); @@ -3095,6 +3499,7 @@ glusterd_import_volinfo (dict_t *vols, int count, char *rb_id_str = NULL; int op_version = 0; int client_op_version = 0; + uint32_t is_snap_volume = 0; GF_ASSERT (vols); GF_ASSERT (volinfo); @@ -3106,6 +3511,22 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_get_uint32 (vols, key, &is_snap_volume); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + if (is_snap_volume == _gf_true) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Not syncing snap volume %s", volname); + ret = 0; + goto out; + } + ret = glusterd_volinfo_new (&new_volinfo); if (ret) goto out; @@ -3232,6 +3653,25 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } + new_volinfo->is_snap_volume = is_snap_volume; + + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + "is_volume_restored option for %s", + volname); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.rebalance", count); ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); @@ -3571,6 +4011,12 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) if (ret) goto out; + if (!new_volinfo) { + gf_log (this->name, GF_LOG_DEBUG, + "Not importing snap volume"); + goto out; + } + ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo); if (0 == ret) { (void) gd_check_and_update_rebalance_info (old_volinfo, @@ -4759,20 +5205,42 @@ out: int glusterd_restart_bricks (glusterd_conf_t *conf) { + int ret = 0; glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; gf_boolean_t start_nodesvcs = _gf_false; - int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); list_for_each_entry (volinfo, &conf->volumes, vol_list) { if (volinfo->status != GLUSTERD_STATUS_STARTED) continue; start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the volume %s", + volinfo->volname); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { glusterd_brick_start (volinfo, brickinfo, _gf_false); } } + list_for_each_entry (snap, &conf->snapshots, snap_list) { + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the snap " + "volume %s", volinfo->volname); + list_for_each_entry (brickinfo, &volinfo->bricks, + brick_list) { + glusterd_brick_start (volinfo, brickinfo, + _gf_false); + } + } + } + if (start_nodesvcs) glusterd_nodesvcs_handle_graph_change (NULL); @@ -4989,7 +5457,7 @@ out: } #ifdef GF_LINUX_HOST_OS -static int +int glusterd_get_brick_root (char *path, char **mount_point) { char *ptr = NULL; @@ -5113,6 +5581,17 @@ glusterd_add_inode_size_to_dict (dict_t *dict, int count) "size for %s : %s package missing", fs_name, ((strcmp (fs_name, "xfs")) ? "e2fsprogs" : "xfsprogs")); + /* + * Runner_start might return an error after the child has + * been forked, e.g. if the program isn't there. In that + * case, we still need to call runner_end to reap the + * child and free resources. Fortunately, that seems to + * be harmless for other kinds of failures. + */ + if (runner_end(&runner)) { + gf_log (THIS->name, GF_LOG_ERROR, + "double failure calling runner_end"); + } goto out; } @@ -5157,6 +5636,31 @@ glusterd_add_inode_size_to_dict (dict_t *dict, int count) return ret; } +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab) +{ + struct mntent *entry = NULL; + + mtab = setmntent (_PATH_MOUNTED, "r"); + if (!mtab) + goto out; + + entry = getmntent (mtab); + + while (1) { + if (!entry) + goto out; + + if (!strcmp (entry->mnt_dir, mnt_pt) && + strcmp (entry->mnt_type, "rootfs")) + break; + entry = getmntent (mtab); + } + +out: + return entry; +} + static int glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, dict_t *dict, int count) @@ -5165,9 +5669,6 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, char key[1024] = {0}; char base_key[1024] = {0}; char *mnt_pt = NULL; - char *fs_name = NULL; - char *mnt_options = NULL; - char *device = NULL; FILE *mtab = NULL; struct mntent *entry = NULL; @@ -5177,31 +5678,17 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, if (ret) goto out; - mtab = setmntent (_PATH_MOUNTED, "r"); - if (!mtab) { + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { ret = -1; goto out; } - entry = getmntent (mtab); - - while (1) { - if (!entry) { - ret = -1; - goto out; - } - if (!strcmp (entry->mnt_dir, mnt_pt) && - strcmp (entry->mnt_type, "rootfs")) - break; - entry = getmntent (mtab); - } - /* get device file */ memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s.device", base_key); - device = gf_strdup (entry->mnt_fsname); - ret = dict_set_dynstr (dict, key, device); + ret = dict_set_dynstr_with_alloc (dict, key, entry->mnt_fsname); if (ret) goto out; @@ -5209,8 +5696,7 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s.fs_name", base_key); - fs_name = gf_strdup (entry->mnt_type); - ret = dict_set_dynstr (dict, key, fs_name); + ret = dict_set_dynstr_with_alloc (dict, key, entry->mnt_type); if (ret) goto out; @@ -5218,8 +5704,7 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s.mnt_options", base_key); - mnt_options = gf_strdup (entry->mnt_opts); - ret = dict_set_dynstr (dict, key, mnt_options); + ret = dict_set_dynstr_with_alloc (dict, key, entry->mnt_opts); out: GF_FREE (mnt_pt); @@ -5228,6 +5713,45 @@ glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, return ret; } + +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + char *mnt_pt = NULL; + char *device = NULL; + FILE *mtab = NULL; + struct mntent *entry = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brickinfo); + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mount point " + "for %s brick", brickinfo->path); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (NULL == entry) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mnt entry " + "for %s mount path", mnt_pt); + goto out; + } + + /* get the fs_name/device */ + device = gf_strdup (entry->mnt_fsname); + +out: + if (NULL != mtab) { + endmntent (mtab); + } + + return device; +} #endif int @@ -5325,7 +5849,6 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, int ret = -1; int32_t pid = -1; int32_t brick_online = -1; - char *peer_id_str = NULL; char key[1024] = {0}; char base_key[1024] = {0}; char pidfile[PATH_MAX] = {0}; @@ -5356,16 +5879,11 @@ glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, goto out; /* add peer uuid */ - peer_id_str = gf_strdup (uuid_utoa (brickinfo->uuid)); - if (!peer_id_str) { - ret = -1; - goto out; - } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "%s.peerid", base_key); - ret = dict_set_dynstr (dict, key, peer_id_str); + ret = dict_set_dynstr_with_alloc (dict, key, + uuid_utoa (brickinfo->uuid)); if (ret) { - GF_FREE (peer_id_str); goto out; } @@ -6027,8 +6545,7 @@ glusterd_sm_tr_log_transition_add_to_dict (dict_t *dict, snprintf (key, sizeof (key), "log%d-time", count); gf_time_fmt (timestr, sizeof timestr, log->transitions[i].time, gf_timefmt_FT); - str = gf_strdup (timestr); - ret = dict_set_dynstr (dict, key, str); + ret = dict_set_dynstr_with_alloc (dict, key, timestr); if (ret) goto out; @@ -6855,29 +7372,41 @@ out: return ret; } -/* Checks if the given peer contains all the bricks belonging to the - * given volume. Returns true if it does else returns false +/* Checks if the given peer contains bricks belonging to the given volume. + * Returns, + * 2 - if peer contains all the bricks + * 1 - if peer contains at least 1 brick + * 0 - if peer contains no bricks */ -gf_boolean_t +int glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo, uuid_t friend_uuid) { - gf_boolean_t ret = _gf_true; + int ret = 0; glusterd_brickinfo_t *brickinfo = NULL; + int count = 0; GF_ASSERT (volinfo); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (uuid_compare (friend_uuid, brickinfo->uuid)) { - ret = _gf_false; - break; + if (!uuid_compare (brickinfo->uuid, friend_uuid)) { + count++; } } + + if (count) { + if (count == volinfo->brick_count) + ret = 2; + else + ret = 1; + } gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } -/* Remove all volumes which completely belong to given friend +/* Cleanup the stale volumes left behind in the cluster. The volumes which are + * contained completely within the detached peer are stale with respect to the + * cluster. */ int glusterd_friend_remove_cleanup_vols (uuid_t uuid) @@ -6892,7 +7421,7 @@ glusterd_friend_remove_cleanup_vols (uuid_t uuid) list_for_each_entry_safe (volinfo, tmp_volinfo, &priv->volumes, vol_list) { - if (glusterd_friend_contains_vol_bricks (volinfo, uuid)) { + if (glusterd_friend_contains_vol_bricks (volinfo, uuid) == 2) { gf_log (THIS->name, GF_LOG_INFO, "Deleting stale volume %s", volinfo->volname); ret = glusterd_delete_volume (volinfo); @@ -7435,7 +7964,7 @@ glusterd_append_gsync_status (dict_t *dst, dict_t *src) goto out; } - ret = dict_set_dynstr (dst, "gsync-status", gf_strdup (stop_msg)); + ret = dict_set_dynstr_with_alloc (dst, "gsync-status", stop_msg); if (ret) { gf_log ("glusterd", GF_LOG_WARNING, "Unable to set the stop" "message in the ctx dictionary"); @@ -7539,8 +8068,8 @@ glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr) ret = dict_get_str (rsp_dict, "conf_path", &conf_path); if (!ret && conf_path) { - ret = dict_set_dynstr (ctx, "conf_path", - gf_strdup(conf_path)); + ret = dict_set_dynstr_with_alloc (ctx, "conf_path", + conf_path); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to store conf path."); @@ -7549,7 +8078,8 @@ glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr) } } if ((op_errstr) && (strcmp ("", op_errstr))) { - ret = dict_set_dynstr (ctx, "errstr", gf_strdup(op_errstr)); + ret = dict_set_dynstr_with_alloc (ctx, "errstr", + op_errstr); if (ret) goto out; } @@ -8230,6 +8760,279 @@ out: } int +glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char buf[PATH_MAX] = ""; + char *volname = NULL; + int ret = -1; + int config_command = 0; + uint64_t i = 0; + uint64_t value = 0; + uint64_t voldisplaycount = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "config-command", &config_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + switch (config_command) { + case GF_SNAP_CONFIG_DISPLAY: + ret = dict_get_uint64 (src, "snap-max-hard-limit", &value); + if (!ret) { + ret = dict_set_uint64 (dst, "snap-max-hard-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_hard_limit"); + goto out; + } + } else { + /* Received dummy response from other nodes */ + ret = 0; + goto out; + } + + ret = dict_get_uint64 (src, "snap-max-soft-limit", &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get snap_max_soft_limit"); + goto out; + } + + ret = dict_set_uint64 (dst, "snap-max-soft-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_soft_limit"); + goto out; + } + + ret = dict_get_uint64 (src, "voldisplaycount", + &voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get voldisplaycount"); + goto out; + } + + ret = dict_set_uint64 (dst, "voldisplaycount", + voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set voldisplaycount"); + goto out; + } + + for (i = 0; i < voldisplaycount; i++) { + snprintf (buf, sizeof(buf), "volume%ld-volname", i); + ret = dict_get_str (src, buf, &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_str (dst, buf, volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + } + + break; + default: + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Aggregate missed_snap_counts from different nodes and save it * + * in the req_dict of the originator node */ +int +glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *buf = NULL; + char *tmp_str = NULL; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t src_missed_snap_count = -1; + int32_t dst_missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (src, "missed_snap_count", + &src_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = dict_get_int32 (dst, "missed_snap_count", + &dst_missed_snap_count); + if (ret) { + /* Initialize dst_missed_count for the first time */ + dst_missed_snap_count = 0; + } + + for (i = 0; i < src_missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (src, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + dst_missed_snap_count); + + tmp_str = gf_strdup (buf); + if (!tmp_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dst, name_buf, tmp_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set %s", name_buf); + goto out; + } + + tmp_str = NULL; + dst_missed_snap_count++; + } + + ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set dst_missed_snap_count"); + goto out; + } + +out: + if (ret && tmp_str) + GF_FREE(tmp_str); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snap_create_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snap_config_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + default: + // copy the response dictinary's contents to the dict to be + // sent back to the cli + dict_copy (src, dst); + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src) { char output_name[PATH_MAX] = ""; @@ -9045,6 +9848,27 @@ gd_update_volume_op_versions (glusterd_volinfo_t *volinfo) return; } +int +op_version_check (xlator_t *this, int min_op_version, char *msg, int msglen) +{ + int ret = 0; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (this); + GF_ASSERT (msg); + + priv = this->private; + if (priv->op_version < min_op_version) { + snprintf (msg, msglen, "One or more nodes do not support " + "the required op-version. Cluster op-version must " + "atleast be %d.", min_op_version); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + ret = -1; + } + return ret; +} + + /* A task is committed/completed once the task-id for it is cleared */ gf_boolean_t gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo) @@ -9117,6 +9941,102 @@ out: return is_status_tasks; } +int +glusterd_compare_snap_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_snap_t *snap1 = NULL; + glusterd_snap_t *snap2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snap1 = list_entry(list1, glusterd_snap_t, snap_list); + snap2 = list_entry(list2, glusterd_snap_t, snap_list); + diff_time = difftime(snap1->time_stamp, snap2->time_stamp); + + return ((int)diff_time); +} + +int +glusterd_compare_snap_vol_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_volinfo_t *snapvol1 = NULL; + glusterd_volinfo_t *snapvol2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snapvol1 = list_entry(list1, glusterd_volinfo_t, snapvol_list); + snapvol2 = list_entry(list2, glusterd_volinfo_t, snapvol_list); + diff_time = difftime(snapvol1->snapshot->time_stamp, + snapvol2->snapshot->time_stamp); + + return ((int)diff_time); +} + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) +{ + glusterd_missed_snap_info *new_missed_snapinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (missed_snapinfo); + + new_missed_snapinfo = GF_CALLOC (1, sizeof(*new_missed_snapinfo), + gf_gld_mt_missed_snapinfo_t); + + if (!new_missed_snapinfo) + goto out; + + new_missed_snapinfo->node_snap_info = NULL; + INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); + INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); + + *missed_snapinfo = new_missed_snapinfo; + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) +{ + glusterd_snap_op_t *new_snap_op = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap_op); + + new_snap_op = GF_CALLOC (1, sizeof(*new_snap_op), + gf_gld_mt_missed_snapinfo_t); + + if (!new_snap_op) + goto out; + + new_snap_op->brick_path = NULL; + new_snap_op->brick_num = -1; + new_snap_op->op = -1; + new_snap_op->status = -1; + INIT_LIST_HEAD (&new_snap_op->snap_ops_list); + + *snap_op = new_snap_op; + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + /* Tells if rebalance needs to be started for the given volume on the peer * * Rebalance should be started on a peer only if an involved brick is present on diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index aebf5fcef..23f8ad7f6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -85,6 +85,11 @@ glusterd_submit_request (struct rpc_clnt *rpc, void *req, int32_t glusterd_volinfo_new (glusterd_volinfo_t **volinfo); +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth); + char * glusterd_auth_get_username (glusterd_volinfo_t *volinfo); @@ -119,11 +124,23 @@ int32_t glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name); int32_t +glusterd_snap_volinfo_find (char *volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); + +int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo); int glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo); +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo); + int32_t glusterd_service_stop(const char *service, char *pidfile, int sig, gf_boolean_t force_kill); @@ -302,6 +319,7 @@ glusterd_is_defrag_on (glusterd_volinfo_t *volinfo); int32_t glusterd_volinfo_bricks_delete (glusterd_volinfo_t *volinfo); + int glusterd_friend_find_by_uuid (uuid_t uuid, glusterd_peerinfo_t **peerinfo); @@ -371,7 +389,7 @@ gf_boolean_t glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo); int32_t glusterd_brick_connect (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); + glusterd_brickinfo_t *brickinfo, char *socketpath); int32_t glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo); int32_t @@ -379,12 +397,16 @@ glusterd_delete_volume (glusterd_volinfo_t *volinfo); int32_t glusterd_delete_brick (glusterd_volinfo_t* volinfo, glusterd_brickinfo_t *brickinfo); + int32_t glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo); + int glusterd_spawn_daemons (void *opaque); + int glusterd_restart_gsyncds (glusterd_conf_t *conf); + int glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, char *path_list, char *conf_path, @@ -431,7 +453,7 @@ glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo); gf_boolean_t glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname, char *path); -gf_boolean_t +int glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo, uuid_t friend_uuid); int @@ -510,6 +532,8 @@ int glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); int glusterd_sys_exec_output_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_snap_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); int32_t glusterd_handle_node_rsp (dict_t *req_ctx, void *pending_entry, glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, @@ -582,6 +606,9 @@ glusterd_is_same_address (char *name1, char *name2); void gd_update_volume_op_versions (glusterd_volinfo_t *volinfo); +int +op_version_check (xlator_t *this, int min_op_version, char *msg, int msglen); + char* gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo); @@ -657,4 +684,37 @@ glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc); int32_t glusterd_compare_volume_name(struct list_head *, struct list_head *); +#ifdef GF_LINUX_HOST_OS +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo); +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab); +int +glusterd_get_brick_root (char *path, char **mount_point); +#endif //LINUX_HOST + +int +glusterd_compare_snap_time(struct list_head *, struct list_head *); + +int +glusterd_compare_snap_vol_time(struct list_head *, struct list_head *); + +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo); +int32_t +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol); + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo); + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op); + +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 8a94309ed..332c3d359 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -28,6 +28,8 @@ #include "logging.h" #include "dict.h" #include "graph-utils.h" +#include "glusterd-store.h" +#include "glusterd-hooks.h" #include "trie.h" #include "glusterd-mem-types.h" #include "cli1-xdr.h" @@ -1324,6 +1326,44 @@ sys_loglevel_option_handler (volgen_graph_t *graph, } static int +logger_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *role = NULL; + struct volopt_map_entry vme2 = {0,}; + + role = (char *) param; + + if (strcmp (vme->option, "!logger") != 0 || + !strstr (vme->key, role)) + return 0; + + memcpy (&vme2, vme, sizeof (vme2)); + vme2.option = "logger"; + + return basic_option_handler (graph, &vme2, NULL); +} + +static int +log_format_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *role = NULL; + struct volopt_map_entry vme2 = {0,}; + + role = (char *) param; + + if (strcmp (vme->option, "!log-format") != 0 || + !strstr (vme->key, role)) + return 0; + + memcpy (&vme2, vme, sizeof (vme2)); + vme2.option = "log-format"; + + return basic_option_handler (graph, &vme2, NULL); +} + +static int volgen_graph_set_xl_options (volgen_graph_t *graph, dict_t *dict) { int32_t ret = -1; @@ -1375,6 +1415,12 @@ server_spec_option_handler (volgen_graph_t *graph, if (!ret) ret = sys_loglevel_option_handler (graph, vme, "brick"); + if (!ret) + ret = logger_option_handler (graph, vme, "brick"); + + if (!ret) + ret = log_format_option_handler (graph, vme, "brick"); + return ret; } @@ -1969,7 +2015,8 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } /* Check for read-only volume option, and add it to the graph */ - if (dict_get_str_boolean (set_dict, "features.read-only", 0)) { + if (dict_get_str_boolean (set_dict, "features.read-only", 0) + || volinfo -> is_snap_volume) { xl = volgen_graph_add (graph, "features/read-only", volname); if (!xl) { ret = -1; @@ -2839,7 +2886,7 @@ static int client_graph_set_perf_options(volgen_graph_t *graph, tmp_data = dict_get (set_dict, "nfs-volume-file"); if (!tmp_data) return volgen_graph_set_options_generic(graph, set_dict, - volname, + volinfo, &perfxl_option_handler); else return volgen_graph_set_options_generic(graph, set_dict, @@ -2858,7 +2905,9 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, char *tmp = NULL; gf_boolean_t var = _gf_false; gf_boolean_t ob = _gf_false; + xlator_t *this = THIS; + GF_ASSERT (this); GF_ASSERT (conf); volname = volinfo->volname; @@ -2935,7 +2984,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, ob = _gf_false; ret = gf_string2boolean (tmp, &ob); if (!ret && ob) { - gf_log (THIS->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "root-squash is enabled. Please turn it" " off to change read-after-open " "option"); @@ -3004,7 +3053,7 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, ret = 0; } if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "setting " + gf_log (this->name, GF_LOG_WARNING, "setting " "open behind option as part of root " "squash failed"); goto out; @@ -3030,14 +3079,28 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, &loglevel_option_handler); if (ret) - gf_log (THIS->name, GF_LOG_WARNING, "changing client log level" + gf_log (this->name, GF_LOG_WARNING, "changing client log level" " failed"); ret = volgen_graph_set_options_generic (graph, set_dict, "client", &sys_loglevel_option_handler); if (ret) - gf_log (THIS->name, GF_LOG_WARNING, "changing client syslog " + gf_log (this->name, GF_LOG_WARNING, "changing client syslog " "level failed"); + + ret = volgen_graph_set_options_generic (graph, set_dict, "client", + &logger_option_handler); + + if (ret) + gf_log (this->name, GF_LOG_WARNING, "changing client logger" + " failed"); + + ret = volgen_graph_set_options_generic (graph, set_dict, "client", + &log_format_option_handler); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "changing client log format" + " failed"); + out: return ret; } @@ -3374,16 +3437,31 @@ build_shd_graph (volgen_graph_t *graph, dict_t *mod_dict) &loglevel_option_handler); if (ret) - gf_log (THIS->name, GF_LOG_WARNING, "changing loglevel " + gf_log (this->name, GF_LOG_WARNING, "changing loglevel " "of self-heal daemon failed"); ret = volgen_graph_set_options_generic (graph, set_dict, "client", &sys_loglevel_option_handler); if (ret) - gf_log (THIS->name, GF_LOG_WARNING, "changing syslog " + gf_log (this->name, GF_LOG_WARNING, "changing syslog " "level of self-heal daemon failed"); + ret = volgen_graph_set_options_generic (graph, set_dict, + "client", + &logger_option_handler); + + if (ret) + gf_log (this->name, GF_LOG_WARNING, "changing logger " + "of self-heal daemon failed"); + + ret = volgen_graph_set_options_generic (graph, set_dict, + "client", + &log_format_option_handler); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "changing log " + "format level of self-heal daemon failed"); + ret = dict_reset (set_dict); if (ret) goto out; @@ -3882,6 +3960,8 @@ generate_client_volfiles (glusterd_volinfo_t *volinfo, out: if (dict) dict_unref (dict); + + gf_log ("", GF_LOG_TRACE, "Returning %d", ret); return ret; } @@ -4511,3 +4591,128 @@ gd_is_boolean_option (char *key) return _gf_false; } + +/* This function will restore origin volume to it's snap. + * The restore operation will simply replace the Gluster origin + * volume with the snap volume. + * TODO: Multi-volume delete to be done. + * Cleanup in case of restore failure is pending. + * + * @param orig_vol volinfo of origin volume + * @param snap_vol volinfo of snapshot volume + * + * @return 0 on success and negative value on error + */ +int +gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_volinfo_t *new_volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + conf = this->private; + GF_ASSERT (conf); + + GF_VALIDATE_OR_GOTO (this->name, orig_vol, out); + GF_VALIDATE_OR_GOTO (this->name, snap_vol, out); + snap = snap_vol->snapshot; + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + /* Snap volume must be stoped before performing the + * restore operation. + */ + ret = glusterd_stop_volume (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "snap volume(%s)", snap_vol->volname); + goto out; + } + + /* Create a new volinfo for the restored volume */ + ret = glusterd_volinfo_dup (snap_vol, &new_volinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create volinfo"); + goto out; + } + + /* Following entries need to be derived from origin volume. */ + strcpy (new_volinfo->volname, orig_vol->volname); + uuid_copy (new_volinfo->volume_id, orig_vol->volume_id); + new_volinfo->snap_count = orig_vol->snap_count; + new_volinfo->snap_max_hard_limit = orig_vol->snap_max_hard_limit; + new_volinfo->is_volume_restored = _gf_true; + + /* Bump the version of the restored volume, so that nodes * + * which are done can sync during handshake */ + new_volinfo->version = orig_vol->version; + + list_for_each_entry_safe (voliter, temp_volinfo, + &orig_vol->snap_volumes, snapvol_list) { + list_add_tail (&voliter->snapvol_list, + &new_volinfo->snap_volumes); + } + /* Copy the snap vol info to the new_volinfo.*/ + ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + /* If the orig_vol is already restored then we should delete + * the backend LVMs */ + if (orig_vol->is_volume_restored) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove " + "LVM backend"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + } + + /* Once the new_volinfo is completely constructed then delete + * the orinal volinfo + */ + ret = glusterd_volinfo_delete (orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + /* New volinfo always shows the status as created. Therefore + * set the status to stop. */ + glusterd_set_volume_status (new_volinfo, GLUSTERD_STATUS_STOPPED); + + list_add_tail (&new_volinfo->vol_list, &conf->volumes); + + /* Now delete the snap entry. As a first step delete the snap + * volume information stored in store. */ + ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap %s", snap->snapname); + goto out; + } + + ret = glusterd_store_volinfo (new_volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo"); + goto out; + } + + ret = 0; +out: + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 4411bc4de..dcba11f32 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -130,6 +130,10 @@ int glusterd_create_quotad_volfile (); int glusterd_delete_volfile (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo); +int +glusterd_delete_snap_volfile (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo); int glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value); int glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key); @@ -143,8 +147,15 @@ glusterd_check_voloption_flags (char *key, int32_t flags); gf_boolean_t glusterd_is_valid_volfpath (char *volname, char *brick); int generate_brick_volfiles (glusterd_volinfo_t *volinfo); +int generate_snap_brick_volfiles (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo); int generate_client_volfiles (glusterd_volinfo_t *volinfo, glusterd_client_type_t client_type); +int +generate_snap_client_volfiles (glusterd_volinfo_t *actual_volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_client_type_t client_type, + gf_boolean_t vol_restore); int glusterd_get_volopt_content (dict_t *dict, gf_boolean_t xml_out); char* glusterd_get_trans_type_rb (gf_transport_type ttype); @@ -165,4 +176,7 @@ gd_is_xlator_option (char *key); gf_boolean_t gd_is_boolean_option (char *key); +int gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 135faa40a..eac926d95 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1211,6 +1211,16 @@ glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) goto out; } + if (volinfo->snap_count > 0 || !list_empty(&volinfo->snap_volumes)) { + snprintf (msg, sizeof (msg), "Cannot delete Volume %s ," + "as it has %ld snapshots. " + "To delete the volume, " + "first delete all the snapshots under it.", + volname, volinfo->snap_count); + ret = -1; + goto out; + } + ret = 0; out: @@ -1772,54 +1782,48 @@ out: return ret; } - int -glusterd_op_stop_volume (dict_t *dict) +glusterd_stop_volume (glusterd_volinfo_t *volinfo) { - int ret = 0; - int flags = 0; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - char mountdir[PATH_MAX] = {0,}; - runner_t runner = {0,}; - char pidfile[PATH_MAX] = {0,}; + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + char mountdir[PATH_MAX] = {0,}; + runner_t runner = {0,}; + char pidfile[PATH_MAX] = {0,}; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); - ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); - if (ret) - goto out; - - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, - volname); - goto out; - } + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { ret = glusterd_brick_stop (volinfo, brickinfo, _gf_false); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "brick (%s)", brickinfo->path); goto out; + } } glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED); ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); - if (ret) + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo of " + "%s volume", volinfo->volname); goto out; + } /* If quota auxiliary mount is present, unmount it */ - GLUSTERFS_GET_AUX_MOUNT_PIDFILE (pidfile, volname); + GLUSTERFS_GET_AUX_MOUNT_PIDFILE (pidfile, volinfo->volname); if (!gf_is_service_running (pidfile, NULL)) { gf_log (this->name, GF_LOG_DEBUG, "Aux mount of volume %s " - "absent", volname); + "absent", volinfo->volname); } else { - GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (mountdir, volname, "/"); + GLUSTERD_GET_QUOTA_AUX_MOUNT_PATH (mountdir, volinfo->volname, + "/"); runinit (&runner); runner_add_args (&runner, "umount", @@ -1837,6 +1841,45 @@ glusterd_op_stop_volume (dict_t *dict) } ret = glusterd_nodesvcs_handle_graph_change (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to notify graph " + "change for %s volume", volinfo->volname); + goto out; + } + +out: + return ret; +} + +int +glusterd_op_stop_volume (dict_t *dict) +{ + int ret = 0; + int flags = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); + goto out; + } + + ret = glusterd_stop_volume (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop %s volume", + volname); + goto out; + } out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 24a6ed7cd..3f1d7d539 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -610,6 +610,28 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = 1, .flags = OPT_FLAG_CLIENT_OPT }, + { .key = "diagnostics.brick-logger", + .voltype = "debug/io-stats", + .option = "!logger", + .op_version = 4 + }, + { .key = "diagnostics.client-logger", + .voltype = "debug/io-stats", + .option = "!logger", + .op_version = 4, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "diagnostics.brick-log-format", + .voltype = "debug/io-stats", + .option = "!log-format", + .op_version = 4 + }, + { .key = "diagnostics.client-log-format", + .voltype = "debug/io-stats", + .option = "!log-format", + .op_version = 4, + .flags = OPT_FLAG_CLIENT_OPT + }, /* IO-cache xlator options */ { .key = "performance.cache-max-file-size", diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index bb2800683..6e7a9278d 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -56,6 +56,8 @@ extern struct rpcsvc_program gd_svc_cli_prog_ro; extern struct rpc_clnt_program gd_brick_prog; extern struct rpcsvc_program glusterd_mgmt_hndsk_prog; +extern char snap_mount_folder[PATH_MAX]; + rpcsvc_cbk_program_t glusterd_cbk_prog = { .progname = "Gluster Callback", .prognum = GLUSTER_CBK_PROGRAM, @@ -110,6 +112,7 @@ const char *gd_op_list[GD_OP_MAX + 1] = { [GD_OP_COPY_FILE] = "Copy File", [GD_OP_SYS_EXEC] = "Execute system commands", [GD_OP_GSYNC_CREATE] = "Geo-replication Create", + [GD_OP_SNAP] = "Snapshot", [GD_OP_MAX] = "Invalid op" }; @@ -1092,6 +1095,71 @@ glusterd_stop_uds_listener (xlator_t *this) return; } +static int +glusterd_init_snap_folder (xlator_t *this) +{ + int ret = -1; + struct stat buf = {0,}; + + GF_ASSERT (this); + + /* Snapshot volumes are mounted under /var/run/gluster/snaps folder. + * But /var/run is normally a symbolic link to /run folder, which + * creates problems as the entry point in the mtab for the mount point + * and glusterd maintained entry point will be different. Therefore + * identify the correct run folder and use it for snap volume mounting. + */ + ret = lstat (GLUSTERD_VAR_RUN_DIR, &buf); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "stat fails on %s, exiting. (errno = %d)", + GLUSTERD_VAR_RUN_DIR, errno); + goto out; + } + + /* If /var/run is symlink then use /run folder */ + if (S_ISLNK (buf.st_mode)) { + strcpy (snap_mount_folder, GLUSTERD_RUN_DIR); + } else { + strcpy (snap_mount_folder, GLUSTERD_VAR_RUN_DIR); + } + + strcat (snap_mount_folder, GLUSTERD_DEFAULT_SNAPS_BRICK_DIR); + + ret = stat (snap_mount_folder, &buf); + if ((ret != 0) && (ENOENT != errno)) { + gf_log (this->name, GF_LOG_ERROR, + "stat fails on %s, exiting. (errno = %d)", + snap_mount_folder, errno); + ret = -1; + goto out; + } + + if ((!ret) && (!S_ISDIR(buf.st_mode))) { + gf_log (this->name, GF_LOG_CRITICAL, + "Provided snap path %s is not a directory," + "exiting", snap_mount_folder); + ret = -1; + goto out; + } + + if ((-1 == ret) && (ENOENT == errno)) { + /* Create missing folders */ + ret = mkdir_p (snap_mount_folder, 0777, _gf_false); + + if (-1 == ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to create directory %s" + " ,errno = %d", snap_mount_folder, errno); + goto out; + } + } + +out: + return ret; +} + + /* * init - called during glusterd initialization * @@ -1158,6 +1226,14 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_INFO, "Using %s as working directory", workdir); + ret = glusterd_init_snap_folder (this); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Unable to create " + "snap backend folder"); + exit (1); + } + snprintf (cmd_log_filename, PATH_MAX,"%s/.cmd_log_history", DEFAULT_LOG_FILE_DIRECTORY); ret = gf_cmd_log_init (cmd_log_filename); @@ -1301,6 +1377,9 @@ init (xlator_t *this) INIT_LIST_HEAD (&conf->peers); INIT_LIST_HEAD (&conf->volumes); + INIT_LIST_HEAD (&conf->snapshots); + INIT_LIST_HEAD (&conf->missed_snaps_list); + pthread_mutex_init (&conf->mutex, NULL); conf->rpc = rpc; conf->uds_rpc = uds_rpc; @@ -1342,7 +1421,7 @@ init (xlator_t *this) } this->private = conf; - glusterd_vol_lock_init (); + glusterd_mgmt_v3_lock_init (); glusterd_txn_opinfo_dict_init (); (void) glusterd_nodesvc_set_online_status ("glustershd", _gf_false); @@ -1451,7 +1530,7 @@ fini (xlator_t *this) if (conf->handle) gf_store_handle_destroy (conf->handle); glusterd_sm_tr_log_delete (&conf->op_sm_log); - glusterd_vol_lock_fini (); + glusterd_mgmt_v3_lock_fini (); glusterd_txn_opinfo_dict_fini (); GF_FREE (conf); @@ -1569,5 +1648,9 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_INT, .description = "Sets the base port for portmap query" }, + { .key = {"snap-brick-path"}, + .type = GF_OPTION_TYPE_STR, + .description = "directory where the bricks for the snapshots will be created" + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 3d810ed97..79461fb5d 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -53,6 +53,9 @@ "S56glusterd-geo-rep-create-post.sh" +#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 +#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 +#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 #define GLUSTERD_SERVER_QUORUM "server" #define FMTSTR_CHECK_VOL_EXISTS "Volume %s does not exist" @@ -73,6 +76,9 @@ struct glusterd_volinfo_; typedef struct glusterd_volinfo_ glusterd_volinfo_t; +struct glusterd_snap_; +typedef struct glusterd_snap_ glusterd_snap_t; + typedef enum glusterd_op_ { GD_OP_NONE = 0, GD_OP_CREATE_VOLUME, @@ -102,6 +108,7 @@ typedef enum glusterd_op_ { GD_OP_COPY_FILE, GD_OP_SYS_EXEC, GD_OP_GSYNC_CREATE, + GD_OP_SNAP, GD_OP_MAX, } glusterd_op_t; @@ -137,13 +144,15 @@ typedef struct { nodesrv_t *quotad; struct pmap_registry *pmap; struct list_head volumes; + struct list_head snapshots; /*List of snap volumes */ pthread_mutex_t xprt_lock; struct list_head xprt_list; gf_store_handle_t *handle; gf_timer_t *timer; glusterd_sm_tr_log_t op_sm_log; struct rpc_clnt_program *gfs_mgmt; - dict_t *vol_lock; /* Dict for saving vol locks */ + dict_t *mgmt_v3_lock; /* Dict for saving + * mgmt_v3 locks */ dict_t *glusterd_txn_opinfo; /* Dict for saving * transaction opinfos */ uuid_t global_txn_id; /* To be used in @@ -151,21 +160,26 @@ typedef struct { * cluster with no * transaction ids */ - struct list_head mount_specs; - gf_boolean_t valgrind; - pthread_t brick_thread; - void *hooks_priv; + struct list_head mount_specs; + gf_boolean_t valgrind; + pthread_t brick_thread; + void *hooks_priv; /* need for proper handshake_t */ - int op_version; /* Starts with 1 for 3.3.0 */ - xlator_t *xl; /* Should be set to 'THIS' before creating thread */ - gf_boolean_t pending_quorum_action; - dict_t *opts; - synclock_t big_lock; - gf_boolean_t restart_done; - rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ - uint32_t base_port; - pid_t etcd_pid; + int op_version; /* Starts with 1 for 3.3.0 */ + xlator_t *xl; /* Should be set to 'THIS' before creating thread */ + gf_boolean_t pending_quorum_action; + dict_t *opts; + synclock_t big_lock; + gf_boolean_t restart_done; + rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ + uint32_t base_port; + uint64_t snap_max_hard_limit; + uint64_t snap_max_soft_limit; + char *snap_bricks_directory; + gf_store_handle_t *missed_snaps_list_shandle; + struct list_head missed_snaps_list; + pid_t etcd_pid; } glusterd_conf_t; @@ -177,6 +191,7 @@ typedef enum gf_brick_status { struct glusterd_brickinfo { char hostname[1024]; char path[PATH_MAX]; + char device_path[PATH_MAX]; char brick_id[1024];/*Client xlator name, AFR changelog name*/ struct list_head brick_list; uuid_t uuid; @@ -190,6 +205,7 @@ struct glusterd_brickinfo { int decommissioned; char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ int caps; /* Capability */ + int32_t snap_status; /* * The group is used to identify which bricks are part of the same * replica set during brick-volfile generation, so that NSR volfiles @@ -290,11 +306,36 @@ struct glusterd_replace_brick_ { typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; struct glusterd_volinfo_ { + gf_lock_t lock; char volname[GLUSTERD_MAX_VOLUME_NAME]; + gf_boolean_t is_snap_volume; + glusterd_snap_t *snapshot; + gf_boolean_t is_volume_restored; + char parent_volname[GLUSTERD_MAX_VOLUME_NAME]; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + field will contain the name of + the volume which is snapped. In + case of a non-snap volume, this + field will be initialized as N/A */ int type; int brick_count; + uint64_t snap_count; + uint64_t snap_max_hard_limit; struct list_head vol_list; + /* In case of a snap volume |