diff options
Diffstat (limited to 'xlators/mgmt')
40 files changed, 52441 insertions, 9906 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index 326d8e63e..933c44019 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -1,22 +1,50 @@ xlator_LTLIBRARIES = glusterd.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt -glusterd_la_LDFLAGS = -module -avoidversion -glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c glusterd-op-sm.c \ - glusterd-utils.c glusterd3_1-mops.c glusterd-store.c glusterd-handshake.c \ - glusterd-pmap.c glusterd-volgen.c glusterd-rebalance.c +glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) "-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\"" +glusterd_la_LDFLAGS = -module -avoid-version +if ENABLE_BD_XLATOR +glusterd_la_LDFLAGS += -llvm2app +endif +glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ + glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \ + glusterd-store.c glusterd-handshake.c glusterd-pmap.c \ + glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \ + glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \ + glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ + glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ + glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ + glusterd-mgmt.c -glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ - $(top_builddir)/rpc/xdr/src/libgfxdr.la\ - $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la +glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ + $(top_builddir)/rpc/xdr/src/libgfxdr.la \ + $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \ + $(XML_LIBS) -lcrypto -noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h glusterd-sm.h \ - glusterd-store.h glusterd-mem-types.h glusterd-pmap.h glusterd-volgen.h +noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ + glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ + glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ + glusterd-syncop.h glusterd-hooks.h glusterd-locks.h \ + glusterd-mgmt.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)\ - -I$(rpclibdir) -L$(xlatordir)/ -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/rpc/xdr/src\ - -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/uuid -DGFS_PREFIX=\"$(prefix)\" \ - -DDATADIR=\"$(localstatedir)\" +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(CONTRIBDIR)/uuid \ + -DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \ + -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\ + -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(XML_CPPFLAGS) +AM_CFLAGS = -Wall $(GF_CFLAGS) + +AM_LDFLAGS = -L$(xlatordir) CLEANFILES = + +install-data-hook: + +if GF_INSTALL_VAR_LIB_GLUSTERD + $(mkdir_p) $(localstatedir)/lib/ + (stat $(sysconfdir)/glusterd && \ + mv $(sysconfdir)/glusterd $(localstatedir)/lib/) || true; + (ln -sf $(localstatedir)/lib/glusterd $(sysconfdir)/glusterd) || true; +endif diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c new file mode 100644 index 000000000..596503c21 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -0,0 +1,1953 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "run.h" +#include <sys/signal.h> + +/* misc */ + +/* In this function, we decide, based on the 'count' of the brick, + where to add it in the current volume. 'count' tells us already + how many of the given bricks are added. other argument are self- + descriptive. */ +int +add_brick_at_right_order (glusterd_brickinfo_t *brickinfo, + glusterd_volinfo_t *volinfo, int count, + int32_t stripe_cnt, int32_t replica_cnt) +{ + int idx = 0; + int i = 0; + int sub_cnt = 0; + glusterd_brickinfo_t *brick = NULL; + + /* The complexity of the function is in deciding at which index + to add new brick. Even though it can be defined with a complex + single formula for all volume, it is seperated out to make it + more readable */ + if (stripe_cnt) { + /* common formula when 'stripe_count' is set */ + /* idx = ((count / ((stripe_cnt * volinfo->replica_count) - + volinfo->dist_leaf_count)) * volinfo->dist_leaf_count) + + (count + volinfo->dist_leaf_count); + */ + + sub_cnt = volinfo->dist_leaf_count; + + idx = ((count / ((stripe_cnt * volinfo->replica_count) - + sub_cnt)) * sub_cnt) + + (count + sub_cnt); + + goto insert_brick; + } + + /* replica count is set */ + /* common formula when 'replica_count' is set */ + /* idx = ((count / (replica_cnt - existing_replica_count)) * + existing_replica_count) + + (count + existing_replica_count); + */ + + sub_cnt = volinfo->replica_count; + idx = (count / (replica_cnt - sub_cnt) * sub_cnt) + + (count + sub_cnt); + +insert_brick: + i = 0; + list_for_each_entry (brick, &volinfo->bricks, brick_list) { + i++; + if (i < idx) + continue; + gf_log (THIS->name, GF_LOG_DEBUG, "brick:%s index=%d, count=%d", + brick->path, idx, count); + + list_add (&brickinfo->brick_list, &brick->brick_list); + break; + } + + return 0; +} + + +static int +gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count, + int total_bricks, int *type, char *err_str, + size_t err_len) +{ + int ret = -1; + + switch (volinfo->type) { + case GF_CLUSTER_TYPE_NONE: + if ((volinfo->brick_count * stripe_count) == total_bricks) { + /* Change the volume type */ + *type = GF_CLUSTER_TYPE_STRIPE; + gf_log (THIS->name, GF_LOG_INFO, + "Changing the type of volume %s from " + "'distribute' to 'stripe'", volinfo->volname); + ret = 0; + goto out; + } else { + snprintf (err_str, err_len, "Incorrect number of " + "bricks (%d) supplied for stripe count (%d).", + (total_bricks - volinfo->brick_count), + stripe_count); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + break; + case GF_CLUSTER_TYPE_REPLICATE: + if (!(total_bricks % (volinfo->replica_count * stripe_count))) { + /* Change the volume type */ + *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE; + gf_log (THIS->name, GF_LOG_INFO, + "Changing the type of volume %s from " + "'replicate' to 'replicate-stripe'", + volinfo->volname); + ret = 0; + goto out; + } else { + snprintf (err_str, err_len, "Incorrect number of " + "bricks (%d) supplied for changing volume's " + "stripe count to %d, need at least %d bricks", + (total_bricks - volinfo->brick_count), + stripe_count, + (volinfo->replica_count * stripe_count)); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + break; + case GF_CLUSTER_TYPE_STRIPE: + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + if (stripe_count < volinfo->stripe_count) { + snprintf (err_str, err_len, + "Incorrect stripe count (%d) supplied. " + "Volume already has stripe count (%d)", + stripe_count, volinfo->stripe_count); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + if (stripe_count == volinfo->stripe_count) { + if (!(total_bricks % volinfo->dist_leaf_count)) { + /* its same as the one which exists */ + ret = 1; + goto out; + } + } + if (stripe_count > volinfo->stripe_count) { + /* We have to make sure before and after 'add-brick', + the number or subvolumes for distribute will remain + same, when stripe count is given */ + if ((volinfo->brick_count * (stripe_count * + volinfo->replica_count)) == + (total_bricks * volinfo->dist_leaf_count)) { + /* Change the dist_leaf_count */ + gf_log (THIS->name, GF_LOG_INFO, + "Changing the stripe count of " + "volume %s from %d to %d", + volinfo->volname, + volinfo->stripe_count, stripe_count); + ret = 0; + goto out; + } + } + break; + } + +out: + return ret; +} + +static int +gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count, + int total_bricks, int *type, char *err_str, + int err_len) +{ + int ret = -1; + + /* replica count is set */ + switch (volinfo->type) { + case GF_CLUSTER_TYPE_NONE: + if ((volinfo->brick_count * replica_count) == total_bricks) { + /* Change the volume type */ + *type = GF_CLUSTER_TYPE_REPLICATE; + gf_log (THIS->name, GF_LOG_INFO, + "Changing the type of volume %s from " + "'distribute' to 'replica'", volinfo->volname); + ret = 0; + goto out; + + } else { + snprintf (err_str, err_len, "Incorrect number of " + "bricks (%d) supplied for replica count (%d).", + (total_bricks - volinfo->brick_count), + replica_count); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + break; + case GF_CLUSTER_TYPE_STRIPE: + if (!(total_bricks % (volinfo->dist_leaf_count * replica_count))) { + /* Change the volume type */ + *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE; + gf_log (THIS->name, GF_LOG_INFO, + "Changing the type of volume %s from " + "'stripe' to 'replicate-stripe'", + volinfo->volname); + ret = 0; + goto out; + } else { + snprintf (err_str, err_len, "Incorrect number of " + "bricks (%d) supplied for changing volume's " + "replica count to %d, need at least %d " + "bricks", + (total_bricks - volinfo->brick_count), + replica_count, (volinfo->dist_leaf_count * + replica_count)); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + break; + case GF_CLUSTER_TYPE_REPLICATE: + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + if (replica_count < volinfo->replica_count) { + snprintf (err_str, err_len, + "Incorrect replica count (%d) supplied. " + "Volume already has (%d)", + replica_count, volinfo->replica_count); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + if (replica_count == volinfo->replica_count) { + if (!(total_bricks % volinfo->dist_leaf_count)) { + ret = 1; + goto out; + } + } + if (replica_count > volinfo->replica_count) { + /* We have to make sure before and after 'add-brick', + the number or subvolumes for distribute will remain + same, when replica count is given */ + if ((total_bricks * volinfo->dist_leaf_count) == + (volinfo->brick_count * (replica_count * + volinfo->stripe_count))) { + /* Change the dist_leaf_count */ + gf_log (THIS->name, GF_LOG_INFO, + "Changing the replica count of " + "volume %s from %d to %d", + volinfo->volname, volinfo->replica_count, + replica_count); + ret = 0; + goto out; + } + } + break; + } +out: + return ret; +} + +static int +gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo, + int32_t replica_count, + int32_t brick_count, char *err_str, + size_t err_len) +{ + int ret = -1; + int replica_nodes = 0; + + switch (volinfo->type) { + case GF_CLUSTER_TYPE_NONE: + case GF_CLUSTER_TYPE_STRIPE: + snprintf (err_str, err_len, + "replica count (%d) option given for non replicate " + "volume %s", replica_count, volinfo->volname); + gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str); + goto out; + + case GF_CLUSTER_TYPE_REPLICATE: + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + /* in remove brick, you can only reduce the replica count */ + if (replica_count > volinfo->replica_count) { + snprintf (err_str, err_len, + "given replica count (%d) option is more " + "than volume %s's replica count (%d)", + replica_count, volinfo->volname, + volinfo->replica_count); + gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str); + goto out; + } + if (replica_count == volinfo->replica_count) { + /* This means the 'replica N' option on CLI was + redundant. Check if the total number of bricks given + for removal is same as 'dist_leaf_count' */ + if (brick_count % volinfo->dist_leaf_count) { + snprintf (err_str, err_len, + "number of bricks provided (%d) is " + "not valid. need at least %d " + "(or %dxN)", brick_count, + volinfo->dist_leaf_count, + volinfo->dist_leaf_count); + gf_log (THIS->name, GF_LOG_WARNING, "%s", + err_str); + goto out; + } + ret = 1; + goto out; + } + + replica_nodes = ((volinfo->brick_count / + volinfo->replica_count) * + (volinfo->replica_count - replica_count)); + + if (brick_count % replica_nodes) { + snprintf (err_str, err_len, + "need %d(xN) bricks for reducing replica " + "count of the volume from %d to %d", + replica_nodes, volinfo->replica_count, + replica_count); + goto out; + } + break; + } + + ret = 0; +out: + return ret; +} + +/* Handler functions */ +int +__glusterd_handle_add_brick (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + char *bricks = NULL; + char *volname = NULL; + int brick_count = 0; + void *cli_rsp = NULL; + char err_str[2048] = {0,}; + gf_cli_rsp rsp = {0,}; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int total_bricks = 0; + int32_t replica_count = 0; + int32_t stripe_count = 0; + int type = 0; + + this = THIS; + GF_ASSERT(this); + + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + snprintf (err_str, sizeof (err_str), "Garbage args received"); + goto out; + } + + gf_log (this->name, GF_LOG_INFO, "Received add brick req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + if (!(ret = glusterd_check_volume_exists (volname))) { + ret = -1; + snprintf (err_str, sizeof (err_str), "Volume %s does not exist", + volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_int32 (dict, "count", &brick_count); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get volume " + "brick count"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (!ret) { + gf_log (this->name, GF_LOG_INFO, "replica-count is %d", + replica_count); + } + + ret = dict_get_int32 (dict, "stripe-count", &stripe_count); + if (!ret) { + gf_log (this->name, GF_LOG_INFO, "stripe-count is %d", + stripe_count); + } + + if (!dict_get (dict, "force")) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get flag"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get volinfo " + "for volume name %s", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + + } + + total_bricks = volinfo->brick_count + brick_count; + + if (!stripe_count && !replica_count) { + if (volinfo->type == GF_CLUSTER_TYPE_NONE) + goto brick_val; + + if ((volinfo->brick_count < volinfo->dist_leaf_count) && + (total_bricks <= volinfo->dist_leaf_count)) + goto brick_val; + + if ((brick_count % volinfo->dist_leaf_count) != 0) { + snprintf (err_str, sizeof (err_str), "Incorrect number " + "of bricks supplied %d with count %d", + brick_count, volinfo->dist_leaf_count); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + goto brick_val; + /* done with validation.. below section is if stripe|replica + count is given */ + } + + /* These bricks needs to be added one per a replica or stripe volume */ + if (stripe_count) { + ret = gd_addbr_validate_stripe_count (volinfo, stripe_count, + total_bricks, &type, + err_str, + sizeof (err_str)); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + /* if stripe count is same as earlier, set it back to 0 */ + if (ret == 1) + stripe_count = 0; + + ret = dict_set_int32 (dict, "stripe-count", stripe_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set the stripe-count in dict"); + goto out; + } + goto brick_val; + } + + ret = gd_addbr_validate_replica_count (volinfo, replica_count, + total_bricks, + &type, err_str, + sizeof (err_str)); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + /* if replica count is same as earlier, set it back to 0 */ + if (ret == 1) + replica_count = 0; + + ret = dict_set_int32 (dict, "replica-count", replica_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set the replica-count in dict"); + goto out; + } + +brick_val: + ret = dict_get_str (dict, "bricks", &bricks); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get volume " + "bricks"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + if (type != volinfo->type) { + ret = dict_set_int32 (dict, "type", type); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to set the new type in dict"); + } + + ret = glusterd_op_begin_synctask (req, GD_OP_ADD_BRICK, dict); + +out: + if (ret) { + rsp.op_ret = -1; + rsp.op_errno = 0; + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), "Operation failed"); + rsp.op_errstr = err_str; + cli_rsp = &rsp; + glusterd_to_cli (req, cli_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); + ret = 0; //sent error to cli, prevent second reply + } + + free (cli_req.dict.dict_val); //its malloced by xdr + + return ret; +} + +int +glusterd_handle_add_brick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_add_brick); +} + +static int +subvol_matcher_init (int **subvols, int count) +{ + int ret = -1; + + *subvols = GF_CALLOC (count, sizeof(int), gf_gld_mt_int); + if (*subvols) + ret = 0; + + return ret; +} + +static void +subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + glusterd_brickinfo_t *tmp = NULL; + int32_t sub_volume = 0; + int pos = 0; + + list_for_each_entry (tmp, &volinfo->bricks, brick_list) { + + if (strcmp (tmp->hostname, brickinfo->hostname) || + strcmp (tmp->path, brickinfo->path)) { + pos++; + continue; + } + gf_log (THIS->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK, + brickinfo->hostname, brickinfo->path, + volinfo->volname); + sub_volume = (pos / volinfo->dist_leaf_count); + subvols[sub_volume]++; + break; + } + +} + +static int +subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str, + size_t err_len, char *vol_type) +{ + int i = 0; + int ret = 0; + + do { + + if (subvols[i] % volinfo->dist_leaf_count == 0) { + continue; + } else { + ret = -1; + snprintf (err_str, err_len, + "Bricks not from same subvol for %s", vol_type); + gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); + break; + } + } while (++i < volinfo->subvol_count); + + return ret; +} + +static void +subvol_matcher_destroy (int *subvols) +{ + GF_FREE (subvols); +} + +int +__glusterd_handle_remove_brick (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + int32_t count = 0; + char *brick = NULL; + char key[256] = {0,}; + char *brick_list = NULL; + int i = 1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int *subvols = NULL; + glusterd_brickinfo_t *tmp = NULL; + char err_str[2048] = {0}; + gf_cli_rsp rsp = {0,}; + void *cli_rsp = NULL; + char vol_type[256] = {0,}; + int32_t replica_count = 0; + int32_t brick_index = 0; + int32_t tmp_brick_idx = 0; + int found = 0; + int diff_count = 0; + char *volname = 0; + xlator_t *this = NULL; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + snprintf (err_str, sizeof (err_str), "Received garbage args"); + goto out; + } + + + gf_log (this->name, GF_LOG_INFO, "Received rem brick req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_int32 (dict, "count", &count); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get brick " + "count"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str),"Volume %s does not exist", + volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (!ret) { + gf_log (this->name, GF_LOG_INFO, + "request to change replica-count to %d", replica_count); + ret = gd_rmbr_validate_replica_count (volinfo, replica_count, + count, err_str, + sizeof (err_str)); + if (ret < 0) { + /* logging and error msg are done in above function + itself */ + goto out; + } + dict_del (dict, "replica-count"); + if (ret) { + replica_count = 0; + } else { + ret = dict_set_int32 (dict, "replica-count", + replica_count); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set the replica_count " + "in dict"); + goto out; + } + } + } + + /* 'vol_type' is used for giving the meaning full error msg for user */ + if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { + strcpy (vol_type, "replica"); + } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE) { + strcpy (vol_type, "stripe"); + } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) { + strcpy (vol_type, "stripe-replicate"); + } else { + strcpy (vol_type, "distribute"); + } + + /* Do not allow remove-brick if the volume is a stripe volume*/ + if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && + (volinfo->brick_count == volinfo->stripe_count)) { + snprintf (err_str, sizeof (err_str), + "Removing brick from a stripe volume is not allowed"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + if (!replica_count && + (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) && + (volinfo->brick_count == volinfo->dist_leaf_count)) { + snprintf (err_str, sizeof(err_str), + "Removing bricks from stripe-replicate" + " configuration is not allowed without reducing " + "replica or stripe count explicitly."); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + if (!replica_count && + (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && + (volinfo->brick_count == volinfo->dist_leaf_count)) { + snprintf (err_str, sizeof (err_str), + "Removing bricks from replicate configuration " + "is not allowed without reducing replica count " + "explicitly."); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + /* Do not allow remove-brick if the bricks given is less than + the replica count or stripe count */ + if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) { + if (volinfo->dist_leaf_count && + (count % volinfo->dist_leaf_count)) { + snprintf (err_str, sizeof (err_str), "Remove brick " + "incorrect brick count of %d for %s %d", + count, vol_type, volinfo->dist_leaf_count); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + } + + brick_list = GF_MALLOC (120000 * sizeof(*brick_list),gf_common_mt_char); + + if (!brick_list) { + ret = -1; + goto out; + } + + strcpy (brick_list, " "); + + if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->subvol_count > 1)) { + ret = subvol_matcher_init (&subvols, volinfo->subvol_count); + if (ret) + goto out; + } + + while ( i <= count) { + snprintf (key, sizeof (key), "brick%d", i); + ret = dict_get_str (dict, key, &brick); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get %s", + key); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + gf_log (this->name, GF_LOG_DEBUG, "Remove brick count %d brick:" + " %s", i, brick); + + ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, + &brickinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), "Incorrect brick " + "%s for volume %s", brick, volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + strcat(brick_list, brick); + strcat(brick_list, " "); + + i++; + if ((volinfo->type == GF_CLUSTER_TYPE_NONE) || + (volinfo->brick_count <= volinfo->dist_leaf_count)) + continue; + + if (replica_count) { + /* do the validation of bricks here */ + /* -2 because i++ is already done, and i starts with 1, + instead of 0 */ + diff_count = (volinfo->replica_count - replica_count); + brick_index = (((i -2) / diff_count) * volinfo->replica_count); + tmp_brick_idx = 0; + found = 0; + list_for_each_entry (tmp, &volinfo->bricks, brick_list) { + tmp_brick_idx++; + gf_log (this->name, GF_LOG_TRACE, + "validate brick %s:%s (%d %d %d)", + tmp->hostname, tmp->path, tmp_brick_idx, + brick_index, volinfo->replica_count); + if (tmp_brick_idx <= brick_index) + continue; + if (tmp_brick_idx > + (brick_index + volinfo->replica_count)) + break; + if ((!strcmp (tmp->hostname,brickinfo->hostname)) && + !strcmp (tmp->path, brickinfo->path)) { + found = 1; + break; + } + } + if (found) + continue; + + snprintf (err_str, sizeof (err_str), "Bricks are from " + "same subvol"); + gf_log (this->name, GF_LOG_INFO, + "failed to validate brick %s:%s (%d %d %d)", + tmp->hostname, tmp->path, tmp_brick_idx, + brick_index, volinfo->replica_count); + ret = -1; + /* brick order is not valid */ + goto out; + } + + /* Find which subvolume the brick belongs to */ + subvol_matcher_update (subvols, volinfo, brickinfo); + } + + /* Check if the bricks belong to the same subvolumes.*/ + if ((volinfo->type != GF_CLUSTER_TYPE_NONE) && + (volinfo->subvol_count > 1)) { + ret = subvol_matcher_verify (subvols, volinfo, + err_str, sizeof(err_str), + vol_type); + if (ret) + goto out; + } + + ret = glusterd_op_begin_synctask (req, GD_OP_REMOVE_BRICK, dict); + +out: + if (ret) { + rsp.op_ret = -1; + rsp.op_errno = 0; + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + rsp.op_errstr = err_str; + cli_rsp = &rsp; + glusterd_to_cli (req, cli_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); + + ret = 0; //sent error to cli, prevent second reply + + } + + GF_FREE (brick_list); + subvol_matcher_destroy (subvols); + free (cli_req.dict.dict_val); //its malloced by xdr + + return ret; +} + +int +glusterd_handle_remove_brick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_remove_brick); +} + +static int +_glusterd_restart_gsync_session (dict_t *this, char *key, + data_t *value, void *data) +{ + char *slave = NULL; + char *slave_buf = NULL; + char *path_list = NULL; + char *slave_vol = NULL; + char *slave_ip = NULL; + char *conf_path = NULL; + char **errmsg = NULL; + int ret = -1; + glusterd_gsync_status_temp_t *param = NULL; + gf_boolean_t is_running = _gf_false; + + param = (glusterd_gsync_status_temp_t *)data; + + GF_ASSERT (param); + GF_ASSERT (param->volinfo); + + slave = strchr(value->data, ':'); + if (slave) { + slave++; + slave_buf = gf_strdup (slave); + if (!slave_buf) { + gf_log ("", GF_LOG_ERROR, + "Failed to gf_strdup"); + ret = -1; + goto out; + } + } + else + return 0; + + ret = dict_set_dynstr (param->rsp_dict, "slave", slave_buf); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store slave"); + if (slave_buf) + GF_FREE(slave_buf); + goto out; + } + + ret = glusterd_get_slave_details_confpath (param->volinfo, + param->rsp_dict, + &slave_ip, &slave_vol, + &conf_path, errmsg); + if (ret) { + if (*errmsg) + gf_log ("", GF_LOG_ERROR, "%s", *errmsg); + else + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + goto out; + } + + /* In cases that gsyncd is not running, we will not invoke it + * because of add-brick. */ + ret = glusterd_check_gsync_running_local (param->volinfo->volname, + slave, conf_path, + &is_running); + if (ret) { + gf_log ("", GF_LOG_ERROR, "gsync running validation failed."); + goto out; + } + if (_gf_false == is_running) { + gf_log ("", GF_LOG_DEBUG, "gsync session for %s and %s is" + " not running on this node. Hence not restarting.", + param->volinfo->volname, slave); + ret = 0; + goto out; + } + + ret = glusterd_get_local_brickpaths (param->volinfo, &path_list); + if (!path_list) { + gf_log ("", GF_LOG_DEBUG, "This node not being part of" + " volume should not be running gsyncd. Hence" + " no gsyncd process to restart."); + ret = 0; + goto out; + } + + ret = glusterd_check_restart_gsync_session (param->volinfo, slave, + param->rsp_dict, path_list, + conf_path, 0); + if (ret) + gf_log ("", GF_LOG_ERROR, + "Unable to restart gsync session."); + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret); + return ret; +} + +/* op-sm */ + +int +glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, + char *bricks, dict_t *dict) +{ + char *brick = NULL; + int32_t i = 1; + char *brick_list = NULL; + char *free_ptr1 = NULL; + char *free_ptr2 = NULL; + char *saveptr = NULL; + int32_t ret = -1; + int32_t stripe_count = 0; + int32_t replica_count = 0; + int32_t type = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_gsync_status_temp_t param = {0, }; + gf_boolean_t restart_needed = 0; + char msg[1024] __attribute__((unused)) = {0, }; + int caps = 0; + + GF_ASSERT (volinfo); + + if (bricks) { + brick_list = gf_strdup (bricks); + free_ptr1 = brick_list; + } + + if (count) + brick = strtok_r (brick_list+1, " \n", &saveptr); + + if (dict) { + ret = dict_get_int32 (dict, "stripe-count", &stripe_count); + if (!ret) + gf_log (THIS->name, GF_LOG_INFO, + "stripe-count is set %d", stripe_count); + + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (!ret) + gf_log (THIS->name, GF_LOG_INFO, + "replica-count is set %d", replica_count); + ret = dict_get_int32 (dict, "type", &type); + if (!ret) + gf_log (THIS->name, GF_LOG_INFO, + "type is set %d, need to change it", type); + } + + while ( i <= count) { + ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); + if (ret) + goto out; + + ret = glusterd_resolve_brick (brickinfo); + if (ret) + goto out; + if (stripe_count || replica_count) { + add_brick_at_right_order (brickinfo, volinfo, (i - 1), + stripe_count, replica_count); + } else { + list_add_tail (&brickinfo->brick_list, &volinfo->bricks); + } + brick = strtok_r (NULL, " \n", &saveptr); + i++; + volinfo->brick_count++; + + } + + + /* Gets changed only if the options are given in add-brick cli */ + if (type) + volinfo->type = type; + if (replica_count) { + volinfo->replica_count = replica_count; + } + if (stripe_count) { + volinfo->stripe_count = stripe_count; + } + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); + + /* backward compatibility */ + volinfo->sub_count = ((volinfo->dist_leaf_count == 1) ? 0: + volinfo->dist_leaf_count); + + volinfo->subvol_count = (volinfo->brick_count / + volinfo->dist_leaf_count); + + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) + goto out; + + ret = 0; + if (GLUSTERD_STATUS_STARTED != volinfo->status) + goto out; + + brick_list = gf_strdup (bricks); + free_ptr2 = brick_list; + i = 1; + + if (count) + brick = strtok_r (brick_list+1, " \n", &saveptr); +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) + caps = CAPS_BD | CAPS_THIN | + CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; +#endif + + while (i <= count) { + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, + &brickinfo); + if (ret) + goto out; +#ifdef HAVE_BD_XLATOR + /* Check for VG/thin pool if its BD volume */ + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (ret) { + gf_log (THIS->name, GF_LOG_CRITICAL, "%s", msg); + goto out; + } + /* if anyone of the brick does not have thin support, + disable it for entire volume */ + caps &= brickinfo->caps; + } else + caps = 0; +#endif + + if (uuid_is_null (brickinfo->uuid)) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, + brickinfo->hostname, brickinfo->path); + goto out; + } + } + + ret = glusterd_brick_start (volinfo, brickinfo, + _gf_true); + if (ret) + goto out; + i++; + brick = strtok_r (NULL, " \n", &saveptr); + + /* Check if the brick is added in this node, and set + * the restart_needed flag. */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + !restart_needed) { + restart_needed = 1; + gf_log ("", GF_LOG_DEBUG, + "Restart gsyncd session, if it's already " + "running."); + } + } + + /* If the restart_needed flag is set, restart gsyncd sessions for that + * particular master with all the slaves. */ + if (restart_needed) { + param.rsp_dict = dict; + param.volinfo = volinfo; + dict_foreach (volinfo->gsync_slaves, + _glusterd_restart_gsync_session, ¶m); + } + volinfo->caps = caps; +out: + GF_FREE (free_ptr1); + GF_FREE (free_ptr2); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + + +int +glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick, + int force, int *need_migrate) +{ + glusterd_brickinfo_t *brickinfo = NULL; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (volinfo); + GF_ASSERT (brick); + + priv = THIS->private; + GF_ASSERT (priv); + + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, + &brickinfo); + if (ret) + goto out; + + ret = glusterd_resolve_brick (brickinfo); + if (ret) + goto out; + + glusterd_volinfo_reset_defrag_stats (volinfo); + + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* Only if the brick is in this glusterd, do the rebalance */ + if (need_migrate) + *need_migrate = 1; + } + + if (force) { + ret = glusterd_brick_stop (volinfo, brickinfo, + _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop " + "glusterfs, ret: %d", ret); + } + goto out; + } + + brickinfo->decommissioned = 1; + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + int count = 0; + int i = 0; + char *bricks = NULL; + char *brick_list = NULL; + char *saveptr = NULL; + char *free_ptr = NULL; + char *brick = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + char msg[2048] = {0,}; + gf_boolean_t brick_alloc = _gf_false; + char *all_bricks = NULL; + char *str_ret = NULL; + gf_boolean_t is_force = _gf_false; + + priv = THIS->private; + if (!priv) + goto out; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to find volume: %s", volname); + goto out; + } + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + + if (glusterd_is_rb_ongoing (volinfo)) { + snprintf (msg, sizeof (msg), "Replace brick is in progress on " + "volume %s. Please retry after replace-brick " + "operation is committed or aborted", volname); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + if (glusterd_is_defrag_on(volinfo)) { + snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " + "progress. Please retry after completion", volname); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + ret = dict_get_int32 (dict, "count", &count); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get count"); + goto out; + } + + ret = dict_get_str (dict, "bricks", &bricks); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to get bricks"); + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + if (bricks) { + brick_list = gf_strdup (bricks); + all_bricks = gf_strdup (bricks); + free_ptr = brick_list; + } + + if (count) + brick = strtok_r (brick_list+1, " \n", &saveptr); + + + while ( i < count) { + if (!glusterd_store_is_valid_brickpath (volname, brick) || + !glusterd_is_valid_volfpath (volname, brick)) { + snprintf (msg, sizeof (msg), "brick path %s is " + "too long", brick); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + + ret = -1; + goto out; + + } + + ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Add-brick: Unable" + " to get brickinfo"); + goto out; + } + brick_alloc = _gf_true; + + ret = glusterd_new_brick_validate (brick, brickinfo, msg, + sizeof (msg)); + if (ret) { + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 1, msg); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "%s", + msg); + *op_errstr = gf_strdup (msg); + goto out; + } + } +#endif + + ret = glusterd_validate_and_create_brickpath (brickinfo, + volinfo->volume_id, + op_errstr, is_force); + if (ret) + goto out; + } + + glusterd_brickinfo_delete (brickinfo); + brick_alloc = _gf_false; + brickinfo = NULL; + brick = strtok_r (NULL, " \n", &saveptr); + i++; + } + +out: + GF_FREE (free_ptr); + if (brick_alloc && brickinfo) + glusterd_brickinfo_delete (brickinfo); + GF_FREE (str_ret); + GF_FREE (all_bricks); + + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *errstr = NULL; + int32_t brick_count = 0; + char msg[2048] = {0,}; + int32_t flag = 0; + gf1_op_commands cmd = GF_OP_CMD_NONE; + char *task_id_str = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Volume %s does not exist", volname); + goto out; + } + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + + if (glusterd_is_rb_ongoing (volinfo)) { + snprintf (msg, sizeof (msg), "Replace brick is in progress on " + "volume %s. Please retry after replace-brick " + "operation is committed or aborted", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + ret = dict_get_int32 (dict, "command", &flag); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); + goto out; + } + cmd = flag; + + ret = -1; + switch (cmd) { + case GF_OP_CMD_NONE: + errstr = gf_strdup ("no remove-brick command issued"); + goto out; + + case GF_OP_CMD_STATUS: + ret = 0; + goto out; + + case GF_OP_CMD_START: + { + if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) && + dict_get (dict, "replica-count")) { + snprintf (msg, sizeof(msg), "Migration of data is not " + "needed when reducing replica count. Use the" + " 'force' option"); + errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + goto out; + } + + if (GLUSTERD_STATUS_STARTED != volinfo->status) { + snprintf (msg, sizeof (msg), "Volume %s needs to be " + "started before remove-brick (you can use " + "'force' or 'commit' to override this " + "behavior)", volinfo->volname); + errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + goto out; + } + if (!gd_is_remove_brick_committed (volinfo)) { + snprintf (msg, sizeof (msg), "An earlier remove-brick " + "task exists for volume %s. Either commit it" + " or stop it before starting a new task.", + volinfo->volname); + errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "Earlier remove-brick" + " task exists for volume %s.", + volinfo->volname); + goto out; + } + if (glusterd_is_defrag_on(volinfo)) { + errstr = gf_strdup("Rebalance is in progress. Please " + "retry after completion"); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + goto out; + } + + if (is_origin_glusterd (dict)) { + ret = glusterd_generate_and_set_task_id + (dict, GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing remove-brick-id"); + ret = 0; + } + } + break; + } + + case GF_OP_CMD_STOP: + ret = 0; + break; + + case GF_OP_CMD_COMMIT: + if (volinfo->decommission_in_progress) { + errstr = gf_strdup ("use 'force' option as migration " + "is in progress"); + goto out; + } + break; + + case GF_OP_CMD_COMMIT_FORCE: + break; + } + + ret = dict_get_int32 (dict, "count", &brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count"); + goto out; + } + + ret = 0; + if (volinfo->brick_count == brick_count) { + errstr = gf_strdup ("Deleting all the bricks of the " + "volume is not allowed"); + ret = -1; + goto out; + } + +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + if (ret && errstr) { + if (op_errstr) + *op_errstr = errstr; + } + + return ret; +} + +int +glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo, + gf_defrag_status_t status) +{ + int ret = 0; + +#if 0 /* TODO: enable this behavior once cluster-wide awareness comes for + defrag cbk function */ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + + switch (status) { + case GF_DEFRAG_STATUS_PAUSED: + case GF_DEFRAG_STATUS_FAILED: + /* No changes required in the volume file. + everything should remain as is */ + break; + case GF_DEFRAG_STATUS_STOPPED: + /* Fall back to the old volume file */ + list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + } + break; + + case GF_DEFRAG_STATUS_COMPLETE: + /* Done with the task, you can remove the brick from the + volume file */ + list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) { + if (!brickinfo->decommissioned) + continue; + gf_log (THIS->name, GF_LOG_INFO, "removing the brick %s", + brickinfo->path); + brickinfo->decommissioned = 0; + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + /*TODO: use the 'atomic' flavour of brick_stop*/ + ret = glusterd_brick_stop (volinfo, brickinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to stop glusterfs (%d)", ret); + } + } + glusterd_delete_brick (volinfo, brickinfo); + } + break; + + default: + GF_ASSERT (!"cbk function called with wrong status"); + break; + } + + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write volume files (%d)", ret); + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to store volume info (%d)", ret); + + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_check_generate_start_nfs (); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to start nfs process (%d)", ret); + } + +#endif + + volinfo->decommission_in_progress = 0; + return ret; +} + + +int +glusterd_op_add_brick (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char *bricks = NULL; + int32_t count = 0; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + goto out; + } + + ret = dict_get_int32 (dict, "count", &count); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get count"); + goto out; + } + + + ret = dict_get_str (dict, "bricks", &bricks); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); + goto out; + } + + ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to add bricks"); + goto out; + } + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) + ret = glusterd_nodesvcs_handle_graph_change (volinfo); + +out: + return ret; +} + +int +glusterd_op_remove_brick (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[256] = {0,}; + int32_t flag = 0; + char err_str[4096] = {0,}; + int need_rebalance = 0; + int force = 0; + gf1_op_commands cmd = 0; + int32_t replica_count = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + char *task_id_str = NULL; + xlator_t *this = NULL; + dict_t *bricks_dict = NULL; + char *brick_tmpstr = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); + goto out; + } + + ret = dict_get_int32 (dict, "command", &flag); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get command"); + goto out; + } + cmd = flag; + + /* Set task-id, if available, in ctx dict for operations other than + * start + */ + if (is_origin_glusterd (dict) && (cmd != GF_OP_CMD_START)) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, dict, + GF_REMOVE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set remove-brick-id"); + goto out; + } + } + } + + /* Clear task-id, rebal.op and stored bricks on commmitting/stopping + * remove-brick */ + if ((cmd != GF_OP_CMD_START) || (cmd != GF_OP_CMD_STATUS)) { + uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + dict_unref (volinfo->rebal.dict); + volinfo->rebal.dict = NULL; + } + + ret = -1; + switch (cmd) { + case GF_OP_CMD_NONE: + goto out; + + case GF_OP_CMD_STATUS: + ret = 0; + goto out; + + case GF_OP_CMD_STOP: + { + /* Fall back to the old volume file */ + list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, + brick_list) { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + } + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to create volfiles"); + goto out; + } + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to store volinfo"); + goto out; + } + + ret = 0; + goto out; + } + + case GF_OP_CMD_START: + ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Missing remove-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REMOVE_BRICK; + } + force = 0; + break; + + case GF_OP_CMD_COMMIT: + force = 1; + break; + + case GF_OP_CMD_COMMIT_FORCE: + + if (volinfo->decommission_in_progress) { + if (volinfo->rebal.defrag) { + LOCK (&volinfo->rebal.defrag->lock); + /* Fake 'rebalance-complete' so the graph change + happens right away */ + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_COMPLETE; + + UNLOCK (&volinfo->rebal.defrag->lock); + } + /* Graph change happens in rebalance _cbk function, + no need to do anything here */ + /* TODO: '_cbk' function is not doing anything for now */ + } + + ret = 0; + force = 1; + break; + } + + ret = dict_get_int32 (dict, "count", &count); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get count"); + goto out; + } + + /* Save the list of bricks for later usage. Right now this is required + * for displaying the task parameters with task status in volume status. + */ + bricks_dict = dict_new (); + if (!bricks_dict) { + ret = -1; + goto out; + } + ret = dict_set_int32 (bricks_dict, "count", count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to save remove-brick count"); + goto out; + } + while ( i <= count) { + snprintf (key, 256, "brick%d", i); + ret = dict_get_str (dict, key, &brick); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get %s", + key); + goto out; + } + + brick_tmpstr = gf_strdup (brick); + if (!brick_tmpstr) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to duplicate brick name"); + goto out; + } + ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add brick to dict"); + goto out; + } + brick_tmpstr = NULL; + + ret = glusterd_op_perform_remove_brick (volinfo, brick, force, + &need_rebalance); + if (ret) + goto out; + i++; + } + ret = dict_get_int32 (dict, "replica-count", &replica_count); + if (!ret) { + gf_log (this->name, GF_LOG_INFO, + "changing replica count %d to %d on volume %s", + volinfo->replica_count, replica_count, + volinfo->volname); + volinfo->replica_count = replica_count; + volinfo->sub_count = replica_count; + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); + volinfo->subvol_count = (volinfo->brick_count / + volinfo->dist_leaf_count); + + if (replica_count == 1) { + if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) { + volinfo->type = GF_CLUSTER_TYPE_NONE; + /* backward compatibility */ + volinfo->sub_count = 0; + } else { + volinfo->type = GF_CLUSTER_TYPE_STRIPE; + /* backward compatibility */ + volinfo->sub_count = volinfo->dist_leaf_count; + } + } + } + volinfo->rebal.dict = bricks_dict; + bricks_dict = NULL; + + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "failed to create volfiles"); + goto out; + } + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "failed to store volinfo"); + goto out; + } + + /* Need to reset the defrag/rebalance status accordingly */ + switch (volinfo->rebal.defrag_status) { + case GF_DEFRAG_STATUS_FAILED: + case GF_DEFRAG_STATUS_COMPLETE: + volinfo->rebal.defrag_status = 0; + default: + break; + } + if (!force && need_rebalance) { + /* perform the rebalance operations */ + ret = glusterd_handle_defrag_start + (volinfo, err_str, sizeof (err_str), + GF_DEFRAG_CMD_START_FORCE, + glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK); + + if (!ret) + volinfo->decommission_in_progress = 1; + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to start the rebalance"); + } + } else { + if (GLUSTERD_STATUS_STARTED == volinfo->status) + ret = glusterd_nodesvcs_handle_graph_change (volinfo); + } + +out: + if (ret && err_str[0] && op_errstr) + *op_errstr = gf_strdup (err_str); + + GF_FREE (brick_tmpstr); + if (bricks_dict) + dict_unref (bricks_dict); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c new file mode 100644 index 000000000..5786694bd --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -0,0 +1,4236 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "run.h" +#include "syscall.h" + +#include <signal.h> + +static int +dict_get_param (dict_t *dict, char *key, char **param); + +static int +glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile); + +static int +glusterd_get_slave_info (char *slave, char **slave_ip, + char **slave_vol, char **op_errstr); + +static int +glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); + +struct gsync_config_opt_vals_ gsync_confopt_vals[] = { + {.op_name = "change_detector", + .no_of_pos_vals = 2, + .case_sensitive = _gf_true, + .values = {"xsync", "changelog"}, + }, + {.op_name = "special_sync_mode", + .no_of_pos_vals = 2, + .case_sensitive = _gf_true, + .values = {"partial", "recover"} + }, + {.op_name = "log-level", + .no_of_pos_vals = 5, + .case_sensitive = _gf_false, + .values = {"critical", "error", "warning", "info", "debug"} + }, + {.op_name = NULL, + }, +}; + +static char *gsync_reserved_opts[] = { + "gluster-command-dir", + "pid-file", + "remote-gsyncd" + "state-file", + "session-owner", + "state-socket-unencoded", + "socketdir", + "ignore-deletes", + "local-id", + "local-path", + "slave-id", + NULL +}; + +int +__glusterd_handle_sys_exec (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_SYS_EXEC; + glusterd_conf_t *priv = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new (); + if (!dict) + goto out; + + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) + goto out; + } + + ret = glusterd_op_begin_synctask (req, cli_op, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + return ret; +} + +int +__glusterd_handle_copy_file (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_COPY_FILE; + glusterd_conf_t *priv = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new (); + if (!dict) + goto out; + + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) + goto out; + } + + ret = glusterd_op_begin_synctask (req, cli_op, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + return ret; +} + +int +__glusterd_handle_gsync_set (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_GSYNC_SET; + char *master = NULL; + char *slave = NULL; + char operation[256] = {0,}; + int type = 0; + glusterd_conf_t *priv = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) + goto out; + + } + + ret = dict_get_str (dict, "master", &master); + if (ret < 0) { + gf_log (this->name, GF_LOG_INFO, "master not found, while " + "handling "GEOREP" options"); + master = "(No Master)"; + } + + ret = dict_get_str (dict, "slave", &slave); + if (ret < 0) { + gf_log (this->name, GF_LOG_INFO, "slave not found, while " + "handling "GEOREP" options"); + slave = "(No Slave)"; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Command type not found " + "while handling "GEOREP" options"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + switch (type) { + case GF_GSYNC_OPTION_TYPE_CREATE: + strncpy (operation, "create", sizeof (operation)); + cli_op = GD_OP_GSYNC_CREATE; + break; + + case GF_GSYNC_OPTION_TYPE_START: + strncpy (operation, "start", sizeof (operation)); + break; + + case GF_GSYNC_OPTION_TYPE_STOP: + strncpy (operation, "stop", sizeof (operation)); + break; + + case GF_GSYNC_OPTION_TYPE_CONFIG: + strncpy (operation, "config", sizeof (operation)); + break; + + case GF_GSYNC_OPTION_TYPE_STATUS: + strncpy (operation, "status", sizeof (operation)); + break; + } + + ret = glusterd_op_begin_synctask (req, cli_op, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + return ret; +} + +int +glusterd_handle_sys_exec (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_sys_exec); +} + +int +glusterd_handle_copy_file (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_copy_file); +} + +int +glusterd_handle_gsync_set (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_gsync_set); +} + +/***** + * + * glusterd_urltransform* internal API + * + *****/ + +static void +glusterd_urltransform_init (runner_t *runner, const char *transname) +{ + runinit (runner); + runner_add_arg (runner, GSYNCD_PREFIX"/gsyncd"); + runner_argprintf (runner, "--%s-url", transname); +} + +static void +glusterd_urltransform_add (runner_t *runner, const char *url) +{ + runner_add_arg (runner, url); +} + +static int +_glusterd_urltransform_add_iter (dict_t *dict, char *key, data_t *value, void *data) +{ + runner_t *runner = (runner_t *)data; + char *slave = NULL; + + slave = strchr (value->data, ':'); + GF_ASSERT (slave); + slave++; + runner_add_arg (runner, slave); + + return 0; +} + +static void +glusterd_urltransform_free (char **linearr, unsigned n) +{ + int i = 0; + + for (; i < n; i++) + GF_FREE (linearr[i]); + + GF_FREE (linearr); +} + +static int +glusterd_urltransform (runner_t *runner, char ***linearrp) +{ + char **linearr = NULL; + char *line = NULL; + unsigned arr_len = 32; + unsigned arr_idx = 0; + gf_boolean_t error = _gf_false; + + linearr = GF_CALLOC (arr_len, sizeof (char *), gf_gld_mt_linearr); + if (!linearr) { + error = _gf_true; + goto out; + } + + runner_redir (runner, STDOUT_FILENO, RUN_PIPE); + if (runner_start (runner) != 0) { + gf_log ("", GF_LOG_ERROR, "spawning child failed"); + + error = _gf_true; + goto out; + } + + arr_idx = 0; + for (;;) { + size_t len; + line = GF_MALLOC (1024, gf_gld_mt_linebuf); + if (!line) { + error = _gf_true; + goto out; + } + + if (fgets (line, 1024, runner_chio (runner, STDOUT_FILENO)) == + NULL) + break; + + len = strlen (line); + if (len == 0 || line[len - 1] != '\n') { + GF_FREE (line); + error = _gf_true; + goto out; + } + line[len - 1] = '\0'; + + if (arr_idx == arr_len) { + void *p = linearr; + arr_len <<= 1; + p = GF_REALLOC (linearr, arr_len); + if (!p) { + GF_FREE (line); + error = _gf_true; + goto out; + } + linearr = p; + } + linearr[arr_idx] = line; + + arr_idx++; + } + + out: + + /* XXX chpid field is not exported by run API + * but runner_end() does not abort the invoked + * process (ie. it might block in waitpid(2)) + * so we resort to a manual kill a the private field + */ + if (error && runner->chpid > 0) + kill (runner->chpid, SIGKILL); + + if (runner_end (runner) != 0) + error = _gf_true; + + if (error) { + gf_log ("", GF_LOG_ERROR, "reading data from child failed"); + glusterd_urltransform_free (linearr, arr_idx); + return -1; + } + + *linearrp = linearr; + return arr_idx; +} + +static int +glusterd_urltransform_single (const char *url, const char *transname, + char ***linearrp) +{ + runner_t runner = {0,}; + + glusterd_urltransform_init (&runner, transname); + glusterd_urltransform_add (&runner, url); + return glusterd_urltransform (&runner, linearrp); +} + + +struct dictidxmark { + unsigned isrch; + unsigned ithis; + char *ikey; +}; + +static int +_dict_mark_atindex (dict_t *dict, char *key, data_t *value, void *data) +{ + struct dictidxmark *dim = data; + + if (dim->isrch == dim->ithis) + dim->ikey = key; + + dim->ithis++; + return 0; +} + +static char * +dict_get_by_index (dict_t *dict, unsigned i) +{ + struct dictidxmark dim = {0,}; + + dim.isrch = i; + dict_foreach (dict, _dict_mark_atindex, &dim); + + return dim.ikey; +} + +static int +glusterd_get_slave (glusterd_volinfo_t *vol, const char *slaveurl, char **slavekey) +{ + runner_t runner = {0,}; + int n = 0; + int i = 0; + char **linearr = NULL; + + glusterd_urltransform_init (&runner, "canonicalize"); + dict_foreach (vol->gsync_slaves, _glusterd_urltransform_add_iter, &runner); + glusterd_urltransform_add (&runner, slaveurl); + + n = glusterd_urltransform (&runner, &linearr); + if (n == -1) + return -2; + + for (i = 0; i < n - 1; i++) { + if (strcmp (linearr[i], linearr[n - 1]) == 0) + break; + } + glusterd_urltransform_free (linearr, i); + + if (i < n - 1) + *slavekey = dict_get_by_index (vol->gsync_slaves, i); + else + i = -1; + + return i; +} + + +static int +glusterd_query_extutil_generic (char *resbuf, size_t blen, runner_t *runner, void *data, + int (*fcbk)(char *resbuf, size_t blen, FILE *fp, void *data)) +{ + int ret = 0; + + runner_redir (runner, STDOUT_FILENO, RUN_PIPE); + if (runner_start (runner) != 0) { + gf_log ("", GF_LOG_ERROR, "spawning child failed"); + + return -1; + } + + ret = fcbk (resbuf, blen, runner_chio (runner, STDOUT_FILENO), data); + + ret |= runner_end (runner); + if (ret) + gf_log ("", GF_LOG_ERROR, "reading data from child failed"); + + return ret ? -1 : 0; +} + +static int +_fcbk_singleline(char *resbuf, size_t blen, FILE *fp, void *data) +{ + char *ptr = NULL; + + errno = 0; + ptr = fgets (resbuf, blen, fp); + if (ptr) { + size_t len = strlen(resbuf); + if (len && resbuf[len-1] == '\n') + resbuf[len-1] = '\0'; //strip off \n + } + + return errno ? -1 : 0; +} + +static int +glusterd_query_extutil (char *resbuf, runner_t *runner) +{ + return glusterd_query_extutil_generic (resbuf, PATH_MAX, runner, NULL, + _fcbk_singleline); +} + +static int +_fcbk_conftodict (char *resbuf, size_t blen, FILE *fp, void *data) +{ + char *ptr = NULL; + dict_t *dict = data; + char *v = NULL; + + for (;;) { + errno = 0; + ptr = fgets (resbuf, blen, fp); + if (!ptr) + break; + v = resbuf + strlen(resbuf) - 1; + while (isspace (*v)) + /* strip trailing space */ + *v-- = '\0'; + if (v == resbuf) + /* skip empty line */ + continue; + v = strchr (resbuf, ':'); + if (!v) + return -1; + *v++ = '\0'; + while (isspace (*v)) + v++; + v = gf_strdup (v); + if (!v) + return -1; + if (dict_set_dynstr (dict, resbuf, v) != 0) { + GF_FREE (v); + return -1; + } + } + + return errno ? -1 : 0; +} + +static int +glusterd_gsync_get_config (char *master, char *slave, char *conf_path, dict_t *dict) +{ + /* key + value, where value must be able to accommodate a path */ + char resbuf[256 + PATH_MAX] = {0,}; + runner_t runner = {0,}; + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); + runner_argprintf (&runner, ":%s", master); + runner_add_args (&runner, slave, "--config-get-all", NULL); + + return glusterd_query_extutil_generic (resbuf, sizeof (resbuf), + &runner, dict, _fcbk_conftodict); +} + +static int +glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master, + char *slave, char *conf_path) +{ + runner_t runner = {0,}; + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); + runner_argprintf (&runner, ":%s", master); + runner_add_args (&runner, slave, "--config-get", NULL); + runner_argprintf (&runner, "%s-file", param); + + return glusterd_query_extutil (prmfile, &runner); +} + +static int +gsyncd_getpidfile (char *master, char *slave, char *pidfile, char *conf_path) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + char *confpath = NULL; + char conf_buf[PATH_MAX] = ""; + struct stat stbuf = {0,}; + + + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + + priv = THIS->private; + + GF_VALIDATE_OR_GOTO ("gsync", master, out); + GF_VALIDATE_OR_GOTO ("gsync", slave, out); + + ret = lstat (conf_path, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_DEBUG, "Using passed config template(%s).", + conf_path); + confpath = conf_path; + } else { + ret = snprintf (conf_buf, sizeof(conf_buf) - 1, + "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); + conf_buf[ret] = '\0'; + confpath = conf_buf; + gf_log ("", GF_LOG_DEBUG, "Using default config template(%s).", + confpath); + } + + ret = glusterd_gsync_get_param_file (pidfile, "pid", master, + slave, confpath); + if (ret == -1) { + ret = -2; + gf_log ("", GF_LOG_WARNING, "failed to create the pidfile string"); + goto out; + } + + ret = open (pidfile, O_RDWR); + + out: + return ret; +} + +static int +gsync_status_byfd (int fd) +{ + GF_ASSERT (fd >= -1); + + if (lockf (fd, F_TEST, 0) == -1 && + (errno == EAGAIN || errno == EACCES)) + /* gsyncd keeps the pidfile locked */ + return 0; + + return -1; +} + +/* status: return 0 when gsync is running + * return -1 when not running + */ +int +gsync_status (char *master, char *slave, char *conf_path, int *status) +{ + char pidfile[PATH_MAX] = {0,}; + int fd = -1; + + fd = gsyncd_getpidfile (master, slave, pidfile, conf_path); + if (fd == -2) + return -1; + + *status = gsync_status_byfd (fd); + + sys_close (fd); + + return 0; +} + + +static int32_t +glusterd_gsync_volinfo_dict_set (glusterd_volinfo_t *volinfo, + char *key, char *value) +{ + int32_t ret = -1; + char *gsync_status = NULL; + + gsync_status = gf_strdup (value); + if (!gsync_status) { + gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + goto out; + } + + ret = dict_set_dynstr (volinfo->dict, key, gsync_status); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set dict"); + goto out; + } + + ret = 0; +out: + return 0; +} + +static int +glusterd_verify_gsyncd_spawn (char *master, char *slave) +{ + int ret = 0; + runner_t runner = {0,}; + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + "--verify", "spawning", NULL); + runner_argprintf (&runner, ":%s", master); + runner_add_args (&runner, slave, NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret) { + gf_log ("", GF_LOG_ERROR, "spawning child failed"); + ret = -1; + goto out; + } + + if (runner_end (&runner) != 0) + ret = -1; + +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +static int +gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) +{ + char **resopt = NULL; + int i = 0; + int ret = -1; + char *subop = NULL; + char *slave = NULL; + char *op_name = NULL; + char *op_value = NULL; + char *t = NULL; + char errmsg[PATH_MAX] = ""; + gf_boolean_t banned = _gf_true; + gf_boolean_t op_match = _gf_true; + gf_boolean_t val_match = _gf_true; + struct gsync_config_opt_vals_ *conf_vals = NULL; + + if (dict_get_str (dict, "subop", &subop) != 0) { + gf_log ("", GF_LOG_WARNING, "missing subop"); + *op_errstr = gf_strdup ("Invalid config request"); + return -1; + } + + if (dict_get_str (dict, "slave", &slave) != 0) { + gf_log ("", GF_LOG_WARNING, GEOREP" CONFIG: no slave given"); + *op_errstr = gf_strdup ("Slave required"); + return -1; + } + + if (strcmp (subop, "get-all") == 0) + return 0; + + if (dict_get_str (dict, "op_name", &op_name) != 0) { + gf_log ("", GF_LOG_WARNING, "option name missing"); + *op_errstr = gf_strdup ("Option name missing"); + return -1; + } + + if (runcmd (GSYNCD_PREFIX"/gsyncd", "--config-check", op_name, NULL)) { + ret = glusterd_verify_gsyncd_spawn (volname, slave); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to spawn gsyncd"); + return 0; + } + + gf_log ("", GF_LOG_WARNING, "Invalid option %s", op_name); + *op_errstr = gf_strdup ("Invalid option"); + + return -1; + } + + if (strcmp (subop, "get") == 0) + return 0; + + t = strtail (subop, "set"); + if (!t) + t = strtail (subop, "del"); + if (!t || (t[0] && strcmp (t, "-glob") != 0)) { + gf_log ("", GF_LOG_WARNING, "unknown subop %s", subop); + *op_errstr = gf_strdup ("Invalid config request"); + return -1; + } + + if (strtail (subop, "set") && + dict_get_str (dict, "op_value", &op_value) != 0) { + gf_log ("", GF_LOG_WARNING, "missing value for set"); + *op_errstr = gf_strdup ("missing value"); + } + + /* match option name against reserved options, modulo -/_ + * difference + */ + for (resopt = gsync_reserved_opts; *resopt; resopt++) { + banned = _gf_true; + for (i = 0; (*resopt)[i] && op_name[i]; i++) { + if ((*resopt)[i] == op_name[i] || + ((*resopt)[i] == '-' && op_name[i] == '_')) + continue; + banned = _gf_false; + } + if (banned) { + gf_log ("", GF_LOG_WARNING, "Reserved option %s", op_name); + *op_errstr = gf_strdup ("Reserved option"); + + return -1; + break; + } + } + + /* Check options in gsync_confopt_vals for invalid values */ + for (conf_vals = gsync_confopt_vals; conf_vals->op_name; conf_vals++) { + op_match = _gf_true; + for (i = 0; conf_vals->op_name[i] && op_name[i]; i++) { + if (conf_vals->op_name[i] == op_name[i] || + (conf_vals->op_name[i] == '_' && op_name[i] == '-')) + continue; + op_match = _gf_false; + } + + if (op_match) { + val_match = _gf_false; + for (i = 0; i < conf_vals->no_of_pos_vals; i++) { + if(conf_vals->case_sensitive){ + if (!strcmp (conf_vals->values[i], op_value)) + val_match = _gf_true; + } else { + if (!strcasecmp (conf_vals->values[i], op_value)) + val_match = _gf_true; + } + } + + if (!val_match) { + ret = snprintf (errmsg, sizeof(errmsg) - 1, + "Invalid values (%s) for" + " option %s", op_value, + op_name); + errmsg[ret] = '\0'; + + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + return -1; + } + } + } + + return 0; +} + +static int +glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo, + char *slave, char *conf_path, + dict_t *rsp_dict, char *node); + +static int +_get_status_mst_slv (dict_t *this, char *key, data_t *value, void *data) +{ + glusterd_gsync_status_temp_t *param = NULL; + char *slave = NULL; + char *slave_buf = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *errmsg = NULL; + char conf_path[PATH_MAX] = ""; + int ret = -1; + glusterd_conf_t *priv = NULL; + + param = (glusterd_gsync_status_temp_t *)data; + + GF_ASSERT (param); + GF_ASSERT (param->volinfo); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + goto out; + } + + slave = strchr(value->data, ':'); + if (!slave) + return 0; + slave++; + + ret = glusterd_get_slave_info (slave, &slave_ip, &slave_vol, &errmsg); + if (ret) { + if (errmsg) + gf_log ("", GF_LOG_ERROR, "Unable to fetch " + "slave details. Error: %s", errmsg); + else + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = snprintf (conf_path, sizeof(conf_path) - 1, + "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, param->volinfo->volname, + slave_ip, slave_vol); + conf_path[ret] = '\0'; + + ret = glusterd_get_gsync_status_mst_slv(param->volinfo, + slave, conf_path, + param->rsp_dict, + param->node); +out: + + if (slave_buf) + GF_FREE(slave_buf); + + gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret); + return ret; +} + + +static int +_get_max_gsync_slave_num (dict_t *this, char *key, data_t *value, void *data) +{ + int tmp_slvnum = 0; + int *slvnum = (int *)data; + + sscanf (key, "slave%d", &tmp_slvnum); + if (tmp_slvnum > *slvnum) + *slvnum = tmp_slvnum; + + return 0; +} + +static int +glusterd_remove_slave_in_info (glusterd_volinfo_t *volinfo, char *slave, + char **op_errstr) +{ + int zero_slave_entries = _gf_true; + int ret = 0; + char *slavekey = NULL; + + GF_ASSERT (volinfo); + GF_ASSERT (slave); + + do { + ret = glusterd_get_slave (volinfo, slave, &slavekey); + if (ret < 0 && zero_slave_entries) { + ret++; + goto out; + } + zero_slave_entries = _gf_false; + dict_del (volinfo->gsync_slaves, slavekey); + } while (ret >= 0); + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + *op_errstr = gf_strdup ("Failed to store the Volume" + "information"); + goto out; + } + out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; + +} + +static int +glusterd_gsync_get_uuid (char *slave, glusterd_volinfo_t *vol, + uuid_t uuid) +{ + int ret = 0; + char *slavekey = NULL; + char *slaveentry = NULL; + char *t = NULL; + + GF_ASSERT (vol); + GF_ASSERT (slave); + + ret = glusterd_get_slave (vol, slave, &slavekey); + if (ret < 0) { + /* XXX colliding cases of failure and non-extant + * slave... now just doing this as callers of this + * function can make sense only of -1 and 0 as retvals; + * getting at the proper semanticals will involve + * fixing callers as well. + */ + ret = -1; + goto out; + } + + ret = dict_get_str (vol->gsync_slaves, slavekey, &slaveentry); + GF_ASSERT (ret == 0); + + t = strchr (slaveentry, ':'); + GF_ASSERT (t); + *t = '\0'; + ret = uuid_parse (slaveentry, uuid); + *t = ':'; + + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_check_gsync_running_local (char *master, char *slave, + char *conf_path, + gf_boolean_t *is_run) +{ + int ret = -1; + int ret_status = 0; + + GF_ASSERT (master); + GF_ASSERT (slave); + GF_ASSERT (is_run); + + *is_run = _gf_false; + ret = gsync_status (master, slave, conf_path, &ret_status); + if (ret == 0 && ret_status == 0) { + *is_run = _gf_true; + } else if (ret == -1) { + gf_log ("", GF_LOG_WARNING, GEOREP" validation " + " failed"); + goto out; + } + ret = 0; + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int +glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave, + char *host_uuid, char **op_errstr, + gf_boolean_t is_force) +{ + int ret = 0; + int maxslv = 0; + char **linearr = NULL; + char *value = NULL; + char *slavekey = NULL; + char *slaveentry = NULL; + char key[512] = {0, }; + char *t = NULL; + + GF_ASSERT (volinfo); + GF_ASSERT (slave); + GF_ASSERT (host_uuid); + + ret = glusterd_get_slave (volinfo, slave, &slavekey); + switch (ret) { + case -2: + ret = -1; + goto out; + case -1: + break; + default: + if (!is_force) + GF_ASSERT (ret > 0); + ret = dict_get_str (volinfo->gsync_slaves, slavekey, &slaveentry); + GF_ASSERT (ret == 0); + + /* same-name + same-uuid slave entries should have been filtered + * out in glusterd_op_verify_gsync_start_options(), so we can + * assert an uuid mismatch + */ + t = strtail (slaveentry, host_uuid); + if (!is_force) + GF_ASSERT (!t || *t != ':'); + + if (is_force) { + gf_log ("", GF_LOG_DEBUG, GEOREP" has already been " + "invoked for the %s (master) and %s (slave)." + " Allowing without saving info again due to" + " force command.", volinfo->volname, slave); + ret = 0; + goto out; + } + + gf_log ("", GF_LOG_ERROR, GEOREP" has already been invoked for " + "the %s (master) and %s (slave) " + "from a different machine", + volinfo->volname, slave); + *op_errstr = gf_strdup (GEOREP" already running in " + "another machine"); + ret = -1; + goto out; + } + + ret = glusterd_urltransform_single (slave, "normalize", &linearr); + if (ret == -1) + goto out; + + ret = gf_asprintf (&value, "%s:%s", host_uuid, linearr[0]); + glusterd_urltransform_free (linearr, 1); + if (ret == -1) + goto out; + + dict_foreach (volinfo->gsync_slaves, _get_max_gsync_slave_num, &maxslv); + snprintf (key, 512, "slave%d", maxslv + 1); + ret = dict_set_dynstr (volinfo->gsync_slaves, key, value); + if (ret) + goto out; + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + *op_errstr = gf_strdup ("Failed to store the Volume " + "information"); + goto out; + } + ret = 0; + out: + return ret; +} + +static int +glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo, + char *slave, char *conf_path, + char *statefile, char **op_errstr, + gf_boolean_t is_force) +{ + int ret = -1; + gf_boolean_t is_running = _gf_false; + char msg[2048] = {0}; + uuid_t uuid = {0}; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + struct stat stbuf = {0,}; + + this = THIS; + + GF_ASSERT (volinfo); + GF_ASSERT (slave); + GF_ASSERT (op_errstr); + GF_ASSERT (conf_path); + GF_ASSERT (this && this->private); + + priv = this->private; + + if (GLUSTERD_STATUS_STARTED != volinfo->status) { + snprintf (msg, sizeof (msg), "Volume %s needs to be started " + "before "GEOREP" start", volinfo->volname); + goto out; + } + + ret = lstat (statefile, &stbuf); + if (ret) { + snprintf (msg, sizeof (msg), "Session between %s and %s has" + " not been created. Please create session and retry.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + /* Check if the gsync slave info is stored. If not + * session has not been created */ + ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); + if (ret) { + snprintf (msg, sizeof (msg), "Session between %s and %s has" + " not been created. Please create session and retry.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", msg); + goto out; + } + + if (is_force) { + ret = 0; + goto out; + } + + /*Check if the gsync is already started in cmd. inited host + * If so initiate add it into the glusterd's priv*/ + ret = glusterd_check_gsync_running_local (volinfo->volname, + slave, conf_path, + &is_running); + if (ret) { + snprintf (msg, sizeof (msg), GEOREP" start option " + "validation failed "); + goto out; + } + if (_gf_true == is_running) { + snprintf (msg, sizeof (msg), GEOREP " session between" + " %s & %s already started", volinfo->volname, + slave); + ret = -1; + goto out; + } + + ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to spawn gsyncd"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + } +out: + if (ret && (msg[0] != '\0')) { + *op_errstr = gf_strdup (msg); + } + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag) +{ + + GF_ASSERT (volinfo); + GF_ASSERT (flag); + + if (volinfo->gsync_slaves->count) + *flag = _gf_true; + else + *flag = _gf_false; + + return 0; +} + +static int +glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo, + char *slave, char *conf_path, + char **op_errstr) +{ + int pfd = -1; + int ret = -1; + char msg[2048] = {0}; + char pidfile[PATH_MAX] = {0,}; + + GF_ASSERT (THIS && THIS->private); + GF_ASSERT (volinfo); + GF_ASSERT (slave); + GF_ASSERT (op_errstr); + + if (GLUSTERD_STATUS_STARTED != volinfo->status) { + snprintf (msg, sizeof (msg), "Volume %s needs to be started " + "before "GEOREP" start", volinfo->volname); + + goto out; + } + + pfd = gsyncd_getpidfile (volinfo->volname, slave, pidfile, conf_path); + if (pfd == -2) { + gf_log ("", GF_LOG_ERROR, GEOREP" stop validation " + "failed for %s & %s", volinfo->volname, slave); + ret = -1; + goto out; + } + if (gsync_status_byfd (pfd) == -1) { + snprintf (msg, sizeof (msg), GEOREP" session b/w %s & %s is not" + " running on this node.", volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", msg); + ret = -1; + /* monitor gsyncd already dead */ + goto out; + } + + if (pfd < 0) + goto out; + + ret = 0; +out: + if (ret && (msg[0] != '\0')) { + *op_errstr = gf_strdup (msg); + } + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_verify_gsync_status_opts (dict_t *dict, char **op_errstr) +{ + char *slave = NULL; + char *volname = NULL; + char errmsg[PATH_MAX] = {0, }; + gf_boolean_t exists = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + char *conf_path = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + glusterd_conf_t *priv = NULL; + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; + } + + ret = dict_get_str (dict, "master", &volname); + if (ret < 0) { + ret = 0; + goto out; + } + + exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if ((ret) || (!exists)) { + gf_log ("", GF_LOG_WARNING, "volume name does not exist"); + snprintf (errmsg, sizeof(errmsg), "Volume name %s does not" + " exist", volname); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "slave", &slave); + if (ret < 0) { + ret = 0; + goto out; + } + + ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip, + &slave_vol, &conf_path, + op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + + +int +glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr, + char **master, char **slave, char **host_uuid) +{ + + int ret = -1; + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + if (master) { + ret = dict_get_str (dict, "master", master); + if (ret < 0) { + gf_log ("", GF_LOG_WARNING, "master not found"); + *op_errstr = gf_strdup ("master not found"); + goto out; + } + } + + if (slave) { + ret = dict_get_str (dict, "slave", slave); + if (ret < 0) { + gf_log ("", GF_LOG_WARNING, "slave not found"); + *op_errstr = gf_strdup ("slave not found"); + goto out; + } + } + + if (host_uuid) { + ret = dict_get_str (dict, "host-uuid", host_uuid); + if (ret < 0) { + gf_log ("", GF_LOG_WARNING, "host_uuid not found"); + *op_errstr = gf_strdup ("host_uuid not found"); + goto out; + } + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr) +{ + char errmsg[PATH_MAX] = ""; + char *command = NULL; + char command_path[PATH_MAX] = ""; + struct stat st = {0,}; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (conf->op_version < 2) { + gf_log ("", GF_LOG_ERROR, "Op Version not supported."); + snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "command", &command); + if (ret) { + strcpy (errmsg, "internal error"); + gf_log ("", GF_LOG_ERROR, + "Unable to get command from dict"); + goto out; + } + + /* enforce local occurrence of the command */ + if (strchr (command, '/')) { + strcpy (errmsg, "invalid command name"); + ret = -1; + goto out; + } + + sprintf (command_path, GSYNCD_PREFIX"/peer_%s", command); + /* check if it's executable */ + ret = access (command_path, X_OK); + if (!ret) + /* check if it's a regular file */ + ret = stat (command_path, &st); + if (!ret && !S_ISREG (st.st_mode)) + ret = -1; + +out: + if (ret) { + if (errmsg[0] == '\0') + snprintf (errmsg, sizeof (errmsg), "%s not found.", + command); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr) +{ + char abs_filename[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *filename = NULL; + char *host_uuid = NULL; + char uuid_str [64] = {0}; + int ret = -1; + glusterd_conf_t *priv = NULL; + struct stat stbuf = {0,}; + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; + } + + if (priv->op_version < 2) { + gf_log ("", GF_LOG_ERROR, "Op Version not supported."); + snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "host-uuid", &host_uuid); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch" + " host-uuid from dict."); + goto out; + } + + uuid_utoa_r (MY_UUID, uuid_str); + if (!strcmp (uuid_str, host_uuid)) { + ret = dict_get_str (dict, "source", &filename); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch" + " filename from dict."); + *op_errstr = gf_strdup ("command unsuccessful"); + goto out; + } + snprintf (abs_filename, sizeof(abs_filename), + "%s/%s", priv->workdir, filename); + + ret = lstat (abs_filename, &stbuf); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Source file" + " does not exist in %s", priv->workdir); + *op_errstr = gf_strdup (errmsg); + goto out; + } + + if (!S_ISREG(stbuf.st_mode)) { + snprintf (errmsg, sizeof (errmsg), "Source file" + " is not a regular file."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile) +{ + glusterd_conf_t *priv = NULL; + int ret = -1; + char *master = NULL; + char *buf = NULL; + dict_t *confd = NULL; + char *confpath = NULL; + char conf_buf[PATH_MAX] = ""; + struct stat stbuf = {0,}; + + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + GF_ASSERT (volinfo); + + master = volinfo->volname; + + confd = dict_new (); + if (!confd) { + gf_log ("", GF_LOG_ERROR, "Unable to create new dict"); + goto out; + } + + priv = THIS->private; + + ret = lstat (conf_path, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_INFO, "Using passed config template(%s).", + conf_path); + confpath = conf_path; + } else { + ret = snprintf (conf_buf, sizeof(conf_buf) - 1, + "%s/"GSYNC_CONF_TEMPLATE, priv->workdir); + conf_buf[ret] = '\0'; + confpath = conf_buf; + gf_log ("", GF_LOG_INFO, "Using default config template(%s).", + confpath); + } + + ret = glusterd_gsync_get_config (master, slave, confpath, + confd); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" + "for %s(master), %s(slave)", master, slave); + goto out; + + } + + ret = dict_get_param (confd, "state_file", &buf); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name."); + goto out; + } + + *statefile = gf_strdup(buf); + if (!*statefile) { + gf_log ("", GF_LOG_ERROR, "Unable to gf_strdup."); + ret = -1; + goto out; + } + + ret = 0; + out: + if (confd) + dict_destroy (confd); + + gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret); + return ret; +} + +static int +glusterd_create_status_file (char *master, char *slave, char *slave_ip, + char *slave_vol, char *status) +{ + int ret = -1; + runner_t runner = {0,}; + glusterd_conf_t *priv = NULL; + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + goto out; + } + + if (!status) { + gf_log ("", GF_LOG_ERROR, "Status Empty"); + goto out; + } + gf_log ("", GF_LOG_DEBUG, "slave = %s", slave); + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--create", + status, "-c", NULL); + runner_argprintf (&runner, "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, master, slave_ip, slave_vol); + runner_argprintf (&runner, ":%s", master); + runner_add_args (&runner, slave, NULL); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Creating status file failed."); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +static int +glusterd_verify_slave (char *volname, char *slave_ip, char *slave, + char **op_errstr, gf_boolean_t *is_force_blocker) +{ + int32_t ret = -1; + runner_t runner = {0,}; + char log_file_path[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + char *tmp = NULL; + char *save_ptr = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (volname); + GF_ASSERT (slave_ip); + GF_ASSERT (slave); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + goto out; + } + + snprintf (log_file_path, sizeof(log_file_path), + DEFAULT_LOG_FILE_DIRECTORY"/create_verify_log"); + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gverify.sh", NULL); + runner_argprintf (&runner, "%s", volname); + runner_argprintf (&runner, "%s", slave_ip); + runner_argprintf (&runner, "%s", slave); + runner_argprintf (&runner, "%s", log_file_path); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Not a valid slave"); + ret = glusterd_gsync_read_frm_status (log_file_path, + buf, sizeof(buf)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to read from %s", + log_file_path); + goto out; + } + + /* Tokenize the error message from gverify.sh to figure out + * if the error is a force blocker or not. */ + tmp = strtok_r (buf, "|", &save_ptr); + if (!strcmp (tmp, "FORCE_BLOCKER")) + *is_force_blocker = 1; + else { + /* No FORCE_BLOCKER flag present so all that is + * present is the error message. */ + *is_force_blocker = 0; + if (tmp) + *op_errstr = gf_strdup (tmp); + ret = -1; + goto out; + } + + /* Copy rest of the error message to op_errstr */ + tmp = strtok_r (NULL, "|", &save_ptr); + if (tmp) + *op_errstr = gf_strdup (tmp); + ret = -1; + goto out; + } + ret = 0; +out: + unlink (log_file_path); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_mountbroker_check (char **slave_ip, char **op_errstr) +{ + int ret = -1; + char *tmp = NULL; + char *save_ptr = NULL; + char *username = NULL; + char *host = NULL; + char errmsg[PATH_MAX] = ""; + + GF_ASSERT (slave_ip); + GF_ASSERT (*slave_ip); + + /* Checking if hostname has user specified */ + host = strstr (*slave_ip, "@"); + if (!host) { + gf_log ("", GF_LOG_DEBUG, "No username provided."); + ret = 0; + goto out; + } else { + /* Moving the host past the '@' and checking if the + * actual hostname also has '@' */ + host++; + if (strstr (host, "@")) { + gf_log ("", GF_LOG_DEBUG, "host = %s", host); + ret = snprintf (errmsg, sizeof(errmsg) - 1, + "Invalid Hostname (%s).", host); + errmsg[ret] = '\0'; + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + if (op_errstr) + *op_errstr = gf_strdup (errmsg); + goto out; + } + + /* Fetching the username and hostname + * and checking if the username is non-root */ + username = strtok_r (*slave_ip, "@", &save_ptr); + tmp = strtok_r (NULL, "@", &save_ptr); + if (strcmp (username, "root")) { + ret = snprintf (errmsg, sizeof(errmsg) - 1, + "Non-root username (%s@%s) not allowed.", + username, tmp); + errmsg[ret] = '\0'; + if (op_errstr) + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, + "Non-Root username not allowed."); + ret = -1; + goto out; + } + + *slave_ip = gf_strdup (tmp); + if (!*slave_ip) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + ret = -1; + goto out; + } + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr) +{ + char *down_peerstr = NULL; + char *slave = NULL; + char *volname = NULL; + char *host_uuid = NULL; + char *statefile = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *conf_path = NULL; + char errmsg[PATH_MAX] = ""; + char common_pem_file[PATH_MAX] = ""; + char hook_script[PATH_MAX] = ""; + char uuid_str [64] = ""; + int ret = -1; + int is_pem_push = -1; + gf_boolean_t is_force = -1; + gf_boolean_t is_force_blocker = -1; + gf_boolean_t exists = _gf_false; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + struct stat stbuf = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + ret = glusterd_op_gsync_args_get (dict, op_errstr, &volname, + &slave, &host_uuid); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch arguments"); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return -1; + } + + if (conf->op_version < 2) { + gf_log ("", GF_LOG_ERROR, "Op Version not supported."); + snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if ((ret) || (!exists)) { + gf_log ("", GF_LOG_WARNING, "volume name does not exist"); + snprintf (errmsg, sizeof(errmsg), "Volume name %s does not" + " exist", volname); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return -1; + } + + ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip, + &slave_vol, &conf_path, + op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + uuid_utoa_r (MY_UUID, uuid_str); + if (!strcmp (uuid_str, host_uuid)) { + ret = glusterd_are_vol_all_peers_up (volinfo, + &conf->peers, + &down_peerstr); + if ((ret == _gf_false) && !is_force) { + snprintf (errmsg, sizeof (errmsg), "Peer %s," + " which is a part of %s volume, is" + " down. Please bring up the peer and" + " retry.", down_peerstr, + volinfo->volname); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + GF_FREE (down_peerstr); + down_peerstr = NULL; + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return -1; + } else if (ret == _gf_false) { + gf_log ("", GF_LOG_INFO, "Peer %s," + " which is a part of %s volume, is" + " down. Force creating geo-rep session." + " On bringing up the peer, re-run" + " \"gluster system:: execute" + " gsec_create\" and \"gluster volume" + " geo-replication %s %s create push-pem" + " force\"", down_peerstr, volinfo->volname, + volinfo->volname, slave); + } + + /* Checking if slave host is pingable, has proper passwordless + * ssh login setup, slave volume is created, slave vol is empty, + * and if it has enough memory and bypass in case of force if + * the error is not a force blocker */ + ret = glusterd_verify_slave (volname, slave_ip, slave_vol, + op_errstr, &is_force_blocker); + if (ret) { + if (is_force && !is_force_blocker) { + gf_log ("", GF_LOG_INFO, "%s is not a valid slave" + " volume. Error: %s. Force creating geo-rep" + " session.", slave, *op_errstr); + } else { + gf_log ("", GF_LOG_ERROR, + "%s is not a valid slave volume. Error: %s", + slave, *op_errstr); + ret = -1; + goto out; + } + } + + ret = dict_get_int32 (dict, "push_pem", &is_pem_push); + if (!ret && is_pem_push) { + ret = snprintf (common_pem_file, + sizeof(common_pem_file) - 1, + "%s"GLUSTERD_COMMON_PEM_PUB_FILE, + conf->workdir); + common_pem_file[ret] = '\0'; + + ret = snprintf (hook_script, sizeof(hook_script) - 1, + "%s"GLUSTERD_CREATE_HOOK_SCRIPT, + conf->workdir); + hook_script[ret] = '\0'; + + ret = lstat (common_pem_file, &stbuf); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "%s" + " required for push-pem is" + " not present. Please run" + " \"gluster system:: execute" + " gsec_create\"", common_pem_file); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = lstat (hook_script, &stbuf); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "The hook-script (%s) required " + "for push-pem is not present. " + "Please install the hook-script " + "and retry", hook_script); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + if (!S_ISREG(stbuf.st_mode)) { + snprintf (errmsg, sizeof (errmsg), "%s" + " required for push-pem is" + " not a regular file. Please run" + " \"gluster system:: execute" + " gsec_create\"", common_pem_file); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + } + } + + ret = glusterd_get_statefile_name (volinfo, slave, conf_path, &statefile); + if (ret) { + if (!strstr(slave, "::")) + snprintf (errmsg, sizeof (errmsg), + "%s is not a valid slave url.", slave); + else + snprintf (errmsg, sizeof (errmsg), "Please check gsync " + "config file. Unable to get statefile's name"); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "statefile", statefile); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store statefile path"); + goto out; + } + + ret = lstat (statefile, &stbuf); + if (!ret && !is_force) { + snprintf (errmsg, sizeof (errmsg), "Session between %s" + " and %s is already created.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } else if (!ret) + gf_log ("", GF_LOG_INFO, "Session between %s" + " and %s is already created. Force" + " creating again.", volinfo->volname, slave); + + ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to spawn gsyncd."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = 0; +out: + + if (ret && errmsg[0] != '\0') + *op_errstr = gf_strdup (errmsg); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr) +{ + int ret = 0; + int type = 0; + char *volname = NULL; + char *slave = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *down_peerstr = NULL; + char *statefile = NULL; + char *path_list = NULL; + char *conf_path = NULL; + gf_boolean_t exists = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + char errmsg[PATH_MAX] = {0,}; + dict_t *ctx = NULL; + gf_boolean_t is_force = 0; + gf_boolean_t is_force_blocker = -1; + gf_boolean_t is_running = _gf_false; + uuid_t uuid = {0}; + char uuid_str [64] = {0}; + char *host_uuid = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + struct stat stbuf = {0,}; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "type", &type); + if (ret < 0) { + gf_log ("", GF_LOG_WARNING, "command type not found"); + *op_errstr = gf_strdup ("command unsuccessful"); + goto out; + } + + if (type == GF_GSYNC_OPTION_TYPE_STATUS) { + ret = glusterd_verify_gsync_status_opts (dict, op_errstr); + goto out; + } + + ret = glusterd_op_gsync_args_get (dict, op_errstr, + &volname, &slave, &host_uuid); + if (ret) + goto out; + + uuid_utoa_r (MY_UUID, uuid_str); + + if (conf->op_version < 2) { + gf_log ("", GF_LOG_ERROR, "Op Version not supported."); + snprintf (errmsg, sizeof(errmsg), "One or more nodes do not" + " support the required op version."); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if ((ret) || (!exists)) { + gf_log ("", GF_LOG_WARNING, "volume name does not exist"); + snprintf (errmsg, sizeof(errmsg), "Volume name %s does not" + " exist", volname); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = glusterd_get_slave_details_confpath (volinfo, dict, &slave_ip, + &slave_vol, &conf_path, + op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave or confpath details."); + ret = -1; + goto out; + } + + ret = glusterd_get_statefile_name (volinfo, slave, conf_path, &statefile); + if (ret) { + /* Checking if slave host is pingable, has proper passwordless + * ssh login setup */ + ret = glusterd_verify_slave (volname, slave_ip, slave_vol, + op_errstr, &is_force_blocker); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "%s is not a valid slave volume. Error: %s", + slave, *op_errstr); + goto out; + } + + if (!strstr(slave, "::")) + snprintf (errmsg, sizeof (errmsg), + "%s is not a valid slave url.", slave); + else + snprintf (errmsg, sizeof (errmsg), + "Unable to get statefile's name"); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "statefile", statefile); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store statefile path"); + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + /* Allowing stop force to bypass the statefile check + * as this command acts as a fail safe method to stop geo-rep + * session. */ + if ((type == GF_GSYNC_OPTION_TYPE_CONFIG) || + ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force) || + (type == GF_GSYNC_OPTION_TYPE_DELETE)) { + ret = lstat (statefile, &stbuf); + if (ret) { + snprintf (errmsg, sizeof(errmsg), "Geo-replication" + " session between %s and %s does not exist.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s. statefile = %s", + errmsg, statefile); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + } + + /* Check if all peers that are a part of the volume are up or not */ + if ((type == GF_GSYNC_OPTION_TYPE_DELETE) || + ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force)) { + if (!strcmp (uuid_str, host_uuid)) { + ret = glusterd_are_vol_all_peers_up (volinfo, + &conf->peers, + &down_peerstr); + if (ret == _gf_false) { + snprintf (errmsg, sizeof (errmsg), "Peer %s," + " which is a part of %s volume, is" + " down. Please bring up the peer and" + " retry.", down_peerstr, + volinfo->volname); + *op_errstr = gf_strdup (errmsg); + ret = -1; + GF_FREE (down_peerstr); + down_peerstr = NULL; + goto out; + } + } + } + + switch (type) { + case GF_GSYNC_OPTION_TYPE_START: + /* don't attempt to start gsync if replace-brick is + * in progress */ + if (glusterd_is_rb_ongoing (volinfo)) { + snprintf (errmsg, sizeof(errmsg), "replace-brick is in" + " progress, not starting geo-replication"); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + ret = glusterd_op_verify_gsync_start_options (volinfo, slave, + conf_path, statefile, + op_errstr, is_force); + if (ret) + goto out; + ctx = glusterd_op_get_ctx(); + if (ctx) { + /* gsyncd does a fuse mount to start + * the geo-rep session */ + if (!glusterd_is_fuse_available ()) { + gf_log ("glusterd", GF_LOG_ERROR, "Unable to " + "open /dev/fuse (%s), geo-replication " + "start failed", strerror (errno)); + snprintf (errmsg, sizeof(errmsg), + "fuse unvailable"); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + } + break; + + case GF_GSYNC_OPTION_TYPE_STOP: + if (!is_force) { + ret = glusterd_op_verify_gsync_running (volinfo, slave, + conf_path, + op_errstr); + if (ret) { + ret = glusterd_get_local_brickpaths (volinfo, + &path_list); + if (path_list) + ret = -1; + } + } + break; + + case GF_GSYNC_OPTION_TYPE_CONFIG: + ret = gsync_verify_config_options (dict, op_errstr, volname); + goto out; + break; + + case GF_GSYNC_OPTION_TYPE_DELETE: + /* Check if the gsync session is still running + * If so ask the user to stop geo-replication first.*/ + ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); + if (ret) { + snprintf (errmsg, sizeof(errmsg), "Geo-replication" + " session between %s and %s does not exist.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } else { + ret = glusterd_check_gsync_running_local (volinfo->volname, + slave, conf_path, + &is_running); + if (_gf_true == is_running) { + snprintf (errmsg, sizeof (errmsg), GEOREP + " session between %s & %s is " + "still active. Please stop the " + "session and retry.", + volinfo->volname, slave); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + } + + ret = glusterd_verify_gsyncd_spawn (volinfo->volname, slave); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to spawn gsyncd"); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + } + + break; + } + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +stop_gsync (char *master, char *slave, char **msg, + char *conf_path, gf_boolean_t is_force) +{ + int32_t ret = 0; + int pfd = -1; + pid_t pid = 0; + char pidfile[PATH_MAX] = {0,}; + char buf [1024] = {0,}; + int i = 0; + + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + + pfd = gsyncd_getpidfile (master, slave, pidfile, conf_path); + if (pfd == -2 && !is_force) { + gf_log ("", GF_LOG_ERROR, GEOREP" stop validation " + " failed for %s & %s", master, slave); + ret = -1; + goto out; + } + if (gsync_status_byfd (pfd) == -1 && !is_force) { + gf_log ("", GF_LOG_ERROR, "gsyncd b/w %s & %s is not" + " running", master, slave); + /* monitor gsyncd already dead */ + goto out; + } + + if (pfd < 0) + goto out; + + ret = read (pfd, buf, 1024); + if (ret > 0) { + pid = strtol (buf, NULL, 10); + ret = kill (-pid, SIGTERM); + if (ret) { + gf_log ("", GF_LOG_WARNING, + "failed to kill gsyncd"); + goto out; + } + for (i = 0; i < 20; i++) { + if (gsync_status_byfd (pfd) == -1) { + /* monitor gsyncd is dead but worker may + * still be alive, give some more time + * before SIGKILL (hack) + */ + usleep (50000); + break; + } + usleep (50000); + } + kill (-pid, SIGKILL); + unlink (pidfile); + } + ret = 0; + +out: + sys_close (pfd); + + if (is_force) + ret = 0; + return ret; +} + +static int +glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, + char *path_list, dict_t *dict, + dict_t *resp_dict, char **op_errstr) +{ + int32_t ret = -1; + char *op_name = NULL; + char *op_value = NULL; + runner_t runner = {0,}; + glusterd_conf_t *priv = NULL; + char *subop = NULL; + char *master = NULL; + char *conf_path = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + struct stat stbuf = {0, }; + + GF_ASSERT (slave); + GF_ASSERT (op_errstr); + GF_ASSERT (dict); + GF_ASSERT (resp_dict); + + ret = dict_get_str (dict, "subop", &subop); + if (ret != 0) + goto out; + + if (strcmp (subop, "get") == 0 || strcmp (subop, "get-all") == 0) { + /* deferred to cli */ + gf_log ("", GF_LOG_DEBUG, "Returning 0"); + return 0; + } + + ret = dict_get_str (dict, "op_name", &op_name); + if (ret != 0) + goto out; + + if (strtail (subop, "set")) { + ret = dict_get_str (dict, "op_value", &op_value); + if (ret != 0) + goto out; + } + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; + } + + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + + master = ""; + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); + if (volinfo) { + master = volinfo->volname; + runner_argprintf (&runner, ":%s", master); + } + runner_add_arg (&runner, slave); + runner_argprintf (&runner, "--config-%s", subop); + runner_add_arg (&runner, op_name); + if (op_value) + runner_add_arg (&runner, op_value); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log ("", GF_LOG_WARNING, "gsyncd failed to " + "%s %s option for %s %s peers", + subop, op_name, master, slave); + + gf_asprintf (op_errstr, GEOREP" config-%s failed for %s %s", + subop, master, slave); + + goto out; + } + + if (!strcmp (op_name, "state_file")) { + + ret = lstat (op_value, &stbuf); + if (ret) { + ret = dict_get_str (dict, "slave_ip", &slave_ip); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave IP."); + goto out; + } + + ret = dict_get_str (dict, "slave_vol", &slave_vol); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave volume name."); + goto out; + } + + ret = glusterd_create_status_file (volinfo->volname, slave, + slave_ip, slave_vol, + "Switching Status File"); + if (ret || lstat (op_value, &stbuf)) { + gf_log ("", GF_LOG_ERROR, "Unable to create %s" + ". Error : %s", op_value, + strerror (errno)); + ret = -1; + goto out; + } + } + } + + ret = 0; + gf_asprintf (op_errstr, "config-%s successful", subop); + +out: + if (!ret && volinfo) { + ret = glusterd_check_restart_gsync_session (volinfo, slave, + resp_dict, path_list, + conf_path, 0); + if (ret) + *op_errstr = gf_strdup ("internal error"); + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) +{ + int ret = 0; + int status_fd = -1; + + GF_ASSERT (path); + GF_ASSERT (buf); + status_fd = open (path, O_RDONLY); + if (status_fd == -1) { + gf_log ("", GF_LOG_ERROR, "Unable to read gsyncd status" + " file"); + return -1; + } + ret = read (status_fd, buf, blen - 1); + if (ret > 0) { + size_t len = strnlen (buf, ret); + /* Ensure there is a NUL byte and that it's not the first. */ + if (len == 0 || len == blen - 1) { + ret = -1; + } else { + char *p = buf + len - 1; + while (isspace (*p)) + *p-- = '\0'; + ret = 0; + } + } else if (ret < 0) + gf_log ("", GF_LOG_ERROR, "Status file of gsyncd is corrupt"); + + close (status_fd); + return ret; +} + +static int +glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen) +{ + char sockpath[PATH_MAX] = {0,}; + struct sockaddr_un sa = {0,}; + size_t l = 0; + int s = -1; + struct pollfd pfd = {0,}; + int ret = 0; + + l = strlen (buf); + /* seek to end of data in buf */ + buf += l; + blen -= l; + + glusterd_set_socket_filepath (path, sockpath, sizeof (sockpath)); + + strncpy(sa.sun_path, sockpath, sizeof(sa.sun_path)); + if (sa.sun_path[sizeof (sa.sun_path) - 1]) + return -1; + sa.sun_family = AF_UNIX; + + s = socket(AF_UNIX, SOCK_STREAM, 0); + if (s == -1) + return -1; + ret = fcntl (s, F_GETFL); + if (ret != -1) + ret = fcntl (s, F_SETFL, ret | O_NONBLOCK); + if (ret == -1) + goto out; + + ret = connect (s, (struct sockaddr *)&sa, sizeof (sa)); + if (ret == -1) + goto out; + pfd.fd = s; + pfd.events = POLLIN; + /* we don't want to hang on gsyncd */ + if (poll (&pfd, 1, 5000) < 1 || + !(pfd.revents & POLLIN)) { + ret = -1; + goto out; + } + ret = read(s, buf, blen); + /* we expect a terminating 0 byte */ + if (ret == 0 || (ret > 0 && buf[ret - 1])) + ret = -1; + if (ret > 0) + ret = 0; + + out: + close (s); + return ret; +} + +static int +dict_get_param (dict_t *dict, char *key, char **param) +{ + char *dk = NULL; + char *s = NULL; + char x = '\0'; + int ret = 0; + + if (dict_get_str (dict, key, param) == 0) + return 0; + + dk = gf_strdup (key); + if (!key) + return -1; + + s = strpbrk (dk, "-_"); + if (!s) + return -1; + x = (*s == '-') ? '_' : '-'; + *s++ = x; + while ((s = strpbrk (s, "-_"))) + *s++ = x; + + ret = dict_get_str (dict, dk, param); + + GF_FREE (dk); + return ret; +} + +static int +glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, dict_t *dict, char *node) +{ + glusterd_conf_t *priv = NULL; + int ret = 0; + char *statefile = NULL; + char *master = NULL; + char buf[1024] = "defunct"; + char nds[1024] = {0, }; + char mst[1024] = {0, }; + char slv[1024] = {0, }; + char sts[1024] = {0, }; + char *bufp = NULL; + dict_t *confd = NULL; + int gsync_count = 0; + int status = 0; + char *dyn_node = NULL; + char *path_list = NULL; + + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + GF_ASSERT (volinfo); + + master = volinfo->volname; + + confd = dict_new (); + if (!dict) { + gf_log ("", GF_LOG_ERROR, "Not able to create dict."); + return -1; + } + + priv = THIS->private; + + ret = glusterd_gsync_get_config (master, slave, conf_path, + confd); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" + "for %s(master), %s(slave)", master, slave); + goto done; + + } + + ret = dict_get_param (confd, "state_file", &statefile); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name " + "for %s(master), %s(slave). Please check gsync " + "config file.", master, slave); + goto done; + } + ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to read the status" + "file for %s(master), %s(slave)", master, slave); + strncpy (buf, "defunct", sizeof (buf)); + goto done; + } + + ret = gsync_status (master, slave, conf_path, &status); + if (ret == 0 && status == -1) { + if ((strcmp (buf, "Not Started")) && + (strcmp (buf, "Stopped"))) + strncpy (buf, "defunct", sizeof (buf)); + goto done; + } else if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "Unable to get gsync status"); + goto done; + } + + if (strcmp (buf, "Stable") != 0) + goto done; + + ret = dict_get_param (confd, "state_socket_unencoded", &statefile); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get state_socket_unencoded" + " filepath. Please check gsync config file."); + goto done; + } + ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status" + "for %s(master), %s(slave)", master, slave); + /* there is a slight chance that this occurs due to race + * -- in that case, the following options all seem bad: + * + * - suppress irregurlar behavior by just leaving status + * on "OK" + * - freak out users with a misleading "defunct" + * - overload the meaning of the regular error signal + * mechanism of gsyncd, that is, when status is "faulty" + * + * -- so we just come up with something new... + */ + strncpy (buf, "N/A", sizeof (buf)); + goto done; + } + + done: + if ((!strcmp (buf, "defunct")) || + (!strcmp (buf, "Not Started")) || + (!strcmp (buf, "Stopped"))) { + ret = glusterd_get_local_brickpaths (volinfo, &path_list); + if (!path_list) { + gf_log ("", GF_LOG_DEBUG, "This node not being part of" + " volume should not be running gsyncd. Hence" + " shouldn't display status for this node."); + ret = 0; + goto out; + } + } + + ret = dict_get_int32 (dict, "gsync-count", &gsync_count); + + if (ret) + gsync_count = 1; + else + gsync_count++; + + (void) snprintf (nds, sizeof (nds), "node%d", gsync_count); + dyn_node = gf_strdup (node); + if (!dyn_node) + goto out; + ret = dict_set_dynstr (dict, nds, dyn_node); + if (ret) { + GF_FREE (dyn_node); + goto out; + } + + snprintf (mst, sizeof (mst), "master%d", gsync_count); + master = gf_strdup (master); + if (!master) + goto out; + ret = dict_set_dynstr (dict, mst, master); + if (ret) { + GF_FREE (master); + goto out; + } + + snprintf (slv, sizeof (slv), "slave%d", gsync_count); + slave = gf_strdup (slave); + if (!slave) + goto out; + ret = dict_set_dynstr (dict, slv, slave); + if (ret) { + GF_FREE (slave); + goto out; + } + + snprintf (sts, sizeof (slv), "status%d", gsync_count); + bufp = gf_strdup (buf); + if (!bufp) + goto out; + ret = dict_set_dynstr (dict, sts, bufp); + if (ret) { + GF_FREE (bufp); + goto out; + } + ret = dict_set_int32 (dict, "gsync-count", gsync_count); + if (ret) + goto out; + + out: + dict_destroy (confd); + + return 0; +} + +int +glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, + dict_t *resp_dict, char *path_list, + char *conf_path, gf_boolean_t is_force) +{ + + int ret = 0; + glusterd_conf_t *priv = NULL; + char *status_msg = NULL; + gf_boolean_t is_running = _gf_false; + + GF_ASSERT (volinfo); + GF_ASSERT (slave); + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + + priv = THIS->private; + + ret = glusterd_check_gsync_running_local (volinfo->volname, + slave, conf_path, + &is_running); + if (!ret && (_gf_true != is_running)) + /* gsynd not running, nothing to do */ + goto out; + + ret = stop_gsync (volinfo->volname, slave, &status_msg, + conf_path, is_force); + if (ret == 0 && status_msg) + ret = dict_set_str (resp_dict, "gsync-status", + status_msg); + if (ret == 0) + ret = glusterd_start_gsync (volinfo, slave, path_list, + conf_path, uuid_utoa(MY_UUID), + NULL); + + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_marker_changelog_create_volfile (glusterd_volinfo_t *volinfo) +{ + int32_t ret = 0; + + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to create volfile" + " for setting of marker while '"GEOREP" start'"); + ret = -1; + goto out; + } + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) + ret = glusterd_nodesvcs_handle_graph_change (volinfo); + ret = 0; +out: + return ret; +} + +static int +glusterd_set_gsync_knob (glusterd_volinfo_t *volinfo, char *key, int *vc) +{ + int ret = -1; + int conf_enabled = _gf_false; + char *knob_on = NULL; + + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + + conf_enabled = glusterd_volinfo_get_boolean (volinfo, key); + if (conf_enabled == -1) { + gf_log ("", GF_LOG_ERROR, + "failed to get key %s from volinfo", key); + goto out; + } + + ret = 0; + if (conf_enabled == _gf_false) { + *vc = 1; + knob_on = gf_strdup ("on"); + if (knob_on == NULL) { + ret = -1; + goto out; + } + + ret = glusterd_gsync_volinfo_dict_set (volinfo, + key, knob_on); + } + + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_set_gsync_confs (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int volfile_changed = 0; + + ret = glusterd_set_gsync_knob (volinfo, + VKEY_MARKER_XTIME, &volfile_changed); + if (ret) + goto out; + + /** + * enable ignore-pid-check blindly as it could be needed for + * cascading setups. + */ + ret = glusterd_set_gsync_knob (volinfo, VKEY_MARKER_XTIME_FORCE, + &volfile_changed); + if (ret) + goto out; + + ret = glusterd_set_gsync_knob (volinfo, + VKEY_CHANGELOG, &volfile_changed); + if (ret) + goto out; + + if (volfile_changed) + ret = glusterd_marker_changelog_create_volfile (volinfo); + + out: + return ret; +} + +static int +glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo, + char *slave, char *conf_path, + dict_t *rsp_dict, char *node) +{ + char *statefile = NULL; + uuid_t uuid = {0, }; + glusterd_conf_t *priv = NULL; + int ret = 0; + struct stat stbuf = {0, }; + + GF_ASSERT (volinfo); + GF_ASSERT (slave); + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + + priv = THIS->private; + + ret = glusterd_gsync_get_uuid (slave, volinfo, uuid); + if (ret) { + gf_log ("", GF_LOG_INFO, "geo-replication status %s %s :" + "session is not active", volinfo->volname, slave); + + ret = glusterd_get_statefile_name (volinfo, slave, + conf_path, &statefile); + if (ret) { + if (!strstr(slave, "::")) + gf_log ("", GF_LOG_INFO, + "%s is not a valid slave url.", slave); + else + gf_log ("", GF_LOG_INFO, "Unable to get" + " statefile's name"); + ret = 0; + goto out; + } + + ret = lstat (statefile, &stbuf); + if (ret) { + gf_log ("", GF_LOG_INFO, "%s statefile not present.", + statefile); + ret = 0; + goto out; + } + } + + ret = glusterd_read_status_file (volinfo, slave, conf_path, + rsp_dict, node); +out: + if (statefile) + GF_FREE (statefile); + + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +static int +glusterd_get_gsync_status_mst (glusterd_volinfo_t *volinfo, dict_t *rsp_dict, + char *node) +{ + glusterd_gsync_status_temp_t param = {0, }; + + GF_ASSERT (volinfo); + + param.rsp_dict = rsp_dict; + param.volinfo = volinfo; + param.node = node; + dict_foreach (volinfo->gsync_slaves, _get_status_mst_slv, ¶m); + + return 0; +} + +static int +glusterd_get_gsync_status_all (dict_t *rsp_dict, char *node) +{ + + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT (THIS); + priv = THIS->private; + + GF_ASSERT (priv); + + list_for_each_entry (volinfo, &priv->volumes, vol_list) { + ret = glusterd_get_gsync_status_mst (volinfo, rsp_dict, node); + if (ret) + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; + +} + +static int +glusterd_get_gsync_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + char *slave = NULL; + char *volname = NULL; + char *conf_path = NULL; + char errmsg[PATH_MAX] = {0, }; + gf_boolean_t exists = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + char my_hostname[256] = {0,}; + + ret = gethostname(my_hostname, 256); + if (ret) { + /* stick to N/A */ + (void) strcpy (my_hostname, "N/A"); + } + + ret = dict_get_str (dict, "master", &volname); + if (ret < 0){ + ret = glusterd_get_gsync_status_all (rsp_dict, my_hostname); + goto out; + } + + exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if ((ret) || (!exists)) { + gf_log ("", GF_LOG_WARNING, "volume name does not exist"); + snprintf (errmsg, sizeof(errmsg), "Volume name %s does not" + " exist", volname); + *op_errstr = gf_strdup (errmsg); + ret = -1; + goto out; + } + + + ret = dict_get_str (dict, "slave", &slave); + if (ret < 0) { + ret = glusterd_get_gsync_status_mst (volinfo, + rsp_dict, my_hostname); + goto out; + } + + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + + ret = glusterd_get_gsync_status_mst_slv (volinfo, slave, conf_path, + rsp_dict, my_hostname); + + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_gsync_delete (glusterd_volinfo_t *volinfo, char *slave, char *slave_ip, + char *slave_vol, char *path_list, dict_t *dict, + dict_t *resp_dict, char **op_errstr) +{ + int32_t ret = -1; + runner_t runner = {0,}; + glusterd_conf_t *priv = NULL; + char *master = NULL; + char *gl_workdir = NULL; + char geo_rep_dir[PATH_MAX] = ""; + char *conf_path = NULL; + + GF_ASSERT (slave); + GF_ASSERT (slave_ip); + GF_ASSERT (slave_vol); + GF_ASSERT (op_errstr); + GF_ASSERT (dict); + GF_ASSERT (resp_dict); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; + } + + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + + gl_workdir = priv->workdir; + master = ""; + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + "--delete", "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); + + if (volinfo) { + master = volinfo->volname; + runner_argprintf (&runner, ":%s", master); + } + runner_add_arg (&runner, slave); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log ("", GF_LOG_ERROR, "gsyncd failed to " + "delete session info for %s and %s peers", + master, slave); + + gf_asprintf (op_errstr, "gsyncd failed to " + "delete session info for %s and %s peers", + master, slave); + + goto out; + } + + ret = snprintf (geo_rep_dir, sizeof(geo_rep_dir) - 1, + "%s/"GEOREP"/%s_%s_%s", gl_workdir, + volinfo->volname, slave_ip, slave_vol); + geo_rep_dir[ret] = '\0'; + + ret = rmdir (geo_rep_dir); + if (ret) { + if (errno == ENOENT) + gf_log ("", GF_LOG_DEBUG, "Geo Rep Dir(%s) Not Present.", + geo_rep_dir); + else { + gf_log ("", GF_LOG_ERROR, "Unable to delete " + "Geo Rep Dir(%s). Error: %s", geo_rep_dir, + strerror (errno)); + goto out; + } + } + + ret = 0; + + gf_asprintf (op_errstr, "delete successful"); + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + char buf[PATH_MAX] = ""; + char cmd_arg_name[PATH_MAX] = ""; + char output_name[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *ptr = NULL; + char *bufp = NULL; + char *command = NULL; + char **cmd_args = NULL; + int ret = -1; + int i = -1; + int cmd_args_count = 0; + int output_count = 0; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; + } + + ret = dict_get_str (dict, "command", &command); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get command from dict"); + goto out; + } + + ret = dict_get_int32 (dict, "cmd_args_count", &cmd_args_count); + if (ret) + gf_log ("", GF_LOG_INFO, "No cmd_args_count"); + + if (cmd_args_count) { + cmd_args = GF_CALLOC (cmd_args_count, sizeof (char*), + gf_common_mt_char); + if (!cmd_args) { + gf_log ("", GF_LOG_ERROR, "Unable to calloc. " + "Errno = %s", strerror(errno)); + goto out; + } + + for (i=1; i <= cmd_args_count; i++) { + memset (cmd_arg_name, '\0', sizeof(cmd_arg_name)); + snprintf (cmd_arg_name, sizeof(cmd_arg_name), + "cmd_arg_%d", i); + ret = dict_get_str (dict, cmd_arg_name, &cmd_args[i-1]); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s in dict", + cmd_arg_name); + goto out; + } + } + } + + runinit (&runner); + runner_argprintf (&runner, GSYNCD_PREFIX"/peer_%s", command); + for (i=0; i < cmd_args_count; i++) + runner_add_arg (&runner, cmd_args[i]); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + synclock_unlock (&priv->big_lock); + ret = runner_start (&runner); + if (ret == -1) { + snprintf (errmsg, sizeof (errmsg), "Unable to " + "execute command. Error : %s", + strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + synclock_lock (&priv->big_lock); + goto out; + } + + ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + ret = dict_get_int32 (rsp_dict, "output_count", &output_count); + if (ret) + output_count = 1; + else + output_count++; + memset (output_name, '\0', sizeof (output_name)); + snprintf (output_name, sizeof (output_name), + "output_%d", output_count); + if (buf[strlen(buf) - 1] == '\n') + buf[strlen(buf) - 1] = '\0'; + bufp = gf_strdup (buf); + if (!bufp) + gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); + ret = dict_set_dynstr (rsp_dict, output_name, bufp); + if (ret) { + GF_FREE (bufp); + gf_log ("", GF_LOG_ERROR, "output set failed."); + } + ret = dict_set_int32 (rsp_dict, "output_count", output_count); + if (ret) + gf_log ("", GF_LOG_ERROR, "output_count set failed."); + } + + ret = runner_end (&runner); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to " + "end. Error : %s", + strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + synclock_lock (&priv->big_lock); + goto out; + } + synclock_lock (&priv->big_lock); + + ret = 0; +out: + if (cmd_args) { + GF_FREE (cmd_args); + cmd_args = NULL; + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_copy_file (dict_t *dict, char **op_errstr) +{ + char abs_filename[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char *filename = NULL; + char *host_uuid = NULL; + char uuid_str [64] = {0}; + char *contents = NULL; + char buf[1024] = ""; + int ret = -1; + int fd = -1; + int bytes_writen = 0; + int bytes_read = 0; + int contents_size = -1; + int file_mode = -1; + glusterd_conf_t *priv = NULL; + struct stat stbuf = {0,}; + + + if (THIS) + priv = THIS->private; + if (priv == NULL) { + gf_log ("", GF_LOG_ERROR, "priv of glusterd not present"); + *op_errstr = gf_strdup ("glusterd defunct"); + goto out; + } + + ret = dict_get_str (dict, "host-uuid", &host_uuid); + if (ret < 0) + goto out; + + ret = dict_get_str (dict, "source", &filename); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch" + " filename from dict."); + *op_errstr = gf_strdup ("command unsuccessful"); + goto out; + } + snprintf (abs_filename, sizeof(abs_filename), + "%s/%s", priv->workdir, filename); + + uuid_utoa_r (MY_UUID, uuid_str); + if (!strcmp (uuid_str, host_uuid)) { + ret = lstat (abs_filename, &stbuf); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Source file" + " does not exist in %s", priv->workdir); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + contents = GF_CALLOC(1, stbuf.st_size+1, gf_common_mt_char); + if (!contents) { + snprintf (errmsg, sizeof (errmsg), + "Unable to allocate memory"); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + fd = open (abs_filename, O_RDONLY); + if (fd < 0) { + snprintf (errmsg, sizeof (errmsg), "Unable to open %s", + abs_filename); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + do { + ret = read (fd, buf, sizeof(buf)); + if (ret > 0) { + memcpy (contents+bytes_read, buf, ret); + bytes_read += ret; + memset (buf, '\0', sizeof(buf)); + } + } while (ret > 0); + + if (bytes_read != stbuf.st_size) { + snprintf (errmsg, sizeof (errmsg), "Unable to read all " + "the data from %s", abs_filename); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + ret = dict_set_int32 (dict, "contents_size", stbuf.st_size); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to set" + " contents size in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = dict_set_int32 (dict, "file_mode", + (int32_t)stbuf.st_mode); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to set" + " file mode in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = dict_set_bin (dict, "common_pem_contents", + contents, stbuf.st_size); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to set" + " pem contents in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + close (fd); + } else { + ret = dict_get_bin (dict, "common_pem_contents", + (void **) &contents); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to get" + " pem contents in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = dict_get_int32 (dict, "contents_size", &contents_size); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to set" + " contents size in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = dict_get_int32 (dict, "file_mode", &file_mode); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to get" + " file mode in dict."); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + fd = open (abs_filename, O_WRONLY | O_TRUNC | O_CREAT, 0600); + if (fd < 0) { + snprintf (errmsg, sizeof (errmsg), "Unable to open %s", + abs_filename); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + bytes_writen = write (fd, contents, contents_size); + + if (bytes_writen != contents_size) { + snprintf (errmsg, sizeof (errmsg), "Failed to write" + " to %s", abs_filename); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + fchmod (fd, file_mode); + close (fd); + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + int32_t type = -1; + dict_t *ctx = NULL; + dict_t *resp_dict = NULL; + char *host_uuid = NULL; + char *slave = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *volname = NULL; + char *path_list = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t is_force = _gf_false; + char *status_msg = NULL; + gf_boolean_t is_running = _gf_false; + char *conf_path = NULL; + + GF_ASSERT (THIS); + GF_ASSERT (THIS->private); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + priv = THIS->private; + + ret = dict_get_int32 (dict, "type", &type); + if (ret < 0) + goto out; + + ret = dict_get_str (dict, "host-uuid", &host_uuid); + if (ret < 0) + goto out; + + ctx = glusterd_op_get_ctx (); + resp_dict = ctx ? ctx : rsp_dict; + GF_ASSERT (resp_dict); + + if (type == GF_GSYNC_OPTION_TYPE_STATUS) { + ret = glusterd_get_gsync_status (dict, op_errstr, resp_dict); + goto out; + } + + ret = dict_get_str (dict, "slave", &slave); + if (ret < 0) + goto out; + + ret = dict_get_str (dict, "slave_ip", &slave_ip); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch slave volume name."); + goto out; + } + + ret = dict_get_str (dict, "slave_vol", &slave_vol); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch slave volume name."); + goto out; + } + + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + + if (dict_get_str (dict, "master", &volname) == 0) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log ("", GF_LOG_WARNING, "Volinfo for %s (master) not found", + volname); + goto out; + } + + ret = glusterd_get_local_brickpaths (volinfo, &path_list); + } + + if (type == GF_GSYNC_OPTION_TYPE_CONFIG) { + ret = glusterd_gsync_configure (volinfo, slave, path_list, + dict, resp_dict, op_errstr); + + ret = dict_set_str (resp_dict, "conf_path", conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store conf_file_path."); + goto out; + } + goto out; + } + + if (type == GF_GSYNC_OPTION_TYPE_DELETE) { + ret = glusterd_remove_slave_in_info(volinfo, slave, op_errstr); + if (ret && !is_force && path_list) + goto out; + + ret = glusterd_gsync_delete (volinfo, slave, slave_ip, + slave_vol, path_list, dict, + resp_dict, op_errstr); + goto out; + } + + if (!volinfo) { + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + if (type == GF_GSYNC_OPTION_TYPE_START) { + + ret = glusterd_set_gsync_confs (volinfo); + if (ret != 0) { + gf_log ("", GF_LOG_WARNING, "marker/changelog start failed"); + *op_errstr = gf_strdup ("failed to initialize indexing"); + ret = -1; + goto out; + } + + ret = glusterd_start_gsync (volinfo, slave, path_list, + conf_path, host_uuid, op_errstr); + } + + if (type == GF_GSYNC_OPTION_TYPE_STOP) { + ret = glusterd_check_gsync_running_local (volinfo->volname, + slave, conf_path, + &is_running); + if (!ret && !is_force && path_list && + (_gf_true != is_running)) { + gf_log ("", GF_LOG_WARNING, GEOREP" is not set up for" + "%s(master) and %s(slave)", volname, slave); + *op_errstr = strdup (GEOREP" is not set up"); + goto out; + } + + ret = stop_gsync (volname, slave, &status_msg, conf_path, is_force); + if (ret == 0 && status_msg) + ret = dict_set_str (resp_dict, "gsync-status", + status_msg); + if (ret != 0 && !is_force && path_list) + *op_errstr = gf_strdup ("internal error"); + + if (!ret) { + ret = glusterd_create_status_file (volinfo->volname, + slave, slave_ip, + slave_vol, "Stopped"); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to update" + "state_file. Error : %s", + strerror (errno)); + } + } + } + +out: + if (path_list) { + GF_FREE (path_list); + path_list = NULL; + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict, + char **slave_ip, char **slave_vol, + char **conf_path, char **op_errstr) +{ + int ret = -1; + char confpath[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + char *slave = NULL; + + GF_ASSERT (THIS); + priv = THIS->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "slave", &slave); + if (ret || !slave) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch slave from dict"); + ret = -1; + goto out; + } + + ret = glusterd_get_slave_info (slave, slave_ip, slave_vol, op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch slave details."); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "slave_ip", *slave_ip); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store slave IP."); + goto out; + } + + ret = dict_set_str (dict, "slave_vol", *slave_vol); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store slave volume name."); + goto out; + } + + ret = snprintf (confpath, sizeof(confpath) - 1, + "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, volinfo->volname, + *slave_ip, *slave_vol); + confpath[ret] = '\0'; + *conf_path = gf_strdup (confpath); + if (!(*conf_path)) { + gf_log ("", GF_LOG_ERROR, + "Unable to gf_strdup. Error: %s", strerror (errno)); + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "conf_path", *conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store conf_path"); + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG,"Returning %d", ret); + return ret; + +} + +static int +glusterd_get_slave_info (char *slave, char **slave_ip, + char **slave_vol, char **op_errstr) +{ + char *tmp = NULL; + char *save_ptr = NULL; + char **linearr = NULL; + int32_t ret = -1; + char errmsg[PATH_MAX] = ""; + + ret = glusterd_urltransform_single (slave, "normalize", + &linearr); + if (ret == -1) { + ret = snprintf (errmsg, sizeof(errmsg) - 1, + "Invalid Url: %s", slave); + errmsg[ret] = '\0'; + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "Failed to normalize url"); + goto out; + } + + tmp = strtok_r (linearr[0], "/", &save_ptr); + tmp = strtok_r (NULL, "/", &save_ptr); + slave = strtok_r (tmp, ":", &save_ptr); + if (slave) { + ret = glusterd_mountbroker_check (&slave, op_errstr); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Invalid slave url: %s", *op_errstr); + goto out; + } + + *slave_ip = gf_strdup (slave); + if (!*slave_ip) { + gf_log ("", GF_LOG_ERROR, + "Failed to gf_strdup"); + ret = -1; + goto out; + } + gf_log ("", GF_LOG_DEBUG, "Slave IP : %s", *slave_ip); + ret = 0; + } else { + gf_log ("", GF_LOG_ERROR, "Invalid slave name"); + goto out; + } + + slave = strtok_r (NULL, ":", &save_ptr); + if (slave) { + *slave_vol = gf_strdup (slave); + if (!*slave_vol) { + gf_log ("", GF_LOG_ERROR, + "Failed to gf_strdup"); + ret = -1; + goto out; + } + gf_log ("", GF_LOG_DEBUG, "Slave Vol : %s", *slave_vol); + ret = 0; + } else { + gf_log ("", GF_LOG_ERROR, "Invalid slave name"); + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static void +runinit_gsyncd_setrx (runner_t *runner, char *conf_path) +{ + runinit (runner); + runner_add_args (runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); + runner_argprintf (runner, "%s", conf_path); + runner_add_arg (runner, "--config-set-rx"); +} + +static int +glusterd_check_gsync_present (int *valid_state) +{ + char buff[PATH_MAX] = {0, }; + runner_t runner = {0,}; + char *ptr = NULL; + int ret = 0; + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret == -1) { + if (errno == ENOENT) { + gf_log ("glusterd", GF_LOG_INFO, GEOREP + " module not installed in the system"); + *valid_state = 0; + } + else { + gf_log ("glusterd", GF_LOG_ERROR, GEOREP + " module not working as desired"); + *valid_state = -1; + } + goto out; + } + + ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + if (!strstr (buff, "gsyncd")) { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not " + "working as desired"); + *valid_state = -1; + goto out; + } + } else { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not " + "working as desired"); + *valid_state = -1; + goto out; + } + + ret = 0; + out: + + runner_end (&runner); + + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int +create_conf_file (glusterd_conf_t *conf, char *conf_path) +#define RUN_GSYNCD_CMD do { \ + ret = runner_run_reuse (&runner); \ + if (ret == -1) { \ + runner_log (&runner, "glusterd", GF_LOG_ERROR, "command failed"); \ + runner_end (&runner); \ + goto out; \ + } \ + runner_end (&runner); \ +} while (0) +{ + int ret = 0; + runner_t runner = {0,}; + char georepdir[PATH_MAX] = {0,}; + int valid_state = 0; + + valid_state = -1; + ret = glusterd_check_gsync_present (&valid_state); + if (-1 == ret) { + ret = valid_state; + goto out; + } + + ret = snprintf (georepdir, sizeof(georepdir) - 1, "%s/"GEOREP, + conf->workdir); + georepdir[ret] = '\0'; + + /************ + * master pre-configuration + ************/ + + /* remote-gsyncd */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "remote-gsyncd", GSYNCD_PREFIX"/gsyncd", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "remote-gsyncd", "/nonexistent/gsyncd", + ".", "^ssh:", NULL); + RUN_GSYNCD_CMD; + + /* gluster-command-dir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "gluster-params", + "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ssh-command */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "ssh-command"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/secret.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* pid-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "pid-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.pid", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "state-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.status", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "state-detail-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-socket */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "state-socket-unencoded"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* socketdir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}.log", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "gluster-log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}.gluster.log", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ignore-deletes */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "ignore-deletes", "true", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* special-sync-mode */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "special-sync-mode", "partial", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* change-detector == changelog */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg(&runner, "working-dir"); + runner_argprintf(&runner, "%s/${mastervol}/${eSlave}", + DEFAULT_VAR_RUN_DIRECTORY); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /************ + * slave pre-configuration + ************/ + + /* gluster-command-dir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/", + ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, "gluster-params", + "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.log", + ".", NULL); + RUN_GSYNCD_CMD; + + /* MountBroker log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "log-file-mbr", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr/${session_owner}:${eSlave}.log", + ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_args (&runner, + "gluster-log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.gluster.log", + ".", NULL); + RUN_GSYNCD_CMD; + + out: + return ret ? -1 : 0; +} + +static int +glusterd_create_essential_dir_files (glusterd_volinfo_t *volinfo, dict_t *dict, + char *slave, char *slave_ip, + char *slave_vol, char **op_errstr) +{ + int ret = -1; + char *conf_path = NULL; + char *statefile = NULL; + char buf[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + struct stat stbuf = {0,}; + + GF_ASSERT (THIS); + conf = THIS->private; + + ret = dict_get_str (dict, "conf_path", &conf_path); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to fetch conf file path."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = dict_get_str (dict, "statefile", &statefile); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to fetch statefile path."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = snprintf (buf, sizeof(buf) - 1, "%s/"GEOREP"/%s_%s_%s", + conf->workdir, volinfo->volname, slave_ip, slave_vol); + buf[ret] = '\0'; + ret = mkdir_p (buf, 0777, _gf_true); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to create %s" + ". Error : %s", buf, strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = snprintf (buf, PATH_MAX, DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/%s", + volinfo->volname); + buf[ret] = '\0'; + ret = mkdir_p (buf, 0777, _gf_true); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to create %s" + ". Error : %s", buf, strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = lstat (conf_path, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_DEBUG, "Session already running." + " Not creating config file again."); + } else { + ret = create_conf_file (conf, conf_path); + if (ret || lstat (conf_path, &stbuf)) { + snprintf (errmsg, sizeof (errmsg), "Failed to create" + " config file(%s).", conf_path); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + } + + ret = lstat (statefile, &stbuf); + if (!ret) { + gf_log ("", GF_LOG_DEBUG, "Session already running." + " Not creating status file again."); + goto out; + } else { + ret = glusterd_create_status_file (volinfo->volname, slave, + slave_ip, slave_vol, + "Not Started"); + if (ret || lstat (statefile, &stbuf)) { + snprintf (errmsg, sizeof (errmsg), "Unable to create %s" + ". Error : %s", statefile, strerror (errno)); + *op_errstr = gf_strdup (errmsg); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + } + +out: + gf_log ("", GF_LOG_DEBUG,"Returning %d", ret); + return ret; +} + +int +glusterd_op_gsync_create (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + char common_pem_file[PATH_MAX] = ""; + char errmsg[PATH_MAX] = ""; + char hooks_args[PATH_MAX] = ""; + char uuid_str [64] = ""; + char *host_uuid = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *arg_buf = NULL; + char *volname = NULL; + char *slave = NULL; + int32_t ret = -1; + int32_t is_pem_push = -1; + gf_boolean_t is_force = -1; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT (THIS); + conf = THIS->private; + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + ret = glusterd_op_gsync_args_get (dict, op_errstr, + &volname, &slave, &host_uuid); + if (ret) + goto out; + + snprintf (common_pem_file, sizeof(common_pem_file), + "%s"GLUSTERD_COMMON_PEM_PUB_FILE, conf->workdir); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Volinfo for %s" + " (master) not found", volname); + goto out; + } + + ret = dict_get_str (dict, "slave_vol", &slave_vol); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to fetch slave volume name."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + + ret = dict_get_str (dict, "slave_ip", &slave_ip); + if (ret) { + snprintf (errmsg, sizeof (errmsg), + "Unable to fetch slave IP."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + ret = -1; + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + uuid_utoa_r (MY_UUID, uuid_str); + if (!strcmp (uuid_str, host_uuid)) { + ret = dict_get_int32 (dict, "push_pem", &is_pem_push); + if (!ret && is_pem_push) { + gf_log ("", GF_LOG_DEBUG, "Trying to setup" + " pem files in slave"); + is_pem_push = 1; + } else + is_pem_push = 0; + + snprintf(hooks_args, sizeof(hooks_args), + "is_push_pem=%d pub_file=%s slave_ip=%s", + is_pem_push, common_pem_file, slave_ip); + + } else + snprintf(hooks_args, sizeof(hooks_args), + "This argument will stop the hooks script"); + + arg_buf = gf_strdup (hooks_args); + if (!arg_buf) { + gf_log ("", GF_LOG_ERROR, "Failed to" + " gf_strdup"); + if (is_force) { + ret = 0; + goto create_essentials; + } + ret = -1; + goto out; + } + + ret = dict_set_str (dict, "hooks_args", arg_buf); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to set" + " hooks_args in dict."); + if (is_force) { + ret = 0; + goto create_essentials; + } + goto out; + } + +create_essentials: + + ret = glusterd_create_essential_dir_files (volinfo, dict, slave, + slave_ip, slave_vol, + op_errstr); + if (ret) + goto out; + + ret = glusterd_store_slave_in_info (volinfo, slave, + host_uuid, op_errstr, + is_force); + if (ret) { + snprintf (errmsg, sizeof (errmsg), "Unable to store" + " slave info."); + gf_log ("", GF_LOG_ERROR, "%s", errmsg); + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG,"Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 9ef81a4a7..71d076624 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -36,18 +26,21 @@ #include "compat.h" #include "compat-errno.h" #include "statedump.h" +#include "run.h" #include "glusterd-mem-types.h" #include "glusterd.h" #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "glusterd-locks.h" -#include "glusterd1.h" -#include "cli1.h" -#include "rpc-clnt.h" #include "glusterd1-xdr.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "rpc-clnt.h" #include "glusterd-volgen.h" +#include "glusterd-mountbroker.h" #include <sys/resource.h> #include <inttypes.h> @@ -55,11 +48,38 @@ #include "defaults.c" #include "common-utils.h" -#define glusterd_start_volume(req, volname, flags) \ - glusterd_volume_txn (req, volname, flags, GD_OP_START_VOLUME) +#include "globals.h" +#include "glusterd-syncop.h" + +#ifdef HAVE_BD_XLATOR +#include <lvm2app.h> +#endif + +extern uuid_t global_txn_id; + +int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, + void *data, rpc_clnt_notify_t notify_fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; + synclock_lock (&priv->big_lock); + ret = notify_fn (rpc, mydata, event, data); + synclock_unlock (&priv->big_lock); + return ret; +} + +int glusterd_big_locked_handler (rpcsvc_request_t *req, rpcsvc_actor actor_fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; + + synclock_lock (&priv->big_lock); + ret = actor_fn (req); + synclock_unlock (&priv->big_lock); -#define glusterd_stop_volume(req, volname, flags) \ - glusterd_volume_txn (req, volname, flags, GD_OP_STOP_VOLUME) + return ret; +} static int glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, @@ -72,11 +92,9 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, glusterd_friend_req_ctx_t *ctx = NULL; char rhost[UNIX_PATH_MAX + 1] = {0}; uuid_t friend_uuid = {0}; - char uuid_str[50] = {0,}; dict_t *dict = NULL; - uuid_unparse (uuid, uuid_str); - uuid_parse (uuid_str, friend_uuid); + uuid_parse (uuid_utoa (uuid), friend_uuid); if (!port) port = GF_DEFAULT_BASE_PORT; @@ -84,8 +102,12 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, ret = glusterd_friend_find (uuid, rhost, &peerinfo); if (ret) { - ret = glusterd_xfer_friend_add_resp (req, rhost, port, -1, - GF_PROBE_UNKNOWN_PEER); + ret = glusterd_xfer_friend_add_resp (req, hostname, rhost, port, + -1, GF_PROBE_UNKNOWN_PEER); + if (friend_req->vols.vols_val) { + free (friend_req->vols.vols_val); + friend_req->vols.vols_val = NULL; + } goto out; } @@ -131,7 +153,6 @@ glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid, event->ctx = ctx; ret = glusterd_friend_sm_inject_event (event); - if (ret) { gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, " "ret = %d", event->event, ret); @@ -144,19 +165,16 @@ out: if (0 != ret) { if (ctx && ctx->hostname) GF_FREE (ctx->hostname); - if (ctx) - GF_FREE (ctx); + GF_FREE (ctx); if (dict) { if ((!dict->extra_stdfree) && friend_req->vols.vols_val) free (friend_req->vols.vols_val); dict_unref (dict); } else { - if (friend_req->vols.vols_val) - free (friend_req->vols.vols_val); + free (friend_req->vols.vols_val); } - if (event) - GF_FREE (event); + GF_FREE (event); } else { if (peerinfo && (0 == peerinfo->connected)) ret = GLUSTERD_CONNECTION_AWAITED; @@ -164,7 +182,6 @@ out: return ret; } - static int glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid, char *hostname, int port) @@ -227,8 +244,7 @@ out: if (0 != ret) { if (ctx && ctx->hostname) GF_FREE (ctx->hostname); - if (ctx) - GF_FREE (ctx); + GF_FREE (ctx); } return ret; @@ -241,13 +257,14 @@ glusterd_add_peer_detail_to_dict (glusterd_peerinfo_t *peerinfo, int ret = -1; char key[256] = {0, }; + char *peer_uuid_str = NULL; GF_ASSERT (peerinfo); GF_ASSERT (friends); snprintf (key, 256, "friend%d.uuid", count); - uuid_unparse (peerinfo->uuid, peerinfo->uuid_str); - ret = dict_set_str (friends, key, peerinfo->uuid_str); + peer_uuid_str = gd_peer_uuid_str (peerinfo); + ret = dict_set_str (friends, key, peer_uuid_str); if (ret) goto out; @@ -261,6 +278,11 @@ glusterd_add_peer_detail_to_dict (glusterd_peerinfo_t *peerinfo, if (ret) goto out; + snprintf (key, 256, "friend%d.stateId", count); + ret = dict_set_int32 (friends, key, peerinfo->state.state); + if (ret) + goto out; + snprintf (key, 256, "friend%d.state", count); ret = dict_set_str (friends, key, glusterd_friend_sm_state_name_get(peerinfo->state.state)); @@ -276,10 +298,34 @@ out: return ret; } +struct args_pack { + dict_t *dict; + int vol_count; + int opt_count; +}; + +static int +_build_option_key (dict_t *d, char *k, data_t *v, void *tmp) +{ + char reconfig_key[256] = {0, }; + struct args_pack *pack = NULL; + int ret = -1; + + pack = tmp; + if (strcmp (k, GLUSTERD_GLOBAL_OPT_VERSION) == 0) + return 0; + snprintf (reconfig_key, 256, "volume%d.option.%s", + pack->vol_count, k); + ret = dict_set_str (pack->dict, reconfig_key, v->data); + if (0 == ret) + pack->opt_count++; + + return 0; +} int glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, - dict_t *volumes, int count) + dict_t *volumes, int count) { int ret = -1; @@ -287,16 +333,23 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo = NULL; char *buf = NULL; int i = 1; - data_pair_t *pairs = NULL; - char reconfig_key[256] = {0, }; dict_t *dict = NULL; - data_t *value = NULL; - int opt_count = 0; - + glusterd_conf_t *priv = NULL; + char *volume_id_str = NULL; + struct args_pack pack = {0,}; + xlator_t *this = NULL; +#ifdef HAVE_BD_XLATOR + int caps = 0; +#endif GF_ASSERT (volinfo); GF_ASSERT (volumes); + this = THIS; + priv = this->private; + + GF_ASSERT (priv); + snprintf (key, 256, "volume%d.name", count); ret = dict_set_str (volumes, key, volinfo->volname); if (ret) @@ -312,13 +365,38 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + /* As of now, the snap volumes are also displayed as part of + volume info command. So this change is to display whether + the volume is original volume or the snap_volume. If + displaying of snap volumes in volume info o/p is not needed + this should be removed. + */ + snprintf (key, 256, "volume%d.snap_volume", count); + ret = dict_set_int32 (volumes, key, volinfo->is_snap_volume); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "failed to set whether " + "the volume is a snap volume or actual volume (%s)", + volinfo->volname); + goto out; + } + snprintf (key, 256, "volume%d.brick_count", count); ret = dict_set_int32 (volumes, key, volinfo->brick_count); if (ret) goto out; - snprintf (key, 256, "volume%d.sub_count", count); - ret = dict_set_int32 (volumes, key, volinfo->sub_count); + snprintf (key, 256, "volume%d.dist_count", count); + ret = dict_set_int32 (volumes, key, volinfo->dist_leaf_count); + if (ret) + goto out; + + snprintf (key, 256, "volume%d.stripe_count", count); + ret = dict_set_int32 (volumes, key, volinfo->stripe_count); + if (ret) + goto out; + + snprintf (key, 256, "volume%d.replica_count", count); + ret = dict_set_int32 (volumes, key, volinfo->replica_count); if (ret) goto out; @@ -327,8 +405,91 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); + if (!volume_id_str) + goto out; + + snprintf (key, sizeof (key), "volume%d.volume_id", count); + ret = dict_set_dynstr (volumes, key, volume_id_str); + if (ret) + goto out; + + snprintf (key, 256, "volume%d.rebalance", count); + ret = dict_set_int32 (volumes, key, volinfo->rebal.defrag_cmd); + if (ret) + goto out; + +#ifdef HAVE_BD_XLATOR + if (volinfo->caps) { + caps = 0; + snprintf (key, 256, "volume%d.xlator0", count); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + if (volinfo->caps & CAPS_BD) + snprintf (buf, 256, "BD"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + + if (volinfo->caps & CAPS_THIN) { + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + snprintf (buf, 256, "thin"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + } + + if (volinfo->caps & CAPS_OFFLOAD_COPY) { + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + snprintf (buf, 256, "offload_copy"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + } + + if (volinfo->caps & CAPS_OFFLOAD_SNAPSHOT) { + snprintf (key, 256, "volume%d.xlator0.caps%d", count, + caps++); + buf = GF_MALLOC (256, gf_common_mt_char); + if (!buf) { + ret = ENOMEM; + goto out; + } + snprintf (buf, 256, "offload_snapshot"); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) { + GF_FREE (buf); + goto out; + } + } + + } +#endif + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { char brick[1024] = {0,}; + char brick_uuid[64] = {0,}; snprintf (key, 256, "volume%d.brick%d", count, i); snprintf (brick, 1024, "%s:%s", brickinfo->hostname, brickinfo->path); @@ -336,6 +497,25 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, ret = dict_set_dynstr (volumes, key, buf); if (ret) goto out; + snprintf (key, 256, "volume%d.brick%d.uuid", count, i); + snprintf (brick_uuid, 64, "%s", uuid_utoa (brickinfo->uuid)); + buf = gf_strdup (brick_uuid); + if (!buf) + goto out; + ret = dict_set_dynstr (volumes, key, buf); + if (ret) + goto out; + +#ifdef HAVE_BD_XLATOR + if (volinfo->caps & CAPS_BD) { + snprintf (key, 256, "volume%d.vg%d", count, i); + snprintf (brick, 1024, "%s", brickinfo->vg); + buf = gf_strdup (brick); + ret = dict_set_dynstr (volumes, key, buf); + if (ret) + goto out; + } +#endif i++; } @@ -345,28 +525,14 @@ glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, goto out; } - pairs = dict->members_list; - - while (pairs) { - if (1 == glusterd_check_option_exists (pairs->key, NULL)) { - value = pairs->value; - if (!value) - continue; - - snprintf (reconfig_key, 256, "volume%d.option.%s", count, - pairs->key); - ret = dict_set_str (volumes, reconfig_key, value->data); - if (!ret) - opt_count++; - } - pairs = pairs->next; - } - - snprintf (key, 256, "volume%d.opt_count", count); - ret = dict_set_int32 (volumes, key, opt_count); - if (ret) - goto out; + pack.dict = volumes; + pack.vol_count = count; + pack.opt_count = 0; + dict_foreach (dict, _build_option_key, (void *) &pack); + dict_foreach (priv->opts, _build_option_key, &pack); + snprintf (key, 256, "volume%d.opt_count", pack.vol_count); + ret = dict_set_int32 (volumes, key, pack.opt_count); out: return ret; } @@ -376,13 +542,18 @@ glusterd_friend_find (uuid_t uuid, char *hostname, glusterd_peerinfo_t **peerinfo) { int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); if (uuid) { ret = glusterd_friend_find_by_uuid (uuid, peerinfo); if (ret) { - gf_log ("glusterd", GF_LOG_NORMAL, - "Unable to find peer by uuid"); + gf_log (this->name, GF_LOG_DEBUG, + "Unable to find peer by uuid: %s", + uuid_utoa (uuid)); } else { goto out; } @@ -393,7 +564,7 @@ glusterd_friend_find (uuid_t uuid, char *hostname, ret = glusterd_friend_find_by_hostname (hostname, peerinfo); if (ret) { - gf_log ("glusterd", GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_DEBUG, "Unable to find hostname: %s", hostname); } else { goto out; @@ -404,199 +575,575 @@ out: return ret; } -int -glusterd_handle_cluster_lock (rpcsvc_request_t *req) +int32_t +glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + char *err_str, size_t err_len) { - gd1_mgmt_cluster_lock_req lock_req = {{0},}; - int32_t ret = -1; - char str[50] = {0,}; - glusterd_op_lock_ctx_t *ctx = NULL; + int32_t ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t locked = 0; + char *tmp = NULL; + char *volname = NULL; + uuid_t *txn_id = NULL; + uuid_t *originator_uuid = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; GF_ASSERT (req); + GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX)); + GF_ASSERT (NULL != ctx); - if (!gd_xdr_to_mgmt_cluster_lock_req (req->msg[0], &lock_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + dict = ctx; + + /* Generate a transaction-id for this operation and + * save it in the dict. This transaction id distinguishes + * each transaction, and helps separate opinfos in the + * op state machine. */ + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!txn_id) + goto out; + + uuid_generate (*txn_id); + + ret = dict_set_bin (dict, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); goto out; } - uuid_unparse (lock_req.uuid, str); - gf_log ("glusterd", GF_LOG_NORMAL, - "Received LOCK from uuid: %s", str); + gf_log (this->name, GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (*txn_id)); + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator uuid."); + goto out; + } - if (!ctx) { - //respond here - return -1; + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (priv->op_version < 3) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock on localhost, ret: %d", + ret); + snprintf (err_str, err_len, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (dict, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to get volume " + "name"); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + snprintf (err_str, err_len, + "Another transaction is in progress for %s. " + "Please try again after sometime.", volname); + goto out; + } } - uuid_copy (ctx->uuid, lock_req.uuid); - ctx->req = req; + locked = 1; + gf_log (this->name, GF_LOG_DEBUG, "Acquired lock on localhost"); + +local_locking_done: + + /* If no volname is given as a part of the command, locks will + * not be held, hence sending stage event. */ + if (volname) + event_type = GD_OP_EVENT_START_LOCK; + else { + txn_op_info.state.state = GD_OP_STATE_LOCK_SENT; + event_type = GD_OP_EVENT_ALL_ACC; + } + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + if (ctx) + dict_unref (ctx); + goto out; + } - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, ctx); + ret = glusterd_op_sm_inject_event (event_type, txn_id, ctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to acquire cluster" + " lock."); + goto out; + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (locked && ret) { + /* Based on the op-version, we release the + * cluster or mgmt_v3 lock */ + if (priv->op_version < 3) + glusterd_unlock (MY_UUID); + else { + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + ret = -1; + } + } + + if (volname) + GF_FREE (volname); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int -glusterd_handle_stage_op (rpcsvc_request_t *req) +__glusterd_handle_cluster_lock (rpcsvc_request_t *req) { - int32_t ret = -1; - char str[50] = {0,}; - gd1_mgmt_stage_op_req stage_req = {{0,}}; - glusterd_op_stage_ctx_t *ctx = NULL; + dict_t *op_ctx = NULL; + int32_t ret = -1; + gd1_mgmt_cluster_lock_req lock_req = {{0},}; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_op_t op = GD_OP_EVENT_LOCK; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + uuid_t *txn_id = &global_txn_id; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (req); - if (!gd_xdr_to_mgmt_stage_op_req (req->msg[0], &stage_req)) { - //failed to decode msg; + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "request received from peer"); req->rpc_err = GARBAGE_ARGS; goto out; } - uuid_unparse (stage_req.uuid, str); - gf_log ("glusterd", GF_LOG_NORMAL, - "Received stage op from uuid: %s", str); + gf_log (this->name, GF_LOG_DEBUG, "Received LOCK from uuid: %s", + uuid_utoa (lock_req.uuid)); - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_stage_ctx_t); + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); if (!ctx) { //respond here return -1; } - //CHANGE THIS - uuid_copy (ctx->stage_req.uuid, stage_req.uuid); - ctx->stage_req.op = stage_req.op; - ctx->stage_req.buf.buf_len = stage_req.buf.buf_len; - ctx->stage_req.buf.buf_val = GF_CALLOC (1, stage_req.buf.buf_len, - gf_gld_mt_string); - if (!ctx->stage_req.buf.buf_val) + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + ctx->dict = NULL; + + op_ctx = dict_new (); + if (!op_ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set new dict"); goto out; + } - memcpy (ctx->stage_req.buf.buf_val, stage_req.buf.buf_val, - stage_req.buf.buf_len); + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, op_ctx, req); - ctx->req = req; + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (txn_op_info.op_ctx); + goto out; + } - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); out: - if (stage_req.buf.buf_val) - free (stage_req.buf.buf_val);//malloced by xdr + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; } int -glusterd_handle_commit_op (rpcsvc_request_t *req) +glusterd_handle_cluster_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cluster_lock); +} + +int +glusterd_req_ctx_create (rpcsvc_request_t *rpc_req, + glusterd_op_t op, uuid_t uuid, + char *buf_val, size_t buf_len, + gf_gld_mem_types_t mem_type, + glusterd_req_ctx_t **req_ctx_out) +{ + int ret = -1; + char str[50] = {0,}; + glusterd_req_ctx_t *req_ctx = NULL; + dict_t *dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + uuid_unparse (uuid, str); + gf_log (this->name, GF_LOG_DEBUG, "Received op from uuid %s", str); + + dict = dict_new (); + if (!dict) + goto out; + + req_ctx = GF_CALLOC (1, sizeof (*req_ctx), mem_type); + if (!req_ctx) { + goto out; + } + + uuid_copy (req_ctx->uuid, uuid); + req_ctx->op = op; + ret = dict_unserialize (buf_val, buf_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + req_ctx->dict = dict; + req_ctx->req = rpc_req; + *req_ctx_out = req_ctx; + ret = 0; +out: + if (ret) { + if (dict) + dict_unref (dict); + GF_FREE (req_ctx); + } + return ret; +} + +int +__glusterd_handle_stage_op (rpcsvc_request_t *req) { int32_t ret = -1; - char str[50] = {0,}; - gd1_mgmt_commit_op_req commit_req = {{0},}; - glusterd_op_commit_ctx_t *ctx = NULL; + glusterd_req_ctx_t *req_ctx = NULL; + gd1_mgmt_stage_op_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_state_info_t state; + this = THIS; + GF_ASSERT (this); GF_ASSERT (req); - if (!gd_xdr_to_mgmt_commit_op_req (req->msg[0], &commit_req)) { - //failed to decode msg; + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_stage_op_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode stage " + "request received from peer"); req->rpc_err = GARBAGE_ARGS; goto out; } - uuid_unparse (commit_req.uuid, str); + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } - gf_log ("glusterd", GF_LOG_NORMAL, - "Received commit op from uuid: %s", str); + ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid, + op_req.buf.buf_val, op_req.buf.buf_len, + gf_gld_mt_op_stage_ctx_t, &req_ctx); + if (ret) + goto out; - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_commit_ctx_t); + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); - if (!ctx) { - //respond here - return -1; + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + /* In cases where there is no volname, the receivers won't have a + * transaction opinfo created, as for those operations, the locking + * phase where the transaction opinfos are created, won't be called. */ + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No transaction's opinfo set"); + + state.state = GD_OP_STATE_LOCKED; + glusterd_txn_opinfo_init (&txn_op_info, &state, + &op_req.op, req_ctx->dict, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (req_ctx->dict); + goto out; + } } - ctx->req = req; - //CHANGE THIS - uuid_copy (ctx->stage_req.uuid, commit_req.uuid); - ctx->stage_req.op = commit_req.op; - ctx->stage_req.buf.buf_len = commit_req.buf.buf_len; - ctx->stage_req.buf.buf_val = GF_CALLOC (1, commit_req.buf.buf_len, - gf_gld_mt_string); - if (!ctx->stage_req.buf.buf_val) + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, + txn_id, req_ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_STAGE_OP"); + + out: + free (op_req.buf.buf_val);//malloced by xdr + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; +} + +int +glusterd_handle_stage_op (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_stage_op); +} + + +int +__glusterd_handle_commit_op (rpcsvc_request_t *req) +{ + int32_t ret = -1; + glusterd_req_ctx_t *req_ctx = NULL; + gd1_mgmt_commit_op_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_commit_op_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; goto out; + } - memcpy (ctx->stage_req.buf.buf_val, commit_req.buf.buf_val, - commit_req.buf.buf_len); + //the structures should always be equal + GF_ASSERT (sizeof (gd1_mgmt_commit_op_req) == sizeof (gd1_mgmt_stage_op_req)); + ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid, + op_req.buf.buf_val, op_req.buf.buf_len, + gf_gld_mt_op_commit_ctx_t, &req_ctx); + if (ret) + goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, ctx); + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, + txn_id, req_ctx); out: - if (commit_req.buf.buf_val) - free (commit_req.buf.buf_val);//malloced by xdr + free (op_req.buf.buf_val);//malloced by xdr + glusterd_friend_sm (); + glusterd_op_sm (); return ret; } int -glusterd_handle_cli_probe (rpcsvc_request_t *req) +glusterd_handle_commit_op (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_commit_op); +} + +int +__glusterd_handle_cli_probe (rpcsvc_request_t *req) { int32_t ret = -1; - gf1_cli_probe_req cli_req = {0,}; - glusterd_peerinfo_t *peerinfo = NULL; + gf_cli_req cli_req = {{0,},}; + glusterd_peerinfo_t *peerinfo = NULL; + gf_boolean_t run_fsm = _gf_true; + xlator_t *this = NULL; + char *bind_name = NULL; + dict_t *dict = NULL; + char *hostname = NULL; + int port = 0; + GF_ASSERT (req); + this = THIS; - if (!gf_xdr_to_cli_probe_req (req->msg[0], &cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; gf_log ("", GF_LOG_ERROR, "xdr decoding error"); req->rpc_err = GARBAGE_ARGS; goto out; } - gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname, - cli_req.port); - gf_log ("glusterd", GF_LOG_NORMAL, "Received CLI probe req %s %d", - cli_req.hostname, cli_req.port); + if (cli_req.dict.dict_len) { + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "unserialize req-buffer to dictionary"); + goto out; + } + } + + ret = dict_get_str (dict, "hostname", &hostname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get hostname"); + goto out; + } + + ret = dict_get_int32 (dict, "port", &port); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get port"); + goto out; + } - if (!(ret = glusterd_is_local_addr(cli_req.hostname))) { + if (glusterd_is_any_volume_in_server_quorum (this) && + !does_gd_meet_server_quorum (this)) { + glusterd_xfer_cli_probe_resp (req, -1, GF_PROBE_QUORUM_NOT_MET, + NULL, hostname, port, dict); + gf_log (this->name, GF_LOG_ERROR, "Quorum does not meet, " + "rejecting operation"); + ret = 0; + goto out; + } + + gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d", + hostname, port); + + if (dict_get_str(this->options,"transport.socket.bind-address", + &bind_name) == 0) { + gf_log ("glusterd", GF_LOG_DEBUG, + "only checking probe address vs. bind address"); + ret = gf_is_same_address (bind_name, hostname); + } + else { + ret = gf_is_local_addr (hostname); + } + if (ret) { glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_LOCALHOST, - cli_req.hostname, cli_req.port); + NULL, hostname, port, dict); + ret = 0; goto out; } - if (!(ret = glusterd_friend_find_by_hostname(cli_req.hostname, - &peerinfo))) { - if ((peerinfo->state.state != GD_FRIEND_STATE_REQ_RCVD) - || (peerinfo->state.state != GD_FRIEND_STATE_DEFAULT)) { - gf_log ("glusterd", GF_LOG_NORMAL, "Probe host %s port %d" - "already a friend", cli_req.hostname, cli_req.port); + if (!(ret = glusterd_friend_find_by_hostname (hostname, &peerinfo))) { + if (strcmp (peerinfo->hostname, hostname) == 0) { + + gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port " + "%d already a peer", hostname, port); glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, - cli_req.hostname, cli_req.port); + NULL, hostname, port, + dict); goto out; } } - ret = glusterd_probe_begin (req, cli_req.hostname, cli_req.port); + ret = glusterd_probe_begin (req, hostname, port, dict); + + if (ret == GLUSTERD_CONNECTION_AWAITED) { + //fsm should be run after connection establishes + run_fsm = _gf_false; + ret = 0; + } - gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, cli_req.port, - (ret) ? "FAILED" : "SUCCESS"); out: - if (cli_req.hostname) - free (cli_req.hostname);//its malloced by xdr + free (cli_req.dict.dict_val); + + if (run_fsm) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + return ret; } int -glusterd_handle_cli_deprobe (rpcsvc_request_t *req) +glusterd_handle_cli_probe (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_cli_probe); +} + +int +__glusterd_handle_cli_deprobe (rpcsvc_request_t *req) { int32_t ret = -1; - gf1_cli_probe_req cli_req = {0,}; - uuid_t uuid = {0}; - int op_errno = 0; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + gf_cli_req cli_req = {{0,},}; + uuid_t uuid = {0}; + int op_errno = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + char *hostname = NULL; + int port = 0; + int flags = 0; this = THIS; GF_ASSERT (this); @@ -604,59 +1151,113 @@ glusterd_handle_cli_deprobe (rpcsvc_request_t *req) GF_ASSERT (priv); GF_ASSERT (req); - if (!gf_xdr_to_cli_probe_req (req->msg[0], &cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - gf_log ("glusterd", GF_LOG_NORMAL, "Received CLI deprobe req"); + if (cli_req.dict.dict_len) { + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "unserialize req-buffer to dictionary"); + goto out; + } + } + + gf_log ("glusterd", GF_LOG_INFO, "Received CLI deprobe req"); + + ret = dict_get_str (dict, "hostname", &hostname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get hostname"); + goto out; + } + + ret = dict_get_int32 (dict, "port", &port); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get port"); + goto out; + } + + ret = dict_get_int32 (dict, "flags", &flags); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get flags"); + goto out; + } - ret = glusterd_hostname_to_uuid (cli_req.hostname, uuid); + ret = glusterd_hostname_to_uuid (hostname, uuid); if (ret) { op_errno = GF_DEPROBE_NOT_FRIEND; goto out; } - if (!uuid_compare (uuid, priv->uuid)) { + if (!uuid_compare (uuid, MY_UUID)) { op_errno = GF_DEPROBE_LOCALHOST; ret = -1; goto out; } - if (!uuid_is_null (uuid)) { - ret = glusterd_all_volume_cond_check ( - glusterd_friend_brick_belongs, - -1, &uuid); - if (ret) { - op_errno = GF_DEPROBE_BRICK_EXIST; + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { + if (!uuid_is_null (uuid)) { + /* Check if peers are connected, except peer being detached*/ + if (!glusterd_chk_peers_connected_befriended (uuid)) { + ret = -1; + op_errno = GF_DEPROBE_FRIEND_DOWN; + goto out; + } + ret = glusterd_all_volume_cond_check ( + glusterd_friend_brick_belongs, + -1, &uuid); + if (ret) { + op_errno = GF_DEPROBE_BRICK_EXIST; + goto out; + } + } + + if (glusterd_is_any_volume_in_server_quorum (this) && + !does_gd_meet_server_quorum (this)) { + gf_log (this->name, GF_LOG_ERROR, "Quorum does not " + "meet, rejecting operation"); + ret = -1; + op_errno = GF_DEPROBE_QUORUM_NOT_MET; goto out; } } if (!uuid_is_null (uuid)) { - ret = glusterd_deprobe_begin (req, cli_req.hostname, - cli_req.port, uuid); + ret = glusterd_deprobe_begin (req, hostname, port, uuid, dict); } else { - ret = glusterd_deprobe_begin (req, cli_req.hostname, - cli_req.port, NULL); + ret = glusterd_deprobe_begin (req, hostname, port, NULL, dict); } - gf_cmd_log ("peer deprobe", "on host %s:%d %s", cli_req.hostname, - cli_req.port, (ret) ? "FAILED" : "SUCCESS"); out: + free (cli_req.dict.dict_val); + if (ret) { - ret = glusterd_xfer_cli_deprobe_resp (req, ret, op_errno, - cli_req.hostname); + ret = glusterd_xfer_cli_deprobe_resp (req, ret, op_errno, NULL, + hostname, dict); } - if (cli_req.hostname) - free (cli_req.hostname);//malloced by xdr + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; } int -glusterd_handle_cli_list_friends (rpcsvc_request_t *req) +glusterd_handle_cli_deprobe (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_cli_deprobe); +} + +int +__glusterd_handle_cli_list_friends (rpcsvc_request_t *req) { int32_t ret = -1; gf1_cli_peer_list_req cli_req = {0,}; @@ -664,13 +1265,15 @@ glusterd_handle_cli_list_friends (rpcsvc_request_t *req) GF_ASSERT (req); - if (!gf_xdr_to_cli_peer_list_req (req->msg[0], &cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf1_cli_peer_list_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - gf_log ("glusterd", GF_LOG_NORMAL, "Received cli list req"); + gf_log ("glusterd", GF_LOG_INFO, "Received cli list req"); if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ @@ -694,25 +1297,38 @@ glusterd_handle_cli_list_friends (rpcsvc_request_t *req) out: if (dict) dict_unref (dict); + + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; } int -glusterd_handle_cli_get_volume (rpcsvc_request_t *req) +glusterd_handle_cli_list_friends (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_list_friends); +} + +int +__glusterd_handle_cli_get_volume (rpcsvc_request_t *req) { int32_t ret = -1; - gf1_cli_get_vol_req cli_req = {0,}; + gf_cli_req cli_req = {{0,}}; dict_t *dict = NULL; + int32_t flags = 0; GF_ASSERT (req); - if (!gf_xdr_to_cli_get_vol_req (req->msg[0], &cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - gf_log ("glusterd", GF_LOG_NORMAL, "Received get vol req"); + gf_log ("glusterd", GF_LOG_INFO, "Received get vol req"); if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ @@ -731,589 +1347,331 @@ glusterd_handle_cli_get_volume (rpcsvc_request_t *req) } } - ret = glusterd_get_volumes (req, dict, cli_req.flags); + ret = dict_get_int32 (dict, "flags", &flags); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "failed to get flags"); + goto out; + } + + ret = glusterd_get_volumes (req, dict, flags); out: if (dict) dict_unref (dict); + + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; } int -glusterd_handle_create_volume (rpcsvc_request_t *req) -{ - int32_t ret = -1; - gf1_cli_create_vol_req cli_req = {0,}; - dict_t *dict = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - char *bricks = NULL; - char *volname = NULL; - int brick_count = 0; - char *tmpptr = NULL; - int i = 0; - char *brick_list = NULL; - void *cli_rsp = NULL; - char err_str[2048] = {0,}; - gf1_cli_create_vol_rsp rsp = {0,}; - glusterd_conf_t *priv = NULL; - int err_ret = 0; - xlator_t *this = NULL; - char *free_ptr = NULL; - char *trans_type = NULL; - uuid_t volume_id = {0,}; - char volid[64] = {0,}; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - glusterd_volinfo_t tmpvolinfo = {{0},}; +glusterd_handle_cli_get_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_get_volume); +} + +int +__glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req) +{ + int ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + uuid_t uuid = {0}; + gf_cli_rsp rsp = {0,}; + gf_cli_req cli_req = {{0,}}; + char msg_str[2048] = {0,}; GF_ASSERT (req); this = THIS; - GF_ASSERT(this); - priv = this->private; + GF_ASSERT (priv); - INIT_LIST_HEAD (&tmpvolinfo.bricks); - - if (!gf_xdr_to_cli_create_vol_req (req->msg[0], &cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - gf_log ("glusterd", GF_LOG_NORMAL, "Received create volume req"); + gf_log ("glusterd", GF_LOG_DEBUG, "Received uuid reset req"); - if (cli_req.bricks.bricks_len) { + if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ dict = dict_new (); - ret = dict_unserialize (cli_req.bricks.bricks_val, - cli_req.bricks.bricks_len, + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); if (ret < 0) { gf_log ("glusterd", GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); + snprintf (msg_str, sizeof (msg_str), "Unable to decode " + "the buffer"); goto out; } else { - dict->extra_stdfree = cli_req.bricks.bricks_val; + dict->extra_stdfree = cli_req.dict.dict_val; } } - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - gf_cmd_log ("Volume create", "on volname: %s attempted", volname); - - if ((ret = glusterd_check_volume_exists (volname))) { - snprintf(err_str, 2048, "Volname %s already exists", - volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); - err_ret = 1; + /* In the above section if dict_unserialize is successful, ret is set + * to zero. + */ + ret = -1; + // Do not allow peer reset if there are any volumes in the cluster + if (!list_empty (&priv->volumes)) { + snprintf (msg_str, sizeof (msg_str), "volumes are already " + "present in the cluster. Resetting uuid is not " + "allowed"); + gf_log (this->name, GF_LOG_WARNING, "%s", msg_str); goto out; } - ret = dict_get_int32 (dict, "count", &brick_count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); + // Do not allow peer reset if trusted storage pool is already formed + if (!list_empty (&priv->peers)) { + snprintf (msg_str, sizeof (msg_str),"trusted storage pool " + "has been already formed. Please detach this peer " + "from the pool and reset its uuid."); + gf_log (this->name, GF_LOG_WARNING, "%s", msg_str); goto out; } - ret = dict_get_str (dict, "transport", &trans_type); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get transport-type"); - goto out; - } - ret = dict_get_str (dict, "bricks", &bricks); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); - goto out; - } + uuid_copy (uuid, priv->uuid); + ret = glusterd_uuid_generate_save (); - uuid_generate (volume_id); - uuid_unparse (volume_id, volid); - free_ptr = gf_strdup (volid); - ret = dict_set_dynstr (dict, "volume-id", free_ptr); - if (ret) { - gf_log ("", GF_LOG_ERROR, "unable to set volume-id"); + if (!uuid_compare (uuid, MY_UUID)) { + snprintf (msg_str, sizeof (msg_str), "old uuid and the new uuid" + " are same. Try gluster peer reset again"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg_str); + ret = -1; goto out; } - free_ptr = NULL; - - if (bricks) { - brick_list = gf_strdup (bricks); - free_ptr = brick_list; - } - - gf_cmd_log ("Volume create", "on volname: %s type:%s count:%d bricks:%s", - cli_req.volname, ((cli_req.type == 0)? "DEFAULT": - ((cli_req.type == 1)? "STRIPE":"REPLICATE")), cli_req.count, - bricks); - - - while ( i < brick_count) { - i++; - brick= strtok_r (brick_list, " \n", &tmpptr); - brick_list = tmpptr; - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) - goto out; - - ret = glusterd_new_brick_validate (brick, brickinfo, err_str, - sizeof (err_str)); - if (ret) { - err_ret = 1; - goto out; - } - ret = glusterd_volume_brickinfo_get (brickinfo->uuid, - brickinfo->hostname, - brickinfo->path, - &tmpvolinfo, &tmpbrkinfo); - if (!ret) { - err_ret = 1; - snprintf (err_str, sizeof (err_str), "Brick: %s:%s, %s" - " in the arguments mean the same", - tmpbrkinfo->hostname, tmpbrkinfo->path, - brick); - goto out; - } - list_add_tail (&brickinfo->brick_list, &tmpvolinfo.bricks); - brickinfo = NULL; - } - - ret = glusterd_create_volume (req, dict); - - gf_cmd_log ("Volume create", "on volname: %s %s", volname, - ((ret || err_ret) != 0) ? "FAILED": "SUCCESS"); out: - if ((err_ret || ret) && dict) - dict_unref (dict); - if (err_ret) { + if (ret) { rsp.op_ret = -1; - rsp.op_errno = 0; - rsp.volname = ""; - rsp.op_errstr = err_str; - cli_rsp = &rsp; - glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_create_vol_rsp); - if (!glusterd_opinfo_unlock()) - gf_log ("glusterd", GF_LOG_ERROR, "Unlock on opinfo" - " failed"); - ret = 0; //Client response sent, prevent second response - } - - if (free_ptr) - GF_FREE(free_ptr); - - glusterd_volume_bricks_delete (&tmpvolinfo); - if (brickinfo) - glusterd_brickinfo_delete (brickinfo); - if (cli_req.volname) - free (cli_req.volname); // its a malloced by xdr - return ret; -} - -int -glusterd_handle_cli_start_volume (rpcsvc_request_t *req) -{ - int32_t ret = -1; - gf1_cli_start_vol_req cli_req = {0,}; - - GF_ASSERT (req); - - if (!gf_xdr_to_cli_start_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; + if (msg_str[0] == '\0') + snprintf (msg_str, sizeof (msg_str), "Operation " + "failed"); + rsp.op_errstr = msg_str; + ret = 0; + } else { + rsp.op_errstr = ""; } - gf_log ("glusterd", GF_LOG_NORMAL, "Received start vol req" - "for volume %s", cli_req.volname); - - ret = glusterd_start_volume (req, cli_req.volname, cli_req.flags); + glusterd_to_cli (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); - gf_cmd_log ("volume start","on volname: %s %s", cli_req.volname, - ((ret == 0) ? "SUCCESS": "FAILED")); - -out: - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr return ret; } - int -glusterd_handle_cli_stop_volume (rpcsvc_request_t *req) +glusterd_handle_cli_uuid_reset (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_stop_vol_req cli_req = {0,}; - - GF_ASSERT (req); - - if (!gf_xdr_to_cli_stop_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_NORMAL, "Received stop vol req" - "for volume %s", cli_req.volname); - - ret = glusterd_stop_volume (req, cli_req.volname, cli_req.flags); - - gf_cmd_log ("Volume stop","on volname: %s %s", cli_req.volname, - ((ret)?"FAILED":"SUCCESS")); - -out: - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr - return ret; + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_uuid_reset); } int -glusterd_handle_cli_delete_volume (rpcsvc_request_t *req) +__glusterd_handle_cli_uuid_get (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_delete_vol_req cli_req = {0,}; - int32_t flags = 0; + int ret = -1; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + gf_cli_rsp rsp = {0,}; + gf_cli_req cli_req = {{0,}}; + char msg_str[2048] = {0,}; + char uuid_str[64] = {0,}; GF_ASSERT (req); - if (!gf_xdr_to_cli_delete_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - gf_cmd_log ("Volume delete","on volname: %s attempted", cli_req.volname); - - gf_log ("glusterd", GF_LOG_NORMAL, "Received delete vol req" - "for volume %s", cli_req.volname); - - ret = glusterd_delete_volume (req, cli_req.volname, flags); - - gf_cmd_log ("Volume delete", "on volname: %s %s", cli_req.volname, - ((ret) ? "FAILED" : "SUCCESS")); - -out: - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr - return ret; -} - -int -glusterd_handle_add_brick (rpcsvc_request_t *req) -{ - int32_t ret = -1; - gf1_cli_add_brick_req cli_req = {0,}; - dict_t *dict = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - char *bricks = NULL; - char *volname = NULL; - int brick_count = 0; - char *tmpptr = NULL; - int i = 0; - char *brick_list = NULL; - void *cli_rsp = NULL; - char err_str[2048] = {0,}; - gf1_cli_add_brick_rsp rsp = {0,}; - glusterd_volinfo_t *volinfo = NULL; - int32_t err_ret = 0; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - char *free_ptr = NULL; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - glusterd_volinfo_t tmpvolinfo = {{0},}; - this = THIS; - GF_ASSERT(this); - priv = this->private; + GF_ASSERT (priv); - GF_ASSERT (req); - - INIT_LIST_HEAD (&tmpvolinfo.bricks); - - if (!gf_xdr_to_cli_add_brick_req (req->msg[0], &cli_req)) { - //failed to decode msg; + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { req->rpc_err = GARBAGE_ARGS; goto out; } - gf_cmd_log ("Volume add-brick", "on volname: %s attempted", - cli_req.volname); - gf_log ("glusterd", GF_LOG_NORMAL, "Received add brick req"); + gf_log ("glusterd", GF_LOG_DEBUG, "Received uuid get req"); - if (cli_req.bricks.bricks_len) { - /* Unserialize the dictionary */ + if (cli_req.dict.dict_len) { dict = dict_new (); + if (!dict) { + ret = -1; + goto out; + } - ret = dict_unserialize (cli_req.bricks.bricks_val, - cli_req.bricks.bricks_len, + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); if (ret < 0) { gf_log ("glusterd", GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); + snprintf (msg_str, sizeof (msg_str), "Unable to decode " + "the buffer"); goto out; - } else { - dict->extra_stdfree = cli_req.bricks.bricks_val; - } - } - ret = dict_get_str (dict, "volname", &volname); + } else { + dict->extra_stdfree = cli_req.dict.dict_val; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; + } } - if (!(ret = glusterd_check_volume_exists (volname))) { - snprintf(err_str, 2048, "Volname %s does not exist", - volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); - err_ret = -1; + rsp_dict = dict_new (); + if (!rsp_dict) { + ret = -1; goto out; } - ret = dict_get_int32 (dict, "count", &brick_count); + uuid_utoa_r (MY_UUID, uuid_str); + ret = dict_set_str (rsp_dict, "uuid", uuid_str); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); + gf_log (this->name, GF_LOG_ERROR, "Failed to set uuid in " + "dictionary."); goto out; } - if (!(ret = glusterd_volinfo_find (volname, &volinfo))) { - if (volinfo->type == GF_CLUSTER_TYPE_NONE) - goto brick_val; - if (!brick_count || !volinfo->sub_count) - goto brick_val; - - /* If the brick count is less than sub_count then, allow add-brick only for - plain replicate volume since in plain stripe brick_count becoming less than - the sub_count is not allowed */ - if (volinfo->brick_count < volinfo->sub_count && (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) ) { - if ((volinfo->sub_count - volinfo->brick_count) == brick_count) - goto brick_val; - } - - if ((brick_count % volinfo->sub_count) != 0) { - snprintf(err_str, 2048, "Incorrect number of bricks" - " supplied %d for type %s with count %d", - brick_count, (volinfo->type == 1)? "STRIPE": - "REPLICATE", volinfo->sub_count); - gf_log("glusterd", GF_LOG_ERROR, "%s", err_str); - err_ret = 1; - goto out; - } - } else { - gf_log("", GF_LOG_ERROR, "Unable to get volinfo for volname" - " %s", volname); - goto out; - } - -brick_val: - ret = dict_get_str (dict, "bricks", &bricks); + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize " + "dictionary."); goto out; } + ret = 0; +out: + if (ret) { + rsp.op_ret = -1; + if (msg_str[0] == '\0') + snprintf (msg_str, sizeof (msg_str), "Operation " + "failed"); + rsp.op_errstr = msg_str; - if (bricks) - brick_list = gf_strdup (bricks); - if (!brick_list) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - ret = -1; - goto out; } else { - free_ptr = brick_list; - } - - gf_cmd_log ("Volume add-brick", "volname: %s type %s count:%d bricks:%s" - ,volname, ((volinfo->type == 0)? "DEFAULT" : ((volinfo->type - == 1)? "STRIPE": "REPLICATE")), brick_count, brick_list); - + rsp.op_errstr = ""; - while ( i < brick_count) { - i++; - brick= strtok_r (brick_list, " \n", &tmpptr); - brick_list = tmpptr; - brickinfo = NULL; - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) - goto out; - ret = glusterd_new_brick_validate (brick, brickinfo, err_str, - sizeof (err_str)); - if (ret) { - err_ret = 1; - goto out; - } - ret = glusterd_volume_brickinfo_get (brickinfo->uuid, - brickinfo->hostname, - brickinfo->path, - &tmpvolinfo, &tmpbrkinfo); - if (!ret) { - err_ret = 1; - snprintf (err_str, sizeof (err_str), "Brick: %s:%s, %s" - " in the arguments mean the same", - tmpbrkinfo->hostname, tmpbrkinfo->path, - brick); - goto out; - } - list_add_tail (&brickinfo->brick_list, &tmpvolinfo.bricks); - brickinfo = NULL; } - ret = glusterd_add_brick (req, dict); + glusterd_to_cli (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); - gf_cmd_log ("Volume add-brick","on volname: %s %s", volname, - ((ret || err_ret) != 0)? "FAILED" : "SUCCESS"); - -out: - if ((err_ret || ret) && dict) - dict_unref (dict); - if (err_ret) { - rsp.op_ret = -1; - rsp.op_errno = 0; - rsp.volname = ""; - rsp.op_errstr = err_str; - cli_rsp = &rsp; - glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_add_brick_rsp); - if (!glusterd_opinfo_unlock()) - gf_log ("glusterd", GF_LOG_ERROR, "Unlock on " - "opinfo failed"); - - ret = 0; //sent error to cli, prevent second reply - } - - if (free_ptr) - GF_FREE (free_ptr); - glusterd_volume_bricks_delete (&tmpvolinfo); - if (brickinfo) - glusterd_brickinfo_delete (brickinfo); - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr - return ret; + return 0; +} +int +glusterd_handle_cli_uuid_get (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_uuid_get); } int -glusterd_handle_replace_brick (rpcsvc_request_t *req) +__glusterd_handle_cli_list_volume (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_replace_brick_req cli_req = {0,}; - dict_t *dict = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - int32_t op = 0; - char operation[256]; + int ret = -1; + dict_t *dict = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int count = 0; + char key[1024] = {0,}; + gf_cli_rsp rsp = {0,}; GF_ASSERT (req); - if (!gf_xdr_to_cli_replace_brick_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_cmd_log ("Volume replace-brick","on volname: %s attempted", cli_req.volname); - - gf_log ("glusterd", GF_LOG_NORMAL, "Received replace brick req"); + priv = THIS->private; + GF_ASSERT (priv); - if (cli_req.bricks.bricks_len) { - /* Unserialize the dictionary */ - dict = dict_new (); + dict = dict_new (); + if (!dict) + goto out; - ret = dict_unserialize (cli_req.bricks.bricks_val, - cli_req.bricks.bricks_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize req-buffer to dictionary"); + list_for_each_entry (volinfo, &priv->volumes, vol_list) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d", count); + ret = dict_set_str (dict, key, volinfo->volname); + if (ret) goto out; - } else { - dict->extra_stdfree = cli_req.bricks.bricks_val; - } - } - - ret = dict_get_int32 (dict, "operation", &op); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "dict_get on operation failed"); - goto out; + count++; } - ret = dict_get_str (dict, "src-brick", &src_brick); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + ret = dict_set_int32 (dict, "count", count); + if (ret) goto out; - } - gf_log ("", GF_LOG_DEBUG, - "src brick=%s", src_brick); - ret = dict_get_str (dict, "dst-brick", &dst_brick); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); + ret = dict_allocate_and_serialize (dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret) goto out; - } - gf_log ("", GF_LOG_DEBUG, - "dst brick=%s", dst_brick); + ret = 0; - switch (op) { - case GF_REPLACE_OP_START: strcpy (operation, "start"); - break; - case GF_REPLACE_OP_COMMIT: strcpy (operation, "commit"); - break; - case GF_REPLACE_OP_PAUSE: strcpy (operation, "pause"); - break; - case GF_REPLACE_OP_ABORT: strcpy (operation, "abort"); - break; - case GF_REPLACE_OP_STATUS: strcpy (operation, "status"); - break; - case GF_REPLACE_OP_COMMIT_FORCE: strcpy (operation, "commit-force"); - break; - default:strcpy (operation, "unknown"); - break; - } +out: + rsp.op_ret = ret; + if (ret) + rsp.op_errstr = "Error listing volumes"; + else + rsp.op_errstr = ""; - gf_cmd_log ("Volume replace-brick","volname: %s src_brick:%s" - " dst_brick:%s op:%s",cli_req.volname, src_brick, dst_brick - ,operation); + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp); + ret = 0; - ret = glusterd_replace_brick (req, dict); + if (dict) + dict_unref (dict); - gf_cmd_log ("Volume replace-brick","on volname: %s %s", cli_req.volname, - (ret) ? "FAILED" : "SUCCESS"); + glusterd_friend_sm (); + glusterd_op_sm (); -out: - if (ret && dict) - dict_unref (dict); - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr return ret; } +int +glusterd_handle_cli_list_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_list_volume); +} +int32_t +glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + char *err_str, size_t err_len) +{ + int ret = -1; + ret = glusterd_op_txn_begin (req, op, ctx, err_str, err_len); + + return ret; +} int -glusterd_handle_reset_volume (rpcsvc_request_t *req) +__glusterd_handle_reset_volume (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_reset_vol_req cli_req = {0,}; + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_RESET_VOLUME; + char *volname = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); - if (!gf_xdr_to_cli_set_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Failed to decode request " + "received from cli"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); req->rpc_err = GARBAGE_ARGS; goto out; } @@ -1326,34 +1684,72 @@ glusterd_handle_reset_volume (rpcsvc_request_t *req) cli_req.dict.dict_len, &dict); if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, "failed to " + gf_log (this->name, GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); goto out; } else { dict->extra_stdfree = cli_req.dict.dict_val; } } - ret = glusterd_reset_volume (req, dict); + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + gf_log (this->name, GF_LOG_DEBUG, "Received volume reset request for " + "volume %s", volname); + + ret = glusterd_op_begin_synctask (req, GD_OP_RESET_VOLUME, dict); out: - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } return ret; } int -glusterd_handle_set_volume (rpcsvc_request_t *req) +glusterd_handle_reset_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_reset_volume); +} + +int +__glusterd_handle_set_volume (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_set_vol_req cli_req = {0,}; + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_SET_VOLUME; + char *key = NULL; + char *value = NULL; + char *volname = NULL; + char *op_errstr = NULL; + gf_boolean_t help = _gf_false; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (req); - if (!gf_xdr_to_cli_set_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Failed to decode " + "request received from cli"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); req->rpc_err = GARBAGE_ARGS; goto out; } @@ -1366,542 +1762,382 @@ glusterd_handle_set_volume (rpcsvc_request_t *req) cli_req.dict.dict_len, &dict); if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); goto out; } else { dict->extra_stdfree = cli_req.dict.dict_val; } } - ret = glusterd_set_volume (req, dict); + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get volume " + "name while handling volume set command"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + if (strcmp (volname, "help") == 0 || + strcmp (volname, "help-xml") == 0) { + ret = glusterd_volset_help (dict, &op_errstr); + help = _gf_true; + goto out; + } + + ret = dict_get_str (dict, "key1", &key); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get key while" + " handling volume set for %s", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_str (dict, "value1", &value); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get value while" + " handling volume set for %s", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + gf_log (this->name, GF_LOG_DEBUG, "Received volume set request for " + "volume %s", volname); + + ret = glusterd_op_begin_synctask (req, GD_OP_SET_VOLUME, dict); out: - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr + if (help) + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, dict, + (op_errstr)? op_errstr:""); + else if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + if (op_errstr) + GF_FREE (op_errstr); return ret; } int -glusterd_handle_remove_brick (rpcsvc_request_t *req) +glusterd_handle_set_volume (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_remove_brick_req cli_req = {0,}; - dict_t *dict = NULL; - int32_t count = 0; - char *brick = NULL; - char key[256] = {0,}; - char *brick_list = NULL; - int i = 1; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t pos = 0; - int32_t sub_volume = 0; - int32_t sub_volume_start = 0; - int32_t sub_volume_end = 0; - glusterd_brickinfo_t *tmp = NULL; - int32_t err_ret = 0; - char *err_str = NULL; - gf1_cli_remove_brick_rsp rsp = {0,}; - void *cli_rsp = NULL; - char vol_type[256] = {0,}; + return glusterd_big_locked_handler (req, __glusterd_handle_set_volume); +} + +int +__glusterd_handle_sync_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + gf_cli_rsp cli_rsp = {0.}; + char msg[2048] = {0,}; + char *volname = NULL; + gf1_cli_sync_volume flags = 0; + char *hostname = NULL; + xlator_t *this = NULL; GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); - if (!gf_xdr_to_cli_remove_brick_req (req->msg[0], &cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - gf_cmd_log ("Volume remove-brick","on volname: %s attempted",cli_req.volname); - gf_log ("glusterd", GF_LOG_NORMAL, "Received rem brick req"); - - if (cli_req.bricks.bricks_len) { + if (cli_req.dict.dict_len) { /* Unserialize the dictionary */ dict = dict_new (); - ret = dict_unserialize (cli_req.bricks.bricks_val, - cli_req.bricks.bricks_len, + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "failed to " "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); goto out; } else { - dict->extra_stdfree = cli_req.bricks.bricks_val; + dict->extra_stdfree = cli_req.dict.dict_val; } } - ret = dict_get_int32 (dict, "count", &count); + ret = dict_get_str (dict, "hostname", &hostname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); + snprintf (msg, sizeof (msg), "Failed to get hostname"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); goto out; } - err_str = GF_MALLOC (2048 * sizeof(*err_str),gf_common_mt_char); - - if (!err_str) { - gf_log ("",GF_LOG_ERROR,"glusterd_handle_remove_brick: " - "Unable to get memory"); - ret = -1; - goto out; - } - - ret = glusterd_volinfo_find (cli_req.volname, &volinfo); + ret = dict_get_str (dict, "volname", &volname); if (ret) { - snprintf (err_str, 2048, "volname %s not found", - cli_req.volname); - gf_log ("", GF_LOG_ERROR, "%s", err_str); - err_ret = 1; - goto out; - } - - if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) - strcpy (vol_type, "replica"); - else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE) - strcpy (vol_type, "stripe"); - else - strcpy (vol_type, "distribute"); - - /* Do not allow remove-brick if the volume is plain stripe */ - if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) && (volinfo->brick_count == volinfo->sub_count)) { - snprintf (err_str, 2048, "Removing brick from a plain stripe is not allowed"); - gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str); - err_ret = 1; - ret = -1; - goto out; - } - - /* Do not allow remove-brick if the bricks given is less than the replica count - or stripe count */ - if (((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || (volinfo->type == GF_CLUSTER_TYPE_STRIPE)) - && !(volinfo->brick_count <= volinfo->sub_count)) { - if (volinfo->sub_count && (count % volinfo->sub_count != 0)) { - snprintf (err_str, 2048, "Remove brick incorrect" - " brick count of %d for %s %d", - count, vol_type, volinfo->sub_count); - gf_log ("", GF_LOG_ERROR, "%s", err_str); - err_ret = 1; - ret = -1; + ret = dict_get_int32 (dict, "flags", (int32_t*)&flags); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get volume name" + " or flags"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); goto out; } } - brick_list = GF_MALLOC (120000 * sizeof(*brick_list),gf_common_mt_char); + gf_log (this->name, GF_LOG_INFO, "Received volume sync req " + "for volume %s", (flags & GF_CLI_SYNC_ALL) ? "all" : volname); - if (!brick_list) { - gf_log ("",GF_LOG_ERROR,"glusterd_handle_remove_brick: " - "Unable to get memory"); + if (gf_is_local_addr (hostname)) { ret = -1; + snprintf (msg, sizeof (msg), "sync from localhost" + " not allowed"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); goto out; } - strcpy (brick_list, " "); - while ( i <= count) { - snprintf (key, 256, "brick%d", i); - ret = dict_get_str (dict, key, &brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); - goto out; - } - gf_log ("", GF_LOG_DEBUG, "Remove brick count %d brick: %s", - i, brick); - - ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo); - if (ret) { - snprintf(err_str, 2048," Incorrect brick %s for volname" - " %s", brick, cli_req.volname); - gf_log ("", GF_LOG_ERROR, "%s", err_str); - err_ret = 1; - goto out; - } - strcat(brick_list, brick); - strcat(brick_list, " "); - - i++; - if ((volinfo->type == GF_CLUSTER_TYPE_NONE) || - (volinfo->brick_count <= volinfo->sub_count)) - continue; - - pos = 0; - list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - - if ((!strcmp (tmp->hostname,brickinfo->hostname)) && - !strcmp (tmp->path, brickinfo->path)) { - gf_log ("", GF_LOG_NORMAL, "Found brick"); - if (!sub_volume && volinfo->sub_count) { - sub_volume = (pos / volinfo-> - sub_count) + 1; - sub_volume_start = volinfo->sub_count * - (sub_volume - 1); - sub_volume_end = (volinfo->sub_count * - sub_volume) -1 ; - } else { - if (pos < sub_volume_start || - pos >sub_volume_end) { - ret = -1; - snprintf(err_str, 2048,"Bricks" - " not from same subvol" - " for %s", vol_type); - gf_log ("",GF_LOG_ERROR, - "%s", err_str); - err_ret = 1; - goto out; - } - } - break; - } - pos++; - } - } - gf_cmd_log ("Volume remove-brick","volname: %s count:%d bricks:%s", - cli_req.volname, count, brick_list); - - ret = glusterd_remove_brick (req, dict); - - gf_cmd_log ("Volume remove-brick","on volname: %s %s",cli_req.volname, - (ret) ? "FAILED" : "SUCCESS"); + ret = glusterd_op_begin_synctask (req, GD_OP_SYNC_VOLUME, dict); out: - if ((ret || err_ret) && dict) - dict_unref (dict); - if (err_ret) { - rsp.op_ret = -1; - rsp.op_errno = 0; - rsp.volname = ""; - rsp.op_errstr = err_str; - cli_rsp = &rsp; - glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_remove_brick_rsp); - if (!glusterd_opinfo_unlock()) - gf_log ("glusterd", GF_LOG_ERROR, "Unlock on " - "opinfo failed"); + if (ret) { + cli_rsp.op_ret = -1; + cli_rsp.op_errstr = msg; + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); + glusterd_to_cli (req, &cli_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); ret = 0; //sent error to cli, prevent second reply - } - if (brick_list) - GF_FREE (brick_list); - if (err_str) - GF_FREE (err_str); - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr + return ret; } int -glusterd_handle_log_filename (rpcsvc_request_t *req) +glusterd_handle_sync_volume (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_log_filename_req cli_req = {0,}; - dict_t *dict = NULL; + return glusterd_big_locked_handler (req, __glusterd_handle_sync_volume); +} - GF_ASSERT (req); +int +glusterd_fsm_log_send_resp (rpcsvc_request_t *req, int op_ret, + char *op_errstr, dict_t *dict) +{ - if (!gf_xdr_to_cli_log_filename_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } + int ret = -1; + gf1_cli_fsm_log_rsp rsp = {0}; - gf_log ("glusterd", GF_LOG_NORMAL, "Received log filename req " - "for volume %s", cli_req.volname); + GF_ASSERT (req); + GF_ASSERT (op_errstr); - dict = dict_new (); - if (!dict) - goto out; + rsp.op_ret = op_ret; + rsp.op_errstr = op_errstr; + if (rsp.op_ret == 0) + ret = dict_allocate_and_serialize (dict, &rsp.fsm_log.fsm_log_val, + &rsp.fsm_log.fsm_log_len); - ret = dict_set_dynmstr (dict, "volname", cli_req.volname); - if (ret) - goto out; - ret = dict_set_dynmstr (dict, "brick", cli_req.brick); - if (ret) - goto out; - ret = dict_set_dynmstr (dict, "path", cli_req.path); - if (ret) - goto out; + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_fsm_log_rsp); + GF_FREE (rsp.fsm_log.fsm_log_val); - ret = glusterd_log_filename (req, dict); + gf_log ("glusterd", GF_LOG_DEBUG, "Responded, ret: %d", ret); -out: - if (ret && dict) - dict_unref (dict); - return ret; + return 0; } int -glusterd_handle_log_locate (rpcsvc_request_t *req) +__glusterd_handle_fsm_log (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_log_locate_req cli_req = {0,}; - gf1_cli_log_locate_rsp rsp = {0,}; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char tmp_str[PATH_MAX] = {0,}; + int32_t ret = -1; + gf1_cli_fsm_log_req cli_req = {0,}; + dict_t *dict = NULL; + glusterd_sm_tr_log_t *log = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char msg[2048] = {0}; + glusterd_peerinfo_t *peerinfo = NULL; GF_ASSERT (req); - priv = THIS->private; - - if (!gf_xdr_to_cli_log_locate_req (req->msg[0], &cli_req)) { + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf1_cli_fsm_log_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; + snprintf (msg, sizeof (msg), "Garbage request"); goto out; } - gf_log ("glusterd", GF_LOG_NORMAL, "Received log locate req " - "for volume %s", cli_req.volname); - - if (strchr (cli_req.brick, ':')) { - /* TODO: need to get info of only that brick and then - tell what is the exact location */ - gf_log ("", GF_LOG_DEBUG, "brick : %s", cli_req.brick); + if (strcmp ("", cli_req.name) == 0) { + this = THIS; + conf = this->private; + log = &conf->op_sm_log; + } else { + ret = glusterd_friend_find_by_hostname (cli_req.name, + &peerinfo); + if (ret) { + snprintf (msg, sizeof (msg), "%s is not a peer", + cli_req.name); + goto out; + } + log = &peerinfo->sm_log; } - ret = glusterd_volinfo_find (cli_req.volname, &volinfo); - if (ret) { - rsp.path = "request sent on non-existent volume"; + dict = dict_new (); + if (!dict) { + ret = -1; goto out; } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (brickinfo->logfile) { - strcpy (tmp_str, brickinfo->logfile); - rsp.path = dirname (tmp_str); - } else { - snprintf (tmp_str, PATH_MAX, "%s/logs/bricks/", - priv->workdir); - rsp.path = tmp_str; - } - break; - } - - ret = 0; + ret = glusterd_sm_tr_log_add_to_dict (dict, log); out: - rsp.op_ret = ret; - if (!rsp.path) - rsp.path = ""; + (void)glusterd_fsm_log_send_resp (req, ret, msg, dict); + free (cli_req.name);//malloced by xdr + if (dict) + dict_unref (dict); - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_log_locate_rsp); + glusterd_friend_sm (); + glusterd_op_sm (); - if (cli_req.brick) - free (cli_req.brick); //its malloced by xdr - if (cli_req.volname) - free (cli_req.volname); //its malloced by xdr - return ret; + return 0;//send 0 to avoid double reply } int -glusterd_handle_log_rotate (rpcsvc_request_t *req) +glusterd_handle_fsm_log (rpcsvc_request_t *req) { - int32_t ret = -1; - gf1_cli_log_rotate_req cli_req = {0,}; - dict_t *dict = NULL; - - GF_ASSERT (req); - - if (!gf_xdr_to_cli_log_rotate_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - - gf_log ("glusterd", GF_LOG_NORMAL, "Received log rotate req " - "for volume %s", cli_req.volname); - - dict = dict_new (); - if (!dict) - goto out; - - ret = dict_set_dynmstr (dict, "volname", cli_req.volname); - if (ret) - goto out; - - ret = dict_set_dynmstr (dict, "brick", cli_req.brick); - if (ret) - goto out; - - ret = dict_set_uint64 (dict, "rotate-key", (uint64_t)time (NULL)); - if (ret) - goto out; - - ret = glusterd_log_rotate (req, dict); - -out: - if (ret && dict) - dict_unref (dict); - return ret; + return glusterd_big_locked_handler (req, __glusterd_handle_fsm_log); } int -glusterd_handle_sync_volume (rpcsvc_request_t *req) +glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status) { - int32_t ret = -1; - gf1_cli_sync_volume_req cli_req = {0,}; - dict_t *dict = NULL; - gf1_cli_sync_volume_rsp cli_rsp = {0.}; - char msg[2048] = {0,}; - gf_boolean_t free_hostname = _gf_true; - gf_boolean_t free_volname = _gf_true; - glusterd_volinfo_t *volinfo = NULL; - GF_ASSERT (req); - - if (!gf_xdr_to_cli_sync_volume_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; - goto out; - } - gf_log ("glusterd", GF_LOG_NORMAL, "Received volume sync req " - "for volume %s", - (cli_req.flags & GF_CLI_SYNC_ALL) ? "all" : cli_req.volname); + gd1_mgmt_cluster_lock_rsp rsp = {{0},}; + int ret = -1; - dict = dict_new (); - if (!dict) { - gf_log ("", GF_LOG_ERROR, "Can't allocate sync vol dict"); - goto out; - } + GF_ASSERT (req); + glusterd_get_uuid (&rsp.uuid); + rsp.op_ret = status; - if (!glusterd_is_local_addr (cli_req.hostname)) { - ret = -1; - snprintf (msg, sizeof (msg), "sync from localhost" - " not allowed"); - goto out; - } + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp); - ret = dict_set_dynmstr (dict, "hostname", cli_req.hostname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "hostname set failed"); - snprintf (msg, sizeof (msg), "hostname set failed"); - goto out; - } else { - free_hostname = _gf_false; - } + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to lock, ret: %d", ret); - ret = dict_set_int32 (dict, "flags", cli_req.flags); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volume flags set failed"); - snprintf (msg, sizeof (msg), "volume flags set failed"); - goto out; - } + return 0; +} - if (!cli_req.flags) { - ret = glusterd_volinfo_find (cli_req.volname, &volinfo); - if (!ret) { - snprintf (msg, sizeof (msg), "please delete the " - "volume: %s before sync", cli_req.volname); - ret = -1; - goto out; - } +int +glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) +{ - ret = dict_set_dynmstr (dict, "volname", cli_req.volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volume name set failed"); - snprintf (msg, sizeof (msg), "volume name set failed"); - goto out; - } else { - free_volname = _gf_false; - } - } else { - free_volname = _gf_false; - if (glusterd_volume_count_get ()) { - snprintf (msg, sizeof (msg), "please delete all the " - "volumes before full sync"); - ret = -1; - goto out; - } - } + gd1_mgmt_cluster_unlock_rsp rsp = {{0},}; + int ret = -1; - ret = glusterd_sync_volume (req, dict); + GF_ASSERT (req); + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); -out: - if (ret) { - cli_rsp.op_ret = -1; - cli_rsp.op_errstr = msg; - glusterd_submit_reply(req, &cli_rsp, NULL, 0, NULL, - gf_xdr_from_cli_sync_volume_rsp); - if (free_hostname && cli_req.hostname) - free (cli_req.hostname); - if (free_volname && cli_req.volname) - free (cli_req.volname); - if (dict) - dict_unref (dict); - if (!glusterd_opinfo_unlock()) - gf_log ("glusterd", GF_LOG_ERROR, "Unlock on " - "opinfo failed"); + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp); - ret = 0; //sent error to cli, prevent second reply - } + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to unlock, ret: %d", ret); return ret; } int -glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status) +glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) { - gd1_mgmt_cluster_lock_rsp rsp = {{0},}; - int ret = -1; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; GF_ASSERT (req); + GF_ASSERT (txn_id); glusterd_get_uuid (&rsp.uuid); rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + uuid_copy (rsp.txn_id, *txn_id); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_cluster_lock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); - gf_log ("glusterd", GF_LOG_NORMAL, - "Responded, ret: %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to mgmt_v3 lock, ret: %d", + ret); - return 0; + return ret; } int -glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) +glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) { - gd1_mgmt_cluster_unlock_rsp rsp = {{0},}; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; int ret = -1; GF_ASSERT (req); + GF_ASSERT (txn_id); rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; glusterd_get_uuid (&rsp.uuid); + uuid_copy (rsp.txn_id, *txn_id); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_cluster_unlock_rsp); + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); - gf_log ("glusterd", GF_LOG_NORMAL, - "Responded to unlock, ret: %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 unlock, ret: %d", + ret); return ret; } int -glusterd_handle_cluster_unlock (rpcsvc_request_t *req) +__glusterd_handle_cluster_unlock (rpcsvc_request_t *req) { gd1_mgmt_cluster_unlock_req unlock_req = {{0}, }; int32_t ret = -1; - char str[50] = {0, }; glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + this = THIS; + GF_ASSERT (this); GF_ASSERT (req); - if (!gd_xdr_to_mgmt_cluster_unlock_req (req->msg[0], &unlock_req)) { - //failed to decode msg; + ret = xdr_to_generic (req->msg[0], &unlock_req, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "request received from peer"); req->rpc_err = GARBAGE_ARGS; goto out; } - uuid_unparse (unlock_req.uuid, str); - gf_log ("glusterd", GF_LOG_NORMAL, - "Received UNLOCK from uuid: %s", str); + gf_log (this->name, GF_LOG_DEBUG, + "Received UNLOCK from uuid: %s", uuid_utoa (unlock_req.uuid)); + + if (glusterd_friend_find_by_uuid (unlock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (unlock_req.uuid)); + ret = -1; + goto out; + } ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); @@ -1911,23 +2147,37 @@ glusterd_handle_cluster_unlock (rpcsvc_request_t *req) } uuid_copy (ctx->uuid, unlock_req.uuid); ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, txn_id, ctx); out: + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; } int +glusterd_handle_cluster_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cluster_unlock); +} + +int glusterd_op_stage_send_resp (rpcsvc_request_t *req, int32_t op, int32_t status, char *op_errstr, dict_t *rsp_dict) { - gd1_mgmt_stage_op_rsp rsp = {{0},}; int ret = -1; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (req); + rsp.op_ret = status; glusterd_get_uuid (&rsp.uuid); rsp.op = op; @@ -1936,22 +2186,19 @@ glusterd_op_stage_send_resp (rpcsvc_request_t *req, else rsp.op_errstr = ""; - ret = dict_allocate_and_serialize (rsp_dict, - &rsp.dict.dict_val, - (size_t *)&rsp.dict.dict_len); + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); if (ret < 0) { - gf_log ("", GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_ERROR, "failed to get serialized length of dict"); return ret; } ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_stage_op_rsp); + (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp); - gf_log ("glusterd", GF_LOG_NORMAL, - "Responded to stage, ret: %d", ret); - if (rsp.dict.dict_val) - GF_FREE (rsp.dict.dict_val); + gf_log (this->name, GF_LOG_DEBUG, "Responded to stage, ret: %d", ret); + GF_FREE (rsp.dict.dict_val); return ret; } @@ -1963,7 +2210,10 @@ glusterd_op_commit_send_resp (rpcsvc_request_t *req, { gd1_mgmt_commit_op_rsp rsp = {{0}, }; int ret = -1; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (req); rsp.op_ret = status; glusterd_get_uuid (&rsp.uuid); @@ -1974,74 +2224,91 @@ glusterd_op_commit_send_resp (rpcsvc_request_t *req, else rsp.op_errstr = ""; - ret = dict_allocate_and_serialize (rsp_dict, - &rsp.dict.dict_val, - (size_t *)&rsp.dict.dict_len); - if (ret < 0) { - gf_log ("", GF_LOG_DEBUG, - "failed to get serialized length of dict"); - goto out; + if (rsp_dict) { + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } } ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_commit_op_rsp); + (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp); - gf_log ("glusterd", GF_LOG_NORMAL, - "Responded to commit, ret: %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Responded to commit, ret: %d", ret); out: - if (rsp.dict.dict_val) - GF_FREE (rsp.dict.dict_val); + GF_FREE (rsp.dict.dict_val); return ret; } int -glusterd_handle_incoming_friend_req (rpcsvc_request_t *req) +__glusterd_handle_incoming_friend_req (rpcsvc_request_t *req) { int32_t ret = -1; gd1_mgmt_friend_req friend_req = {{0},}; - char str[50] = {0,}; + gf_boolean_t run_fsm = _gf_true; GF_ASSERT (req); - if (!gd_xdr_to_mgmt_friend_req (req->msg[0], &friend_req)) { + ret = xdr_to_generic (req->msg[0], &friend_req, + (xdrproc_t)xdr_gd1_mgmt_friend_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - uuid_unparse (friend_req.uuid, str); - gf_log ("glusterd", GF_LOG_NORMAL, - "Received probe from uuid: %s", str); + gf_log ("glusterd", GF_LOG_INFO, + "Received probe from uuid: %s", uuid_utoa (friend_req.uuid)); ret = glusterd_handle_friend_req (req, friend_req.uuid, friend_req.hostname, friend_req.port, &friend_req); + if (ret == GLUSTERD_CONNECTION_AWAITED) { + //fsm should be run after connection establishes + run_fsm = _gf_false; + ret = 0; + } + out: - if (friend_req.hostname) - free (friend_req.hostname);//malloced by xdr + free (friend_req.hostname);//malloced by xdr + + if (run_fsm) { + glusterd_friend_sm (); + glusterd_op_sm (); + } return ret; } int -glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req) +glusterd_handle_incoming_friend_req (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_incoming_friend_req); +} + +int +__glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req) { int32_t ret = -1; gd1_mgmt_friend_req friend_req = {{0},}; - char str[50]; char remote_hostname[UNIX_PATH_MAX + 1] = {0,}; GF_ASSERT (req); - if (!gd_xdr_to_mgmt_friend_req (req->msg[0], &friend_req)) { + ret = xdr_to_generic (req->msg[0], &friend_req, + (xdrproc_t)xdr_gd1_mgmt_friend_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - uuid_unparse (friend_req.uuid, str); - gf_log ("glusterd", GF_LOG_NORMAL, - "Received unfriend from uuid: %s", str); + gf_log ("glusterd", GF_LOG_INFO, + "Received unfriend from uuid: %s", uuid_utoa (friend_req.uuid)); ret = glusterd_remote_hostname_get (req, remote_hostname, sizeof (remote_hostname)); @@ -2053,13 +2320,22 @@ glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req) remote_hostname, friend_req.port); out: - if (friend_req.hostname) - free (friend_req.hostname);//malloced by xdr - if (friend_req.vols.vols_val) - free (friend_req.vols.vols_val);//malloced by xdr + free (friend_req.hostname);//malloced by xdr + free (friend_req.vols.vols_val);//malloced by xdr + + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; } +int +glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_incoming_unfriend_req); + +} int glusterd_handle_friend_update_delete (dict_t *dict) @@ -2080,13 +2356,37 @@ out: return ret; } +int +glusterd_friend_hostname_update (glusterd_peerinfo_t *peerinfo, + char *hostname, + gf_boolean_t store_update) +{ + char *new_hostname = NULL; + int ret = 0; + + GF_ASSERT (peerinfo); + GF_ASSERT (hostname); + + new_hostname = gf_strdup (hostname); + if (!new_hostname) { + ret = -1; + goto out; + } + + GF_FREE (peerinfo->hostname); + peerinfo->hostname = new_hostname; + if (store_update) + ret = glusterd_store_peerinfo (peerinfo); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} int -glusterd_handle_friend_update (rpcsvc_request_t *req) +__glusterd_handle_friend_update (rpcsvc_request_t *req) { int32_t ret = -1; gd1_mgmt_friend_update friend_req = {{0},}; - char str[50] = {0,}; glusterd_peerinfo_t *peerinfo = NULL; glusterd_conf_t *priv = NULL; xlator_t *this = NULL; @@ -2109,21 +2409,22 @@ glusterd_handle_friend_update (rpcsvc_request_t *req) priv = this->private; GF_ASSERT (priv); - if (!gd_xdr_to_mgmt_friend_update (req->msg[0], &friend_req)) { + ret = xdr_to_generic (req->msg[0], &friend_req, + (xdrproc_t)xdr_gd1_mgmt_friend_update); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - uuid_unparse (friend_req.uuid, str); ret = glusterd_friend_find (friend_req.uuid, NULL, &tmp); if (ret) { gf_log ("", GF_LOG_CRITICAL, "Received friend update request " - "from unknown peer %s", str); + "from unknown peer %s", uuid_utoa (friend_req.uuid)); goto out; } - gf_log ("glusterd", GF_LOG_NORMAL, - "Received friend update from uuid: %s", str); + gf_log ("glusterd", GF_LOG_INFO, + "Received friend update from uuid: %s", uuid_utoa (friend_req.uuid)); if (friend_req.friends.friends_len) { /* Unserialize the dictionary */ @@ -2167,11 +2468,17 @@ glusterd_handle_friend_update (rpcsvc_request_t *req) if (ret) goto out; - gf_log ("", GF_LOG_NORMAL, "Received uuid: %s, hostname:%s", + gf_log ("", GF_LOG_INFO, "Received uuid: %s, hostname:%s", uuid_buf, hostname); - if (!uuid_compare (uuid, priv->uuid)) { - gf_log ("", GF_LOG_NORMAL, "Received my uuid as Friend"); + if (uuid_is_null (uuid)) { + gf_log (this->name, GF_LOG_WARNING, "Updates mustn't " + "contain peer with 'null' uuid"); + continue; + } + + if (!uuid_compare (uuid, MY_UUID)) { + gf_log ("", GF_LOG_INFO, "Received my uuid as Friend"); i++; continue; } @@ -2179,38 +2486,50 @@ glusterd_handle_friend_update (rpcsvc_request_t *req) ret = glusterd_friend_find (uuid, hostname, &tmp); if (!ret) { + if (strcmp (hostname, tmp->hostname) != 0) { + glusterd_friend_hostname_update (tmp, hostname, + _gf_true); + } i++; continue; } ret = glusterd_friend_add (hostname, friend_req.port, GD_FRIEND_STATE_BEFRIENDED, - &uuid, NULL, &peerinfo, 0, &args); + &uuid, &peerinfo, 0, &args); i++; } out: - uuid_copy (rsp.uuid, priv->uuid); + uuid_copy (rsp.uuid, MY_UUID); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_friend_update_rsp); + (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp); if (dict) { if (!dict->extra_stdfree && friend_req.friends.friends_val) free (friend_req.friends.friends_val);//malloced by xdr dict_unref (dict); } else { - if (friend_req.friends.friends_val) - free (friend_req.friends.friends_val);//malloced by xdr + free (friend_req.friends.friends_val);//malloced by xdr } + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; } int -glusterd_handle_probe_query (rpcsvc_request_t *req) +glusterd_handle_friend_update (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_friend_update); +} + +int +__glusterd_handle_probe_query (rpcsvc_request_t *req) { int32_t ret = -1; - char str[50]; xlator_t *this = NULL; glusterd_conf_t *conf = NULL; gd1_mgmt_probe_req probe_req = {{0},}; @@ -2222,24 +2541,38 @@ glusterd_handle_probe_query (rpcsvc_request_t *req) GF_ASSERT (req); - if (!gd_xdr_to_mgmt_probe_req (req->msg[0], &probe_req)) { + ret = xdr_to_generic (req->msg[0], &probe_req, + (xdrproc_t)xdr_gd1_mgmt_probe_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto out; } - this = THIS; conf = this->private; - uuid_unparse (probe_req.uuid, str); if (probe_req.port) port = probe_req.port; else port = GF_DEFAULT_BASE_PORT; - gf_log ("glusterd", GF_LOG_NORMAL, - "Received probe from uuid: %s", str); + gf_log ("glusterd", GF_LOG_INFO, + "Received probe from uuid: %s", uuid_utoa (probe_req.uuid)); + + /* Check for uuid collision and handle it in a user friendly way by + * sending the error. + */ + if (!uuid_compare (probe_req.uuid, MY_UUID)) { + gf_log (THIS->name, GF_LOG_ERROR, "Peer uuid %s is same as " + "local uuid. Please check the uuid of both the peers " + "from %s/%s", uuid_utoa (probe_req.uuid), + GLUSTERD_DEFAULT_WORKDIR, GLUSTERD_INFO_FILE); + rsp.op_ret = -1; + rsp.op_errno = GF_PROBE_SAME_UUID; + rsp.port = port; + goto respond; + } ret = glusterd_remote_hostname_get (req, remote_hostname, sizeof (remote_hostname)); @@ -2252,12 +2585,12 @@ glusterd_handle_probe_query (rpcsvc_request_t *req) rsp.op_ret = -1; rsp.op_errno = GF_PROBE_ANOTHER_CLUSTER; } else if (ret) { - gf_log ("glusterd", GF_LOG_NORMAL, "Unable to find peerinfo" + gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo" " for host: %s (%d)", remote_hostname, port); - args.mode = GD_MODE_SWITCH_ON; + args.mode = GD_MODE_ON; ret = glusterd_friend_add (remote_hostname, port, - GD_FRIEND_STATE_DEFAULT, - NULL, NULL, &peerinfo, 0, &args); + GD_FRIEND_STATE_PROBE_RCVD, + NULL, &peerinfo, 0, &args); if (ret) { gf_log ("", GF_LOG_ERROR, "Failed to add peer %s", remote_hostname); @@ -2265,221 +2598,591 @@ glusterd_handle_probe_query (rpcsvc_request_t *req) } } - uuid_copy (rsp.uuid, conf->uuid); +respond: + uuid_copy (rsp.uuid, MY_UUID); rsp.hostname = probe_req.hostname; + rsp.op_errstr = ""; - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_probe_rsp); + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_probe_rsp); + ret = 0; - gf_log ("glusterd", GF_LOG_NORMAL, "Responded to %s, op_ret: %d, " - "op_errno: %d, ret: %d", probe_req.hostname, + gf_log ("glusterd", GF_LOG_INFO, "Responded to %s, op_ret: %d, " + "op_errno: %d, ret: %d", remote_hostname, rsp.op_ret, rsp.op_errno, ret); out: - if (probe_req.hostname) - free (probe_req.hostname);//malloced by xdr + free (probe_req.hostname);//malloced by xdr + + glusterd_friend_sm (); + glusterd_op_sm (); + return ret; } +int glusterd_handle_probe_query (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_probe_query); +} + int -glusterd_friend_remove (uuid_t uuid, char *hostname) +__glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) { - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_PROFILE_VOLUME; + char *volname = NULL; + int32_t op = 0; + char err_str[2048] = {0,}; + xlator_t *this = NULL; - ret = glusterd_friend_find (uuid, hostname, &peerinfo); - if (ret) + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; goto out; + } + + if (cli_req.dict.dict_len > 0) { + dict = dict_new(); + if (!dict) + goto out; + dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + gf_log (this->name, GF_LOG_INFO, "Received volume profile req " + "for volume %s", volname); + ret = dict_get_int32 (dict, "op", &op); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get operation"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = glusterd_op_begin (req, cli_op, dict, err_str, sizeof (err_str)); - ret = glusterd_friend_cleanup (peerinfo); out: - gf_log ("", GF_LOG_DEBUG, "returning %d"); + glusterd_friend_sm (); + glusterd_op_sm (); + + free (cli_req.dict.dict_val); + + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int -glusterd_friend_add (const char *hoststr, int port, - glusterd_friend_sm_state_t state, - uuid_t *uuid, - struct rpc_clnt *rpc, - glusterd_peerinfo_t **friend, - gf_boolean_t restore, - glusterd_peerctx_args_t *args) +glusterd_handle_cli_profile_volume (rpcsvc_request_t *req) { - int ret = 0; + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_profile_volume); +} + +int +__glusterd_handle_getwd (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf1_cli_getwd_rsp rsp = {0,}; glusterd_conf_t *priv = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - dict_t *options = NULL; - struct rpc_clnt_config rpc_cfg = {0,}; - glusterd_peer_hostname_t *name = NULL; - char *hostname = NULL; - glusterd_peerctx_t *peerctx = NULL; - int32_t intvl = 0; + + GF_ASSERT (req); priv = THIS->private; + GF_ASSERT (priv); - peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); - if (!peerctx) { - ret = -1; - goto out; - } - peerinfo = GF_CALLOC (1, sizeof (*peerinfo), gf_gld_mt_peerinfo_t); + gf_log ("glusterd", GF_LOG_INFO, "Received getwd req"); - if (!peerinfo) { - ret = -1; + rsp.wd = priv->workdir; + + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_getwd_rsp); + ret = 0; + + glusterd_friend_sm (); + glusterd_op_sm (); + + return ret; +} + +int +glusterd_handle_getwd (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_getwd); +} + +int +__glusterd_handle_mount (rpcsvc_request_t *req) +{ + gf1_cli_mount_req mnt_req = {0,}; + gf1_cli_mount_rsp rsp = {0,}; + dict_t *dict = NULL; + int ret = 0; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (req); + priv = THIS->private; + + ret = xdr_to_generic (req->msg[0], &mnt_req, + (xdrproc_t)xdr_gf1_cli_mount_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + rsp.op_ret = -1; + rsp.op_errno = EINVAL; goto out; } - if (args) - peerctx->args = *args; - peerctx->peerinfo = peerinfo; - if (friend) - *friend = peerinfo; + gf_log ("glusterd", GF_LOG_INFO, "Received mount req"); - INIT_LIST_HEAD (&peerinfo->hostnames); - peerinfo->state.state = state; - if (hoststr) { - ret = glusterd_peer_hostname_new ((char *)hoststr, &name); - if (ret) + if (mnt_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (mnt_req.dict.dict_val, + mnt_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + rsp.op_ret = -1; + rsp.op_errno = -EINVAL; goto out; - list_add_tail (&peerinfo->hostnames, &name->hostname_list); - rpc_cfg.remote_host = (char *)hoststr; - peerinfo->hostname = gf_strdup (hoststr); + } else { + dict->extra_stdfree = mnt_req.dict.dict_val; + } } - INIT_LIST_HEAD (&peerinfo->uuid_list); - list_add_tail (&peerinfo->uuid_list, &priv->peers); + synclock_unlock (&priv->big_lock); + rsp.op_ret = glusterd_do_mount (mnt_req.label, dict, + &rsp.path, &rsp.op_errno); + synclock_lock (&priv->big_lock); - if (uuid) { - uuid_copy (peerinfo->uuid, *uuid); + out: + if (!rsp.path) + rsp.path = ""; + + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_mount_rsp); + ret = 0; + + if (dict) + dict_unref (dict); + if (*rsp.path) + GF_FREE (rsp.path); + + glusterd_friend_sm (); + glusterd_op_sm (); + + return ret; +} + +int +glusterd_handle_mount (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_mount); +} + +int +__glusterd_handle_umount (rpcsvc_request_t *req) +{ + gf1_cli_umount_req umnt_req = {0,}; + gf1_cli_umount_rsp rsp = {0,}; + char *mountbroker_root = NULL; + char mntp[PATH_MAX] = {0,}; + char *path = NULL; + runner_t runner = {0,}; + int ret = 0; + xlator_t *this = THIS; + gf_boolean_t dir_ok = _gf_false; + char *pdir = NULL; + char *t = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (req); + GF_ASSERT (this); + priv = this->private; + + ret = xdr_to_generic (req->msg[0], &umnt_req, + (xdrproc_t)xdr_gf1_cli_umount_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + rsp.op_ret = -1; + goto out; } + gf_log ("glusterd", GF_LOG_INFO, "Received umount req"); - if (hoststr) { - options = dict_new (); - if (!options) { - ret = -1; - goto out; - } + if (dict_get_str (this->options, "mountbroker-root", + &mountbroker_root) != 0) { + rsp.op_errno = ENOENT; + goto out; + } - ret = dict_get_int32 (THIS->options, - "transport.socket.keepalive-interval", - &intvl); - if (!ret) { - ret = dict_set_int32 (options, - "transport.socket.keepalive-interval", intvl); - if (ret) - goto out; - } + /* check if it is allowed to umount path */ + path = gf_strdup (umnt_req.path); + if (!path) { + rsp.op_errno = ENOMEM; + goto out; + } + dir_ok = _gf_false; + pdir = dirname (path); + t = strtail (pdir, mountbroker_root); + if (t && *t == '/') { + t = strtail(++t, MB_HIVE); + if (t && !*t) + dir_ok = _gf_true; + } + GF_FREE (path); + if (!dir_ok) { + rsp.op_errno = EACCES; + goto out; + } - ret = dict_get_int32 (THIS->options, - "transport.socket.keepalive-time", - &intvl); - if (!ret) { - ret = dict_set_int32 (options, - "transport.socket.keepalive-time", intvl); - if (ret) - goto out; + runinit (&runner); + runner_add_args (&runner, "umount", umnt_req.path, NULL); + if (umnt_req.lazy) + runner_add_arg (&runner, "-l"); + synclock_unlock (&priv->big_lock); + rsp.op_ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (rsp.op_ret == 0) { + if (realpath (umnt_req.path, mntp)) + rmdir (mntp); + else { + rsp.op_ret = -1; + rsp.op_errno = errno; } - - hostname = gf_strdup((char*)hoststr); - if (!hostname) { - ret = -1; - goto out; + if (unlink (umnt_req.path) != 0) { + rsp.op_ret = -1; + rsp.op_errno = errno; } + } - ret = dict_set_dynstr (options, "remote-host", hostname); - if (ret) - goto out; + out: + if (rsp.op_errno) + rsp.op_ret = -1; + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_umount_rsp); + ret = 0; - if (!port) - port = GLUSTERD_DEFAULT_PORT; + glusterd_friend_sm (); + glusterd_op_sm (); - rpc_cfg.remote_port = port; + return ret; +} - ret = dict_set_int32 (options, "remote-port", port); - if (ret) - goto out; +int +glusterd_handle_umount (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_umount); +} - ret = dict_set_str (options, "transport.address-family", "inet"); - if (ret) - goto out; +int +glusterd_friend_remove (uuid_t uuid, char *hostname) +{ + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; - rpc = rpc_clnt_init (&rpc_cfg, options, THIS->ctx, THIS->name); + ret = glusterd_friend_find (uuid, hostname, &peerinfo); + if (ret) + goto out; - if (!rpc) { - gf_log ("glusterd", GF_LOG_ERROR, - "rpc init failed for peer: %s!", hoststr); - ret = -1; - goto out; + ret = glusterd_friend_remove_cleanup_vols (peerinfo->uuid); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, "Volumes cleanup failed"); + ret = glusterd_friend_cleanup (peerinfo); +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +int +glusterd_rpc_create (struct rpc_clnt **rpc, + dict_t *options, + rpc_clnt_notify_t notify_fn, + void *notify_data) +{ + struct rpc_clnt *new_rpc = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (options); + + /* TODO: is 32 enough? or more ? */ + new_rpc = rpc_clnt_new (options, this->ctx, this->name, 16); + if (!new_rpc) + goto out; + + ret = rpc_clnt_register_notify (new_rpc, notify_fn, notify_data); + *rpc = new_rpc; + if (ret) + goto out; + ret = rpc_clnt_start (new_rpc); +out: + if (ret) { + if (new_rpc) { + (void) rpc_clnt_unref (new_rpc); } + } + + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +int +glusterd_transport_keepalive_options_get (int *interval, int *time) +{ + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_int32 (this->options, + "transport.socket.keepalive-interval", + interval); + ret = dict_get_int32 (this->options, + "transport.socket.keepalive-time", + time); + return 0; +} - ret = rpc_clnt_register_notify (rpc, glusterd_rpc_notify, - peerctx); +int +glusterd_transport_inet_options_build (dict_t **options, const char *hostname, + int port) +{ + dict_t *dict = NULL; + int32_t interval = -1; + int32_t time = -1; + int ret = 0; + + GF_ASSERT (options); + GF_ASSERT (hostname); - peerinfo->rpc = rpc; + if (!port) + port = GLUSTERD_DEFAULT_PORT; + + /* Build default transport options */ + ret = rpc_transport_inet_options_build (&dict, hostname, port); + if (ret) + goto out; + /* Set frame-timeout to 10mins. Default timeout of 30 mins is too long + * when compared to 2 mins for cli timeout. This ensures users don't + * wait too long after cli timesout before being able to resume normal + * operations + */ + ret = dict_set_int32 (dict, "frame-timeout", 600); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Failed to set frame-timeout"); + goto out; } - if (!restore) - ret = glusterd_store_update_peerinfo (peerinfo); + /* Set keepalive options */ + glusterd_transport_keepalive_options_get (&interval, &time); + if ((interval > 0) || (time > 0)) + ret = rpc_transport_keepalive_options_set (dict, interval, time); + *options = dict; +out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args) +{ + dict_t *options = NULL; + int ret = -1; + glusterd_peerctx_t *peerctx = NULL; + data_t *data = NULL; + + peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t); + if (!peerctx) + goto out; + + if (args) + peerctx->args = *args; + + peerctx->peerinfo = peerinfo; + ret = glusterd_transport_inet_options_build (&options, + peerinfo->hostname, + peerinfo->port); + if (ret) + goto out; + + /* + * For simulated multi-node testing, we need to make sure that we + * create our RPC endpoint with the same address that the peer would + * use to reach us. + */ + if (this->options) { + data = dict_get(this->options,"transport.socket.bind-address"); + if (data) { + ret = dict_set(options, + "transport.socket.source-addr",data); + } + } + + ret = glusterd_rpc_create (&peerinfo->rpc, options, + glusterd_peer_rpc_notify, peerctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for" + " peer %s", peerinfo->hostname); + goto out; + } + peerctx = NULL; + ret = 0; out: + GF_FREE (peerctx); + return ret; +} + +int +glusterd_friend_add (const char *hoststr, int port, + glusterd_friend_sm_state_t state, + uuid_t *uuid, + glusterd_peerinfo_t **friend, + gf_boolean_t restore, + glusterd_peerctx_args_t *args) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + GF_ASSERT (hoststr); + + ret = glusterd_peerinfo_new (friend, state, uuid, hoststr, port); if (ret) { - if (peerctx) - GF_FREE (peerctx); - if (rpc) { - (void) rpc_clnt_unref (rpc); + goto out; + } + + /* + * We can't add to the list after calling glusterd_friend_rpc_create, + * even if it succeeds, because by then the callback to take it back + * off and free might have happened already (notably in the case of an + * invalid peer name). That would mean we're adding something that had + * just been free, and we're likely to crash later. + */ + list_add_tail (&(*friend)->uuid_list, &conf->peers); + + //restore needs to first create the list of peers, then create rpcs + //to keep track of quorum in race-free manner. In restore for each peer + //rpc-create calls rpc_notify when the friend-list is partially + //constructed, leading to wrong quorum calculations. + if (!restore) { + ret = glusterd_store_peerinfo (*friend); + if (ret == 0) { + synclock_unlock (&conf->big_lock); + ret = glusterd_friend_rpc_create (this, *friend, args); + synclock_lock (&conf->big_lock); } - if (peerinfo) { - peerinfo->rpc = NULL; - (void) glusterd_friend_cleanup (peerinfo); + else { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store peerinfo"); } - if (options) - dict_unref (options); } - gf_log ("glusterd", GF_LOG_NORMAL, "connect returned %d", ret); + if (ret) { + (void) glusterd_friend_cleanup (*friend); + *friend = NULL; + } + +out: + gf_log (this->name, GF_LOG_INFO, "connect returned %d", ret); return ret; } - - int -glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port) +glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, + dict_t *dict) { int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerctx_args_t args = {0}; + glusterd_friend_sm_event_t *event = NULL; GF_ASSERT (hoststr); ret = glusterd_friend_find (NULL, (char *)hoststr, &peerinfo); if (ret) { - gf_log ("glusterd", GF_LOG_NORMAL, "Unable to find peerinfo" + gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo" " for host: %s (%d)", hoststr, port); args.mode = GD_MODE_ON; args.req = req; + args.dict = dict; ret = glusterd_friend_add ((char *)hoststr, port, GD_FRIEND_STATE_DEFAULT, - NULL, NULL, &peerinfo, 0, &args); - } + NULL, &peerinfo, 0, &args); + if ((!ret) && (!peerinfo->connected)) { + ret = GLUSTERD_CONNECTION_AWAITED; + } - if ((!ret) && (!peerinfo->connected)) { - return GLUSTERD_CONNECTION_AWAITED; + } else if (peerinfo->connected && + (GD_FRIEND_STATE_BEFRIENDED == peerinfo->state.state)) { + ret = glusterd_friend_hostname_update (peerinfo, (char*)hoststr, + _gf_false); + if (ret) + goto out; + //this is just to rename so inject local acc for cluster update + ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_LOCAL_ACC, + &event); + if (!ret) { + event->peerinfo = peerinfo; + ret = glusterd_friend_sm_inject_event (event); + glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_SUCCESS, + NULL, (char*)hoststr, + port, dict); + } + } else { + glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL, + (char*)hoststr, port, dict); } - +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); return ret; } int glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, - uuid_t uuid) + uuid_t uuid, dict_t *dict) { int ret = -1; glusterd_peerinfo_t *peerinfo = NULL; @@ -2492,7 +3195,7 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, ret = glusterd_friend_find (uuid, (char *)hoststr, &peerinfo); if (ret) { - gf_log ("glusterd", GF_LOG_NORMAL, "Unable to find peerinfo" + gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo" " for host: %s %d", hoststr, port); goto out; } @@ -2520,6 +3223,7 @@ glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, ctx->hostname = gf_strdup (hoststr); ctx->port = port; ctx->req = req; + ctx->dict = dict; event->ctx = ctx; @@ -2554,480 +3258,253 @@ glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int por conf = this->private; - uuid_copy (rsp.uuid, conf->uuid); + uuid_copy (rsp.uuid, MY_UUID); rsp.hostname = hostname; rsp.port = port; ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_friend_rsp); + (xdrproc_t)xdr_gd1_mgmt_friend_rsp); - - gf_log ("glusterd", GF_LOG_NORMAL, + gf_log ("glusterd", GF_LOG_INFO, "Responded to %s (%d), ret: %d", hostname, port, ret); return ret; } + int -glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *hostname, int port, - int32_t op_ret, int32_t op_errno) +glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *myhostname, + char *remote_hostname, int port, int32_t op_ret, + int32_t op_errno) { gd1_mgmt_friend_rsp rsp = {{0}, }; int32_t ret = -1; xlator_t *this = NULL; glusterd_conf_t *conf = NULL; - GF_ASSERT (hostname); + GF_ASSERT (myhostname); this = THIS; GF_ASSERT (this); conf = this->private; - uuid_copy (rsp.uuid, conf->uuid); + uuid_copy (rsp.uuid, MY_UUID); rsp.op_ret = op_ret; rsp.op_errno = op_errno; - rsp.hostname = gf_strdup (hostname); + rsp.hostname = gf_strdup (myhostname); rsp.port = port; ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gd_xdr_serialize_mgmt_friend_rsp); + (xdrproc_t)xdr_gd1_mgmt_friend_rsp); - gf_log ("glusterd", GF_LOG_NORMAL, - "Responded to %s (%d), ret: %d", hostname, port, ret); - if (rsp.hostname) - GF_FREE (rsp.hostname) + gf_log ("glusterd", GF_LOG_INFO, + "Responded to %s (%d), ret: %d", remote_hostname, port, ret); + GF_FREE (rsp.hostname); return ret; } -int -glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret, - int32_t op_errno, char *hostname, int port) -{ - gf1_cli_probe_rsp rsp = {0, }; - int32_t ret = -1; - - GF_ASSERT (req); - - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.hostname = hostname; - rsp.port = port; - - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_probe_rsp); - - gf_log ("glusterd", GF_LOG_NORMAL, "Responded to CLI, ret: %d",ret); - - return ret; -} - -int -glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret, - int32_t op_errno, char *hostname) -{ - gf1_cli_deprobe_rsp rsp = {0, }; - int32_t ret = -1; - - GF_ASSERT (req); - - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.hostname = hostname; - - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_deprobe_rsp); - - gf_log ("glusterd", GF_LOG_NORMAL, "Responded to CLI, ret: %d",ret); - - return ret; -} -int32_t -glusterd_op_txn_begin () +static void +set_probe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr, + size_t len, char *hostname, int port) { - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - int32_t locked = 0; - - priv = THIS->private; - GF_ASSERT (priv); - - ret = glusterd_lock (priv->uuid); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to acquire local lock, ret: %d", ret); - goto out; + if ((op_errstr) && (strcmp (op_errstr, ""))) { + snprintf (errstr, len, "%s", op_errstr); + return; } - locked = 1; - gf_log ("glusterd", GF_LOG_NORMAL, "Acquired local lock"); - - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_LOCK, NULL); - - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - -out: - if (locked && ret) - glusterd_unlock (priv->uuid); - return ret; -} - -int32_t -glusterd_create_volume (rpcsvc_request_t *req, dict_t *dict) -{ - int32_t ret = -1; - data_t *data = NULL; - - GF_ASSERT (req); - GF_ASSERT (dict); - - glusterd_op_set_op (GD_OP_CREATE_VOLUME); - - glusterd_op_set_ctx (GD_OP_CREATE_VOLUME, dict); - - glusterd_op_set_ctx_free (GD_OP_CREATE_VOLUME, _gf_true); - - glusterd_op_set_req (req); - - data = dict_get (dict, "volname"); - if (!data) - goto out; - - data = dict_get (dict, "type"); - if (!data) - goto out; - - data = dict_get (dict, "count"); - if (!data) - goto out; - - data = dict_get (dict, "bricks"); - if (!data) - goto out; - - data = dict_get (dict, "transport"); - if (!data) - goto out; - - data = dict_get (dict, "volume-id"); - if (!data) - goto out; - - ret = glusterd_op_txn_begin (); - -out: - return ret; -} - -int32_t -glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags, - glusterd_op_t op) -{ - int32_t ret = -1; - dict_t *ctx = NULL; - char *dup_volname = NULL; - - GF_ASSERT (req); - GF_ASSERT (volname); - - ctx = dict_new (); - - if (!ctx) - goto out; - - dup_volname = gf_strdup(volname); - if (!dup_volname) - goto out; - - ret = dict_set_dynstr (ctx, "volname", dup_volname); - if (ret) - goto out; - - ret = dict_set_int32 (ctx, "flags", flags); - if (ret) - goto out; - - glusterd_op_set_op (op); - - glusterd_op_set_ctx (op, ctx); - glusterd_op_set_ctx_free (op, _gf_true); - glusterd_op_set_req (req); - - ret = glusterd_op_txn_begin (); - -out: - if (ret && ctx) - dict_unref (ctx); - return ret; -} - -int32_t -glusterd_delete_volume (rpcsvc_request_t *req, char *volname, int flags) -{ - int32_t ret = -1; - glusterd_op_delete_volume_ctx_t *ctx = NULL; - - GF_ASSERT (req); - GF_ASSERT (volname); - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_delete_volume_ctx_t); - - if (!ctx) - goto out; - - strncpy (ctx->volume_name, volname, GD_VOLUME_NAME_MAX); - - glusterd_op_set_op (GD_OP_DELETE_VOLUME); - - glusterd_op_set_ctx (GD_OP_DELETE_VOLUME, ctx); - glusterd_op_set_ctx_free (GD_OP_DELETE_VOLUME, _gf_true); - glusterd_op_set_req (req); - - ret = glusterd_op_txn_begin (); - -out: - return ret; -} - -int32_t -glusterd_add_brick (rpcsvc_request_t *req, dict_t *dict) -{ - int32_t ret = -1; - - GF_ASSERT (req); - GF_ASSERT (dict); - - glusterd_op_set_op (GD_OP_ADD_BRICK); - - glusterd_op_set_ctx (GD_OP_ADD_BRICK, dict); - glusterd_op_set_ctx_free (GD_OP_ADD_BRICK, _gf_true); - glusterd_op_set_req (req); - - ret = glusterd_op_txn_begin (); - - return ret; -} + if (!op_ret) { + switch (op_errno) { + case GF_PROBE_LOCALHOST: + snprintf (errstr, len, "Probe on localhost not " + "needed"); + break; -int32_t -glusterd_replace_brick (rpcsvc_request_t *req, dict_t *dict) -{ - int32_t ret = -1; + case GF_PROBE_FRIEND: + snprintf (errstr, len, "Host %s port %d already" + " in peer list", hostname, port); + break; - GF_ASSERT (req); - GF_ASSERT (dict); + default: + if (op_errno != 0) + snprintf (errstr, len, "Probe returned " + "with unknown errno %d", + op_errno); + break; + } + } else { + switch (op_errno) { + case GF_PROBE_ANOTHER_CLUSTER: + snprintf (errstr, len, "%s is already part of " + "another cluster", hostname); + break; - glusterd_op_set_op (GD_OP_REPLACE_BRICK); + case GF_PROBE_VOLUME_CONFLICT: + snprintf (errstr, len, "Atleast one volume on " + "%s conflicts with existing volumes " + "in the cluster", hostname); + break; - glusterd_op_set_ctx (GD_OP_REPLACE_BRICK, dict); + case GF_PROBE_UNKNOWN_PEER: + snprintf (errstr, len, "%s responded with " + "'unknown peer' error, this could " + "happen if %s doesn't have localhost " + "in its peer database", hostname, + hostname); + break; - glusterd_op_set_ctx_free (GD_OP_REPLACE_BRICK, _gf_true); - glusterd_op_set_req (req); + case GF_PROBE_ADD_FAILED: + snprintf (errstr, len, "Failed to add peer " + "information on %s", hostname); + break; - ret = glusterd_op_txn_begin (); + case GF_PROBE_SAME_UUID: + snprintf (errstr, len, "Peer uuid (host %s) is " + "same as local uuid", hostname); + break; - return ret; -} + case GF_PROBE_QUORUM_NOT_MET: + snprintf (errstr, len, "Cluster quorum is not " + "met. Changing peers is not allowed " + "in this state"); + break; -static void -_print (dict_t *unused, char *key, data_t *value, void *newdict) -{ - gf_log ("", GF_LOG_DEBUG, "key=%s, value=%s", key, value->data); + default: + snprintf (errstr, len, "Probe returned with " + "unknown errno %d", op_errno); + break; + } + } } int -glusterd_set_volume_history (rpcsvc_request_t *req,dict_t *dict) +glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret, + int32_t op_errno, char *op_errstr, char *hostname, + int port, dict_t *dict) { - glusterd_volinfo_t *volinfo = NULL; - gf1_cli_set_vol_rsp rsp = {0, }; - int ret = -1; - char *volname = NULL; - char vol[256] = {0, }; - - ret = dict_get_str (dict, "volname", &volname); + gf_cli_rsp rsp = {0,}; + int32_t ret = -1; + char errstr[2048] = {0,}; + char *cmd_str = NULL; + xlator_t *this = THIS; - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + GF_ASSERT (req); + GF_ASSERT (this); - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "'volume set' Volume %s not found", volname); - snprintf (vol, 256, "Volume %s not present", volname); + (void) set_probe_error_str (op_ret, op_errno, op_errstr, errstr, + sizeof (errstr), hostname, port); - rsp.op_errstr = gf_strdup (vol); - if (!rsp.op_errstr) { - rsp.op_errstr = ""; - gf_log ("glusterd", GF_LOG_ERROR, "Out of memory"); - } - goto out; + if (dict) { + ret = dict_get_str (dict, "cmd-str", &cmd_str); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "command string"); } - dict_foreach (volinfo->dict, _print, volinfo->dict); - - ret = dict_allocate_and_serialize (volinfo->dict, &rsp.dict.dict_val, - (size_t *)&rsp.dict.dict_len); + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; + rsp.op_errstr = (errstr[0] != '\0') ? errstr : ""; - - if (ret) { - gf_log ("", GF_LOG_DEBUG, "FAILED: allocatea n serialize dict"); - goto out; - } - -out: - if (!ret) - rsp.op_ret = 1; - else - rsp.op_ret = ret; - if (!rsp.volname) - rsp.volname = ""; - if (!rsp.op_errstr) - rsp.op_errstr = "Error, Validation failed"; + gf_cmd_log ("", "%s : %s %s %s", cmd_str, + (op_ret) ? "FAILED" : "SUCCESS", + (errstr[0] != '\0') ? ":" : " ", + (errstr[0] != '\0') ? errstr : " "); ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_set_vol_rsp); + (xdrproc_t)xdr_gf_cli_rsp); + if (dict) + dict_unref (dict); + gf_log (this->name, GF_LOG_DEBUG, "Responded to CLI, ret: %d",ret); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } -int32_t -glusterd_reset_volume (rpcsvc_request_t *req, dict_t *dict) +static void +set_deprobe_error_str (int op_ret, int op_errno, char *op_errstr, char *errstr, + size_t len, char *hostname) { - int ret = -1; - - - glusterd_op_set_op (GD_OP_RESET_VOLUME); - - glusterd_op_set_ctx (GD_OP_RESET_VOLUME, dict); - - glusterd_op_set_ctx_free (GD_OP_RESET_VOLUME, _gf_true); + if ((op_errstr) && (strcmp (op_errstr, ""))) { + snprintf (errstr, len, "%s", op_errstr); + return; + } - glusterd_op_set_cli_op (GD_MGMT_CLI_RESET_VOLUME); + if (op_ret) { + switch (op_errno) { + case GF_DEPROBE_LOCALHOST: + snprintf (errstr, len, "%s is localhost", + hostname); + break; - glusterd_op_set_req (req); + case GF_DEPROBE_NOT_FRIEND: + snprintf (errstr, len, "%s is not part of " + "cluster", hostname); + break; - ret = glusterd_op_txn_begin (); - - return ret; -} + case GF_DEPROBE_BRICK_EXIST: + snprintf (errstr, len, "Brick(s) with the peer " + "%s exist in cluster", hostname); + break; + case GF_DEPROBE_FRIEND_DOWN: + snprintf (errstr, len, "One of the peers is " + "probably down. Check with " + "'peer status'"); + break; + case GF_DEPROBE_QUORUM_NOT_MET: + snprintf (errstr, len, "Cluster quorum is not " + "met. Changing peers is not allowed " + "in this state"); + break; -int32_t -glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict) -{ - int32_t ret = -1; - int32_t dict_count = 0; + default: + snprintf (errstr, len, "Detach returned with " + "unknown errno %d", op_errno); + break; - GF_ASSERT (req); - GF_ASSERT (dict); - - ret = dict_get_int32 (dict, "count", &dict_count); - if (ret) - goto out; - - if (dict_count == 1) { - if (dict_get (dict, "history")) { - ret = glusterd_set_volume_history(req, dict); - goto out; } } - - glusterd_op_set_op (GD_OP_SET_VOLUME); - - glusterd_op_set_ctx (GD_OP_SET_VOLUME, dict); - - glusterd_op_set_ctx_free (GD_OP_SET_VOLUME, _gf_true); - - glusterd_op_set_cli_op (GD_MGMT_CLI_SET_VOLUME); - - glusterd_op_set_req (req); - - ret = glusterd_op_txn_begin (); - -out: - - return ret; } -int32_t -glusterd_remove_brick (rpcsvc_request_t *req, dict_t *dict) -{ - int32_t ret = -1; - - GF_ASSERT (req); - GF_ASSERT (dict); - - glusterd_op_set_op (GD_OP_REMOVE_BRICK); - - glusterd_op_set_ctx (GD_OP_REMOVE_BRICK, dict); - glusterd_op_set_ctx_free (GD_OP_REMOVE_BRICK, _gf_true); - glusterd_op_set_req (req); - ret = glusterd_op_txn_begin (); - - return ret; -} - -int32_t -glusterd_log_filename (rpcsvc_request_t *req, dict_t *dict) +int +glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret, + int32_t op_errno, char *op_errstr, + char *hostname, dict_t *dict) { - int32_t ret = -1; + gf_cli_rsp rsp = {0,}; + int32_t ret = -1; + char *cmd_str = NULL; + char errstr[2048] = {0,}; GF_ASSERT (req); - GF_ASSERT (dict); - - glusterd_op_set_op (GD_OP_LOG_FILENAME); - glusterd_op_set_ctx (GD_OP_LOG_FILENAME, dict); - glusterd_op_set_ctx_free (GD_OP_LOG_FILENAME, _gf_true); - glusterd_op_set_req (req); - - ret = glusterd_op_txn_begin (); - - - return ret; -} - -int32_t -glusterd_log_rotate (rpcsvc_request_t *req, dict_t *dict) -{ - int32_t ret = -1; - - GF_ASSERT (req); - GF_ASSERT (dict); + (void) set_deprobe_error_str (op_ret, op_errno, op_errstr, errstr, + sizeof (errstr), hostname); - glusterd_op_set_op (GD_OP_LOG_ROTATE); - glusterd_op_set_ctx (GD_OP_LOG_ROTATE, dict); - glusterd_op_set_ctx_free (GD_OP_LOG_ROTATE, _gf_true); - glusterd_op_set_req (req); - - ret = glusterd_op_txn_begin (); - - return ret; -} + if (dict) { + ret = dict_get_str (dict, "cmd-str", &cmd_str); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + "command string"); + } -int32_t -glusterd_sync_volume (rpcsvc_request_t *req, dict_t *ctx) -{ - int32_t ret = -1; + rsp.op_ret = op_ret; + rsp.op_errno = op_errno; + rsp.op_errstr = (errstr[0] != '\0') ? errstr : ""; - GF_ASSERT (req); - GF_ASSERT (ctx); + gf_cmd_log ("", "%s : %s %s %s", cmd_str, + (op_ret) ? "FAILED" : "SUCCESS", + (errstr[0] != '\0') ? ":" : " ", + (errstr[0] != '\0') ? errstr : " "); - glusterd_op_set_op (GD_OP_SYNC_VOLUME); - glusterd_op_set_ctx (GD_OP_SYNC_VOLUME, ctx); - glusterd_op_set_ctx_free (GD_OP_SYNC_VOLUME, _gf_true); - glusterd_op_set_req (req); + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp); - ret = glusterd_op_txn_begin (); + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to CLI, ret: %d",ret); return ret; } - int32_t glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags) { @@ -3037,39 +3514,52 @@ glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags) int32_t count = 0; dict_t *friends = NULL; gf1_cli_peer_list_rsp rsp = {0,}; + char my_uuid_str[64] = {0,}; + char key[256] = {0,}; priv = THIS->private; GF_ASSERT (priv); - if (!list_empty (&priv->peers)) { - friends = dict_new (); - if (!friends) { - gf_log ("", GF_LOG_WARNING, "Out of Memory"); - goto out; - } - } else { - ret = 0; + friends = dict_new (); + if (!friends) { + gf_log ("", GF_LOG_WARNING, "Out of Memory"); goto out; } - - if (flags == GF_CLI_LIST_ALL) { - list_for_each_entry (entry, &priv->peers, uuid_list) { - count++; - ret = glusterd_add_peer_detail_to_dict (entry, + if (!list_empty (&priv->peers)) { + list_for_each_entry (entry, &priv->peers, uuid_list) { + count++; + ret = glusterd_add_peer_detail_to_dict (entry, friends, count); - if (ret) - goto out; + if (ret) + goto out; + } + } - } + if (flags == GF_CLI_LIST_POOL_NODES) { + count++; + snprintf (key, 256, "friend%d.uuid", count); + uuid_utoa_r (MY_UUID, my_uuid_str); + ret = dict_set_str (friends, key, my_uuid_str); + if (ret) + goto out; - ret = dict_set_int32 (friends, "count", count); + snprintf (key, 256, "friend%d.hostname", count); + ret = dict_set_str (friends, key, "localhost"); + if (ret) + goto out; - if (ret) - goto out; + snprintf (key, 256, "friend%d.connected", count); + ret = dict_set_int32 (friends, key, 1); + if (ret) + goto out; } + ret = dict_set_int32 (friends, "count", count); + if (ret) + goto out; + ret = dict_allocate_and_serialize (friends, &rsp.friends.friends_val, - (size_t *)&rsp.friends.friends_len); + &rsp.friends.friends_len); if (ret) goto out; @@ -3082,10 +3572,10 @@ out: rsp.op_ret = ret; - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_peer_list_rsp); - if (rsp.friends.friends_val) - GF_FREE (rsp.friends.friends_val); + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf1_cli_peer_list_rsp); + ret = 0; + GF_FREE (rsp.friends.friends_val); return ret; } @@ -3098,7 +3588,7 @@ glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags) glusterd_volinfo_t *entry = NULL; int32_t count = 0; dict_t *volumes = NULL; - gf1_cli_get_vol_rsp rsp = {0,}; + gf_cli_rsp rsp = {0,}; char *volname = NULL; priv = THIS->private; @@ -3175,9 +3665,8 @@ respond: ret = dict_set_int32 (volumes, "count", count); if (ret) goto out; - - ret = dict_allocate_and_serialize (volumes, &rsp.volumes.volumes_val, - (size_t *)&rsp.volumes.volumes_len); + ret = dict_allocate_and_serialize (volumes, &rsp.dict.dict_val, + &rsp.dict.dict_len); if (ret) goto out; @@ -3186,121 +3675,435 @@ respond: out: rsp.op_ret = ret; - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_peer_list_rsp); + rsp.op_errstr = ""; + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp); + ret = 0; if (volumes) dict_unref (volumes); - if (rsp.volumes.volumes_val) - GF_FREE (rsp.volumes.volumes_val); + GF_FREE (rsp.dict.dict_val); return ret; } -static int -glusterd_event_connected_inject (glusterd_peerctx_t *peerctx) +int +__glusterd_handle_status_volume (rpcsvc_request_t *req) { - GF_ASSERT (peerctx); + int32_t ret = -1; + uint32_t cmd = 0; + dict_t *dict = NULL; + char *volname = 0; + gf_cli_req cli_req = {{0,}}; + glusterd_op_t cli_op = GD_OP_STATUS_VOLUME; + char err_str[2048] = {0,}; + xlator_t *this = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_probe_ctx_t *ctx = NULL; - int ret = -1; - glusterd_peerinfo_t *peerinfo = NULL; + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + if (cli_req.dict.dict_len > 0) { + dict = dict_new(); + if (!dict) + goto out; + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize buffer"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } - ret = glusterd_friend_sm_new_event - (GD_FRIEND_EVENT_CONNECTED, &event); + } + + ret = dict_get_uint32 (dict, "cmd", &cmd); + if (ret) + goto out; + + if (!(cmd & GF_CLI_STATUS_ALL)) { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get " + "volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + gf_log (this->name, GF_LOG_INFO, + "Received status volume req for volume %s", volname); + + } + + ret = glusterd_op_begin_synctask (req, GD_OP_STATUS_VOLUME, dict); + +out: if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get new event"); + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + free (cli_req.dict.dict_val); + + return ret; +} + +int +glusterd_handle_status_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_status_volume); +} + +int +__glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + glusterd_op_t cli_op = GD_OP_CLEARLOCKS_VOLUME; + char *volname = NULL; + dict_t *dict = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + ret = -1; + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; goto out; } - ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t); + if (cli_req.dict.dict_len) { + dict = dict_new (); - if (!ctx) { + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to unserialize req-buffer to" + " dictionary"); + snprintf (err_str, sizeof (err_str), "unable to decode " + "the command"); + goto out; + } + + } else { ret = -1; - gf_log ("", GF_LOG_ERROR, "Memory not available"); + gf_log (this->name, GF_LOG_ERROR, "Empty cli request."); goto out; } - peerinfo = peerctx->peerinfo; - ctx->hostname = gf_strdup (peerinfo->hostname); - ctx->port = peerinfo->port; - ctx->req = peerctx->args.req; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } - event->peerinfo = peerinfo; - event->ctx = ctx; + gf_log (this->name, GF_LOG_INFO, "Received clear-locks volume req " + "for volume %s", volname); - ret = glusterd_friend_sm_inject_event (event); + ret = glusterd_op_begin_synctask (req, GD_OP_CLEARLOCKS_VOLUME, dict); +out: if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject " - "EVENT_CONNECTED ret = %d", ret); + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + free (cli_req.dict.dict_val); + + return ret; +} + +int +glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_clearlocks_volume); +} + +static int +get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) +{ + glusterd_volinfo_t *volinfo = NULL; + char *volid_str = NULL; + char *brick = NULL; + char *brickid_dup = NULL; + uuid_t volid = {0}; + int ret = -1; + + brickid_dup = gf_strdup (brickid); + if (!brickid_dup) goto out; + + volid_str = brickid_dup; + brick = strchr (brickid_dup, ':'); + *brick = '\0'; + brick++; + if (!volid_str || !brick) + goto out; + + uuid_parse (volid_str, volid); + ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo); + if (ret) { + /* Check if it a snapshot volume */ + ret = glusterd_snap_volinfo_find_by_volume_id (volid, &volinfo); + if (ret) + goto out; } + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, + brickinfo); + if (ret) + goto out; + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + GF_FREE (brickid_dup); return ret; } int -glusterd_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, - void *data) +__glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) { xlator_t *this = NULL; - char *handshake = "on"; glusterd_conf_t *conf = NULL; int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_peerctx_t *peerctx = NULL; + char *brickid = NULL; + glusterd_brickinfo_t *brickinfo = NULL; - peerctx = mydata; - if (!peerctx) + brickid = mydata; + if (!brickid) return 0; - peerinfo = peerctx->peerinfo; + ret = get_brickinfo_from_brickid (brickid, &brickinfo); + if (ret) + return 0; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + switch (event) { + case RPC_CLNT_CONNECT: + gf_log (this->name, GF_LOG_DEBUG, "Connected to %s:%s", + brickinfo->hostname, brickinfo->path); + glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); + ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); + + break; + + case RPC_CLNT_DISCONNECT: + if (GF_BRICK_STARTED == brickinfo->status) + gf_log (this->name, GF_LOG_INFO, "Disconnected from " + "%s:%s", brickinfo->hostname, brickinfo->path); + + glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); + if (rpc_clnt_is_disabled (rpc)) + GF_FREE (brickid); + break; + + default: + gf_log (this->name, GF_LOG_TRACE, + "got some other RPC event %d", event); + break; + } + + return ret; +} + +int +glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, data, + __glusterd_brick_rpc_notify); +} + +int +__glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char *server = NULL; + int ret = 0; + this = THIS; + GF_ASSERT (this); conf = this->private; + GF_ASSERT (conf); + server = mydata; + if (!server) + return 0; switch (event) { case RPC_CLNT_CONNECT: - { gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); - peerinfo->connected = 1; + (void) glusterd_nodesvc_set_online_status (server, _gf_true); + ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); - if ((ret < 0) || (strcasecmp (handshake, "on"))) { - //ret = glusterd_handshake (this, peerinfo->rpc); + break; - } else { - //conf->rpc->connected = 1; - ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); - } + case RPC_CLNT_DISCONNECT: + gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT"); + (void) glusterd_nodesvc_set_online_status (server, _gf_false); + break; + + default: + gf_log (this->name, GF_LOG_TRACE, + "got some other RPC event %d", event); + break; + } + + return ret; +} + +int +glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, data, + __glusterd_nodesvc_rpc_notify); +} + +int +glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx) +{ + int ret = -1; + glusterd_friend_sm_event_t *new_event = NULL; + glusterd_peerinfo_t *peerinfo = peerctx->peerinfo; + rpcsvc_request_t *req = peerctx->args.req; + char *errstr = peerctx->errstr; + dict_t *dict = NULL; - if (GD_MODE_ON == peerctx->args.mode) { - ret = glusterd_event_connected_inject (peerctx); - peerctx->args.req = NULL; - } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) { - peerctx->args.mode = GD_MODE_ON; + GF_ASSERT (peerctx); + + peerinfo = peerctx->peerinfo; + req = peerctx->args.req; + dict = peerctx->args.dict; + errstr = peerctx->errstr; + + ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND, + &new_event); + if (!ret) { + if (!req) { + gf_log (THIS->name, GF_LOG_WARNING, + "Unable to find the request for responding " + "to User (%s)", peerinfo->hostname); + goto out; } - glusterd_friend_sm (); - glusterd_op_sm (); + glusterd_xfer_cli_probe_resp (req, -1, ENOTCONN, errstr, + peerinfo->hostname, + peerinfo->port, dict); + + new_event->peerinfo = peerinfo; + ret = glusterd_friend_sm_inject_event (new_event); + + } else { + gf_log ("glusterd", GF_LOG_ERROR, + "Unable to create event for removing peer %s", + peerinfo->hostname); + } + +out: + return ret; +} + +int +__glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + gf_boolean_t quorum_action = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + + peerctx = mydata; + if (!peerctx) + return 0; + + peerinfo = peerctx->peerinfo; + this = THIS; + conf = this->private; + + switch (event) { + case RPC_CLNT_CONNECT: + { + gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT"); + peerinfo->connected = 1; + peerinfo->quorum_action = _gf_true; + + ret = glusterd_peer_dump_version (this, rpc, peerctx); + if (ret) + gf_log ("", GF_LOG_ERROR, "glusterd handshake failed"); break; } case RPC_CLNT_DISCONNECT: + { + gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d", + peerinfo->state.state); + + if (peerinfo->connected) { + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + ret = glusterd_mgmt_v3_unlock (volinfo->volname, + peerinfo->uuid, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "Lock not released for %s", + volinfo->volname); + } - //Inject friend disconnected here + ret = 0; + } - gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT"); - peerinfo->connected = 0; + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && + (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { + peerinfo->quorum_contrib = QUORUM_DOWN; + quorum_action = _gf_true; + peerinfo->quorum_action = _gf_false; + } - //default_notify (this, GF_EVENT_CHILD_DOWN, NULL); - break; + /* Remove peer if it is not a friend and connection/handshake + * fails, and notify cli. Happens only during probe. + */ + if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT) { + glusterd_friend_remove_notify (peerctx); + goto out; + } + peerinfo->connected = 0; + break; + } default: gf_log (this->name, GF_LOG_TRACE, "got some other RPC event %d", event); @@ -3308,5 +4111,127 @@ glusterd_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, break; } +out: + glusterd_friend_sm (); + glusterd_op_sm (); + if (quorum_action) + glusterd_do_quorum_action (); return ret; } + +int +glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, data, + __glusterd_peer_rpc_notify); +} + +int +glusterd_null (rpcsvc_request_t *req) +{ + + return 0; +} + +rpcsvc_actor_t gd_svc_mgmt_actors[] = { + [GLUSTERD_MGMT_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GLUSTERD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_STAGE_OP] = { "STAGE_OP", GLUSTERD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_COMMIT_OP] = { "COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_mgmt_prog = { + .progname = "GlusterD svc mgmt", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_VERSION, + .numactors = GLUSTERD_MGMT_MAXVALUE, + .actors = gd_svc_mgmt_actors, + .synctask = _gf_true, +}; + +rpcsvc_actor_t gd_svc_peer_actors[] = { + [GLUSTERD_FRIEND_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, 0, DRC_NA}, + [GLUSTERD_PROBE_QUERY] = { "PROBE_QUERY", GLUSTERD_PROBE_QUERY, glusterd_handle_probe_query, NULL, 0, DRC_NA}, + [GLUSTERD_FRIEND_ADD] = { "FRIEND_ADD", GLUSTERD_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, 0, DRC_NA}, + [GLUSTERD_FRIEND_REMOVE] = { "FRIEND_REMOVE", GLUSTERD_FRIEND_REMOVE, glusterd_handle_incoming_unfriend_req, NULL, 0, DRC_NA}, + [GLUSTERD_FRIEND_UPDATE] = { "FRIEND_UPDATE", GLUSTERD_FRIEND_UPDATE, glusterd_handle_friend_update, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_peer_prog = { + .progname = "GlusterD svc peer", + .prognum = GD_FRIEND_PROGRAM, + .progver = GD_FRIEND_VERSION, + .numactors = GLUSTERD_FRIEND_MAXVALUE, + .actors = gd_svc_peer_actors, + .synctask = _gf_false, +}; + + + +rpcsvc_actor_t gd_svc_cli_actors[] = { + [GLUSTER_CLI_PROBE] = { "CLI_PROBE", GLUSTER_CLI_PROBE, glusterd_handle_cli_probe, NULL, 0, DRC_NA}, + [GLUSTER_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GLUSTER_CLI_CREATE_VOLUME, glusterd_handle_create_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GLUSTER_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DEPROBE] = { "FRIEND_REMOVE", GLUSTER_CLI_DEPROBE, glusterd_handle_cli_deprobe, NULL, 0, DRC_NA}, + [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0, DRC_NA}, + [GLUSTER_CLI_UUID_RESET] = { "UUID_RESET", GLUSTER_CLI_UUID_RESET, glusterd_handle_cli_uuid_reset, NULL, 0, DRC_NA}, + [GLUSTER_CLI_UUID_GET] = { "UUID_GET", GLUSTER_CLI_UUID_GET, glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA}, + [GLUSTER_CLI_START_VOLUME] = { "START_VOLUME", GLUSTER_CLI_START_VOLUME, glusterd_handle_cli_start_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_STOP_VOLUME] = { "STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME, glusterd_handle_cli_stop_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, 0, DRC_NA}, + [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SET_VOLUME] = { "SET_VOLUME", GLUSTER_CLI_SET_VOLUME, glusterd_handle_set_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME, glusterd_handle_sync_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_RESET_VOLUME] = { "RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME, glusterd_handle_reset_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_FSM_LOG] = { "FSM_LOG", GLUSTER_CLI_FSM_LOG, glusterd_handle_fsm_log, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GSYNC_SET] = { "GSYNC_SET", GLUSTER_CLI_GSYNC_SET, glusterd_handle_gsync_set, NULL, 0, DRC_NA}, + [GLUSTER_CLI_PROFILE_VOLUME] = { "STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME, glusterd_handle_cli_profile_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_QUOTA] = { "QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1, DRC_NA}, + [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, 1, DRC_NA}, + [GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, 1, DRC_NA}, + [GLUSTER_CLI_HEAL_VOLUME] = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", GLUSTER_CLI_STATEDUMP_VOLUME, glusterd_handle_cli_statedump_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", GLUSTER_CLI_CLRLOCKS_VOLUME, glusterd_handle_cli_clearlocks_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", GLUSTER_CLI_COPY_FILE, glusterd_handle_copy_file, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", GLUSTER_CLI_SYS_EXEC, glusterd_handle_sys_exec, NULL, 0, DRC_NA}, + [GLUSTER_CLI_SNAP] = {"SNAP", GLUSTER_CLI_SNAP, glusterd_handle_snapshot, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_cli_prog = { + .progname = "GlusterD svc cli", + .prognum = GLUSTER_CLI_PROGRAM, + .progver = GLUSTER_CLI_VERSION, + .numactors = GLUSTER_CLI_MAXVALUE, + .actors = gd_svc_cli_actors, + .synctask = _gf_true, +}; + +/* This is a minimal RPC prog, which contains only the readonly RPC procs from + * the cli rpcsvc + */ +rpcsvc_actor_t gd_svc_cli_actors_ro[] = { + [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, 0, DRC_NA}, + [GLUSTER_CLI_UUID_GET] = { "UUID_GET", GLUSTER_CLI_UUID_GET, glusterd_handle_cli_uuid_get, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, 1, DRC_NA}, + [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, 0, DRC_NA}, + [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_cli_prog_ro = { + .progname = "GlusterD svc cli read-only", + .prognum = GLUSTER_CLI_PROGRAM, + .progver = GLUSTER_CLI_VERSION, + .numactors = GLUSTER_CLI_MAXVALUE, + .actors = gd_svc_cli_actors_ro, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 213bcc6fb..0f0357c4c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H @@ -24,36 +14,187 @@ #endif #include "xlator.h" +#include "defaults.h" #include "glusterfs.h" #include "compat-errno.h" #include "glusterd.h" #include "glusterd-utils.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" #include "glusterfs3.h" #include "protocol-common.h" #include "rpcsvc.h" +#include "rpc-common-xdr.h" + +extern struct rpc_clnt_program gd_peer_prog; +extern struct rpc_clnt_program gd_mgmt_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; + +#define TRUSTED_PREFIX "trusted-" typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data); +static int +get_snap_volname_and_volinfo (const char *volpath, char **volname, + glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char *save_ptr = NULL; + char *str_token = NULL; + char *snapname = NULL; + char *volname_token = NULL; + char *vol = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (volpath); + GF_ASSERT (volinfo); + + str_token = gf_strdup (volpath); + if (NULL == str_token) { + goto out; + } + + /* Input volname will have below formats: + * /snaps/<snapname>/<volname>.<hostname> + * or + * /snaps/<snapname>/<parent-volname> + * We need to extract snapname and parent_volname */ + + /*split string by "/" */ + strtok_r (str_token, "/", &save_ptr); + snapname = strtok_r(NULL, "/", &save_ptr); + if (!snapname) { + gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath); + goto out; + } + + volname_token = strtok_r(NULL, "/", &save_ptr); + if (!volname_token) { + gf_log(this->name, GF_LOG_ERROR, "Invalid path: %s", volpath); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap %s", snapname); + goto out; + } + + /* Find if its a parent volume name or snap volume + * name. This function will succeed if volname_token + * is a parent volname + */ + ret = glusterd_volinfo_find (volname_token, volinfo); + if (ret) { + *volname = gf_strdup (volname_token); + if (NULL == *volname) { + ret = -1; + goto out; + } + + ret = glusterd_snap_volinfo_find (volname_token, snap, + volinfo); + if (ret) { + /* Split the volume name */ + vol = strtok_r (volname_token, ".", &save_ptr); + if (!vol) { + gf_log(this->name, GF_LOG_ERROR, "Invalid " + "volname (%s)", volname_token); + goto out; + } + + ret = glusterd_snap_volinfo_find (vol, snap, volinfo); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap volume from volname (%s)", + vol); + goto out; + } + } + } else { + /*volname_token is parent volname*/ + ret = glusterd_snap_volinfo_find_from_parent_volname ( + volname_token, snap, volinfo); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, "Failed to " + "fetch snap volume from parent " + "volname (%s)", volname_token); + goto out; + } + + /* Since volname_token is a parent volname we should + * get the snap volname here*/ + *volname = gf_strdup ((*volinfo)->volname); + if (NULL == *volname) { + ret = -1; + goto out; + } + } + +out: + if (ret && NULL != *volname) { + GF_FREE (*volname); + *volname = NULL; + } + return ret; +} static size_t build_volfile_path (const char *volname, char *path, - size_t path_len) + size_t path_len, char *trusted_str) { - struct stat stbuf = {0,}; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char *vol = NULL; - char *dup_volname = NULL; - char *free_ptr = NULL; - char *tmp = NULL; - glusterd_volinfo_t *volinfo = NULL; + struct stat stbuf = {0,}; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + char *vol = NULL; + char *dup_volname = NULL; + char *free_ptr = NULL; + char *save_ptr = NULL; + char *str_token = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *server = NULL; + const char *volname_ptr = NULL; + char path_prefix [PATH_MAX] = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volname); + GF_ASSERT (path); + + if (strstr (volname, "gluster/")) { + server = strchr (volname, '/') + 1; + glusterd_get_nodesvc_volfile (server, priv->workdir, + path, path_len); + ret = 1; + goto out; + } else if ((str_token = strstr (volname, "/snaps/"))) { + ret = get_snap_volname_and_volinfo (str_token, &dup_volname, + &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get snap" + " volinfo from path (%s)", volname); + ret = -1; + goto out; + } - priv = THIS->private; + snprintf (path_prefix, sizeof (path_prefix), "%s/snaps/%s", + priv->workdir, volinfo->snapshot->snapname); - if (volname[0] != '/') { + free_ptr = dup_volname; + volname_ptr = dup_volname; + goto gotvolinfo; + } else if (volname[0] != '/') { /* Normal behavior */ dup_volname = gf_strdup (volname); } else { @@ -63,83 +204,221 @@ build_volfile_path (const char *volname, char *path, dup_volname = gf_strdup (&volname[1]); } + if (!dup_volname) { + gf_log(THIS->name, GF_LOG_ERROR, "strdup failed"); + ret = -1; + goto out; + } free_ptr = dup_volname; + volname_ptr = volname; + + snprintf (path_prefix, sizeof (path_prefix), "%s/vols", + priv->workdir); ret = glusterd_volinfo_find (dup_volname, &volinfo); + if (ret) { /* Split the volume name */ - vol = strtok_r (dup_volname, ".", &tmp); + vol = strtok_r (dup_volname, ".", &save_ptr); if (!vol) goto out; + ret = glusterd_volinfo_find (vol, &volinfo); if (ret) goto out; } - ret = snprintf (path, path_len, "%s/vols/%s/%s.vol", - priv->workdir, volinfo->volname, volname); + +gotvolinfo: + if (!glusterd_auth_get_username (volinfo)) + trusted_str = NULL; + + ret = snprintf (path, path_len, "%s/%s/%s.vol", path_prefix, + volinfo->volname, volname_ptr); if (ret == -1) goto out; ret = stat (path, &stbuf); + if ((ret == -1) && (errno == ENOENT)) { - ret = snprintf (path, path_len, "%s/vols/%s/%s-fuse.vol", - priv->workdir, volinfo->volname, volname); + snprintf (path, path_len, "%s/%s/%s%s-fuse.vol", + path_prefix, volinfo->volname, + (trusted_str ? trusted_str : ""), + dup_volname); + ret = stat (path, &stbuf); } + if ((ret == -1) && (errno == ENOENT)) { - ret = snprintf (path, path_len, "%s/vols/%s/%s-tcp.vol", - priv->workdir, volinfo->volname, volname); + snprintf (path, path_len, "%s/%s/%s-tcp.vol", + path_prefix, volinfo->volname, volname_ptr); } ret = 1; out: - if (free_ptr) - GF_FREE (free_ptr); + GF_FREE (free_ptr); return ret; } +/* Get and store op-versions of the clients sending the getspec request + * Clients of versions <= 3.3, don't send op-versions, their op-versions are + * defaulted to 1 + */ static int -xdr_to_glusterfs_req (rpcsvc_request_t *req, void *arg, gfs_serialize_t sfunc) +_get_client_op_versions (gf_getspec_req *args, peer_info_t *peerinfo) { - int ret = -1; + int ret = 0; + int client_max_op_version = 1; + int client_min_op_version = 1; + dict_t *dict = NULL; - if (!req) - return -1; + GF_ASSERT (args); + GF_ASSERT (peerinfo); - ret = sfunc (req->msg[0], arg); + if (args->xdata.xdata_len) { + dict = dict_new (); + if (!dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (args->xdata.xdata_val, + args->xdata.xdata_len, &dict); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Failed to unserialize request dictionary"); + goto out; + } + + ret = dict_get_int32 (dict, "min-op-version", + &client_min_op_version); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Failed to get client-min-op-version"); + goto out; + } + + ret = dict_get_int32 (dict, "max-op-version", + &client_max_op_version); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Failed to get client-max-op-version"); + goto out; + } + } - if (ret > 0) - ret = 0; + peerinfo->max_op_version = client_max_op_version; + peerinfo->min_op_version = client_min_op_version; +out: return ret; } - -int -server_getspec (rpcsvc_request_t *req) +/* Checks if the client supports the volume, ie. client can understand all the + * options in the volfile + */ +static gf_boolean_t +_client_supports_volume (peer_info_t *peerinfo, int32_t *op_errno) { - int32_t ret = -1; - int32_t op_errno = 0; - int32_t spec_fd = -1; - size_t file_len = 0; - char filename[ZR_PATH_MAX] = {0,}; - struct stat stbuf = {0,}; - char *volume = NULL; - int cookie = 0; + gf_boolean_t ret = _gf_true; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT (peerinfo); + GF_ASSERT (op_errno); - gf_getspec_req args = {0,}; - gf_getspec_rsp rsp = {0,}; + /* Only check when the volfile being requested is a volume. Not finding + * a volinfo implies that the volfile requested for is not of a gluster + * volume. A non volume volfile is requested by the local gluster + * services like shd and nfs-server. These need not be checked as they + * will be running at the same op-version as glusterd and will be able + * to support all the features + */ + if ((glusterd_volinfo_find (peerinfo->volname, &volinfo) == 0) && + ((peerinfo->min_op_version > volinfo->client_op_version) || + (peerinfo->max_op_version < volinfo->client_op_version))) { + ret = _gf_false; + *op_errno = ENOTSUP; + gf_log ("glusterd", GF_LOG_INFO, + "Client %s (%d -> %d) doesn't support required " + "op-version (%d). Rejecting volfile request.", + peerinfo->identifier, peerinfo->min_op_version, + peerinfo->max_op_version, volinfo->client_op_version); + } + + return ret; +} + +int +__server_getspec (rpcsvc_request_t *req) +{ + int32_t ret = -1; + int32_t op_errno = 0; + int32_t spec_fd = -1; + size_t file_len = 0; + char filename[PATH_MAX] = {0,}; + struct stat stbuf = {0,}; + char *volume = NULL; + char *tmp = NULL; + int cookie = 0; + rpc_transport_t *trans = NULL; + gf_getspec_req args = {0,}; + gf_getspec_rsp rsp = {0,}; + char addrstr[RPCSVC_PEER_STRLEN] = {0}; + peer_info_t *peerinfo = NULL; - if (xdr_to_glusterfs_req (req, &args, xdr_to_getspec_req)) { + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_getspec_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto fail; } + peerinfo = &req->trans->peerinfo; + volume = args.key; + /* Need to strip leading '/' from volnames. This was introduced to + * support nfs style mount parameters for native gluster mount + */ + if (volume[0] == '/') + strncpy (peerinfo->volname, &volume[1], strlen(&volume[1])); + else + strncpy (peerinfo->volname, volume, strlen(volume)); + + ret = _get_client_op_versions (&args, peerinfo); + if (ret) + goto fail; + + if (!_client_supports_volume (peerinfo, &op_errno)) { + ret = -1; + goto fail; + } + + trans = req->trans; + /* addrstr will be empty for cli socket connections */ + ret = rpcsvc_transport_peername (trans, (char *)&addrstr, + sizeof (addrstr)); + if (ret) + goto fail; + + tmp = strrchr (addrstr, ':'); + if (tmp) + *tmp = '\0'; - ret = build_volfile_path (volume, filename, sizeof (filename)); + /* The trusted volfiles are given to the glusterd owned process like NFS + * server, self-heal daemon etc., so that they are not inadvertently + * blocked by a auth.{allow,reject} setting. The trusted volfile is not + * meant for external users. + */ + if (strlen (addrstr) && gf_is_local_addr (addrstr)) { + + ret = build_volfile_path (volume, filename, + sizeof (filename), + TRUSTED_PREFIX); + } else { + ret = build_volfile_path (volume, filename, + sizeof (filename), NULL); + } if (ret > 0) { /* to allocate the proper buffer to hold the file data */ @@ -185,29 +464,903 @@ fail: rsp.op_errno = cookie; if (!rsp.spec) - rsp.spec = ""; + rsp.spec = strdup (""); glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_serialize_getspec_rsp); - if (args.key) - free (args.key);//malloced by xdr - if (rsp.spec && (strcmp (rsp.spec, ""))) - free (rsp.spec); + (xdrproc_t)xdr_gf_getspec_rsp); + free (args.key);//malloced by xdr + free (rsp.spec); return 0; } +int +server_getspec (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __server_getspec); +} + +int32_t +__server_event_notify (rpcsvc_request_t *req) +{ + int32_t ret = -1; + int32_t op_errno = 0; + gf_event_notify_req args = {0,}; + gf_event_notify_rsp rsp = {0,}; + dict_t *dict = NULL; + gf_boolean_t need_rsp = _gf_true; + + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_event_notify_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto fail; + } + + if (args.dict.dict_len) { + dict = dict_new (); + if (!dict) + return ret; + ret = dict_unserialize (args.dict.dict_val, + args.dict.dict_len, &dict); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to unserialize req"); + goto fail; + } + } + + switch (args.op) { + case GF_EN_DEFRAG_STATUS: + gf_log ("", GF_LOG_INFO, + "received defrag status updated"); + if (dict) { + glusterd_defrag_event_notify_handle (dict); + need_rsp = _gf_false; + } + break; + default: + gf_log ("", GF_LOG_ERROR, "Unknown op received in event " + "notify"); + ret = -1; + break; + } + +fail: + rsp.op_ret = ret; + + if (op_errno) + rsp.op_errno = gf_errno_to_error (op_errno); + + if (need_rsp) + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_event_notify_rsp); + if (dict) + dict_unref (dict); + free (args.dict.dict_val);//malloced by xdr + + return 0; +} + +int32_t +server_event_notify (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __server_event_notify); +} + +int +gd_validate_cluster_op_version (xlator_t *this, int cluster_op_version, + char *peerid) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + + conf = this->private; + + if (cluster_op_version > GD_OP_VERSION_MAX) { + gf_log (this->name, GF_LOG_ERROR, + "operating version %d is more than the maximum " + "supported (%d) on the machine (as per peer request " + "from %s)", cluster_op_version, GD_OP_VERSION_MAX, + peerid); + goto out; + } + + /* The peer can only reduce its op-version when it doesn't have any + * volumes. Reducing op-version when it already contains volumes can + * lead to inconsistencies in the cluster + */ + if ((cluster_op_version < conf->op_version) && + !list_empty (&conf->volumes)) { + gf_log (this->name, GF_LOG_ERROR, + "cannot reduce operating version to %d from current " + "version %d as volumes exist (as per peer request from " + "%s)", cluster_op_version, conf->op_version, peerid); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +__glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req) +{ + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int op_errno = EINVAL; + gf_mgmt_hndsk_req args = {{0,},}; + gf_mgmt_hndsk_rsp rsp = {0,}; + + this = THIS; + conf = this->private; + + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_mgmt_hndsk_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_set_int32 (dict, GD_OP_VERSION_KEY, conf->op_version); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set operating version"); + rsp.op_ret = ret; + goto out; + } + + ret = dict_set_int32 (dict, GD_MIN_OP_VERSION_KEY, GD_OP_VERSION_MIN); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set %s", GD_MIN_OP_VERSION_KEY); + rsp.op_ret = ret; + goto out; + } + + ret = dict_set_int32 (dict, GD_MAX_OP_VERSION_KEY, GD_OP_VERSION_MAX); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to set %s", GD_MAX_OP_VERSION_KEY); + rsp.op_ret = ret; + goto out; + } + + ret = 0; + + GF_PROTOCOL_DICT_SERIALIZE (this, dict, (&rsp.hndsk.hndsk_val), + rsp.hndsk.hndsk_len, op_errno, out); +out: + + rsp.op_ret = ret; + rsp.op_errno = op_errno; + + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + + ret = 0; + + if (dict) + dict_unref (dict); + + if (args.hndsk.hndsk_val) + free (args.hndsk.hndsk_val); + + if (rsp.hndsk.hndsk_val) + GF_FREE (rsp.hndsk.hndsk_val); + + return ret; +} + +int +glusterd_mgmt_hndsk_versions (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_mgmt_hndsk_versions); +} + +int +__glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req) +{ + dict_t *clnt_dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int op_errno = EINVAL; + int peer_op_version = 0; + gf_mgmt_hndsk_req args = {{0,},}; + gf_mgmt_hndsk_rsp rsp = {0,}; + + this = THIS; + conf = this->private; + + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gf_mgmt_hndsk_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + GF_PROTOCOL_DICT_UNSERIALIZE (this, clnt_dict, args.hndsk.hndsk_val, + (args.hndsk.hndsk_len), ret, op_errno, + out); + + ret = dict_get_int32 (clnt_dict, GD_OP_VERSION_KEY, &peer_op_version); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get the op-version key peer=%s", + req->trans->peerinfo.identifier); + goto out; + } + + ret = gd_validate_cluster_op_version (this, peer_op_version, + req->trans->peerinfo.identifier); + if (ret) + goto out; + + + /* As this is ACK from the Cluster for the versions supported, + can set the op-version of 'this' glusterd to the one + received. */ + gf_log (this->name, GF_LOG_INFO, "using the op-version %d", + peer_op_version); + conf->op_version = peer_op_version; + ret = glusterd_store_global_info (this); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to store op-version"); + +out: + rsp.op_ret = ret; + rsp.op_errno = op_errno; + + glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + + ret = 0; + + if (clnt_dict) + dict_unref (clnt_dict); + + if (args.hndsk.hndsk_val) + free (args.hndsk.hndsk_val); + + return ret; +} + +int +glusterd_mgmt_hndsk_versions_ack (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_mgmt_hndsk_versions_ack); +} rpcsvc_actor_t gluster_handshake_actors[] = { - [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, NULL }, - [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, NULL }, + [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, 0, DRC_NA}, + [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA}, + [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY, server_event_notify, NULL, 0, DRC_NA}, }; struct rpcsvc_program gluster_handshake_prog = { - .progname = "GlusterFS Handshake", + .progname = "Gluster Handshake", .prognum = GLUSTER_HNDSK_PROGRAM, .progver = GLUSTER_HNDSK_VERSION, .actors = gluster_handshake_actors, .numactors = GF_HNDSK_MAXVALUE, }; + +/* A minimal RPC program just for the cli getspec command */ +rpcsvc_actor_t gluster_cli_getspec_actors[] = { + [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gluster_cli_getspec_prog = { + .progname = "Gluster Handshake (CLI Getspec)", + .prognum = GLUSTER_HNDSK_PROGRAM, + .progver = GLUSTER_HNDSK_VERSION, + .actors = gluster_cli_getspec_actors, + .numactors = GF_HNDSK_MAXVALUE, +}; + + +char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = { + [GF_DUMP_NULL] = "NULL", + [GF_DUMP_DUMP] = "DUMP", +}; + +rpc_clnt_prog_t glusterd_dump_prog = { + .progname = "GLUSTERD-DUMP", + .prognum = GLUSTER_DUMP_PROGRAM, + .progver = GLUSTER_DUMP_VERSION, + .procnames = glusterd_dump_proc, +}; + + +rpcsvc_actor_t glusterd_mgmt_hndsk_actors[] = { + [GD_MGMT_HNDSK_NULL] = {"NULL", GD_MGMT_HNDSK_NULL, NULL, + NULL, 0}, + [GD_MGMT_HNDSK_VERSIONS] = {"MGMT-VERS", GD_MGMT_HNDSK_VERSIONS, + glusterd_mgmt_hndsk_versions, NULL, + 0}, + [GD_MGMT_HNDSK_VERSIONS_ACK] = {"MGMT-VERS-ACK", + GD_MGMT_HNDSK_VERSIONS_ACK, + glusterd_mgmt_hndsk_versions_ack, + NULL, 0}, +}; + +struct rpcsvc_program glusterd_mgmt_hndsk_prog = { + .progname = "Gluster MGMT Handshake", + .prognum = GD_MGMT_HNDSK_PROGRAM, + .progver = GD_MGMT_HNDSK_VERSION, + .actors = glusterd_mgmt_hndsk_actors, + .numactors = GD_MGMT_HNDSK_MAXVALUE, +}; + +char *glusterd_mgmt_hndsk_proc[GD_MGMT_HNDSK_MAXVALUE] = { + [GD_MGMT_HNDSK_NULL] = "NULL", + [GD_MGMT_HNDSK_VERSIONS] = "MGMT-VERS", + [GD_MGMT_HNDSK_VERSIONS_ACK] = "MGMT-VERS-ACK", +}; + +rpc_clnt_prog_t gd_clnt_mgmt_hndsk_prog = { + .progname = "Gluster MGMT Handshake", + .prognum = GD_MGMT_HNDSK_PROGRAM, + .progver = GD_MGMT_HNDSK_VERSION, + .procnames = glusterd_mgmt_hndsk_proc, +}; + + +static int +glusterd_event_connected_inject (glusterd_peerctx_t *peerctx) +{ + GF_ASSERT (peerctx); + + glusterd_friend_sm_event_t *event = NULL; + glusterd_probe_ctx_t *ctx = NULL; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + + + ret = glusterd_friend_sm_new_event + (GD_FRIEND_EVENT_CONNECTED, &event); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get new event"); + goto out; + } + + ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t); + + if (!ctx) { + ret = -1; + gf_log ("", GF_LOG_ERROR, "Memory not available"); + goto out; + } + + peerinfo = peerctx->peerinfo; + ctx->hostname = gf_strdup (peerinfo->hostname); + ctx->port = peerinfo->port; + ctx->req = peerctx->args.req; + ctx->dict = peerctx->args.dict; + + event->peerinfo = peerinfo; + event->ctx = ctx; + + ret = glusterd_friend_sm_inject_event (event); + + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject " + "EVENT_CONNECTED ret = %d", ret); + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + + +int +gd_validate_peer_op_version (xlator_t *this, glusterd_peerinfo_t *peerinfo, + dict_t *dict, char **errstr) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + int32_t peer_op_version = 0; + int32_t peer_min_op_version = 0; + int32_t peer_max_op_version = 0; + + if (!dict && !this && !peerinfo) + goto out; + + conf = this->private; + + ret = dict_get_int32 (dict, GD_OP_VERSION_KEY, &peer_op_version); + if (ret) + goto out; + + ret = dict_get_int32 (dict, GD_MAX_OP_VERSION_KEY, + &peer_max_op_version); + if (ret) + goto out; + + ret = dict_get_int32 (dict, GD_MIN_OP_VERSION_KEY, + &peer_min_op_version); + if (ret) + goto out; + + ret = -1; + /* Check if peer can support our op_version */ + if ((peer_max_op_version < conf->op_version) || + (peer_min_op_version > conf->op_version)) { + ret = gf_asprintf (errstr, "Peer %s does not support required " + "op-version", peerinfo->hostname); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_log (this->name , GF_LOG_DEBUG, "Peer %s %s", peerinfo->hostname, + ((ret < 0) ? "rejected" : "accepted")); + return ret; +} + +int +__glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + int op_errno = EINVAL; + gf_mgmt_hndsk_rsp rsp = {0,}; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + char msg[1024] = {0,}; + + this = THIS; + frame = myframe; + peerctx = frame->local; + peerinfo = peerctx->peerinfo; + + if (-1 == req->rpc_status) { + snprintf (msg, sizeof (msg), + "Error through RPC layer, retry again later"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + if (ret < 0) { + snprintf (msg, sizeof (msg), "Failed to decode XDR"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + op_errno = rsp.op_errno; + if (-1 == rsp.op_ret) { + ret = -1; + snprintf (msg, sizeof (msg), + "Failed to get handshake ack from remote server"); + gf_log (frame->this->name, GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + /* TODO: this is hardcoded as of now, but I don't forsee any problems + * with this as long as we are properly handshaking operating versions + */ + peerinfo->mgmt = &gd_mgmt_prog; + peerinfo->peer = &gd_peer_prog; + peerinfo->mgmt_v3 = &gd_mgmt_v3_prog; + + ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); + + if (GD_MODE_ON == peerctx->args.mode) { + ret = glusterd_event_connected_inject (peerctx); + peerctx->args.req = NULL; + } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) { + peerctx->args.mode = GD_MODE_ON; + } else { + gf_log (this->name, GF_LOG_WARNING, "unknown mode %d", + peerctx->args.mode); + } + + glusterd_friend_sm (); + + ret = 0; +out: + + frame->local = NULL; + STACK_DESTROY (frame->root); + + if (ret != 0) + rpc_transport_disconnect (peerinfo->rpc->conn.trans); + + if (rsp.hndsk.hndsk_val) + free (rsp.hndsk.hndsk_val); + + return 0; +} + +int +glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_mgmt_hndsk_version_ack_cbk); +} + +int +__glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + int op_errno = EINVAL; + gf_mgmt_hndsk_rsp rsp = {0,}; + gf_mgmt_hndsk_req arg = {{0,}}; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + glusterd_conf_t *conf = NULL; + char msg[1024] = {0,}; + + this = THIS; + conf = this->private; + frame = myframe; + peerctx = frame->local; + peerinfo = peerctx->peerinfo; + + if (-1 == req->rpc_status) { + ret = -1; + snprintf (msg, sizeof (msg), + "Error through RPC layer, retry again later"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp); + if (ret < 0) { + snprintf (msg, sizeof (msg), "Failed to decode management " + "handshake response"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + GF_PROTOCOL_DICT_UNSERIALIZE (this, dict, rsp.hndsk.hndsk_val, + rsp.hndsk.hndsk_len, ret, op_errno, + out); + + op_errno = rsp.op_errno; + if (-1 == rsp.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get the 'versions' from peer (%s)", + req->conn->trans->peerinfo.identifier); + goto out; + } + + /* Check if peer can be part of cluster */ + ret = gd_validate_peer_op_version (this, peerinfo, dict, + &peerctx->errstr); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to validate the operating version of peer (%s)", + peerinfo->hostname); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) + goto out; + + ret = dict_set_int32 (rsp_dict, GD_OP_VERSION_KEY, conf->op_version); + if (ret) { + gf_log(this->name, GF_LOG_ERROR, + "failed to set operating version in dict"); + goto out; + } + + GF_PROTOCOL_DICT_SERIALIZE (this, rsp_dict, (&arg.hndsk.hndsk_val), + arg.hndsk.hndsk_len, op_errno, out); + + ret = glusterd_submit_request (peerctx->peerinfo->rpc, &arg, frame, + &gd_clnt_mgmt_hndsk_prog, + GD_MGMT_HNDSK_VERSIONS_ACK, NULL, this, + glusterd_mgmt_hndsk_version_ack_cbk, + (xdrproc_t)xdr_gf_mgmt_hndsk_req); + +out: + if (ret) { + frame->local = NULL; + STACK_DESTROY (frame->root); + rpc_transport_disconnect (peerinfo->rpc->conn.trans); + } + + if (rsp.hndsk.hndsk_val) + free (rsp.hndsk.hndsk_val); + + if (arg.hndsk.hndsk_val) + GF_FREE (arg.hndsk.hndsk_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + return 0; +} + +int +glusterd_mgmt_hndsk_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_mgmt_hndsk_version_cbk); +} + +int +glusterd_mgmt_handshake (xlator_t *this, glusterd_peerctx_t *peerctx) +{ + call_frame_t *frame = NULL; + gf_mgmt_hndsk_req req = {{0,},}; + int ret = -1; + + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + + frame->local = peerctx; + + ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame, + &gd_clnt_mgmt_hndsk_prog, + GD_MGMT_HNDSK_VERSIONS, NULL, this, + glusterd_mgmt_hndsk_version_cbk, + (xdrproc_t)xdr_gf_mgmt_hndsk_req); + ret = 0; +out: + if (ret && frame) + STACK_DESTROY (frame->root); + + return ret; +} + +int +glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo, + gf_prog_detail *prog) +{ + gf_prog_detail *trav = NULL; + int ret = -1; + + if (!peerinfo || !prog) + goto out; + + trav = prog; + + while (trav) { + ret = -1; + if ((gd_mgmt_prog.prognum == trav->prognum) && + (gd_mgmt_prog.progver == trav->progver)) { + peerinfo->mgmt = &gd_mgmt_prog; + ret = 0; + } + + if ((gd_peer_prog.prognum == trav->prognum) && + (gd_peer_prog.progver == trav->progver)) { + peerinfo->peer = &gd_peer_prog; + ret = 0; + } + + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "%s (%"PRId64":%"PRId64") not supported", + trav->progname, trav->prognum, + trav->progver); + } + + trav = trav->next; + } + + if (peerinfo->mgmt) { + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt->progname, peerinfo->mgmt->prognum, + peerinfo->mgmt->progver); + } + + if (peerinfo->peer) { + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->peer->progname, peerinfo->peer->prognum, + peerinfo->peer->progver); + } + + if (peerinfo->mgmt_v3) { + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt_v3->progname, peerinfo->mgmt_v3->prognum, + peerinfo->mgmt_v3->progver); + } + + ret = 0; +out: + return ret; + +} + +static gf_boolean_t +_mgmt_hndsk_prog_present (gf_prog_detail *prog) { + gf_boolean_t ret = _gf_false; + gf_prog_detail *trav = NULL; + + GF_ASSERT (prog); + + trav = prog; + + while (trav) { + if ((trav->prognum == GD_MGMT_HNDSK_PROGRAM) && + (trav->progver == GD_MGMT_HNDSK_VERSION)) { + ret = _gf_true; + goto out; + } + trav = trav->next; + } +out: + return ret; +} + +int +__glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + gf_dump_rsp rsp = {0,}; + xlator_t *this = NULL; + gf_prog_detail *trav = NULL; + gf_prog_detail *next = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerctx_t *peerctx = NULL; + glusterd_conf_t *conf = NULL; + char msg[1024] = {0,}; + + this = THIS; + conf = this->private; + frame = myframe; + peerctx = frame->local; + peerinfo = peerctx->peerinfo; + + if (-1 == req->rpc_status) { + snprintf (msg, sizeof (msg), + "Error through RPC layer, retry again later"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp); + if (ret < 0) { + snprintf (msg, sizeof (msg), "Failed to decode XDR"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + if (-1 == rsp.op_ret) { + snprintf (msg, sizeof (msg), + "Failed to get the 'versions' from remote server"); + gf_log (frame->this->name, GF_LOG_ERROR, "%s", msg); + peerctx->errstr = gf_strdup (msg); + goto out; + } + + if (_mgmt_hndsk_prog_present (rsp.prog)) { + gf_log (this->name, GF_LOG_DEBUG, + "Proceeding to op-version handshake with peer %s", + peerinfo->hostname); + ret = glusterd_mgmt_handshake (this, peerctx); + goto out; + } else if (conf->op_version > 1) { + ret = -1; + snprintf (msg, sizeof (msg), + "Peer %s does not support required op-version", + peerinfo->hostname); + peerctx->errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + /* Make sure we assign the proper program to peer */ + ret = glusterd_set_clnt_mgmt_program (peerinfo, rsp.prog); + if (ret) { + gf_log ("", GF_LOG_WARNING, "failed to set the mgmt program"); + goto out; + } + + ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); + + if (GD_MODE_ON == peerctx->args.mode) { + ret = glusterd_event_connected_inject (peerctx); + peerctx->args.req = NULL; + } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) { + peerctx->args.mode = GD_MODE_ON; + } else { + gf_log ("", GF_LOG_WARNING, "unknown mode %d", + peerctx->args.mode); + } + + glusterd_friend_sm(); + glusterd_op_sm(); + + ret = 0; + +out: + + /* don't use GF_FREE, buffer was allocated by libc */ + if (rsp.prog) { + trav = rsp.prog; + while (trav) { + next = trav->next; + free (trav->progname); + free (trav); + trav = next; + } + } + + frame->local = NULL; + STACK_DESTROY (frame->root); + + if (ret != 0) + rpc_transport_disconnect (peerinfo->rpc->conn.trans); + + return 0; +} + + +int +glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_peer_dump_version_cbk); +} + +int +glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc, + glusterd_peerctx_t *peerctx) +{ + call_frame_t *frame = NULL; + gf_dump_req req = {0,}; + int ret = -1; + + frame = create_frame (this, this->ctx->pool); + if (!frame) + goto out; + + frame->local = peerctx; + + req.gfs_id = 0xcafe; + + ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame, + &glusterd_dump_prog, GF_DUMP_DUMP, + NULL, this, + glusterd_peer_dump_version_cbk, + (xdrproc_t)xdr_gf_dump_req); +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c new file mode 100644 index 000000000..2b43a452e --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c @@ -0,0 +1,531 @@ +/* + Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "globals.h" +#include "glusterfs.h" +#include "dict.h" +#include "xlator.h" +#include "logging.h" +#include "run.h" +#include "defaults.h" +#include "compat.h" +#include "compat-errno.h" +#include "glusterd.h" +#include "glusterd-sm.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-store.h" +#include "glusterd-hooks.h" + +#include <fnmatch.h> + +#define EMPTY "" +char glusterd_hook_dirnames[GD_OP_MAX][256] = +{ + [GD_OP_NONE] = EMPTY, + [GD_OP_CREATE_VOLUME] = "create", + [GD_OP_START_BRICK] = EMPTY, + [GD_OP_STOP_BRICK] = EMPTY, + [GD_OP_DELETE_VOLUME] = "delete", + [GD_OP_START_VOLUME] = "start", + [GD_OP_STOP_VOLUME] = "stop", + [GD_OP_DEFRAG_VOLUME] = EMPTY, + [GD_OP_ADD_BRICK] = "add-brick", + [GD_OP_REMOVE_BRICK] = "remove-brick", + [GD_OP_REPLACE_BRICK] = EMPTY, + [GD_OP_SET_VOLUME] = "set", + [GD_OP_RESET_VOLUME] = EMPTY, + [GD_OP_SYNC_VOLUME] = EMPTY, + [GD_OP_LOG_ROTATE] = EMPTY, + [GD_OP_GSYNC_CREATE] = "gsync-create", + [GD_OP_GSYNC_SET] = EMPTY, + [GD_OP_PROFILE_VOLUME] = EMPTY, + [GD_OP_QUOTA] = EMPTY, + [GD_OP_STATUS_VOLUME] = EMPTY, + [GD_OP_REBALANCE] = EMPTY, + [GD_OP_HEAL_VOLUME] = EMPTY, + [GD_OP_STATEDUMP_VOLUME] = EMPTY, + [GD_OP_LIST_VOLUME] = EMPTY, + [GD_OP_CLEARLOCKS_VOLUME] = EMPTY, + [GD_OP_DEFRAG_BRICK_VOLUME] = EMPTY, +}; +#undef EMPTY + +static inline gf_boolean_t +glusterd_is_hook_enabled (char *script) +{ + return (script[0] == 'S'); +} + +int +glusterd_hooks_create_hooks_directory (char *basedir) +{ + int ret = -1; + int op = GD_OP_NONE; + int type = GD_COMMIT_HOOK_NONE; + char version_dir[PATH_MAX] = {0, }; + char path[PATH_MAX] = {0, }; + char *cmd_subdir = NULL; + char type_subdir[GD_COMMIT_HOOK_MAX][256] = {{0, }, + "pre", + "post"}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + + snprintf (path, sizeof (path), "%s/hooks", basedir); + ret = mkdir_p (path, 0777, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to create %s due" + "to %s", path, strerror (errno)); + goto out; + } + + GLUSTERD_GET_HOOKS_DIR (version_dir, GLUSTERD_HOOK_VER, priv); + ret = mkdir_p (version_dir, 0777, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to create %s due " + "to %s", version_dir, strerror (errno)); + goto out; + } + + for (op = GD_OP_NONE+1; op < GD_OP_MAX; op++) { + cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir (op); + if (strlen (cmd_subdir) == 0) + continue; + + snprintf (path, sizeof (path), "%s/%s", version_dir, + cmd_subdir); + ret = mkdir_p (path, 0777, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_CRITICAL, + "Unable to create %s due to %s", + path, strerror (errno)); + goto out; + } + + for (type = GD_COMMIT_HOOK_PRE; type < GD_COMMIT_HOOK_MAX; + type++) { + snprintf (path, sizeof (path), "%s/%s/%s", + version_dir, cmd_subdir, type_subdir[type]); + ret = mkdir_p (path, 0777, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_CRITICAL, + "Unable to create %s due to %s", + path, strerror (errno)); + goto out; + } + } + } + + ret = 0; +out: + return ret; +} + +char* +glusterd_hooks_get_hooks_cmd_subdir (glusterd_op_t op) +{ + GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX)); + + return glusterd_hook_dirnames[op]; +} + +int +glusterd_hooks_set_volume_args (dict_t *dict, runner_t *runner) +{ + int i = 0; + int count = 0; + int ret = -1; + char query[1024] = {0,}; + char *key = NULL; + char *value = NULL; + + ret = dict_get_int32 (dict, "count", &count); + if (ret) + goto out; + + /* This will not happen unless op_ctx + * is corrupted*/ + if (!count) + goto out; + + runner_add_arg (runner, "-o"); + for (i = 1; (ret == 0); i++) { + snprintf (query, sizeof (query), "key%d", i); + ret = dict_get_str (dict, query, &key); + if (ret) + continue; + + snprintf (query, sizeof (query), "value%d", i); + ret = dict_get_str (dict, query, &value); + if (ret) + continue; + + runner_argprintf (runner, "%s=%s", key, value); + } + + ret = 0; +out: + return ret; +} + +static int +glusterd_hooks_add_op_args (runner_t *runner, glusterd_op_t op, + dict_t *op_ctx, glusterd_commit_hook_type_t type) +{ + char *hooks_args = NULL; + int vol_count = 0; + gf_boolean_t truth = _gf_false; + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + priv = THIS->private; + list_for_each_entry (voliter, &priv->volumes, + vol_list) { + if (glusterd_is_volume_started (voliter)) + vol_count++; + } + + ret = 0; + switch (op) { + case GD_OP_START_VOLUME: + if (type == GD_COMMIT_HOOK_PRE && + vol_count == 0) + truth = _gf_true; + + else if (type == GD_COMMIT_HOOK_POST && + vol_count == 1) + truth = _gf_true; + + else + truth = _gf_false; + + runner_argprintf (runner, "--first=%s", + truth? "yes":"no"); + break; + + case GD_OP_STOP_VOLUME: + if (type == GD_COMMIT_HOOK_PRE && + vol_count == 1) + truth = _gf_true; + + else if (type == GD_COMMIT_HOOK_POST && + vol_count == 0) + truth = _gf_true; + + else + truth = _gf_false; + + runner_argprintf (runner, "--last=%s", + truth? "yes":"no"); + break; + + case GD_OP_SET_VOLUME: + ret = glusterd_hooks_set_volume_args (op_ctx, runner); + break; + + case GD_OP_GSYNC_CREATE: + ret = dict_get_str (op_ctx, "hooks_args", &hooks_args); + if (ret) + gf_log ("", GF_LOG_DEBUG, + "No Hooks Arguments."); + else + gf_log ("", GF_LOG_DEBUG, + "Hooks Args = %s", hooks_args); + if (hooks_args) + runner_argprintf (runner, "%s", hooks_args); + break; + + default: + break; + + } + + return ret; +} + +int +glusterd_hooks_run_hooks (char *hooks_path, glusterd_op_t op, dict_t *op_ctx, + glusterd_commit_hook_type_t type) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = {0, }; + struct dirent *entry = NULL; + DIR *hookdir = NULL; + char *volname = NULL; + char **lines = NULL; + int N = 8; /*arbitrary*/ + int lineno = 0; + int line_count = 0; + int ret = -1; + + this = THIS; + priv = this->private; + + ret = dict_get_str (op_ctx, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Failed to get volname " + "from operation context"); + goto out; + } + + hookdir = opendir (hooks_path); + if (!hookdir) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s, due " + "to %s", hooks_path, strerror (errno)); + goto out; + } + + lines = GF_CALLOC (1, N * sizeof (*lines), gf_gld_mt_charptr); + if (!lines) { + ret = -1; + goto out; + } + + ret = -1; + line_count = 0; + glusterd_for_each_entry (entry, hookdir); + while (entry) { + if (line_count == N-1) { + N *= 2; + lines = GF_REALLOC (lines, N * sizeof (char *)); + if (!lines) + goto out; + } + + if (glusterd_is_hook_enabled (entry->d_name)) { + lines[line_count] = gf_strdup (entry->d_name); + line_count++; + } + + glusterd_for_each_entry (entry, hookdir); + } + + lines[line_count] = NULL; + lines = GF_REALLOC (lines, (line_count + 1) * sizeof (char *)); + if (!lines) + goto out; + + qsort (lines, line_count, sizeof (*lines), glusterd_compare_lines); + + for (lineno = 0; lineno < line_count; lineno++) { + + runinit (&runner); + runner_argprintf (&runner, "%s/%s", hooks_path, lines[lineno]); + /*Add future command line arguments to hook scripts below*/ + runner_argprintf (&runner, "--volname=%s", volname); + ret = glusterd_hooks_add_op_args (&runner, op, op_ctx, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to add " + "command specific arguments"); + goto out; + } + + ret = runner_run_reuse (&runner); + if (ret) { + runner_log (&runner, this->name, GF_LOG_ERROR, + "Failed to execute script"); + } else { + runner_log (&runner, this->name, GF_LOG_INFO, + "Ran script"); + } + runner_end (&runner); + } + + ret = 0; +out: + if (lines) { + for (lineno = 0; lineno < line_count+1; lineno++) + GF_FREE (lines[lineno]); + + GF_FREE (lines); + } + + if (hookdir) + closedir (hookdir); + + return ret; +} + +int +glusterd_hooks_post_stub_enqueue (char *scriptdir, glusterd_op_t op, + dict_t *op_ctx) +{ + int ret = -1; + glusterd_hooks_stub_t *stub = NULL; + glusterd_hooks_private_t *hooks_priv = NULL; + glusterd_conf_t *conf = NULL; + + conf = THIS->private; + hooks_priv = conf->hooks_priv; + + ret = glusterd_hooks_stub_init (&stub, scriptdir, op, op_ctx); + if (ret) + goto out; + + pthread_mutex_lock (&hooks_priv->mutex); + { + hooks_priv->waitcount++; + list_add_tail (&stub->all_hooks, &hooks_priv->list); + pthread_cond_signal (&hooks_priv->cond); + } + pthread_mutex_unlock (&hooks_priv->mutex); + + ret = 0; +out: + return ret; +} + +int +glusterd_hooks_stub_init (glusterd_hooks_stub_t **stub, char *scriptdir, + glusterd_op_t op, dict_t *op_ctx) +{ + int ret = -1; + glusterd_hooks_stub_t *hooks_stub = NULL; + + GF_ASSERT (stub); + if (!stub) + goto out; + + hooks_stub = GF_CALLOC (1, sizeof (*hooks_stub), + gf_gld_mt_hooks_stub_t); + if (!hooks_stub) + goto out; + + INIT_LIST_HEAD (&hooks_stub->all_hooks); + hooks_stub->op = op; + hooks_stub->scriptdir = gf_strdup (scriptdir); + if (!hooks_stub->scriptdir) + goto out; + + hooks_stub->op_ctx = dict_copy_with_ref (op_ctx, hooks_stub->op_ctx); + if (!hooks_stub->op_ctx) + goto out; + + *stub = hooks_stub; + ret = 0; +out: + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to initialize " + "post hooks stub"); + glusterd_hooks_stub_cleanup (hooks_stub); + } + + return ret; +} + +void +glusterd_hooks_stub_cleanup (glusterd_hooks_stub_t *stub) +{ + if (!stub) { + gf_log_callingfn (THIS->name, GF_LOG_WARNING, + "hooks_stub is NULL"); + return; + } + + if (stub->op_ctx) + dict_unref (stub->op_ctx); + + GF_FREE (stub->scriptdir); + + GF_FREE (stub); +} + +static void* +hooks_worker (void *args) +{ + glusterd_conf_t *conf = NULL; + glusterd_hooks_private_t *hooks_priv = NULL; + glusterd_hooks_stub_t *stub = NULL; + + THIS = args; + conf = THIS->private; + hooks_priv = conf->hooks_priv; + + for (;;) { + pthread_mutex_lock (&hooks_priv->mutex); + { + while (list_empty (&hooks_priv->list)) { + pthread_cond_wait (&hooks_priv->cond, + &hooks_priv->mutex); + } + stub = list_entry (hooks_priv->list.next, + glusterd_hooks_stub_t, + all_hooks); + list_del_init (&stub->all_hooks); + hooks_priv->waitcount--; + + } + pthread_mutex_unlock (&hooks_priv->mutex); + + glusterd_hooks_run_hooks (stub->scriptdir, stub->op, + stub->op_ctx, GD_COMMIT_HOOK_POST); + glusterd_hooks_stub_cleanup (stub); + } + + return NULL; +} + +int +glusterd_hooks_priv_init (glusterd_hooks_private_t **new) +{ + int ret = -1; + glusterd_hooks_private_t *hooks_priv = NULL; + + if (!new) + goto out; + + hooks_priv = GF_CALLOC (1, sizeof (*hooks_priv), + gf_gld_mt_hooks_priv_t); + if (!hooks_priv) + goto out; + + pthread_mutex_init (&hooks_priv->mutex, NULL); + pthread_cond_init (&hooks_priv->cond, NULL); + INIT_LIST_HEAD (&hooks_priv->list); + hooks_priv->waitcount = 0; + + *new = hooks_priv; + ret = 0; +out: + return ret; +} + +int +glusterd_hooks_spawn_worker (xlator_t *this) +{ + int ret = -1; + glusterd_conf_t *conf = NULL; + glusterd_hooks_private_t *hooks_priv = NULL; + + + ret = glusterd_hooks_priv_init (&hooks_priv); + if (ret) + goto out; + + conf = this->private; + conf->hooks_priv = hooks_priv; + ret = pthread_create (&hooks_priv->worker, NULL, hooks_worker, + (void *)this); + if (ret) + gf_log (this->name, GF_LOG_CRITICAL, "Failed to spawn post " + "hooks worker thread"); +out: + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.h b/xlators/mgmt/glusterd/src/glusterd-hooks.h new file mode 100644 index 000000000..c597ddd2a --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-hooks.h @@ -0,0 +1,89 @@ +/* + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_HOOKS_H_ +#define _GLUSTERD_HOOKS_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <fnmatch.h> + +#define GLUSTERD_GET_HOOKS_DIR(path, version, priv) \ + snprintf (path, PATH_MAX, "%s/hooks/%d", priv->workdir,\ + version); + +#define GLUSTERD_HOOK_VER 1 + +#define GD_HOOKS_SPECIFIC_KEY "user.*" + +typedef enum glusterd_commit_hook_type { + GD_COMMIT_HOOK_NONE = 0, + GD_COMMIT_HOOK_PRE, + GD_COMMIT_HOOK_POST, + GD_COMMIT_HOOK_MAX +} glusterd_commit_hook_type_t; + +typedef struct hooks_private { + struct list_head list; + int waitcount; //debug purposes + pthread_mutex_t mutex; + pthread_cond_t cond; + pthread_t worker; +} glusterd_hooks_private_t; + +typedef struct hooks_stub { + struct list_head all_hooks; + char *scriptdir; + glusterd_op_t op; + dict_t *op_ctx; + +} glusterd_hooks_stub_t; + + +static inline gf_boolean_t +is_key_glusterd_hooks_friendly (char *key) +{ + gf_boolean_t is_friendly = _gf_false; + + /* This is very specific to hooks friendly behavior */ + if (fnmatch (GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) { + gf_log (THIS->name, GF_LOG_DEBUG, "user namespace key %s", key); + is_friendly = _gf_true; + } + + return is_friendly; +} + +int +glusterd_hooks_create_hooks_directory (char *basedir); + +char * +glusterd_hooks_get_hooks_cmd_subdir (glusterd_op_t op); + +int +glusterd_hooks_run_hooks (char *hooks_path, glusterd_op_t op, dict_t *op_ctx, + glusterd_commit_hook_type_t type); +int +glusterd_hooks_spawn_worker (xlator_t *this); + +int +glusterd_hooks_stub_init (glusterd_hooks_stub_t **stub, char *scriptdir, + glusterd_op_t op, dict_t *op_ctx); +void +glusterd_hooks_stub_cleanup (glusterd_hooks_stub_t *stub); + +int +glusterd_hooks_post_stub_enqueue (char *scriptdir, glusterd_op_t op, + dict_t *op_ctx); +int +glusterd_hooks_priv_init (glusterd_hooks_private_t **new); +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c new file mode 100644 index 000000000..0af2a186f --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.c @@ -0,0 +1,637 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-locks.h" +#include "run.h" +#include "syscall.h" + +#include <signal.h> + +#define GF_MAX_LOCKING_ENTITIES 2 + +/* Valid entities that the mgmt_v3 lock can hold locks upon * + * To add newer entities to be locked, we can just add more * + * entries to this table along with the type and default value */ +valid_entities valid_types[] = { + { "vol", _gf_true }, + { "snap", _gf_false }, + { NULL }, +}; + +static dict_t *mgmt_v3_lock; + +/* Checks if the lock request is for a valid entity */ +gf_boolean_t +glusterd_mgmt_v3_is_type_valid (char *type) +{ + int32_t i = 0; + gf_boolean_t ret = _gf_false; + + GF_ASSERT (type); + + for (i = 0; valid_types[i].type; i++) { + if (!strcmp (type, valid_types[i].type)) { + ret = _gf_true; + break; + } + } + + return ret; +} + +/* Initialize the global mgmt_v3 lock list(dict) when + * glusterd is spawned */ +int32_t +glusterd_mgmt_v3_lock_init () +{ + int32_t ret = -1; + + mgmt_v3_lock = dict_new (); + if (!mgmt_v3_lock) { + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +/* Destroy the global mgmt_v3 lock list(dict) when + * glusterd cleanup is performed */ +void +glusterd_mgmt_v3_lock_fini () +{ + if (mgmt_v3_lock) + dict_destroy (mgmt_v3_lock); +} + +int32_t +glusterd_get_mgmt_v3_lock_owner (char *key, uuid_t *uuid) +{ + int32_t ret = -1; + mgmt_v3_lock_obj *lock_obj = NULL; + uuid_t no_owner = {"\0"}; + xlator_t *this = NULL; + + GF_ASSERT(THIS); + this = THIS; + + if (!key || !uuid) { + gf_log (this->name, GF_LOG_ERROR, "key or uuid is null."); + ret = -1; + goto out; + } + + ret = dict_get_bin(mgmt_v3_lock, key, (void **) &lock_obj); + if (!ret) + uuid_copy (*uuid, lock_obj->lock_owner); + else + uuid_copy (*uuid, no_owner); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* This function is called with the locked_count and type, to * + * release all the acquired locks. */ +static int32_t +glusterd_release_multiple_locks_per_entity (dict_t *dict, uuid_t uuid, + int32_t locked_count, + char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t op_ret = 0; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + if (locked_count == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "No %s locked as part of this transaction", + type); + goto out; + } + + /* Release all the locks held */ + for (i = 0; i < locked_count; i++) { + snprintf (name_buf, sizeof(name_buf), + "%sname%d", type, i+1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s locked_count = %d", + name_buf, locked_count); + op_ret = ret; + continue; + } + + ret = glusterd_mgmt_v3_unlock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release lock for %s.", + name); + op_ret = ret; + } + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", op_ret); + return op_ret; +} + +/* Given the count and type of the entity this function acquires * + * locks on multiple elements of the same entity. For example: * + * If type is "vol" this function tries to acquire locks on multiple * + * volumes */ +static int32_t +glusterd_acquire_multiple_locks_per_entity (dict_t *dict, uuid_t uuid, + int32_t count, char *type) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + /* Locking one element after other */ + for (i = 0; i < count; i++) { + snprintf (name_buf, sizeof(name_buf), + "%sname%d", type, i+1); + + /* Looking for volname1, volname2 or snapname1, * + * as key in the dict snapname2 */ + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s count = %d", + name_buf, count); + break; + } + + ret = glusterd_mgmt_v3_lock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock for %s %s " + "on behalf of %s. Reversing " + "this transaction", type, name, + uuid_utoa(uuid)); + break; + } + locked_count++; + } + + if (count == locked_count) { + /* If all locking ops went successfuly, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one element, unlock others and return failure */ + ret = glusterd_release_multiple_locks_per_entity (dict, uuid, + locked_count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release multiple %s locks", + type); + } + ret = -1; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Given the type of entity, this function figures out if it should unlock a * + * single element of multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly unlock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_unlock_entity (dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean (dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Locks were not held for this particular entity * + * Hence nothing to release */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + snprintf (name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32 (dict, name_buf, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be unlocked */ + snprintf (name_buf, sizeof(name_buf), "%sname", + type); + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_unlock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release lock for %s %s " + "on behalf of %s.", type, name, + uuid_utoa(uuid)); + goto out; + } + } else { + /* Unlocking one element name after another */ + ret = glusterd_release_multiple_locks_per_entity (dict, + uuid, + count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Given the type of entity, this function figures out if it should lock a * + * single element or multiple elements of the said entity. For example: * + * if the type is "vol", this function will accordingly lock a single volume * + * or multiple volumes */ +static int32_t +glusterd_mgmt_v3_lock_entity (dict_t *dict, uuid_t uuid, char *type, + gf_boolean_t default_value) +{ + char name_buf[PATH_MAX] = ""; + char *name = NULL; + int32_t count = -1; + int32_t ret = -1; + gf_boolean_t hold_locks = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT (dict); + GF_ASSERT (type); + + snprintf (name_buf, sizeof(name_buf), "hold_%s_locks", type); + hold_locks = dict_get_str_boolean (dict, name_buf, default_value); + + if (hold_locks == _gf_false) { + /* Not holding locks for this particular entity */ + ret = 0; + goto out; + } + + /* Looking for volcount or snapcount in the dict */ + snprintf (name_buf, sizeof(name_buf), "%scount", type); + ret = dict_get_int32 (dict, name_buf, &count); + if (ret) { + /* count is not present. Only one * + * element name needs to be locked */ + snprintf (name_buf, sizeof(name_buf), "%sname", + type); + ret = dict_get_str (dict, name_buf, &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %sname", type); + goto out; + } + + ret = glusterd_mgmt_v3_lock (name, uuid, type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock for %s %s " + "on behalf of %s.", type, name, + uuid_utoa(uuid)); + goto out; + } + } else { + /* Locking one element name after another */ + ret = glusterd_acquire_multiple_locks_per_entity (dict, + uuid, + count, + type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire all %s locks", type); + goto out; + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Try to release locks of multiple entities like * + * volume, snaps etc. */ +int32_t +glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid) +{ + int32_t i = -1; + int32_t ret = -1; + int32_t op_ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is null."); + ret = -1; + goto out; + } + + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_unlock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to unlock all %s", + valid_types[i].type); + op_ret = ret; + } + } + + ret = op_ret; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Try to acquire locks on multiple entities like * + * volume, snaps etc. */ +int32_t +glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid) +{ + int32_t i = -1; + int32_t ret = -1; + int32_t locked_count = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "dict is null."); + ret = -1; + goto out; + } + + /* Locking one entity after other */ + for (i = 0; valid_types[i].type; i++) { + ret = glusterd_mgmt_v3_lock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to lock all %s", + valid_types[i].type); + break; + } + locked_count++; + } + + if (locked_count == GF_MAX_LOCKING_ENTITIES) { + /* If all locking ops went successfuly, return as success */ + ret = 0; + goto out; + } + + /* If we failed to lock one entity, unlock others and return failure */ + for (i = 0; i < locked_count; i++) { + ret = glusterd_mgmt_v3_unlock_entity + (dict, uuid, + valid_types[i].type, + valid_types[i].default_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to unlock all %s", + valid_types[i].type); + } + } + ret = -1; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_lock (const char *name, uuid_t uuid, char *type) +{ + char key[PATH_MAX] = ""; + int32_t ret = -1; + mgmt_v3_lock_obj *lock_obj = NULL; + gf_boolean_t is_valid = _gf_true; + uuid_t owner = {0}; + xlator_t *this = NULL; + + GF_ASSERT(THIS); + this = THIS; + + if (!name || !type) { + gf_log (this->name, GF_LOG_ERROR, "name or type is null."); + ret = -1; + goto out; + } + + is_valid = glusterd_mgmt_v3_is_type_valid (type); + if (is_valid != _gf_true) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid entity. Cannot perform locking " + "operation on %s types", type); + ret = -1; + goto out; + } + + ret = snprintf (key, sizeof(key), "%s_%s", name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to create key"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Trying to acquire lock of %s %s for %s as %s", + type, name, uuid_utoa (uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner (key, &owner); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get mgmt_v3 lock owner"); + goto out; + } + + /* If the lock has already been held for the given volume + * we fail */ + if (!uuid_is_null (owner)) { + gf_log (this->name, GF_LOG_ERROR, "Lock for %s held by %s", + name, uuid_utoa (owner)); + ret = -1; + goto out; + } + + lock_obj = GF_CALLOC (1, sizeof(mgmt_v3_lock_obj), + gf_common_mt_mgmt_v3_lock_obj_t); + if (!lock_obj) { + ret = -1; + goto out; + } + + uuid_copy (lock_obj->lock_owner, uuid); + + ret = dict_set_bin (mgmt_v3_lock, key, lock_obj, + sizeof(*lock_obj)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set lock owner in mgmt_v3 lock"); + if (lock_obj) + GF_FREE (lock_obj); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Lock for %s %s successfully held by %s", + type, name, uuid_utoa (uuid)); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_unlock (const char *name, uuid_t uuid, char *type) +{ + char key[PATH_MAX] = ""; + int32_t ret = -1; + gf_boolean_t is_valid = _gf_true; + uuid_t owner = {0}; + xlator_t *this = NULL; + + GF_ASSERT(THIS); + this = THIS; + + if (!name || !type) { + gf_log (this->name, GF_LOG_ERROR, "name is null."); + ret = -1; + goto out; + } + + is_valid = glusterd_mgmt_v3_is_type_valid (type); + if (is_valid != _gf_true) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid entity. Cannot perform unlocking " + "operation on %s types", type); + ret = -1; + goto out; + } + + ret = snprintf (key, sizeof(key), "%s_%s", + name, type); + if (ret != strlen(name) + 1 + strlen(type)) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create key"); + ret = -1; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, + "Trying to release lock of %s %s for %s as %s", + type, name, uuid_utoa (uuid), key); + + ret = glusterd_get_mgmt_v3_lock_owner (key, &owner); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get mgmt_v3 lock owner"); + goto out; + } + + if (uuid_is_null (owner)) { + gf_log (this->name, GF_LOG_ERROR, + "Lock for %s %s not held", type, name); + ret = -1; + goto out; + } + + ret = uuid_compare (uuid, owner); + if (ret) { + + gf_log (this->name, GF_LOG_ERROR, "Lock owner mismatch. " + "Lock for %s %s held by %s", + type, name, uuid_utoa (owner)); + goto out; + } + + /* Removing the mgmt_v3 lock from the global list */ + dict_del (mgmt_v3_lock, key); + + gf_log (this->name, GF_LOG_DEBUG, + "Lock for %s %s successfully released", + type, name); + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h new file mode 100644 index 000000000..83eb8c997 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.h @@ -0,0 +1,51 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_LOCKS_H_ +#define _GLUSTERD_LOCKS_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +typedef struct mgmt_v3_lock_object_ { + uuid_t lock_owner; +} mgmt_v3_lock_obj; + +typedef struct mgmt_v3_lock_valid_entities { + char *type; /* Entity type like vol, snap */ + gf_boolean_t default_value; /* The default value that * + * determines if the locks * + * should be held for that * + * entity */ +} valid_entities; + +int32_t +glusterd_mgmt_v3_lock_init (); + +void +glusterd_mgmt_v3_lock_fini (); + +int32_t +glusterd_get_mgmt_v3_lock_owner (char *volname, uuid_t *uuid); + +int32_t +glusterd_mgmt_v3_lock (const char *key, uuid_t uuid, char *type); + +int32_t +glusterd_mgmt_v3_unlock (const char *key, uuid_t uuid, char *type); + +int32_t +glusterd_multiple_mgmt_v3_lock (dict_t *dict, uuid_t uuid); + +int32_t +glusterd_multiple_mgmt_v3_unlock (dict_t *dict, uuid_t uuid); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c new file mode 100644 index 000000000..33bd95c03 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c @@ -0,0 +1,271 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" + +#include <signal.h> + +int +__glusterd_handle_log_rotate (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_LOG_ROTATE; + char *volname = NULL; + char msg[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); + goto out; + } + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + gf_log (this->name, GF_LOG_INFO, "Received log rotate req " + "for volume %s", volname); + + ret = dict_set_uint64 (dict, "rotate-key", (uint64_t)time (NULL)); + if (ret) + goto out; + + ret = glusterd_op_begin_synctask (req, GD_OP_LOG_ROTATE, dict); + +out: + if (ret) { + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, msg); + } + + free (cli_req.dict.dict_val); + return ret; +} + +int +glusterd_handle_log_rotate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_log_rotate); +} + +/* op-sm */ +int +glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + gf_boolean_t exists = _gf_false; + char msg[2048] = {0}; + char *brick = NULL; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if (!exists) { + snprintf (msg, sizeof (msg), "Volume %s does not exist", + volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + if (_gf_false == glusterd_is_volume_started (volinfo)) { + snprintf (msg, sizeof (msg), "Volume %s needs to be started before" + " log rotate.", volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "brick", &brick); + /* If no brick is specified, do log-rotate for + all the bricks in the volume */ + if (ret) { + ret = 0; + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, NULL); + if (ret) { + snprintf (msg, sizeof (msg), "Incorrect brick %s " + "for volume %s", brick, volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + + +int +glusterd_op_log_rotate (dict_t *dict) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + char *volname = NULL; + char *brick = NULL; + char logfile[PATH_MAX] = {0,}; + char pidfile[PATH_MAX] = {0,}; + FILE *file = NULL; + pid_t pid = 0; + uint64_t key = 0; + int valid_brick = 0; + glusterd_brickinfo_t *tmpbrkinfo = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "volname not found"); + goto out; + } + + ret = dict_get_uint64 (dict, "rotate-key", &key); + if (ret) { + gf_log ("", GF_LOG_ERROR, "rotate key not found"); + goto out; + } + + ret = dict_get_str (dict, "brick", &brick); + /* If no brick is specified, do log-rotate for + all the bricks in the volume */ + if (ret) + goto cont; + + ret = glusterd_brickinfo_new_from_brick (brick, &tmpbrkinfo); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "cannot get brickinfo from brick"); + goto out; + } + +cont: + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + + ret = -1; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brick && + (strcmp (tmpbrkinfo->hostname, brickinfo->hostname) || + strcmp (tmpbrkinfo->path,brickinfo->path))) + continue; + + valid_brick = 1; + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); + file = fopen (pidfile, "r+"); + if (!file) { + gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", + pidfile); + ret = -1; + goto out; + } + + ret = fscanf (file, "%d", &pid); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s", + pidfile); + ret = -1; + goto out; + } + fclose (file); + file = NULL; + + snprintf (logfile, PATH_MAX, "%s.%"PRIu64, + brickinfo->logfile, key); + + ret = rename (brickinfo->logfile, logfile); + if (ret) + gf_log ("", GF_LOG_WARNING, "rename failed"); + + ret = kill (pid, SIGHUP); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to SIGHUP to %d", pid); + goto out; + } + ret = 0; + + /* If request was for brick, only one iteration is enough */ + if (brick) + break; + } + + if (ret && !valid_brick) + ret = 0; + +out: + if (tmpbrkinfo) + glusterd_brickinfo_delete (tmpbrkinfo); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h index 5ee9375dd..e6f6a0333 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h +++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h @@ -1,29 +1,19 @@ /* - Copyright (c) 2008-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef __GLUSTERD_MEM_TYPES_H__ #define __GLUSTERD_MEM_TYPES_H__ #include "mem-types.h" -enum gf_gld_mem_types_ { +typedef enum gf_gld_mem_types_ { gf_gld_mt_dir_entry_t = gf_common_mt_end + 1, gf_gld_mt_volfile_ctx = gf_common_mt_end + 2, gf_gld_mt_glusterd_state_t = gf_common_mt_end + 3, @@ -59,7 +49,27 @@ enum gf_gld_mem_types_ { gf_gld_mt_log_locate_ctx_t = gf_common_mt_end + 33, gf_gld_mt_log_rotate_ctx_t = gf_common_mt_end + 34, gf_gld_mt_peerctx_t = gf_common_mt_end + 35, - gf_gld_mt_end = gf_common_mt_end + 36 -}; + gf_gld_mt_sm_tr_log_t = gf_common_mt_end + 36, + gf_gld_mt_pending_node_t = gf_common_mt_end + 37, + gf_gld_mt_brick_rsp_ctx_t = gf_common_mt_end + 38, + gf_gld_mt_mop_brick_req_t = gf_common_mt_end + 39, + gf_gld_mt_op_allack_ctx_t = gf_common_mt_end + 40, + gf_gld_mt_linearr = gf_common_mt_end + 41, + gf_gld_mt_linebuf = gf_common_mt_end + 42, + gf_gld_mt_mount_pattern = gf_common_mt_end + 43, + gf_gld_mt_mount_comp_container = gf_common_mt_end + 44, + gf_gld_mt_mount_component = gf_common_mt_end + 45, + gf_gld_mt_mount_spec = gf_common_mt_end + 46, + gf_gld_mt_georep_meet_spec = gf_common_mt_end + 47, + gf_gld_mt_nodesrv_t = gf_common_mt_end + 48, + gf_gld_mt_charptr = gf_common_mt_end + 49, + gf_gld_mt_hooks_stub_t = gf_common_mt_end + 50, + gf_gld_mt_hooks_priv_t = gf_common_mt_end + 51, + gf_gld_mt_mop_commit_req_t = gf_common_mt_end + 52, + gf_gld_mt_int = gf_common_mt_end + 53, + gf_gld_mt_snap_t = gf_common_mt_end + 54, + gf_gld_mt_missed_snapinfo_t = gf_common_mt_end + 55, + gf_gld_mt_end = gf_common_mt_end + 56, +} gf_gld_mem_types_t; #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c new file mode 100644 index 000000000..a5f38ce9c --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c @@ -0,0 +1,924 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" + +static int +glusterd_mgmt_v3_null (rpcsvc_request_t *req) +{ + return 0; +} + +static int +glusterd_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, int32_t status) +{ + + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid (&rsp.uuid); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + + gf_log (this->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 lock, ret: %d", ret); + + return ret; +} + +static int +glusterd_synctasked_mgmt_v3_lock (rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (ctx); + GF_ASSERT (ctx->dict); + + /* Trying to acquire multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_lock (ctx->dict, ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire mgmt_v3 locks for %s", + uuid_utoa (ctx->uuid)); + + ret = glusterd_mgmt_v3_lock_send_resp (req, ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_op_state_machine_mgmt_v3_lock (rpcsvc_request_t *req, + gd1_mgmt_v3_lock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req->op, + ctx->dict, req); + + ret = glusterd_set_txn_opinfo (&lock_req->txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, + &lock_req->txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); + +out: + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_lock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_v3_lock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_lock_ctx_t *ctx = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received mgmt_v3 lock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) + ret = glusterd_synctasked_mgmt_v3_lock (req, &lock_req, ctx); + else + ret = glusterd_op_state_machine_mgmt_v3_lock (req, &lock_req, + ctx); + +out: + + if (ret) { + if (ctx->dict) + dict_unref (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + free (lock_req.dict.dict_val); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_pre_validate_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_pre_val_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to pre validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_pre_validate_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode pre validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_pre_validate_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_pre_validate_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send Pre Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_brick_op_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_brick_op_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to brick op, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_brick_op_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode brick op " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_brick_op_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick Op failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_brick_op_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send brick op " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_commit_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_commit_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, "Responded to commit, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_commit_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_commit_fn (op_req.op, dict, &op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "commit failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_commit_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send commit " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_post_validate_send_resp (rpcsvc_request_t *req, + int32_t op, int32_t status, + char *op_errstr, dict_t *rsp_dict) +{ + gd1_mgmt_v3_post_val_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + glusterd_get_uuid (&rsp.uuid); + rsp.op = op; + if (op_errstr) + rsp.op_errstr = op_errstr; + else + rsp.op_errstr = ""; + + ret = dict_allocate_and_serialize (rsp_dict, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get serialized length of dict"); + goto out; + } + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + + GF_FREE (rsp.dict.dict_val); +out: + gf_log (this->name, GF_LOG_DEBUG, + "Responded to post validation, ret: %d", ret); + return ret; +} + +static int +glusterd_handle_post_validate_fn (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req op_req = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &op_req, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode post validation " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (op_req.uuid)); + ret = -1; + goto out; + } + + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (op_req.dict.dict_val, + op_req.dict.dict_len, &dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; + } + + ret = gd_mgmt_v3_post_validate_fn (op_req.op, op_req.op_ret, dict, + &op_errstr, rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed on operation %s", + gd_op_list[op_req.op]); + } + + ret = glusterd_mgmt_v3_post_validate_send_resp (req, op_req.op, + ret, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to send Post Validation " + "response for operation %s", + gd_op_list[op_req.op]); + goto out; + } + +out: + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + free (op_req.dict.dict_val); + + if (dict) + dict_unref (dict); + + if (rsp_dict) + dict_unref (rsp_dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, int32_t status) +{ + + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + + glusterd_get_uuid (&rsp.uuid); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + + gf_log (this->name, GF_LOG_DEBUG, + "Responded to mgmt_v3 unlock, ret: %d", ret); + + return ret; +} + +static int +glusterd_synctasked_mgmt_v3_unlock (rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *unlock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (ctx); + + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (ctx->dict, ctx->uuid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks for %s", + uuid_utoa(ctx->uuid)); + } + + ret = glusterd_mgmt_v3_unlock_send_resp (req, ret); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + + +static int +glusterd_op_state_machine_mgmt_v3_unlock (rpcsvc_request_t *req, + gd1_mgmt_v3_unlock_req *lock_req, + glusterd_op_lock_ctx_t *ctx) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, + &lock_req->txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_UNLOCK"); + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int +glusterd_handle_mgmt_v3_unlock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_v3_unlock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + gf_boolean_t is_synctasked = _gf_false; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received mgmt_v3 unlock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + is_synctasked = dict_get_str_boolean (ctx->dict, + "is_synctasked", _gf_false); + if (is_synctasked) + ret = glusterd_synctasked_mgmt_v3_unlock (req, &lock_req, ctx); + else + ret = glusterd_op_state_machine_mgmt_v3_unlock (req, &lock_req, + ctx); + +out: + + if (ret) { + if (ctx->dict) + dict_unref (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + free (lock_req.dict.dict_val); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_mgmt_v3_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_mgmt_v3_lock_fn); +} + +static int +glusterd_handle_pre_validate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_pre_validate_fn); +} + +static int +glusterd_handle_brick_op (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_brick_op_fn); +} + +static int +glusterd_handle_commit (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_commit_fn); +} + +static int +glusterd_handle_post_validate (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_post_validate_fn); +} + +int +glusterd_handle_mgmt_v3_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_mgmt_v3_unlock_fn); +} + +rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { + [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_mgmt_v3_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_LOCK] = { "MGMT_V3_LOCK", GLUSTERD_MGMT_V3_LOCK, glusterd_handle_mgmt_v3_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_PRE_VALIDATE] = { "PRE_VAL", GLUSTERD_MGMT_V3_PRE_VALIDATE, glusterd_handle_pre_validate, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_BRICK_OP] = { "BRCK_OP", GLUSTERD_MGMT_V3_BRICK_OP, glusterd_handle_brick_op, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_COMMIT] = { "COMMIT", GLUSTERD_MGMT_V3_COMMIT, glusterd_handle_commit, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_POST_VALIDATE] = { "POST_VAL", GLUSTERD_MGMT_V3_POST_VALIDATE, glusterd_handle_post_validate, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_UNLOCK] = { "MGMT_V3_UNLOCK", GLUSTERD_MGMT_V3_UNLOCK, glusterd_handle_mgmt_v3_unlock, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_mgmt_v3_prog = { + .progname = "GlusterD svc mgmt v3", + .prognum = GD_MGMT_V3_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .numactors = GLUSTERD_MGMT_V3_MAXVALUE, + .actors = gd_svc_mgmt_v3_actors, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c new file mode 100644 index 000000000..d52532e54 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c @@ -0,0 +1,1893 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "glusterd-mgmt.h" +#include "glusterd-op-sm.h" + +extern struct rpc_clnt_program gd_mgmt_v3_prog; + + +static void +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char *peer_str = NULL; + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int32_t len = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (args); + GF_ASSERT (uuid); + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + err_str[len] = '\0'; + } + + switch (op_code){ + case GLUSTERD_MGMT_V3_LOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Locking failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_PRE_VALIDATE: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Pre Validation failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_BRICK_OP: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Brick ops failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_COMMIT: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Commit failed on %s. %s", + peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_POST_VALIDATE: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Post Validation failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_UNLOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Unlocking failed " + "on %s. %s", peer_str, err_str); + break; + } + } + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + len = snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + err_str[len] = '\0'; + + gf_log (this->name, GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +int32_t +gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Snapshot Prevalidate Failed"); + goto out; + } + + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_brickop (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "snapshot brickop " + "failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Snapshot Commit Failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + switch (op) { + case GD_OP_SNAP: + { + ret = glusterd_snapshot_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "postvalidate operation failed"); + goto out; + } + break; + } + default: + break; + } + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "OP = %d. Returning %d", op, ret); + return ret; +} + +int32_t +gd_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + /* Even though the lock command has failed, while collating the errors + (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be + used. @args is obtained from frame->local. So before checking the + status of the request and going out if its a failure, args should be + set to frame->local. Otherwise, while collating args will be NULL. + This applies to other phases such as prevalidate, brickop, commit and + postvalidate also. + */ + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_LOCK, + peerinfo, rsp.uuid); + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_lock_cbk_fn); +} + +int +gd_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + gd1_mgmt_v3_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_LOCK, + gd_mgmt_v3_lock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_lock_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_initiate_lockdown (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *dict, char **op_errstr, int npeers, + gf_boolean_t *is_acquired) +{ + char *volname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t ret = -1; + int32_t peer_cnt = 0; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (is_acquired); + + peers = &conf->xaction_peers; + + /* Trying to acquire multiple mgmt_v3 locks on local node */ + ret = glusterd_multiple_mgmt_v3_lock (dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire mgmt_v3 locks on localhost"); + goto out; + } + + *is_acquired = _gf_true; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending mgmt_v3 lock req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_lock (op, dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + if (ret) { + if (*op_errstr) + gf_log (this->name, GF_LOG_ERROR, "%s", + *op_errstr); + + if (volname) + ret = gf_asprintf (op_errstr, + "Another transaction is in progress " + "for %s. Please try again after " + "sometime.", volname); + else + ret = gf_asprintf (op_errstr, + "Another transaction is in progress " + "Please try again after sometime."); + + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + + return ret; +} + +int +glusterd_pre_validate_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) +{ + int32_t ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (aggr); + GF_ASSERT (rsp); + + switch (op) { + case GD_OP_SNAP: + ret = glusterd_snap_pre_validate_use_rsp_dict (aggr, rsp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to aggregate prevalidate " + "response dictionaries."); + goto out; + } + break; + default: + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Invalid op (%s)", gd_op_list[op]); + + break; + } +out: + return ret; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_pre_val_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + GF_FREE (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_pre_validate_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } + pthread_mutex_unlock (&args->lock_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref (rsp_dict); + + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_PRE_VALIDATE, + peerinfo, rsp.uuid); + + if (rsp.op_errstr) + free (rsp.op_errstr); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_pre_validate_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_pre_validate_cbk_fn); +} + +int +gd_mgmt_v3_pre_validate_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_pre_val_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_PRE_VALIDATE, + gd_mgmt_v3_pre_validate_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_pre_val_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_pre_validate (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Pre Validation on local node */ + ret = gd_mgmt_v3_pre_validate_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Pre-validation failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_pre_validate_aggr_rsp_dict (op, req_dict, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending Pre Validation req to other nodes in the cluster */ + gd_syncargs_init (&args, req_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_pre_validate_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pre Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent pre valaidation req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int +glusterd_mgmt_v3_build_payload (dict_t **req, char **op_errstr, dict_t *dict, + glusterd_op_t op) +{ + int32_t ret = -1; + dict_t *req_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (op_errstr); + GF_ASSERT (dict); + + req_dict = dict_new (); + if (!req_dict) + goto out; + + switch (op) { + case GD_OP_SNAP: + dict_copy (dict, req_dict); + break; + default: + break; + } + + *req = req_dict; + ret = 0; +out: + return ret; +} + +int32_t +gd_mgmt_v3_brick_op_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_brick_op_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + /* If the operation failed, then iov can be NULL. So better check the + status of the operation and then worry about iov (if the status of + the command is success) + */ + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_BRICK_OP, + peerinfo, rsp.uuid); + + if (rsp.op_errstr) + free (rsp.op_errstr); + + free (rsp.dict.dict_val); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_brick_op_cbk_fn); +} + +int +gd_mgmt_v3_brick_op_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_brick_op_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_BRICK_OP, + gd_mgmt_v3_brick_op_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_brick_op_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_brick_op (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Perform brick op on local node */ + ret = gd_mgmt_v3_brick_op_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick ops failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Brick ops failed " + "on localhost. Please " + "check log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending brick op req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_brick_op_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Brick ops failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent brick op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_commit_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_commit_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + GF_FREE (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + } + pthread_mutex_unlock (&args->lock_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + if (!rsp.op_ret) + op_ret = ret; + else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + } else { + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + } + +out: + if (rsp_dict) + dict_unref (rsp_dict); + + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_COMMIT, + peerinfo, rsp.uuid); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_commit_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_commit_cbk_fn); +} + +int +gd_mgmt_v3_commit_req (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_commit_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_COMMIT, + gd_mgmt_v3_commit_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_commit_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_commit (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *op_ctx, dict_t *req_dict, + char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (op_ctx); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Commit on local node */ + ret = gd_mgmt_v3_commit_fn (op, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Commit failed " + "on localhost. Please " + "check log file for details."); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending commit req to other nodes in the cluster */ + gd_syncargs_init (&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_commit_req (op, req_dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent commit req for %s to %d " + "peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_post_validate_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_post_val_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_POST_VALIDATE, + peerinfo, rsp.uuid); + if (rsp.op_errstr) + free (rsp.op_errstr); + + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_post_validate_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_post_validate_cbk_fn); +} + +int +gd_mgmt_v3_post_validate_req (glusterd_op_t op, int32_t op_ret, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_post_val_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + req.op_ret = op_ret; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_POST_VALIDATE, + gd_mgmt_v3_post_validate_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_post_val_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_post_validate (glusterd_conf_t *conf, glusterd_op_t op, + int32_t op_ret, dict_t *dict, dict_t *req_dict, + char **op_errstr, int npeers) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + dict_t *rsp_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (req_dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + GF_ASSERT (peers); + + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create response dictionary"); + goto out; + } + + /* Copy the contents of dict like missed snaps info to req_dict */ + dict_copy (dict, req_dict); + + /* Post Validation on local node */ + ret = gd_mgmt_v3_post_validate_fn (op, op_ret, req_dict, op_errstr, + rsp_dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed for " + "operation %s on local node", + gd_op_list[op]); + + if (*op_errstr == NULL) { + ret = gf_asprintf (op_errstr, + "Post-validation failed " + "on localhost. Please check " + "log file for details"); + if (ret == -1) + *op_errstr = NULL; + + ret = -1; + } + goto out; + } + + dict_unref (rsp_dict); + rsp_dict = NULL; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending Post Validation req to other nodes in the cluster */ + gd_syncargs_init (&args, req_dict); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_post_validate_req (op, op_ret, req_dict, peerinfo, + &args, MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Post Validation failed on peers"); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent post valaidation req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int32_t +gd_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int32_t ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + if (!iov) { + gf_log (this->name, GF_LOG_ERROR, "iov is NULL"); + op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_UNLOCK, + peerinfo, rsp.uuid); + free (rsp.dict.dict_val); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_mgmt_v3_unlock_cbk_fn); +} + +int +gd_mgmt_v3_unlock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid) +{ + int32_t ret = -1; + gd1_mgmt_v3_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_ctx); + GF_ASSERT (peerinfo); + GF_ASSERT (args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_UNLOCK, + gd_mgmt_v3_unlock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req); + synclock_lock (&conf->big_lock); +out: + GF_FREE (req.dict.dict_val); + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_mgmt_v3_release_peer_locks (glusterd_conf_t *conf, glusterd_op_t op, + dict_t *dict, int32_t op_ret, + char **op_errstr, int npeers, + gf_boolean_t is_acquired) +{ + int32_t ret = -1; + int32_t peer_cnt = 0; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (conf); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + peers = &conf->xaction_peers; + + /* If the lock has not been held during this + * transaction, do not send unlock requests */ + if (!is_acquired) + goto out; + + if (!npeers) { + ret = 0; + goto out; + } + + /* Sending mgmt_v3 unlock req to other nodes in the cluster */ + gd_syncargs_init (&args, NULL); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + gd_mgmt_v3_unlock (op, dict, peerinfo, &args, + MY_UUID, peer_uuid); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unlock failed on peers"); + + if (!op_ret && args.errstr) + *op_errstr = gf_strdup (args.errstr); + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent unlock op req for %s " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); + +out: + return ret; +} + +int32_t +glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t npeers = 0; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + conf = this->private; + GF_ASSERT (conf); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32 (dict, "is_synctasked", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + goto out; + } + dict_copy (dict, tmp_dict); + + /* BUILD PEERS LIST */ + INIT_LIST_HEAD (&conf->xaction_peers); + npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr, + npeers, &is_acquired); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); + goto out; + } + + /* COMMIT OP PHASE */ + ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); + goto out; + } + + /* POST-COMMIT VALIDATE PHASE */ + /* As of now, post_validate is not handling any other + commands other than snapshot. So as of now, I am + sending 0 (op_ret as 0). + */ + ret = glusterd_mgmt_v3_post_validate (conf, op, 0, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + goto out; + } + + ret = 0; +out: + op_ret = ret; + /* UNLOCK PHASE FOR PEERS*/ + (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, + op_ret, &op_errstr, + npeers, is_acquired); + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); + + if (req_dict) + dict_unref (req_dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return 0; +} + +int32_t +glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict) +{ + int32_t ret = -1; + int32_t op_ret = -1; + int32_t npeers = 0; + dict_t *req_dict = NULL; + dict_t *tmp_dict = NULL; + glusterd_conf_t *conf = NULL; + char *op_errstr = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *originator_uuid = NULL; + gf_boolean_t success = _gf_false; + char *tmp_errstr = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + conf = this->private; + GF_ASSERT (conf); + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + /* Marking the operation as complete synctasked */ + ret = dict_set_int32 (dict, "is_synctasked", _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set synctasked flag."); + goto out; + } + + /* Use a copy at local unlock as cli response will be sent before + * the unlock and the volname in the dict might be removed */ + tmp_dict = dict_new(); + if (!tmp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Unable to create dict"); + goto out; + } + dict_copy (dict, tmp_dict); + + /* BUILD PEERS LIST */ + INIT_LIST_HEAD (&conf->xaction_peers); + npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); + + /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */ + ret = glusterd_mgmt_v3_initiate_lockdown (conf, op, dict, &op_errstr, + npeers, &is_acquired); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mgmt_v3 lockdown failed."); + goto out; + } + + /* BUILD PAYLOAD */ + ret = glusterd_mgmt_v3_build_payload (&req_dict, &op_errstr, dict, op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + /* PRE-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_pre_validate (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Pre Validation Failed"); + goto out; + } + + /* BRICK OP PHASE for initiating barrier*/ + ret = dict_set_int32 (req_dict, "barrier", 1); + if (ret) + goto out; + ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); + goto unbarrier; + } + + /* COMMIT OP PHASE */ + /* TODO: As of now, the plan is to do quorum check before sending the + commit fop and if the quorum succeeds, then commit is sent to all + the other glusterds. + snap create functionality now creates the in memory and on disk + objects for the snapshot (marking them as incomplete), takes the lvm + snapshot and then updates the status of the in memory and on disk + snap objects as complete. Suppose one of the glusterds goes down + after taking the lvm snapshot, but before updating the snap object, + then treat it as a snapshot create failure and trigger cleanup. + i.e the number of commit responses received by the originator + glusterd shold be the same as the number of peers it has sent the + request to (i.e npeers variable). If not, then originator glusterd + will initiate cleanup in post-validate fop. + Question: What if one of the other glusterds goes down as explained + above and along with it the originator glusterd also goes down? + Who will initiate the cleanup? + */ + ret = glusterd_mgmt_v3_commit (conf, op, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Commit Op Failed"); + /* If the main op fails, we should save the error string. + Because, op_errstr will be used for unbarrier and + unlock ops also. We might lose the actual error that + caused the failure. + */ + tmp_errstr = op_errstr; + op_errstr = NULL; + goto unbarrier; + } + + success = _gf_true; +unbarrier: + /* BRICK OP PHASE for removing the barrier*/ + ret = dict_set_int32 (req_dict, "barrier", 0); + if (ret) + goto out; + ret = glusterd_mgmt_v3_brick_op (conf, op, req_dict, + &op_errstr, npeers); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Brick Ops Failed"); + goto out; + } + + ret = 0; + +out: + op_ret = ret; + + if (success == _gf_false) + op_ret = -1; + + /* POST-COMMIT VALIDATE PHASE */ + ret = glusterd_mgmt_v3_post_validate (conf, op, op_ret, dict, req_dict, + &op_errstr, npeers); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Post Validation Failed"); + op_ret = -1; + } + + /* UNLOCK PHASE FOR PEERS*/ + (void) glusterd_mgmt_v3_release_peer_locks (conf, op, dict, + op_ret, &op_errstr, + npeers, is_acquired); + + /* If the commit op (snapshot taking) failed, then the error is stored + in tmp_errstr and unbarrier is called. Suppose, if unbarrier also + fails, then the error happened in unbarrier is logged and freed. + The error happened in commit op, which is stored in tmp_errstr + is sent to cli. + */ + if (tmp_errstr) { + if (op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "unbarrier brick op" + "failed with the error %s", op_errstr); + GF_FREE (op_errstr); + op_errstr = NULL; + } + op_errstr = tmp_errstr; + } + + /* LOCAL VOLUME(S) UNLOCK */ + if (is_acquired) { + /* Trying to release multiple mgmt_v3 locks */ + ret = glusterd_multiple_mgmt_v3_unlock (tmp_dict, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to release mgmt_v3 locks on localhost"); + op_ret = ret; + } + } + + /* SEND CLI RESPONSE */ + glusterd_op_send_cli_response (op, op_ret, 0, req, dict, op_errstr); + + if (req_dict) + dict_unref (req_dict); + + if (tmp_dict) + dict_unref (tmp_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return 0; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h new file mode 100644 index 000000000..b185a9bec --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h @@ -0,0 +1,45 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_MGMT_H_ +#define _GLUSTERD_MGMT_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +int32_t +gd_mgmt_v3_pre_validate_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_brick_op_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_commit_fn (glusterd_op_t op, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +gd_mgmt_v3_post_validate_fn (glusterd_op_t op, int32_t op_ret, dict_t *dict, + char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_mgmt_v3_initiate_all_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int32_t +glusterd_mgmt_v3_initiate_snap_phases (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict); + +int +glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src); + +#endif /* _GLUSTERD_MGMT_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c new file mode 100644 index 000000000..0d67d1303 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c @@ -0,0 +1,693 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif +#include <inttypes.h> +#include <fnmatch.h> +#include <pwd.h> + +#include "globals.h" +#include "glusterfs.h" +#include "compat.h" +#include "dict.h" +#include "list.h" +#include "logging.h" +#include "defaults.h" +#include "compat.h" +#include "compat-errno.h" +#include "run.h" +#include "glusterd-mem-types.h" +#include "glusterd.h" +#include "glusterd-utils.h" +#include "common-utils.h" +#include "glusterd-mountbroker.h" +#include "glusterd-op-sm.h" + +static int +seq_dict_foreach (dict_t *dict, + int (*fn)(char *str, void *data), + void *data) +{ + char index[] = "4294967296"; // 1<<32 + int i = 0; + char *val = NULL; + int ret = 0; + + for (;;i++) { + snprintf(index, sizeof(index), "%d", i); + ret = dict_get_str (dict, index, &val); + if (ret != 0) + return ret == -ENOENT ? 0 : ret; + ret = fn (val, data); + if (ret != 0) + return ret; + } +} + +int +parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc) +#define SYNTAX_ERR -2 +{ + char *curs = NULL; + char *c2 = NULL; + char sc = '\0'; + char **cc = NULL; + gf_mount_pattern_t *pat = NULL; + int pnum = 0; + int ret = 0; + int lastsup = -1; + int incl = -1; + char **pcc = NULL; + int pnc = 0; + + skipwhite (&pdesc); + + /* a bow to theory */ + if (!*pdesc) + return 0; + + /* count number of components, separated by '&' */ + mspec->len = 0; + for (curs = pdesc; *curs; curs++) { + if (*curs == ')') + mspec->len++; + } + + mspec->patterns = GF_CALLOC (mspec->len, sizeof (*mspec->patterns), + gf_gld_mt_mount_pattern); + if (!mspec->patterns) { + ret = -1; + goto out; + } + + pat = mspec->patterns; + curs = pdesc; + skipwhite (&curs); + for (;;) { + incl = -1; + + /* check for pattern signedness modifier */ + if (*curs == '-') { + pat->negative = _gf_true; + curs++; + } + + /* now should come condition specifier, + * then opening paren + */ + c2 = nwstrtail (curs, "SUB("); + if (c2) { + pat->condition = SET_SUB; + goto got_cond; + } + c2 = nwstrtail (curs, "SUP("); + if (c2) { + pat->condition = SET_SUPER; + lastsup = pat - mspec->patterns; + goto got_cond; + } + c2 = nwstrtail (curs, "EQL("); + if (c2) { + pat->condition = SET_EQUAL; + goto got_cond; + } + c2 = nwstrtail (curs, "MEET("); + if (c2) { + pat->condition = SET_INTERSECT; + goto got_cond; + } + c2 = nwstrtail (curs, "SUB+("); + if (c2) { + pat->condition = SET_SUB; + incl = lastsup; + goto got_cond; + } + + ret = SYNTAX_ERR; + goto out; + + got_cond: + curs = c2; + skipwhite (&curs); + /* count the number of components for pattern */ + pnum = *curs == ')' ? 0 : 1; + for (c2 = curs ;*c2 != ')';) { + if (strchr ("&|", *c2)) { + ret = SYNTAX_ERR; + goto out; + } + while (!strchr ("|&)", *c2) && !isspace (*c2)) + c2++; + skipwhite (&c2); + switch (*c2) { + case ')': + break; + case '\0': + case '&': + ret = SYNTAX_ERR; + goto out; + case '|': + *c2 = ' '; + skipwhite (&c2); + /* fall through */ + default: + pnum++; + } + } + if (incl >= 0) { + pnc = 0; + for (pcc = mspec->patterns[incl].components; *pcc; pcc++) + pnc++; + pnum += pnc; + } + pat->components = GF_CALLOC (pnum + 1, sizeof (*pat->components), + gf_gld_mt_mount_comp_container); + if (!pat->components) { + ret = -1; + goto out; + } + + cc = pat->components; + /* copy over included component set */ + if (incl >= 0) { + memcpy (pat->components, + mspec->patterns[incl].components, + pnc * sizeof (*pat->components)); + cc += pnc; + } + /* parse and add components */ + c2 = ""; /* reset c2 */ + while (*c2 != ')') { + c2 = curs; + while (!isspace (*c2) && *c2 != ')') + c2++; + sc = *c2; + *c2 = '\0';; + *cc = gf_strdup (curs); + if (!*cc) { + ret = -1; + goto out; + } + *c2 = sc; + skipwhite (&c2); + curs = c2; + cc++; + } + + curs++; + skipwhite (&curs); + if (*curs == '&') { + curs++; + skipwhite (&curs); + } + + if (!*curs) + break; + pat++; + } + + out: + if (ret == SYNTAX_ERR) { + gf_log ("", GF_LOG_ERROR, "cannot parse mount patterns %s", + pdesc); + } + + /* We've allocted a lotta stuff here but don't bother with freeing + * on error, in that case we'll terminate anyway + */ + return ret ? -1 : 0; +} +#undef SYNTAX_ERR + + +const char *georep_mnt_desc_template = + "SUP(" + "xlator-option=\\*-dht.assert-no-child-down=true " + "volfile-server=localhost " + "client-pid=%d " + "user-map-root=%s " + ")" + "SUB+(" + "log-file="DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"*/* " + "log-level=* " + "volfile-id=* " + ")" + "MEET(" + "%s" + ")"; + +const char *hadoop_mnt_desc_template = + "SUP(" + "volfile-server=%s " + "client-pid=%d " + "volfile-id=%s " + "user-map-root=%s " + ")" + "SUB+(" + "log-file="DEFAULT_LOG_FILE_DIRECTORY"/"GHADOOP"*/* " + "log-level=* " + ")"; + +int +make_georep_mountspec (gf_mount_spec_t *mspec, const char *volnames, + char *user) +{ + char *georep_mnt_desc = NULL; + char *meetspec = NULL; + char *vols = NULL; + char *vol = NULL; + char *p = NULL; + char *savetok = NULL; + char *fa[3] = {0,}; + size_t siz = 0; + int vc = 0; + int i = 0; + int ret = 0; + + vols = gf_strdup ((char *)volnames); + if (!vols) + goto out; + + for (vc = 1, p = vols; *p; p++) { + if (*p == ',') + vc++; + } + siz = strlen (volnames) + vc * strlen("volfile-id="); + meetspec = GF_CALLOC (1, siz + 1, gf_gld_mt_georep_meet_spec); + if (!meetspec) + goto out; + + for (p = vols;;) { + vol = strtok_r (p, ",", &savetok); + if (!vol) { + GF_ASSERT (vc == 0); + break; + } + p = NULL; + strcat (meetspec, "volfile-id="); + strcat (meetspec, vol); + if (--vc > 0) + strcat (meetspec, " "); + } + + ret = gf_asprintf (&georep_mnt_desc, georep_mnt_desc_template, + GF_CLIENT_PID_GSYNCD, user, meetspec); + if (ret == -1) { + georep_mnt_desc = NULL; + goto out; + } + + ret = parse_mount_pattern_desc (mspec, georep_mnt_desc); + + out: + fa[0] = meetspec; + fa[1] = vols; + fa[2] = georep_mnt_desc; + + for (i = 0; i < 3; i++) { + if (fa[i] == NULL) + ret = -1; + else + GF_FREE (fa[i]); + } + + return ret; +} + +int +make_ghadoop_mountspec (gf_mount_spec_t *mspec, const char *volname, + char *user, char *server) +{ + char *hadoop_mnt_desc = NULL; + int ret = 0; + + ret = gf_asprintf (&hadoop_mnt_desc, hadoop_mnt_desc_template, + server, GF_CLIENT_PID_HADOOP, volname, user); + if (ret == -1) + return ret; + + return parse_mount_pattern_desc (mspec, hadoop_mnt_desc); +} + +static gf_boolean_t +match_comp (char *str, char *patcomp) +{ + char *c1 = patcomp; + char *c2 = str; + + GF_ASSERT (c1); + GF_ASSERT (c2); + + while (*c1 == *c2) { + if (!*c1) + return _gf_true; + c1++; + c2++; + if (c1[-1] == '=') + break; + } + + return fnmatch (c1, c2, 0) == 0 ? _gf_true : _gf_false; +} + +struct gf_set_descriptor { + gf_boolean_t priv[2]; + gf_boolean_t common; +}; + +static int +_gf_set_dict_iter1 (char *val, void *data) +{ + void **dataa = data; + struct gf_set_descriptor *sd = dataa[0]; + char **curs = dataa[1]; + gf_boolean_t priv = _gf_true; + + while (*curs) { + if (match_comp (val, *curs)) { + priv = _gf_false; + sd->common = _gf_true; + } + curs++; + } + + if (priv) + sd->priv[0] = _gf_true; + + return 0; +} + +static int +_gf_set_dict_iter2 (char *val, void *data) +{ + void **dataa = data; + gf_boolean_t *boo = dataa[0]; + char *comp = dataa[1]; + + if (match_comp (val, comp)) + *boo = _gf_true; + + return 0; +} + +static void +relate_sets (struct gf_set_descriptor *sd, dict_t *argdict, char **complist) +{ + void *dataa[] = {NULL, NULL}; + gf_boolean_t boo = _gf_false; + + memset (sd, 0, sizeof (*sd)); + + dataa[0] = sd; + dataa[1] = complist; + seq_dict_foreach (argdict, _gf_set_dict_iter1, dataa); + + while (*complist) { + boo = _gf_false; + dataa[0] = &boo; + dataa[1] = *complist; + seq_dict_foreach (argdict, _gf_set_dict_iter2, dataa); + + if (boo) + sd->common = _gf_true; + else + sd->priv[1] = _gf_true; + + complist++; + } +} + +static int +_arg_parse_uid (char *val, void *data) +{ + char *user = strtail (val, "user-map-root="); + struct passwd *pw = NULL; + + if (!user) + return 0; + pw = getpwnam (user); + if (!pw) + return -EINVAL; + + if (*(int *)data >= 0) + /* uid ambiguity, already found */ + return -EINVAL; + + *(int *)data = pw->pw_uid; + return 0; +} + +static int +evaluate_mount_request (gf_mount_spec_t *mspec, dict_t *argdict) +{ + struct gf_set_descriptor sd = {{0,},}; + int i = 0; + int uid = -1; + int ret = 0; + gf_boolean_t match = _gf_false; + + for (i = 0; i < mspec->len; i++) { + relate_sets (&sd, argdict, mspec->patterns[i].components); + switch (mspec->patterns[i].condition) { + case SET_SUB: + match = !sd.priv[0]; + break; + case SET_SUPER: + match = !sd.priv[1]; + break; + case SET_EQUAL: + match = (!sd.priv[0] && !sd.priv[1]); + break; + case SET_INTERSECT: + match = sd.common; + break; + default: + GF_ASSERT(!"unreached"); + } + if (mspec->patterns[i].negative) + match = !match; + + if (!match) + return -EPERM; + } + + ret = seq_dict_foreach (argdict, _arg_parse_uid, &uid); + if (ret != 0) + return ret; + + return uid; +} + +static int +_volname_get (char *val, void *data) +{ + char **volname = data; + + *volname = strtail (val, "volfile-id="); + + return *volname ? 1 : 0; +} + +static int +_runner_add (char *val, void *data) +{ + runner_t *runner = data; + + runner_argprintf (runner, "--%s", val); + + return 0; +} + +int +glusterd_do_mount (char *label, dict_t *argdict, char **path, int *op_errno) +{ + glusterd_conf_t *priv = NULL; + char *mountbroker_root = NULL; + gf_mount_spec_t *mspec = NULL; + int uid = -ENOENT; + char *volname = NULL; + glusterd_volinfo_t *vol = NULL; + char *mtptemp = NULL; + char *mntlink = NULL; + char *cookieswitch = NULL; + char *cookie = NULL; + char *sla = NULL; + struct stat st = {0,}; + runner_t runner = {0,}; + int ret = 0; + xlator_t *this = THIS; + + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errno); + *op_errno = 0; + + if (dict_get_str (this->options, "mountbroker-root", + &mountbroker_root) != 0) { + *op_errno = ENOENT; + goto out; + } + + GF_ASSERT (label); + if (!*label) { + *op_errno = EINVAL; + goto out; + } + + /* look up spec for label */ + list_for_each_entry (mspec, &priv->mount_specs, + speclist) { + if (strcmp (mspec->label, label) != 0) + continue; + uid = evaluate_mount_request (mspec, argdict); + break; + } + if (uid < 0) { + *op_errno = -uid; + goto out; + } + + /* some sanity check on arguments */ + seq_dict_foreach (argdict, _volname_get, &volname); + if (!volname) { + *op_errno = EINVAL; + goto out; + } + if (glusterd_volinfo_find (volname, &vol) != 0 || + !glusterd_is_volume_started (vol)) { + *op_errno = ENOENT; + goto out; + } + + /* go do mount */ + + /** create actual mount dir */ + + /*** "overload" string name to be possible to used for cookie + creation, see below */ + ret = gf_asprintf (&mtptemp, "%s/user%d/mtpt-%s-XXXXXX/cookie", + mountbroker_root, uid, label); + if (ret == -1) { + mtptemp = NULL; + *op_errno = ENOMEM; + goto out; + } + /*** hide cookie part */ + cookieswitch = strrchr (mtptemp, '/'); + *cookieswitch = '\0'; + + sla = strrchr (mtptemp, '/'); + *sla = '\0'; + ret = mkdir (mtptemp, 0700); + if (ret == 0) + ret = chown (mtptemp, uid, 0); + else if (errno == EEXIST) + ret = 0; + if (ret == -1) { + *op_errno = errno; + goto out; + } + ret = lstat (mtptemp, &st); + if (ret == -1) { + *op_errno = errno; + goto out; + } + if (!(S_ISDIR (st.st_mode) && (st.st_mode & ~S_IFMT) == 0700 && + st.st_uid == uid && st.st_gid == 0)) { + *op_errno = EACCES; + goto out; + } + *sla = '/'; + + if (!mkdtemp (mtptemp)) { + *op_errno = errno; + goto out; + } + + /** create private "cookie" symlink */ + + /*** occupy an entry in the hive dir via mkstemp */ + ret = gf_asprintf (&cookie, "%s/"MB_HIVE"/mntXXXXXX", + mountbroker_root); + if (ret == -1) { + cookie = NULL; + *op_errno = ENOMEM; + goto out; + } + ret = mkstemp (cookie); + if (ret == -1) { + *op_errno = errno; + goto out; + } + close (ret); + + /*** assembly the path from cookie to mountpoint */ + sla = strchr (sla - 1, '/'); + GF_ASSERT (sla); + ret = gf_asprintf (&mntlink, "../user%d%s", uid, sla); + if (ret == -1) { + *op_errno = ENOMEM; + goto out; + } + + /*** create cookie link in (to-be) mountpoint, + move it over to the final place */ + *cookieswitch = '/'; + ret = symlink (mntlink, mtptemp); + if (ret != -1) + ret = rename (mtptemp, cookie); + *cookieswitch = '\0'; + if (ret == -1) { + *op_errno = errno; + goto out; + } + + /** invoke glusterfs on the mountpoint */ + + runinit (&runner); + runner_add_arg (&runner, SBIN_DIR"/glusterfs"); + seq_dict_foreach (argdict, _runner_add, &runner); + runner_add_arg (&runner, mtptemp); + ret = runner_run_reuse (&runner); + if (ret == -1) { + *op_errno = EIO; /* XXX hacky fake */ + runner_log (&runner, "", GF_LOG_ERROR, "command failed"); + } + runner_end (&runner); + + out: + + if (*op_errno) { + ret = -1; + gf_log ("", GF_LOG_WARNING, "unsuccessful mount request (%s)", + strerror (*op_errno)); + if (mtptemp) { + *cookieswitch = '/'; + unlink (mtptemp); + *cookieswitch = '\0'; + rmdir (mtptemp); + } + if (cookie) { + unlink (cookie); + GF_FREE (cookie); + } + + } else { + ret = 0; + *path = cookie; + } + + GF_FREE (mtptemp); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.h b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h new file mode 100644 index 000000000..426252ebe --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h @@ -0,0 +1,42 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define MB_HIVE "mb_hive" + +typedef enum { + SET_SUB = 1, + SET_SUPER, + SET_EQUAL, + SET_INTERSECT +} gf_setrel_t; + +struct gf_mount_pattern { + char **components; + gf_setrel_t condition; + gf_boolean_t negative; +}; +typedef struct gf_mount_pattern gf_mount_pattern_t; + +struct gf_mount_spec { + struct list_head speclist; + char *label; + gf_mount_pattern_t *patterns; + size_t len; +}; +typedef struct gf_mount_spec gf_mount_spec_t; + + +int parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc); + +int make_georep_mountspec (gf_mount_spec_t *mspec, const char *volname, + char *user); +int make_ghadoop_mountspec (gf_mount_spec_t *mspec, const char *volname, + char *user, char *server); + +int glusterd_do_mount (char *label, dict_t *argdict, char **path, int *op_errno); diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index d19caddc6..1666f5e4d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is GF_FREE software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H @@ -25,6 +15,7 @@ #include <time.h> #include <sys/uio.h> #include <sys/resource.h> +#include <sys/mount.h> #include <libgen.h> #include "uuid.h" @@ -44,19 +35,198 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" -#include "cli1.h" +#include "glusterd-hooks.h" #include "glusterd-volgen.h" +#include "glusterd-locks.h" #include "syscall.h" +#include "cli1-xdr.h" +#include "common-utils.h" +#include "run.h" #include <sys/types.h> #include <signal.h> - -#define glusterd_op_start_volume_args_get(req, dict, volname, flags) \ - glusterd_op_stop_volume_args_get (req, dict, volname, flags) +#include <sys/wait.h> + +#define ALL_VOLUME_OPTION_CHECK(volname, key, ret, op_errstr, label) \ + do { \ + gf_boolean_t _all = !strcmp ("all", volname); \ + gf_boolean_t _ratio = !strcmp (key, \ + GLUSTERD_QUORUM_RATIO_KEY); \ + if (_all && !_ratio) { \ + ret = -1; \ + *op_errstr = gf_strdup ("Not a valid option for all " \ + "volumes"); \ + goto label; \ + } else if (!_all && _ratio) { \ + ret = -1; \ + *op_errstr = gf_strdup ("Not a valid option for " \ + "single volume"); \ + goto label; \ + } \ + } while (0) static struct list_head gd_op_sm_queue; pthread_mutex_t gd_op_sm_lock; glusterd_op_info_t opinfo = {{0},}; +uuid_t global_txn_id = {"\0"}; /* To be used in + * heterogeneous + * cluster with no + * transaction ids */ + +static dict_t *txn_opinfo; + +struct txn_opinfo_object_ { + glusterd_op_info_t opinfo; +}; +typedef struct txn_opinfo_object_ txn_opinfo_obj; + +int32_t +glusterd_txn_opinfo_dict_init () +{ + int32_t ret = -1; + + txn_opinfo = dict_new (); + if (!txn_opinfo) { + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +void +glusterd_txn_opinfo_dict_fini () +{ + if (txn_opinfo) + dict_destroy (txn_opinfo); +} + +void +glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo, + glusterd_op_sm_state_info_t *state, + glusterd_op_t *op, + dict_t *op_ctx, + rpcsvc_request_t *req) +{ + GF_ASSERT (opinfo); + + if (state) + opinfo->state = *state; + + if (op) + opinfo->op = *op; + + opinfo->op_ctx = dict_ref(op_ctx); + + if (req) + opinfo->req = req; + + return; +} + +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id || !opinfo) { + gf_log ("", GF_LOG_ERROR, + "Empty transaction id or opinfo received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get transaction opinfo"); + goto out; + } + + (*opinfo) = opinfo_obj->opinfo; + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + opinfo_obj = GF_CALLOC (1, sizeof(txn_opinfo_obj), + gf_common_mt_txn_opinfo_obj_t); + if (!opinfo_obj) { + ret = -1; + goto out; + } + + ret = dict_set_bin(txn_opinfo, uuid_utoa (*txn_id), opinfo_obj, + sizeof(txn_opinfo_obj)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set opinfo for transaction ID : %s", + uuid_utoa (*txn_id)); + goto out; + } + } + + opinfo_obj->opinfo = (*opinfo); + + ret = 0; +out: + if (ret) + if (opinfo_obj) + GF_FREE (opinfo_obj); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id) +{ + int32_t ret = -1; + glusterd_op_info_t txn_op_info = {{0},}; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Transaction opinfo not found"); + goto out; + } + + dict_unref (txn_op_info.op_ctx); + + dict_del(txn_opinfo, uuid_utoa (*txn_id)); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + static int glusterfs_port = GLUSTERD_DEFAULT_PORT; static char *glusterd_op_sm_state_names[] = { "Default", @@ -65,8 +235,15 @@ static char *glusterd_op_sm_state_names[] = { "Stage op sent", "Staged", "Commit op sent", - "Commited", + "Committed", "Unlock sent", + "Stage op failed", + "Commit op failed", + "Brick op sent", + "Brick op failed", + "Brick op Committed", + "Brick op Commit failed", + "Ack drain", "Invalid", }; @@ -83,11 +260,15 @@ static char *glusterd_op_sm_event_names[] = { "GD_OP_EVENT_COMMIT_OP", "GD_OP_EVENT_UNLOCK", "GD_OP_EVENT_START_UNLOCK", + "GD_OP_EVENT_ALL_ACK", + "GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP", "GD_OP_EVENT_INVALID" }; +extern struct volopt_map_entry glusterd_volopt_map[]; + char* -glusterd_op_sm_state_name_get (glusterd_op_sm_state_t state) +glusterd_op_sm_state_name_get (int state) { if (state < 0 || state >= GD_OP_STATE_MAX) return glusterd_op_sm_state_names[GD_OP_STATE_MAX]; @@ -95,7 +276,7 @@ glusterd_op_sm_state_name_get (glusterd_op_sm_state_t state) } char* -glusterd_op_sm_event_name_get (glusterd_op_sm_event_type_t event) +glusterd_op_sm_event_name_get (int event) { if (event < 0 || event >= GD_OP_EVENT_MAX) return glusterd_op_sm_event_names[GD_OP_EVENT_MAX]; @@ -111,28 +292,6 @@ glusterd_destroy_lock_ctx (glusterd_op_lock_ctx_t *ctx) } void -glusterd_destroy_stage_ctx (glusterd_op_stage_ctx_t *ctx) -{ - if (!ctx) - return; - - if (ctx->stage_req.buf.buf_val) - GF_FREE (ctx->stage_req.buf.buf_val); - GF_FREE (ctx); -} - -void -glusterd_destroy_commit_ctx (glusterd_op_commit_ctx_t *ctx) -{ - if (!ctx) - return; - - if (ctx->stage_req.buf.buf_val) - GF_FREE (ctx->stage_req.buf.buf_val); - GF_FREE (ctx); -} - -void glusterd_set_volume_status (glusterd_volinfo_t *volinfo, glusterd_volume_status status) { @@ -140,1481 +299,1619 @@ glusterd_set_volume_status (glusterd_volinfo_t *volinfo, volinfo->status = status; } -static int +gf_boolean_t glusterd_is_volume_started (glusterd_volinfo_t *volinfo) { GF_ASSERT (volinfo); - return (!(volinfo->status == GLUSTERD_STATUS_STARTED)); + return (volinfo->status == GLUSTERD_STATUS_STARTED); } -gf_boolean_t -glusterd_are_all_volumes_stopped () +static int +glusterd_op_sm_inject_all_acc (uuid_t *txn_id) { - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - glusterd_volinfo_t *voliter = NULL; + int32_t ret = -1; + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, txn_id, NULL); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo, + gd1_mgmt_brick_op_req **req, dict_t *dict) +{ + int ret = -1; + gd1_mgmt_brick_op_req *brick_req = NULL; + char *volname = NULL; + char name[1024] = {0,}; + gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - list_for_each_entry (voliter, &priv->volumes, vol_list) { - if (voliter->status == GLUSTERD_STATUS_STARTED) - return _gf_false; - } - - return _gf_true; - -} - -static int -glusterd_op_get_len (glusterd_op_t op) -{ GF_ASSERT (op < GD_OP_MAX); GF_ASSERT (op > GD_OP_NONE); - int ret = -1; + GF_ASSERT (req); + switch (op) { - case GD_OP_CREATE_VOLUME: - { - dict_t *dict = glusterd_op_get_ctx (op); - ret = dict_serialized_length (dict); - return ret; - } - break; + case GD_OP_REMOVE_BRICK: + case GD_OP_STOP_VOLUME: + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; + brick_req->op = GLUSTERD_BRICK_TERMINATE; + brick_req->name = ""; + break; + case GD_OP_PROFILE_VOLUME: + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); - case GD_OP_START_BRICK: - break; + if (!brick_req) + goto out; - case GD_OP_SET_VOLUME: - case GD_OP_RESET_VOLUME: - case GD_OP_REPLACE_BRICK: - case GD_OP_ADD_BRICK: - { - dict_t *dict = glusterd_op_get_ctx (op); - ret = dict_serialized_length (dict); - return ret; - } - case GD_OP_REMOVE_BRICK: - { - dict_t *dict = glusterd_op_get_ctx (op); - ret = dict_serialized_length (dict); - return ret; - } - break; - break; + brick_req->op = GLUSTERD_BRICK_XLATOR_INFO; + brick_req->name = brickinfo->path; - default: - GF_ASSERT (op); + break; + case GD_OP_HEAL_VOLUME: + { + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; + + brick_req->op = GLUSTERD_BRICK_XLATOR_OP; + brick_req->name = ""; + ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); + if (ret) + goto out; + ret = dict_set_int32 (dict, "xl-op", heal_op); + } + break; + case GD_OP_STATUS_VOLUME: + { + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; + brick_req->op = GLUSTERD_BRICK_STATUS; + brick_req->name = ""; + } + break; + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; + + brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG; + ret = dict_get_str (dict, "volname", &volname); + if (ret) + goto out; + snprintf (name, 1024, "%s-dht",volname); + brick_req->name = gf_strdup (name); + + break; + case GD_OP_SNAP: + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; + brick_req->op = GLUSTERD_VOLUME_BARRIER_OP; + ret = dict_get_str (dict, "volname", &volname); + if (ret) + goto out; + snprintf (name, 1024, "%s-server",volname); + brick_req->name = gf_strdup (name); + + break; + default: + goto out; + break; } - return 0; -} + ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val, + &brick_req->input.input_len); + if (ret) + goto out; + *req = brick_req; + ret = 0; -static int -glusterd_op_sm_inject_all_acc () -{ - int32_t ret = -1; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); +out: + if (ret && brick_req) + GF_FREE (brick_req); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int -glusterd_op_build_payload (glusterd_op_t op, gd1_mgmt_stage_op_req **req) +glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req, + dict_t *dict) { - int len = 0; int ret = -1; - gd1_mgmt_stage_op_req *stage_req = NULL; - void *ctx = NULL; + gd1_mgmt_brick_op_req *brick_req = NULL; GF_ASSERT (op < GD_OP_MAX); GF_ASSERT (op > GD_OP_NONE); GF_ASSERT (req); - len = glusterd_op_get_len (op); + switch (op) { + case GD_OP_PROFILE_VOLUME: + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; - stage_req = GF_CALLOC (1, sizeof (*stage_req), - gf_gld_mt_mop_stage_req_t); + brick_req->op = GLUSTERD_NODE_PROFILE; + brick_req->name = ""; - if (!stage_req) { - gf_log ("", GF_LOG_ERROR, "Out of Memory"); - goto out; - } + break; + case GD_OP_STATUS_VOLUME: + brick_req = GF_CALLOC (1, sizeof (*brick_req), + gf_gld_mt_mop_brick_req_t); + if (!brick_req) + goto out; - glusterd_get_uuid (&stage_req->uuid); - stage_req->op = op; - //stage_req->buf.buf_len = len; + brick_req->op = GLUSTERD_NODE_STATUS; + brick_req->name = ""; - ctx = (void*)glusterd_op_get_ctx (op); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, "Null Context for " - "op %d", op); - ret = -1; + break; + + default: goto out; } - switch (op) { - case GD_OP_CREATE_VOLUME: - { - dict_t *dict = ctx; - ++glusterfs_port; - ret = dict_set_int32 (dict, "port", glusterfs_port); - ret = dict_allocate_and_serialize (dict, - &stage_req->buf.buf_val, - (size_t *)&stage_req->buf.buf_len); - if (ret) { - goto out; - } - } - break; - - case GD_OP_DELETE_VOLUME: - { - glusterd_op_delete_volume_ctx_t *ctx1 = ctx; - stage_req->buf.buf_len = - strlen (ctx1->volume_name); - stage_req->buf.buf_val = - gf_strdup (ctx1->volume_name); - } - break; - - case GD_OP_START_VOLUME: - case GD_OP_STOP_VOLUME: - case GD_OP_ADD_BRICK: - case GD_OP_REPLACE_BRICK: - case GD_OP_SET_VOLUME: - case GD_OP_RESET_VOLUME: - case GD_OP_REMOVE_BRICK: - case GD_OP_LOG_FILENAME: - case GD_OP_LOG_ROTATE: - case GD_OP_SYNC_VOLUME: - { - dict_t *dict = ctx; - ret = dict_allocate_and_serialize (dict, - &stage_req->buf.buf_val, - (size_t *)&stage_req->buf.buf_len); - if (ret) { - goto out; - } - } - break; + ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val, + &brick_req->input.input_len); - default: - break; - } + if (ret) + goto out; - *req = stage_req; + *req = brick_req; ret = 0; out: + if (ret && brick_req) + GF_FREE (brick_req); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } static int -glusterd_op_stage_create_volume (gd1_mgmt_stage_op_req *req, char **op_errstr) +glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value, + char **op_errstr) { - int ret = 0; - dict_t *dict = NULL; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - char *bricks = NULL; - char *brick_list = NULL; - char *free_ptr = NULL; - glusterd_brickinfo_t *brick_info = NULL; - int32_t brick_count = 0; - int32_t i = 0; - char *brick = NULL; - char *tmpptr = NULL; - char cmd_str[1024]; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int ret = 0; + char *key = NULL; + volume_option_t *opt = NULL; - GF_ASSERT (req); + if (!glusterd_is_quorum_option (fullkey)) + goto out; + key = strchr (fullkey, '.'); + key++; + opt = xlator_volume_option_get (this, key); + ret = xlator_option_validate (this, key, value, opt, op_errstr); +out: + return ret; +} + +static int +glusterd_check_client_op_version_support (char *volname, uint32_t op_version, + char **op_errstr) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + rpc_transport_t *xprt = NULL; this = THIS; - if (!this) { - gf_log ("glusterd", GF_LOG_ERROR, - "this is NULL"); - goto out; + GF_ASSERT(this); + priv = this->private; + GF_ASSERT(priv); + + pthread_mutex_lock (&priv->xprt_lock); + list_for_each_entry (xprt, &priv->xprt_list, list) { + if ((!strcmp(volname, xprt->peerinfo.volname)) && + ((op_version > xprt->peerinfo.max_op_version) || + (op_version < xprt->peerinfo.min_op_version))) { + ret = -1; + break; + } } + pthread_mutex_unlock (&priv->xprt_lock); - priv = this->private; - if (!priv) { - gf_log ("glusterd", GF_LOG_ERROR, - "priv is NULL"); - goto out; + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "One or more clients " + "don't support the required op-version"); + ret = gf_asprintf (op_errstr, "One or more connected clients " + "cannot support the feature being set. " + "These clients need to be upgraded or " + "disconnected before running this command" + " again"); + return -1; } + return 0; +} - dict = dict_new (); - if (!dict) - goto out; +static int +glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + int exists = 0; + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char str[100] = {0, }; + int count = 0; + int dict_count = 0; + char errstr[2048] = {0, }; + glusterd_volinfo_t *volinfo = NULL; + dict_t *val_dict = NULL; + gf_boolean_t global_opt = _gf_false; + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + uint32_t new_op_version = 0; + uint32_t local_new_op_version = 0; + uint32_t key_op_version = 0; + uint32_t local_key_op_version = 0; + gf_boolean_t origin_glusterd = _gf_true; + gf_boolean_t check_op_version = _gf_true; + gf_boolean_t all_vol = _gf_false; + struct volopt_map_entry *vme = NULL; - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + GF_ASSERT (dict); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + val_dict = dict_new(); + if (!val_dict) goto out; - } ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } - exists = glusterd_check_volume_exists (volname); + /* Check if we can support the required op-version + * This check is not done on the originator glusterd. The originator + * glusterd sets this value. + */ + origin_glusterd = is_origin_glusterd (dict); - if (exists) { - gf_log ("", GF_LOG_ERROR, "Volume with name: %s exists", - volname); - ret = -1; - } else { - ret = 0; - } - ret = dict_get_int32 (dict, "count", &brick_count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; + if (!origin_glusterd) { + /* Check for v3.3.x origin glusterd */ + check_op_version = dict_get_str_boolean (dict, + "check-op-version", + _gf_false); + + if (check_op_version) { + ret = dict_get_uint32 (dict, "new-op-version", + &new_op_version); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new_op_version"); + goto out; + } + + if ((new_op_version > GD_OP_VERSION_MAX) || + (new_op_version < GD_OP_VERSION_MIN)) { + ret = -1; + snprintf (errstr, sizeof (errstr), + "Required op_version (%d) is not " + "supported", new_op_version); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + goto out; + } + } } - ret = dict_get_str (dict, "bricks", &bricks); + ret = dict_get_int32 (dict, "count", &dict_count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); + gf_log (this->name, GF_LOG_ERROR, + "Count(dict),not set in Volume-Set"); goto out; } - if (bricks) { - brick_list = gf_strdup (bricks); - if (!brick_list) { - ret = -1; - gf_log ("", GF_LOG_ERROR, "Out of memory"); + if (dict_count == 0) { + /*No options would be specified of volume set help */ + if (dict_get (dict, "help" )) { + ret = 0; goto out; - } else { - free_ptr = brick_list; } - } - while ( i < brick_count) { - i++; - brick= strtok_r (brick_list, " \n", &tmpptr); - brick_list = tmpptr; - ret = glusterd_brickinfo_from_brick (brick, &brick_info); - if (ret) + if (dict_get (dict, "help-xml" )) { +#if (HAVE_LIB_XML) + ret = 0; goto out; - snprintf (cmd_str, 1024, "%s", brick_info->path); - ret = glusterd_resolve_brick (brick_info); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "cannot resolve " - "brick: %s:%s", brick_info->hostname, - brick_info->path); +#else + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "libxml not present in the system"); + *op_errstr = gf_strdup ("Error: xml libraries not " + "present to produce xml-output"); goto out; +#endif } - - if (!uuid_compare (brick_info->uuid, priv->uuid)) { - ret = glusterd_brick_create_path (brick_info->hostname, - brick_info->path, - 0777, op_errstr); - if (ret) - goto out; - brick_list = tmpptr; - } - glusterd_brickinfo_delete (brick_info); - brick_info = NULL; - } -out: - if (dict) - dict_unref (dict); - if (free_ptr) - GF_FREE (free_ptr); - if (brick_info) - glusterd_brickinfo_delete (brick_info); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; -} - -static int -glusterd_op_stop_volume_args_get (gd1_mgmt_stage_op_req *req, - dict_t *dict, char** volname, - int *flags) -{ - int ret = -1; - - if (!req || !dict || !volname || !flags) - goto out; - - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; - } - - ret = dict_get_str (dict, "volname", volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "No options received "); + *op_errstr = gf_strdup ("Options not specified"); + ret = -1; goto out; } - ret = dict_get_int32 (dict, "flags", flags); + ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get flags"); - goto out; - } -out: - return ret; -} - -static int -glusterd_op_stage_start_volume (gd1_mgmt_stage_op_req *req, char **op_errstr) -{ - int ret = 0; - dict_t *dict = NULL; - char *volname = NULL; - int flags = 0; - gf_boolean_t exists = _gf_false; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char msg[2048]; - glusterd_conf_t *priv = NULL; - - GF_ASSERT (req); - - priv = THIS->private; - if (!priv) { - gf_log ("glusterd", GF_LOG_ERROR, - "priv is NULL"); - ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } - dict = dict_new (); - if (!dict) - goto out; - - ret = glusterd_op_start_volume_args_get (req, dict, &volname, &flags); - if (ret) - goto out; + if (strcasecmp (volname, "all") != 0) { + exists = glusterd_check_volume_exists (volname); + if (!exists) { + snprintf (errstr, sizeof (errstr), + FMTSTR_CHECK_VOL_EXISTS, volname); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + ret = -1; + goto out; + } - exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } - if (!exists) { - snprintf (msg, 2048, "Volume with name %s does not exist", volname); - gf_log ("", GF_LOG_ERROR, "%s", - msg); - *op_errstr = gf_strdup (msg); - ret = -1; + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; } else { - ret = 0; + all_vol = _gf_true; } - ret = glusterd_volinfo_find (volname, &volinfo); + local_new_op_version = priv->op_version; - if (ret) - goto out; + for ( count = 1; ret != 1 ; count++ ) { + global_opt = _gf_false; + sprintf (str, "key%d", count); + ret = dict_get_str (dict, str, &key); + if (ret) + break; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_resolve_brick (brickinfo); + sprintf (str, "value%d", count); + ret = dict_get_str (dict, str, &value); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to resolve brick" - " with hostname: %s, export: %s", - brickinfo->hostname,brickinfo->path); + gf_log (this->name, GF_LOG_ERROR, + "invalid key,value pair in 'volume set'"); + ret = -1; goto out; } - if (!uuid_compare (brickinfo->uuid, priv->uuid)) { - ret = glusterd_brick_create_path (brickinfo->hostname, - brickinfo->path, - 0777, op_errstr); - if (ret) - goto out; + if (strcmp (key, "config.memory-accounting") == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "enabling memory accounting for volume %s", + volname); + ret = 0; } - if (!(flags & GF_CLI_FLAG_OP_FORCE)) { - ret = glusterd_is_volume_started (volinfo); - if (!ret) { - snprintf (msg, 2048, "Volume %s already started", - volname); - gf_log ("glusterd", GF_LOG_ERROR, - "%s", msg); - *op_errstr = gf_strdup (msg); + if (strcmp (key, "config.transport") == 0) { + gf_log (this->name, GF_LOG_DEBUG, + "changing transport-type for volume %s", + volname); + ret = 0; + /* if value is none of 'tcp/rdma/tcp,rdma' error out */ + if (!((strcasecmp (value, "rdma") == 0) || + (strcasecmp (value, "tcp") == 0) || + (strcasecmp (value, "tcp,rdma") == 0) || + (strcasecmp (value, "rdma,tcp") == 0))) { + ret = snprintf (errstr, sizeof (errstr), + "transport-type %s does " + "not exist", value); + /* lets not bother about above return value, + its a failure anyways */ ret = -1; goto out; } } - } - - ret = 0; -out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} + if (is_key_glusterd_hooks_friendly (key)) + continue; -static int -glusterd_op_stage_stop_volume (gd1_mgmt_stage_op_req *req) -{ - int ret = -1; - dict_t *dict = NULL; - char *volname = NULL; - int flags = 0; - gf_boolean_t exists = _gf_false; - glusterd_volinfo_t *volinfo = NULL; + for (vme = &glusterd_volopt_map[0]; vme->key; vme++) { + if ((vme->validate_fn) && + ((!strcmp (key, vme->key)) || + (!strcmp (key, strchr (vme->key, '.') + 1)))) { + ret = vme->validate_fn (dict, key, value, + op_errstr); + if (ret) + goto out; + break; + } + } - dict = dict_new (); - if (!dict) - goto out; + exists = glusterd_check_option_exists (key, &key_fixed); + if (exists == -1) { + ret = -1; + goto out; + } - ret = glusterd_op_stop_volume_args_get (req, dict, &volname, &flags); - if (ret) - goto out; + if (!exists) { + gf_log (this->name, GF_LOG_ERROR, + "Option with name: %s does not exist", key); + ret = snprintf (errstr, sizeof (errstr), + "option : %s does not exist", + key); + if (key_fixed) + snprintf (errstr + ret, sizeof (errstr) - ret, + "\nDid you mean %s?", key_fixed); + ret = -1; + goto out; + } - exists = glusterd_check_volume_exists (volname); + if (key_fixed) + key = key_fixed; + ALL_VOLUME_OPTION_CHECK (volname, key, ret, op_errstr, out); + ret = glusterd_validate_quorum_options (this, key, value, + op_errstr); + if (ret) + goto out; - if (!exists) { - gf_log ("", GF_LOG_ERROR, "Volume with name %s does not exist", - volname); - ret = -1; - } else { - ret = 0; - } + local_key_op_version = glusterd_get_op_version_for_key (key); + if (local_key_op_version > local_new_op_version) + local_new_op_version = local_key_op_version; - ret = glusterd_volinfo_find (volname, &volinfo); + sprintf (str, "op-version%d", count); + if (origin_glusterd) { + ret = dict_set_uint32 (dict, str, local_key_op_version); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set key-op-version in dict"); + goto out; + } + } else if (check_op_version) { + ret = dict_get_uint32 (dict, str, &key_op_version); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get key-op-version from" + " dict"); + goto out; + } + if (local_key_op_version != key_op_version) { + ret = -1; + snprintf (errstr, sizeof (errstr), + "option: %s op-version mismatch", + key); + gf_log (this->name, GF_LOG_ERROR, + "%s, required op-version = %"PRIu32", " + "available op-version = %"PRIu32, + errstr, key_op_version, + local_key_op_version); + goto out; + } + } - if (ret) - goto out; + if (glusterd_check_globaloption (key)) + global_opt = _gf_true; - if (!(flags & GF_CLI_FLAG_OP_FORCE)) { - ret = glusterd_is_volume_started (volinfo); + ret = dict_set_str (val_dict, key, value); if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s " - "has not been started", volname); + gf_log (this->name, GF_LOG_ERROR, + "Unable to set the options in 'volume set'"); + ret = -1; goto out; } - } - -out: - if (dict) - dict_unref (dict); - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; -} - -static int -glusterd_op_stage_delete_volume (gd1_mgmt_stage_op_req *req) -{ - int ret = 0; - char volname [1024] = {0,}; - gf_boolean_t exists = _gf_false; - glusterd_volinfo_t *volinfo = NULL; - - GF_ASSERT (req); - - strncpy (volname, req->buf.buf_val, req->buf.buf_len); + *op_errstr = NULL; + if (!global_opt && !all_vol) + ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr); + else if (!all_vol) { + voliter = NULL; + list_for_each_entry (voliter, &priv->volumes, vol_list) { + ret = glusterd_validate_globalopts (voliter, val_dict, op_errstr); + if (ret) + break; + } + } - exists = glusterd_check_volume_exists (volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not create " + "temp volfile, some option failed: %s", + *op_errstr); + goto out; + } + dict_del (val_dict, key); - if (!exists) { - gf_log ("", GF_LOG_ERROR, "Volume with name %s does not exist", - volname); - ret = -1; - goto out; - } else { - ret = 0; + if (key_fixed) { + GF_FREE (key_fixed); + key_fixed = NULL; + } } - ret = glusterd_volinfo_find (volname, &volinfo); - + // Check if all the connected clients support the new op-version + ret = glusterd_check_client_op_version_support (volname, + local_new_op_version, + op_errstr); if (ret) goto out; - ret = glusterd_is_volume_started (volinfo); - - if (!ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s has been started." - "Volume needs to be stopped before deletion.", - volname); - ret = -1; - goto out; + if (origin_glusterd) { + ret = dict_set_uint32 (dict, "new-op-version", + local_new_op_version); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set new-op-version in dict"); + goto out; + } + /* Set this value in dict so other peers know to check for + * op-version. This is a hack for 3.3.x compatibility + * + * TODO: Remove this and the other places this is referred once + * 3.3.x compatibility is not required + */ + ret = dict_set_uint32 (dict, "check-op-version", + _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set check-op-version in dict"); + goto out; + } } ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (val_dict) + dict_unref (val_dict); + GF_FREE (key_fixed); + if (errstr[0] != '\0') + *op_errstr = gf_strdup (errstr); + + if (ret) { + if (!(*op_errstr)) { + *op_errstr = gf_strdup ("Error, Validation Failed"); + gf_log (this->name, GF_LOG_DEBUG, + "Error, Cannot Validate option :%s", + *op_errstr); + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Error, Cannot Validate option"); + } + } return ret; } static int -glusterd_op_stage_add_brick (gd1_mgmt_stage_op_req *req, char **op_errstr) +glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr) { - int ret = 0; - dict_t *dict = NULL; - char *volname = NULL; - int count = 0; - int i = 0; - char *bricks = NULL; - char *brick_list = NULL; - char *saveptr = NULL; - char *free_ptr = NULL; - char *brick = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_volinfo_t *volinfo = NULL; - char cmd_str[1024]; - glusterd_conf_t *priv = NULL; - char msg[2048] = {0,}; - gf_boolean_t brick_alloc = _gf_false; - - GF_ASSERT (req); - - priv = THIS->private; - if (!priv) - goto out; + int ret = 0; + char *volname = NULL; + gf_boolean_t exists = _gf_false; + char msg[2048] = {0}; + char *key = NULL; + char *key_fixed = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; - dict = dict_new (); - if (!dict) - goto out; + this = THIS; + GF_ASSERT (this); - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); goto out; } - ret = dict_get_str (dict, "volname", &volname); + if (strcasecmp (volname, "all") != 0) { + exists = glusterd_check_volume_exists (volname); + if (!exists) { + snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, + volname); + ret = -1; + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, + volname); + goto out; + } - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; } - ret = glusterd_volinfo_find (volname, &volinfo); + ret = dict_get_str (dict, "key", &key); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to find volume: %s", volname); + gf_log (this->name, GF_LOG_ERROR, "Unable to get option key"); goto out; } + if (strcmp(key, "all")) { + exists = glusterd_check_option_exists (key, &key_fixed); + if (exists == -1) { + ret = -1; + goto out; + } + if (!exists) { + ret = snprintf (msg, sizeof (msg), + "Option %s does not exist", key); + if (key_fixed) + snprintf (msg + ret, sizeof (msg) - ret, + "\nDid you mean %s?", key_fixed); + ret = -1; + goto out; + } else if (exists > 0) { + if (key_fixed) + key = key_fixed; + ALL_VOLUME_OPTION_CHECK (volname, key, ret, + op_errstr, out); + } + } - if (glusterd_is_defrag_on(volinfo)) { - snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " - "progress. Please retry after completion", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); +out: + GF_FREE (key_fixed); + + if (msg[0] != '\0') { + gf_log (this->name, GF_LOG_ERROR, "%s", msg); *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - ret = dict_get_int32 (dict, "count", &count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; } - ret = dict_get_str (dict, "bricks", &bricks); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + + + +static int +glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + char *hostname = NULL; + gf_boolean_t exists = _gf_false; + glusterd_peerinfo_t *peerinfo = NULL; + char msg[2048] = {0,}; + glusterd_volinfo_t *volinfo = NULL; + + ret = dict_get_str (dict, "hostname", &hostname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); + snprintf (msg, sizeof (msg), "hostname couldn't be " + "retrieved from msg"); + *op_errstr = gf_strdup (msg); goto out; } - if (bricks) { - brick_list = gf_strdup (bricks); - free_ptr = brick_list; - } - - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); - - while ( i < count) { - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, - &brickinfo); + if (gf_is_local_addr (hostname)) { + //volname is not present in case of sync all + ret = dict_get_str (dict, "volname", &volname); if (!ret) { - gf_log ("", GF_LOG_ERROR, "Adding duplicate brick: %s", - brick); - ret = -1; - goto out; - } else { - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Add-brick: Unable" - " to get brickinfo"); + exists = glusterd_check_volume_exists (volname); + if (!exists) { + snprintf (msg, sizeof (msg), "Volume %s " + "does not exist", volname); + *op_errstr = gf_strdup (msg); + ret = -1; goto out; } - brick_alloc = _gf_true; + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + + } else { + ret = 0; } - snprintf (cmd_str, 1024, "%s", brickinfo->path); - ret = glusterd_resolve_brick (brickinfo); + } else { + ret = glusterd_friend_find (NULL, hostname, &peerinfo); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "resolve brick failed"); + snprintf (msg, sizeof (msg), "%s, is not a friend", + hostname); + *op_errstr = gf_strdup (msg); goto out; } - if (!uuid_compare (brickinfo->uuid, priv->uuid)) { - ret = glusterd_brick_create_path (brickinfo->hostname, - brickinfo->path, - 0777, op_errstr); - if (ret) - goto out; + if (!peerinfo->connected) { + snprintf (msg, sizeof (msg), "%s, is not connected at " + "the moment", hostname); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; } - glusterd_brickinfo_delete (brickinfo); - brick_alloc = _gf_false; - brickinfo = NULL; - brick = strtok_r (NULL, " \n", &saveptr); - i++; } out: - if (dict) - dict_unref (dict); - if (free_ptr) - GF_FREE (free_ptr); - if (brick_alloc && brickinfo) - glusterd_brickinfo_delete (brickinfo); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } static int -glusterd_op_stage_replace_brick (gd1_mgmt_stage_op_req *req, char **op_errstr, - dict_t *rsp_dict) +glusterd_op_stage_status_volume (dict_t *dict, char **op_errstr) { - int ret = 0; - dict_t *dict = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - char *volname = NULL; - gf1_cli_replace_op replace_op = 0; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *src_brickinfo = NULL; - char *host = NULL; - char *path = NULL; - char msg[2048] = {0}; - char *dup_dstbrick = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_brickinfo_t *dst_brickinfo = NULL; - - GF_ASSERT (req); - - dict = dict_new (); - if (!dict) - goto out; + int ret = -1; + uint32_t cmd = 0; + char msg[2048] = {0,}; + char *volname = NULL; + char *brick = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + dict_t *vol_opts = NULL; + gf_boolean_t nfs_disabled = _gf_false; + gf_boolean_t shd_enabled = _gf_true; - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; - } - - ret = dict_get_str (dict, "src-brick", &src_brick); + GF_ASSERT (dict); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT(priv); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + ret = dict_get_uint32 (dict, "cmd", &cmd); + if (ret) goto out; - } - - gf_log ("", GF_LOG_DEBUG, - "src brick=%s", src_brick); - - ret = dict_get_str (dict, "dst-brick", &dst_brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dest brick"); + if (cmd & GF_CLI_STATUS_ALL) goto out; - } - - gf_log ("", GF_LOG_DEBUG, - "dst brick=%s", dst_brick); ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "dict get on replace-brick operation failed"); + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to get volume name"); goto out; } ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - snprintf (msg, sizeof (msg), "volume: %s does not exist", + snprintf (msg, sizeof(msg), "Volume %s does not exist", volname); - *op_errstr = gf_strdup (msg); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + ret = -1; goto out; } - if (GLUSTERD_STATUS_STARTED != volinfo->status) { - ret = -1; - snprintf (msg, sizeof (msg), "volume: %s is not started", - volname); - *op_errstr = gf_strdup (msg); + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) goto out; - } - if (glusterd_is_defrag_on(volinfo)) { - snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " - "progress. Please retry after completion", volname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); - *op_errstr = gf_strdup (msg); + ret = glusterd_is_volume_started (volinfo); + if (!ret) { + snprintf (msg, sizeof (msg), "Volume %s is not started", + volname); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); ret = -1; goto out; } - switch (replace_op) { - case GF_REPLACE_OP_START: - if (glusterd_is_rb_started (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started for volume "); + vol_opts = volinfo->dict; + + if ((cmd & GF_CLI_STATUS_NFS) != 0) { + nfs_disabled = dict_get_str_boolean (vol_opts, "nfs.disable", + _gf_false); + if (nfs_disabled) { ret = -1; + snprintf (msg, sizeof (msg), + "NFS server is disabled for volume %s", + volname); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); goto out; } - break; - case GF_REPLACE_OP_PAUSE: - if (glusterd_is_rb_paused (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is already" - " paused for volume "); - ret = -1; - goto out; - } else if (!glusterd_is_rb_started(volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not" - " started for volume "); + } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { + if (!glusterd_is_volume_replicate (volinfo)) { ret = -1; + snprintf (msg, sizeof (msg), + "Volume %s is not of type replicate", + volname); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); goto out; } - break; - case GF_REPLACE_OP_ABORT: - if ((!glusterd_is_rb_paused (volinfo)) && - (!glusterd_is_rb_started (volinfo))) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not " - " started or paused for volume "); + shd_enabled = dict_get_str_boolean (vol_opts, + "cluster.self-heal-daemon", + _gf_true); + if (!shd_enabled) { ret = -1; + snprintf (msg, sizeof (msg), + "Self-heal Daemon is disabled for volume %s", + volname); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); goto out; } - break; - case GF_REPLACE_OP_COMMIT: - if (!glusterd_is_rb_started (volinfo)) { - gf_log ("", GF_LOG_ERROR, "Replace brick is not " - "started for volume "); + } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_str (dict, "brick", &brick); + if (ret) + goto out; + + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, + &brickinfo); + if (ret) { + snprintf (msg, sizeof(msg), "No brick %s in" + " volume %s", brick, volname); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + ret = -1; goto out; } - break; - - case GF_REPLACE_OP_COMMIT_FORCE: break; - case GF_REPLACE_OP_STATUS: - break; - default: - ret = -1; - goto out; } - ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, - &src_brickinfo); + ret = 0; + + out: if (ret) { - snprintf (msg, sizeof (msg), "brick: %s does not exist in " - "volume: %s", src_brick, volname); - *op_errstr = gf_strdup (msg); - goto out; + if (msg[0] != '\0') + *op_errstr = gf_strdup (msg); + else + *op_errstr = gf_strdup ("Validation Failed for Status"); } - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - gf_log ("", GF_LOG_DEBUG, - "I AM THE SOURCE HOST"); - if (src_brickinfo->port) { - ret = dict_set_int32 (rsp_dict, "src-brick-port", - src_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set src-brick-port=%d", - src_brickinfo->port); - } - } + gf_log (THIS->name, GF_LOG_DEBUG, "Returning: %d", ret); + return ret; +} - } - dup_dstbrick = gf_strdup (dst_brick); - if (!dup_dstbrick) { - ret = -1; - gf_log ("", GF_LOG_ERROR, "Memory allocation failed"); +static gf_boolean_t +glusterd_is_profile_on (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + gf_boolean_t is_latency_on = _gf_false; + gf_boolean_t is_fd_stats_on = _gf_false; + + GF_ASSERT (volinfo); + + ret = glusterd_volinfo_get_boolean (volinfo, VKEY_DIAG_CNT_FOP_HITS); + if (ret != -1) + is_fd_stats_on = ret; + ret = glusterd_volinfo_get_boolean (volinfo, VKEY_DIAG_LAT_MEASUREMENT); + if (ret != -1) + is_latency_on = ret; + if ((_gf_true == is_latency_on) && + (_gf_true == is_fd_stats_on)) + return _gf_true; + return _gf_false; +} + +static int +glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + gf_boolean_t exists = _gf_false; + char msg[2048] = {0,}; + int32_t stats_op = GF_CLI_STATS_NONE; + glusterd_volinfo_t *volinfo = NULL; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (msg, sizeof (msg), "Volume name get failed"); goto out; } - host = strtok (dup_dstbrick, ":"); - path = strtok (NULL, ":"); - if (!host || !path) { - gf_log ("", GF_LOG_ERROR, - "dst brick %s is not of form <HOSTNAME>:<export-dir>", - dst_brick); + exists = glusterd_check_volume_exists (volname); + ret = glusterd_volinfo_find (volname, &volinfo); + if ((!exists) || (ret < 0)) { + snprintf (msg, sizeof (msg), "Volume %s, " + "doesn't exist", volname); ret = -1; goto out; } - if (!glusterd_brickinfo_get (NULL, host, path, NULL)) { - snprintf(msg, sizeof(msg), "Brick: %s:%s already in use", - host, path); - *op_errstr = gf_strdup (msg); - ret = -1; + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) goto out; - } - ret = glusterd_brickinfo_from_brick (dst_brick, &dst_brickinfo); - if ((volinfo->rb_status ==GF_RB_STATUS_NONE) && - (replace_op == GF_REPLACE_OP_START)) { - volinfo->src_brick = src_brickinfo; - volinfo->dst_brick = dst_brickinfo; + ret = dict_get_int32 (dict, "op", &stats_op); + if (ret) { + snprintf (msg, sizeof (msg), "Volume profile op get failed"); + goto out; } - if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) { - gf_log ("", GF_LOG_ERROR, "replace brick: incorrect source or" - " destination bricks specified"); - ret = -1; - goto out; - } - if (!glusterd_is_local_addr (host)) { - ret = glusterd_brick_create_path (host, path, 0777, op_errstr); - if (ret) - goto out; - } else { - ret = glusterd_friend_find (NULL, host, &peerinfo); - if (ret) { - snprintf (msg, sizeof (msg), "%s, is not a friend", - host); - *op_errstr = gf_strdup (msg); + if (GF_CLI_STATS_START == stats_op) { + if (_gf_true == glusterd_is_profile_on (volinfo)) { + snprintf (msg, sizeof (msg), "Profile on Volume %s is" + " already started", volinfo->volname); + ret = -1; goto out; } - if (!peerinfo->connected) { - snprintf (msg, sizeof (msg), "%s, is not connected at " - "the moment", host); - *op_errstr = gf_strdup (msg); + } + if ((GF_CLI_STATS_STOP == stats_op) || + (GF_CLI_STATS_INFO == stats_op)) { + if (_gf_false == glusterd_is_profile_on (volinfo)) { + snprintf (msg, sizeof (msg), "Profile on Volume %s is" + " not started", volinfo->volname); ret = -1; + goto out; } - - if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) { - snprintf (msg, sizeof (msg), "%s, is not befriended " - "at the moment", host); - *op_errstr = gf_strdup (msg); + } + if ((GF_CLI_STATS_TOP == stats_op) || + (GF_CLI_STATS_INFO == stats_op)) { + if (_gf_false == glusterd_is_volume_started (volinfo)) { + snprintf (msg, sizeof (msg), "Volume %s is not started.", + volinfo->volname); + gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); ret = -1; goto out; } } ret = 0; - out: - if (dup_dstbrick) - GF_FREE (dup_dstbrick); - if (dict) - dict_unref (dict); + if (msg[0] != '\0') { + gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + } gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; } + static int -glusterd_op_stage_log_filename (gd1_mgmt_stage_op_req *req) +_delete_reconfig_opt (dict_t *this, char *key, data_t *value, void *data) { - int ret = -1; - dict_t *dict = NULL; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - - GF_ASSERT (req); - - dict = dict_new (); - if (!dict) - goto out; - - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; - } + int32_t *is_force = 0; - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + GF_ASSERT (data); + is_force = (int32_t*)data; - exists = glusterd_check_volume_exists (volname); - if (!exists) { - gf_log ("", GF_LOG_ERROR, "Volume with name: %s not exists", - volname); - ret = -1; - goto out; + if (*is_force != 1) { + if (_gf_true == glusterd_check_voloption_flags (key, + OPT_FLAG_FORCE)) { + /* indicate to caller that we don't set the option + * due to being protected + */ + *is_force = *is_force | GD_OP_PROTECTED; + goto out; + } else { + *is_force = *is_force | GD_OP_UNPROTECTED; + } } + gf_log ("", GF_LOG_DEBUG, "deleting dict with key=%s,value=%s", + key, value->data); + dict_del (this, key); out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + return 0; } static int -glusterd_op_stage_log_rotate (gd1_mgmt_stage_op_req *req) +_delete_reconfig_global_opt (dict_t *this, char *key, data_t *value, void *data) { - int ret = -1; - dict_t *dict = NULL; - char *volname = NULL; - gf_boolean_t exists = _gf_false; - - GF_ASSERT (req); - - dict = dict_new (); - if (!dict) - goto out; - - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; - } + int32_t *is_force = 0; - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + GF_ASSERT (data); + is_force = (int32_t*)data; - exists = glusterd_check_volume_exists (volname); - if (!exists) { - gf_log ("", GF_LOG_ERROR, "Volume with name: %s not exists", - volname); - ret = -1; + if (strcmp (GLUSTERD_GLOBAL_OPT_VERSION, key) == 0) goto out; - } + _delete_reconfig_opt (this, key, value, data); out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - - return ret; + return 0; } static int -glusterd_op_stage_set_volume (gd1_mgmt_stage_op_req *req, char **op_errstr) +glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key, + int32_t *is_force) { - int ret = 0; - dict_t *dict = NULL; - char *volname = NULL; - int exists = 0; - char *key = NULL; - char *value = NULL; - char str[100] = {0, }; - int count = 0; - int dict_count = 0; - char errstr[2048] = {0, }; - glusterd_volinfo_t *volinfo = NULL; - dict_t *val_dict = NULL; - - GF_ASSERT (req); - - dict = dict_new (); - if (!dict) - goto out; + int ret = 0; + data_t *value = NULL; + char *key_fixed = NULL; + xlator_t *this = NULL; - val_dict = dict_new(); - if (!val_dict) - goto out; - - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + this = THIS; + GF_ASSERT (this); + GF_ASSERT (volinfo->dict); + GF_ASSERT (key); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; + if (!strncmp(key, "all", 3)) + dict_foreach (volinfo->dict, _delete_reconfig_opt, is_force); + else { + value = dict_get (volinfo->dict, key); + if (!value) { + gf_log (this->name, GF_LOG_DEBUG, + "no value set for option %s", key); + goto out; + } + _delete_reconfig_opt (volinfo->dict, key, value, is_force); } - ret = dict_get_str (dict, "volname", &volname); + gd_update_volume_op_versions (volinfo); + ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } - - exists = glusterd_check_volume_exists (volname); - - if (!exists) { - gf_log ("", GF_LOG_ERROR, "Volume with name: %s " - "does not exist", volname); - snprintf (errstr, 2048, "Volume : %s does not exist", - volname); - *op_errstr = gf_strdup (errstr); + gf_log (this->name, GF_LOG_ERROR, "Unable to create volfile for" + " 'volume reset'"); ret = -1; goto out; } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } - - ret = dict_get_int32 (dict, "count", &dict_count); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Count(dict),not set in Volume-Set"); + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) goto out; - } - if ( dict_count == 1 ) { - if (dict_get (dict, "history" )) { - ret = 0; + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_nodesvcs_handle_reconfigure (volinfo); + if (ret) goto out; - } - - gf_log ("", GF_LOG_ERROR, "No options received "); - *op_errstr = gf_strdup ("Options not specified"); - ret = -1; - goto out; } + ret = 0; +out: + GF_FREE (key_fixed); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - for ( count = 1; ret != 1 ; count++ ) { - - sprintf (str, "key%d", count); - ret = dict_get_str (dict, str, &key); - - - if (ret) - break; - - - exists = glusterd_check_option_exists (key, NULL); - - if (exists != 1) { - gf_log ("", GF_LOG_ERROR, "Option with name: %s " - "does not exist", key); - snprintf (errstr, 2048, "option : %s does not exist", - key); - *op_errstr = gf_strdup (errstr); - - ret = -1; - goto out; - } - - sprintf (str, "value%d", count); - ret = dict_get_str (dict, str, &value); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "invalid key,value pair" - "in 'volume set'"); - ret = -1; - goto out; - } +static int +glusterd_op_reset_all_volume_options (xlator_t *this, dict_t *dict) +{ + char *key = NULL; + char *key_fixed = NULL; + int ret = -1; + int32_t is_force = 0; + glusterd_conf_t *conf = NULL; + dict_t *dup_opt = NULL; + gf_boolean_t all = _gf_false; + char *next_version = NULL; + gf_boolean_t quorum_action = _gf_false; + + conf = this->private; + ret = dict_get_str (dict, "key", &key); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get key"); + goto out; + } - ret = dict_set_str (val_dict, key, value); + ret = dict_get_int32 (dict, "force", &is_force); + if (ret) + is_force = 0; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set the options" - "in 'volume set'"); + if (strcmp (key, "all")) { + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Option %s does not " + "exist", key); ret = -1; goto out; } + } else { + all = _gf_true; + } + if (key_fixed) + key = key_fixed; - } - - *op_errstr = NULL; - ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr); - if (ret) { - gf_log ("glsuterd", GF_LOG_DEBUG, - "Could not create temp volfile, some option failed: %s", - *op_errstr); + ret = -1; + dup_opt = dict_new (); + if (!dup_opt) goto out; + if (!all) { + dict_copy (conf->opts, dup_opt); + dict_del (dup_opt, key); } + ret = glusterd_get_next_global_opt_version_str (conf->opts, + &next_version); + if (ret) + goto out; + ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version); + if (ret) + goto out; - ret = 0; + ret = glusterd_store_options (this, dup_opt); + if (ret) + goto out; -out: - if (dict) - dict_unref (dict); + if (glusterd_is_quorum_changed (conf->opts, key, NULL)) + quorum_action = _gf_true; - if (val_dict) - dict_unref (val_dict); + ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + next_version); + if (ret) + goto out; + else + next_version = NULL; - if (ret) { - if (!(*op_errstr)) { - *op_errstr = gf_strdup ("Error, Validation Failed"); - gf_log ("glsuterd", GF_LOG_DEBUG, - "Error, Cannot Validate option :%s", - *op_errstr); - } - else - gf_log ("glsuterd", GF_LOG_DEBUG, - "Error, Cannot Validate option"); + if (!all) { + dict_del (conf->opts, key); + } else { + dict_foreach (conf->opts, _delete_reconfig_global_opt, + &is_force); } -return ret; +out: + GF_FREE (key_fixed); + if (dup_opt) + dict_unref (dup_opt); + + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); + GF_FREE (next_version); + return ret; } static int -glusterd_op_stage_reset_volume (gd1_mgmt_stage_op_req *req) +glusterd_op_reset_volume (dict_t *dict, char **op_rspstr) { - int ret = 0; - dict_t *dict = NULL; - char *volname = NULL; - gf_boolean_t exists = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + char *volname = NULL; + char *key = NULL; + char *key_fixed = NULL; + int32_t is_force = 0; + gf_boolean_t quorum_action = _gf_false; + xlator_t *this = NULL; - GF_ASSERT (req); + this = THIS; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name" ); + goto out; + } - dict = dict_new (); - if (!dict) + if (strcasecmp (volname, "all") == 0) { + ret = glusterd_op_reset_all_volume_options (this, dict); goto out; + } - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + ret = dict_get_int32 (dict, "force", &is_force); + if (ret) + is_force = 0; + ret = dict_get_str (dict, "key", &key); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get option key"); goto out; } - ret = dict_get_str (dict, "volname", &volname); - + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); goto out; } - exists = glusterd_check_volume_exists (volname); - - if (!exists) { - gf_log ("", GF_LOG_ERROR, "Volume with name: %s " - "does not exist", - volname); + if (strcmp (key, "all") && + glusterd_check_option_exists (key, &key_fixed) != 1) { + gf_log (this->name, GF_LOG_ERROR, + "volinfo dict inconsistency: option %s not found", + key); ret = -1; goto out; } + if (key_fixed) + key = key_fixed; + + if (glusterd_is_quorum_changed (volinfo->dict, key, NULL)) + quorum_action = _gf_true; + ret = glusterd_options_reset (volinfo, key, &is_force); + if (ret == -1) { + gf_asprintf(op_rspstr, "Volume reset : failed"); + } else if (is_force & GD_OP_PROTECTED) { + if (is_force & GD_OP_UNPROTECTED) { + gf_asprintf (op_rspstr, "All unprotected fields were" + " reset. To reset the protected fields," + " use 'force'."); + } else { + ret = -1; + gf_asprintf (op_rspstr, "'%s' is protected. To reset" + " use 'force'.", key); + } + } out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + GF_FREE (key_fixed); + if (quorum_action) + glusterd_do_quorum_action (); + gf_log (this->name, GF_LOG_DEBUG, "'volume reset' returning %d", ret); return ret; } - -static int -glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick) +int +glusterd_stop_bricks (glusterd_volinfo_t *volinfo) { + glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - char *dup_brick = NULL; - glusterd_conf_t *priv = NULL; - int32_t ret = -1; - - GF_ASSERT (volinfo); - GF_ASSERT (brick); - - priv = THIS->private; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + /*TODO: Need to change @del_brick in brick_stop to _gf_true + * once we enable synctask in peer rpc prog */ + if (glusterd_brick_stop (volinfo, brickinfo, _gf_false)) + return -1; + } - dup_brick = gf_strdup (brick); - if (!dup_brick) - goto out; + return 0; +} - ret = glusterd_volume_brickinfo_get_by_brick (dup_brick, volinfo, &brickinfo); - if (ret) - goto out; +int +glusterd_start_bricks (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; - ret = glusterd_resolve_brick (brickinfo); - if (ret) - goto out; + GF_ASSERT (volinfo); - if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_brick_stop (volinfo, brickinfo); + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_brick_start (volinfo, brickinfo, _gf_false); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to stop " - "glusterfs, ret: %d", ret); + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to start %s:%s for %s", + brickinfo->hostname, brickinfo->path, + volinfo->volname); goto out; } } - glusterd_delete_volfile (volinfo, brickinfo); - glusterd_store_delete_brick (volinfo, brickinfo); - glusterd_brickinfo_delete (brickinfo); - volinfo->brick_count--; - + ret = 0; out: - if (dup_brick) - GF_FREE (dup_brick); - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } static int -glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo, - char *old_brick, char *new_brick) +glusterd_op_set_all_volume_options (xlator_t *this, dict_t *dict) { - glusterd_brickinfo_t *old_brickinfo = NULL; - glusterd_brickinfo_t *new_brickinfo = NULL; - int32_t ret = -1; - glusterd_conf_t *priv = NULL; + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char *dup_value = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + dict_t *dup_opt = NULL; + char *next_version = NULL; + gf_boolean_t quorum_action = _gf_false; + + conf = this->private; + ret = dict_get_str (dict, "key1", &key); + if (ret) + goto out; - priv = THIS->private; + ret = dict_get_str (dict, "value1", &value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "invalid key,value pair in 'volume set'"); + goto out; + } + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid key %s", key); + ret = -1; + goto out; + } - GF_ASSERT (volinfo); + if (key_fixed) + key = key_fixed; - ret = glusterd_brickinfo_from_brick (new_brick, - &new_brickinfo); + ret = -1; + dup_opt = dict_new (); + if (!dup_opt) + goto out; + dict_copy (conf->opts, dup_opt); + ret = dict_set_str (dup_opt, key, value); if (ret) goto out; - ret = glusterd_volume_brickinfo_get_by_brick (old_brick, volinfo, - &old_brickinfo); + ret = glusterd_get_next_global_opt_version_str (conf->opts, + &next_version); if (ret) goto out; - ret = glusterd_resolve_brick (new_brickinfo); + ret = dict_set_str (dup_opt, GLUSTERD_GLOBAL_OPT_VERSION, next_version); if (ret) goto out; - list_add_tail (&new_brickinfo->brick_list, - &old_brickinfo->brick_list); - - volinfo->brick_count++; - - ret = glusterd_op_perform_remove_brick (volinfo, old_brick); - if (ret) + dup_value = gf_strdup (value); + if (!dup_value) goto out; - ret = glusterd_create_volfiles (volinfo); + ret = glusterd_store_options (this, dup_opt); if (ret) goto out; - if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_brick_start (volinfo, new_brickinfo); - if (ret) - goto out; - } + if (glusterd_is_quorum_changed (conf->opts, key, value)) + quorum_action = _gf_true; + ret = dict_set_dynstr (conf->opts, GLUSTERD_GLOBAL_OPT_VERSION, + next_version); + if (ret) + goto out; + else + next_version = NULL; + ret = dict_set_dynstr (conf->opts, key, dup_value); + if (ret) + goto out; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + GF_FREE (key_fixed); + if (dup_opt) + dict_unref (dup_opt); + + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); + GF_FREE (next_version); return ret; } static int -glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count, - char *bricks) +glusterd_op_set_volume (dict_t *dict) { - glusterd_brickinfo_t *brickinfo = NULL; - char *brick = NULL; - int32_t i = 1; - char *brick_list = NULL; - char *free_ptr1 = NULL; - char *free_ptr2 = NULL; - char *saveptr = NULL; - int32_t ret = -1; + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + xlator_t *this = NULL; glusterd_conf_t *priv = NULL; + int count = 1; + char *key = NULL; + char *key_fixed = NULL; + char *value = NULL; + char str[50] = {0, }; + char *op_errstr = NULL; + gf_boolean_t global_opt = _gf_false; + gf_boolean_t global_opts_set = _gf_false; + glusterd_volinfo_t *voliter = NULL; + int32_t dict_count = 0; + gf_boolean_t check_op_version = _gf_false; + uint32_t new_op_version = 0; + gf_boolean_t quorum_action = _gf_false; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); - GF_ASSERT (volinfo); + priv = this->private; + GF_ASSERT (priv); - if (bricks) { - brick_list = gf_strdup (bricks); - free_ptr1 = brick_list; + ret = dict_get_int32 (dict, "count", &dict_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Count(dict),not set in Volume-Set"); + goto out; } - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); - - while ( i <= count) { - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) - goto out; + if (dict_count == 0) { + ret = glusterd_volset_help (NULL, &op_errstr); + if (ret) { + op_errstr = (op_errstr)? op_errstr: + "Volume set help internal error"; + gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr); + } + goto out; + } - ret = glusterd_resolve_brick (brickinfo); - if (ret) - goto out; - list_add_tail (&brickinfo->brick_list, &volinfo->bricks); - brick = strtok_r (NULL, " \n", &saveptr); - i++; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; } - brick_list = gf_strdup (bricks); - free_ptr2 = brick_list; - i = 1; - - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); + if (strcasecmp (volname, "all") == 0) { + ret = glusterd_op_set_all_volume_options (this, dict); + goto out; + } - ret = glusterd_create_volfiles (volinfo); - if (ret) + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); goto out; + } - while (i <= count) { + // TODO: Remove this once v3.3 compatability is not required + check_op_version = dict_get_str_boolean (dict, "check-op-version", + _gf_false); - ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, - &brickinfo); - if (ret) + if (check_op_version) { + ret = dict_get_uint32 (dict, "new-op-version", &new_op_version); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get new op-version from dict"); goto out; - - if (GLUSTERD_STATUS_STARTED == volinfo->status) { - ret = glusterd_brick_start (volinfo, brickinfo); - if (ret) - goto out; } - i++; - brick = strtok_r (NULL, " \n", &saveptr); } - volinfo->brick_count += count; - -out: - if (free_ptr1) - GF_FREE (free_ptr1); - if (free_ptr2) - GF_FREE (free_ptr2); - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} + for (count = 1; ret != -1 ; count++) { + sprintf (str, "key%d", count); + ret = dict_get_str (dict, str, &key); + if (ret) + break; -static int -glusterd_op_stage_remove_brick (gd1_mgmt_stage_op_req *req) -{ - int ret = -1; - dict_t *dict = NULL; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - dict_t *ctx = NULL; - char *errstr = NULL; + sprintf (str, "value%d", count); + ret = dict_get_str (dict, str, &value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "invalid key,value pair in 'volume set'"); + ret = -1; + goto out; + } - GF_ASSERT (req); + if (strcmp (key, "config.memory-accounting") == 0) { + ret = gf_string2boolean (value, + &volinfo->memory_accounting); + } - dict = dict_new (); - if (!dict) - goto out; + if (strcmp (key, "config.transport") == 0) { + gf_log (this->name, GF_LOG_INFO, + "changing transport-type for volume %s to %s", + volname, value); + ret = 0; + if (strcasecmp (value, "rdma") == 0) { + volinfo->transport_type = GF_TRANSPORT_RDMA; + } else if (strcasecmp (value, "tcp") == 0) { + volinfo->transport_type = GF_TRANSPORT_TCP; + } else if ((strcasecmp (value, "tcp,rdma") == 0) || + (strcasecmp (value, "rdma,tcp") == 0)) { + volinfo->transport_type = + GF_TRANSPORT_BOTH_TCP_RDMA; + } else { + ret = -1; + goto out; + } + } - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + if (!is_key_glusterd_hooks_friendly (key)) { + ret = glusterd_check_option_exists (key, &key_fixed); + GF_ASSERT (ret); + if (ret <= 0) { + key_fixed = NULL; + goto out; + } + } - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; - } + global_opt = _gf_false; + if (glusterd_check_globaloption (key)) { + global_opt = _gf_true; + global_opts_set = _gf_true; + } - ret = dict_get_str (dict, "volname", &volname); + if (!global_opt) + value = gf_strdup (value); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + if (!value) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set the options in 'volume set'"); + ret = -1; + goto out; + } - ret = glusterd_volinfo_find (volname, &volinfo); + if (key_fixed) + key = key_fixed; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname); - goto out; - } + if (glusterd_is_quorum_changed (volinfo->dict, key, value)) + quorum_action = _gf_true; - if (glusterd_is_defrag_on(volinfo)) { - ctx = glusterd_op_get_ctx (GD_OP_REMOVE_BRICK); - errstr = gf_strdup("Rebalance is in progress. Please retry" - " after completion"); - if (!errstr) { - ret = -1; - goto out; + if (global_opt) { + list_for_each_entry (voliter, &priv->volumes, vol_list) { + value = gf_strdup (value); + ret = dict_set_dynstr (voliter->dict, key, value); + if (ret) + goto out; + } + } else { + ret = dict_set_dynstr (volinfo->dict, key, value); + if (ret) + goto out; } - gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr); - ret = dict_set_dynstr (ctx, "errstr", errstr); - if (ret) { - GF_FREE (errstr); - gf_log ("", GF_LOG_DEBUG, - "failed to set errstr ctx"); - goto out; + + if (key_fixed) { + GF_FREE (key_fixed); + key_fixed = NULL; } + } + if (count == 1) { + gf_log (this->name, GF_LOG_ERROR, "No options received "); ret = -1; goto out; } - if (volinfo->brick_count == 1) { - ctx = glusterd_op_get_ctx (GD_OP_REMOVE_BRICK); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, - "Operation Context is not present"); - ret = -1; + /* Update the cluster op-version before regenerating volfiles so that + * correct volfiles are generated + */ + if (new_op_version > priv->op_version) { + priv->op_version = new_op_version; + ret = glusterd_store_global_info (this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store op-version"); goto out; } - errstr = gf_strdup ("Deleting the last brick of the " - "volume is not allowed"); - if (!errstr) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); + } + + if (!global_opts_set) { + gd_update_volume_op_versions (volinfo); + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to create volfile for" + " 'volume set'"); ret = -1; goto out; } - ret = dict_set_dynstr (ctx, "errstr", errstr); - if (ret) { - GF_FREE (errstr); - gf_log ("", GF_LOG_DEBUG, - "failed to set pump status in ctx"); + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_nodesvcs_handle_reconfigure (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to restart NFS-Server"); + goto out; + } } - ret = -1; - goto out; - } + } else { + list_for_each_entry (voliter, &priv->volumes, vol_list) { + volinfo = voliter; + gd_update_volume_op_versions (volinfo); + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to create volfile for" + " 'volume set'"); + ret = -1; + goto out; + } -out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_nodesvcs_handle_reconfigure (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to restart NFS-Server"); + goto out; + } + } + } + } + + out: + GF_FREE (key_fixed); + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + if (quorum_action) + glusterd_do_quorum_action (); return ret; } + static int -glusterd_op_stage_sync_volume (gd1_mgmt_stage_op_req *req, char **op_errstr) +glusterd_op_sync_volume (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) { int ret = -1; - dict_t *dict = NULL; char *volname = NULL; char *hostname = NULL; - gf_boolean_t exists = _gf_false; - glusterd_peerinfo_t *peerinfo = NULL; char msg[2048] = {0,}; + int count = 1; + int vol_count = 0; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; - GF_ASSERT (req); - - dict = dict_new (); - if (!dict) - goto out; - - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; - } + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); ret = dict_get_str (dict, "hostname", &hostname); if (ret) { @@ -1624,409 +1921,405 @@ glusterd_op_stage_sync_volume (gd1_mgmt_stage_op_req *req, char **op_errstr) goto out; } - ret = glusterd_is_local_addr (hostname); - if (ret) { - ret = glusterd_friend_find (NULL, hostname, &peerinfo); + if (!gf_is_local_addr (hostname)) { + ret = 0; + goto out; + } + + //volname is not present in case of sync all + ret = dict_get_str (dict, "volname", &volname); + if (!ret) { + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - snprintf (msg, sizeof (msg), "%s, is not a friend", - hostname); - *op_errstr = gf_strdup (msg); + gf_log ("", GF_LOG_ERROR, "Volume with name: %s " + "not exists", volname); goto out; } + } - if (!peerinfo->connected) { - snprintf (msg, sizeof (msg), "%s, is not connected at " - "the moment", hostname); - *op_errstr = gf_strdup (msg); - ret = -1; - goto out; - } - } else { + if (!rsp_dict) { + //this should happen only on source + ret = 0; + goto out; + } - //volname is not present in case of sync all - ret = dict_get_str (dict, "volname", &volname); - if (!ret) { - exists = glusterd_check_volume_exists (volname); - if (!exists) { - snprintf (msg, sizeof (msg), "volume: %s, " - "doesn't exist", volname); - *op_errstr = gf_strdup (msg); - ret = -1; + if (volname) { + ret = glusterd_add_volume_to_dict (volinfo, rsp_dict, + 1); + vol_count = 1; + } else { + list_for_each_entry (volinfo, &priv->volumes, vol_list) { + ret = glusterd_add_volume_to_dict (volinfo, + rsp_dict, count); + if (ret) goto out; - } - } else { - ret = 0; + + vol_count = count++; } } + ret = dict_set_int32 (rsp_dict, "count", vol_count); out: - if (dict) - dict_unref (dict); gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } static int -glusterd_op_create_volume (gd1_mgmt_stage_op_req *req, char **op_errstr) +glusterd_add_profile_volume_options (glusterd_volinfo_t *volinfo) { - int ret = 0; - dict_t *dict = NULL; - char *volname = NULL; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char *bricks = NULL; - char *brick_list = NULL; - char *free_ptr = NULL; - char *saveptr = NULL; - int32_t sub_count = 0; - char *trans_type = NULL; - char *str = NULL; - - GF_ASSERT (req); - - this = THIS; - GF_ASSERT (this); - - priv = this->private; - GF_ASSERT (priv); + int ret = -1; + char *latency_key = NULL; + char *fd_stats_key = NULL; - dict = dict_new (); - if (!dict) - goto out; + GF_ASSERT (volinfo); - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + latency_key = VKEY_DIAG_LAT_MEASUREMENT; + fd_stats_key = VKEY_DIAG_CNT_FOP_HITS; + ret = dict_set_str (volinfo->dict, latency_key, "on"); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + gf_log ("glusterd", GF_LOG_ERROR, "failed to set the volume %s " + "option %s value %s", + volinfo->volname, latency_key, "on"); goto out; } - ret = glusterd_volinfo_new (&volinfo); - + ret = dict_set_str (volinfo->dict, fd_stats_key, "on"); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + gf_log ("glusterd", GF_LOG_ERROR, "failed to set the volume %s " + "option %s value %s", + volinfo->volname, fd_stats_key, "on"); goto out; } +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - ret = dict_get_str (dict, "volname", &volname); +static void +glusterd_remove_profile_volume_options (glusterd_volinfo_t *volinfo) +{ + char *latency_key = NULL; + char *fd_stats_key = NULL; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + GF_ASSERT (volinfo); - strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); - GF_ASSERT (volinfo->volname); + latency_key = VKEY_DIAG_LAT_MEASUREMENT; + fd_stats_key = VKEY_DIAG_CNT_FOP_HITS; + dict_del (volinfo->dict, latency_key); + dict_del (volinfo->dict, fd_stats_key); +} - ret = dict_get_int32 (dict, "type", &volinfo->type); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get type"); - goto out; - } +static int +glusterd_op_stats_volume (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + char msg[2048] = {0,}; + glusterd_volinfo_t *volinfo = NULL; + int32_t stats_op = GF_CLI_STATS_NONE; - ret = dict_get_int32 (dict, "count", &volinfo->brick_count); + ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); + gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); goto out; } - ret = dict_get_int32 (dict, "port", &volinfo->port); + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get port"); + snprintf (msg, sizeof (msg), "Volume %s does not exists", + volname); + + gf_log ("", GF_LOG_ERROR, "%s", msg); goto out; } - count = volinfo->brick_count; - - ret = dict_get_str (dict, "bricks", &bricks); + ret = dict_get_int32 (dict, "op", &stats_op); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); + gf_log ("glusterd", GF_LOG_ERROR, "volume profile op get failed"); goto out; } - if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { - ret = dict_get_int32 (dict, "replica-count", - &sub_count); + switch (stats_op) { + case GF_CLI_STATS_START: + ret = glusterd_add_profile_volume_options (volinfo); if (ret) goto out; - } else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) { - ret = dict_get_int32 (dict, "stripe-count", - &sub_count); - if (ret) - goto out; - } - - ret = dict_get_str (dict, "transport", &trans_type); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get transport"); + break; + case GF_CLI_STATS_STOP: + glusterd_remove_profile_volume_options (volinfo); + break; + case GF_CLI_STATS_INFO: + case GF_CLI_STATS_TOP: + //info is already collected in brick op. + //just goto out; + ret = 0; goto out; - } - - ret = dict_get_str (dict, "volume-id", &str); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume-id"); + break; + default: + GF_ASSERT (0); + gf_log ("glusterd", GF_LOG_ERROR, "Invalid profile op: %d", + stats_op); + ret = -1; goto out; + break; } - ret = uuid_parse (str, volinfo->volume_id); + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { - gf_log ("", GF_LOG_ERROR, "unable to parse uuid %s", str); + gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" + " 'volume set'"); + ret = -1; goto out; } - if (strcasecmp (trans_type, "rdma") == 0) { - volinfo->transport_type = GF_TRANSPORT_RDMA; - } else { - volinfo->transport_type = GF_TRANSPORT_TCP; - } - volinfo->sub_count = sub_count; - - if (bricks) { - brick_list = gf_strdup (bricks); - free_ptr = brick_list; - } - - if (count) - brick = strtok_r (brick_list+1, " \n", &saveptr); - - while ( i <= count) { - ret = glusterd_brickinfo_from_brick (brick, &brickinfo); - if (ret) - goto out; - - ret = glusterd_resolve_brick (brickinfo); - if (ret) - goto out; - list_add_tail (&brickinfo->brick_list, &volinfo->bricks); - brick = strtok_r (NULL, " \n", &saveptr); - i++; - } - list_add_tail (&volinfo->vol_list, &priv->volumes); - volinfo->version++; - volinfo->defrag_status = 0; - - ret = glusterd_store_create_volume (volinfo); - + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) goto out; - ret = glusterd_create_volfiles (volinfo); + if (GLUSTERD_STATUS_STARTED == volinfo->status) + ret = glusterd_nodesvcs_handle_reconfigure (volinfo); - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) - goto out; + ret = 0; out: - if (dict) - dict_unref (dict); - - if (free_ptr) - GF_FREE(free_ptr); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } static int -glusterd_op_add_brick (gd1_mgmt_stage_op_req *req, char **op_errstr) +_add_brick_name_to_dict (dict_t *dict, char *key, glusterd_brickinfo_t *brick) { - int ret = 0; - dict_t *dict = NULL; - char *volname = NULL; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - xlator_t *this = NULL; - char *bricks = NULL; - int32_t count = 0; + int ret = -1; + char tmp[1024] = {0,}; + char *brickname = NULL; + xlator_t *this = NULL; - GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (key); + GF_ASSERT (brick); this = THIS; GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - dict = dict_new (); - if (!dict) - goto out; - - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + snprintf (tmp, sizeof (tmp), "%s:%s", brick->hostname, brick->path); + brickname = gf_strdup (tmp); + if (!brickname) { + gf_log (this->name, GF_LOG_ERROR, "Failed to dup brick name"); goto out; } - ret = dict_get_str (dict, "volname", &volname); - + ret = dict_set_dynstr (dict, key, brickname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, + "Failed to add brick name to dict"); goto out; } + brickname = NULL; +out: + if (brickname) + GF_FREE (brickname); + return ret; +} - ret = glusterd_volinfo_find (volname, &volinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } +static int +_add_remove_bricks_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, + char *prefix) +{ + int ret = -1; + int count = 0; + int i = 0; + char brick_key[1024] = {0,}; + char dict_key[1024] ={0,}; + char *brick = NULL; + xlator_t *this = NULL; - ret = dict_get_int32 (dict, "count", &count); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; - } + GF_ASSERT (dict); + GF_ASSERT (volinfo); + GF_ASSERT (prefix); + this = THIS; + GF_ASSERT (this); - ret = dict_get_str (dict, "bricks", &bricks); + ret = dict_get_int32 (volinfo->rebal.dict, "count", &count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get bricks"); + gf_log (this->name, GF_LOG_ERROR, + "Failed to get brick count"); goto out; } - ret = glusterd_op_perform_add_bricks (volinfo, count, bricks); + snprintf (dict_key, sizeof (dict_key), "%s.count", prefix); + ret = dict_set_int32 (dict, dict_key, count); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to add bricks"); + gf_log (this->name, GF_LOG_ERROR, + "Failed to set brick count in dict"); goto out; } - volinfo->version++; - volinfo->defrag_status = 0; + for (i = 1; i <= count; i++) { + memset (brick_key, 0, sizeof (brick_key)); + snprintf (brick_key, sizeof (brick_key), "brick%d", i); - ret = glusterd_store_update_volume (volinfo); - if (ret) - goto out; - - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) - goto out; + ret = dict_get_str (volinfo->rebal.dict, brick_key, &brick); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get %s", brick_key); + goto out; + } - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (volinfo); + memset (dict_key, 0, sizeof (dict_key)); + snprintf (dict_key, sizeof (dict_key), "%s.%s", prefix, + brick_key); + ret = dict_set_str (dict, dict_key, brick); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add brick to dict"); + goto out; + } + brick = NULL; + } out: - if (dict) - dict_unref (dict); return ret; } +/* This adds the respective task-id and all available parameters of a task into + * a dictionary + */ static int -rb_regenerate_volfiles (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, - int32_t pump_needed) +_add_task_to_dict (dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index) { - dict_t *dict = NULL; - int ret = 0; - dict = volinfo->dict; + int ret = -1; + char key[128] = {0,}; + char *uuid_str = NULL; + int status = 0; + xlator_t *this = NULL; - gf_log ("", GF_LOG_DEBUG, - "attempting to set pump value=%d", pump_needed); + GF_ASSERT (dict); + GF_ASSERT (volinfo); - ret = dict_set_int32 (dict, "enable-pump", pump_needed); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "could not dict_set enable-pump"); - goto out; - } + this = THIS; + GF_ASSERT (this); - ret = glusterd_create_rb_volfiles (volinfo, brickinfo); + switch (op) { + case GD_OP_REMOVE_BRICK: + snprintf (key, sizeof (key), "task%d", index); + ret = _add_remove_bricks_to_dict (dict, volinfo, key); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add remove bricks to dict"); + goto out; + } + case GD_OP_REBALANCE: + uuid_str = gf_strdup (uuid_utoa (volinfo->rebal.rebalance_id)); + status = volinfo->rebal.defrag_status; + break; -out: - return ret; -} + case GD_OP_REPLACE_BRICK: + snprintf (key, sizeof (key), "task%d.src-brick", index); + ret = _add_brick_name_to_dict (dict, key, + volinfo->rep_brick.src_brick); + if (ret) + goto out; + memset (key, 0, sizeof (key)); -static int -rb_src_brick_restart (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - int activate_pump) -{ - int ret = 0; + snprintf (key, sizeof (key), "task%d.dst-brick", index); + ret = _add_brick_name_to_dict (dict, key, + volinfo->rep_brick.dst_brick); + if (ret) + goto out; + memset (key, 0, sizeof (key)); - gf_log ("", GF_LOG_DEBUG, - "Attempting to kill src"); + uuid_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + status = volinfo->rep_brick.rb_status; + break; - ret = glusterd_volume_stop_glusterfs (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to stop " - "glusterfs, ret: %d", ret); + default: + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "%s operation doesn't have a" + " task_id", gd_op_list[op]); goto out; } - glusterd_delete_volfile (volinfo, src_brickinfo); + snprintf (key, sizeof (key), "task%d.type", index); + ret = dict_set_str (dict, key, (char *)gd_op_list[op]); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task type in dict"); + goto out; + } - if (activate_pump) { - ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 1); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not regenerate volfiles with pump"); - goto out; - } - } else { - ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 0); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not regenerate volfiles without pump"); - goto out; - } + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.id", index); + if (!uuid_str) + goto out; + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting task id in dict"); + goto out; } + uuid_str = NULL; - sleep (2); - ret = glusterd_volume_start_glusterfs (volinfo, src_brickinfo); + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "task%d.status", index); + ret = dict_set_int32 (dict, key, status); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to start " - "glusterfs, ret: %d", ret); + gf_log (this->name, GF_LOG_ERROR, + "Error setting task status in dict"); goto out; } - out: + if (uuid_str) + GF_FREE (uuid_str); return ret; } static int -rb_send_xattr_command (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo, - const char *xattr_key, - const char *value) +glusterd_aggregate_task_status (dict_t *rsp_dict, glusterd_volinfo_t *volinfo) { - glusterd_conf_t *priv = NULL; - char mount_point_path[PATH_MAX] = {0,}; - struct stat buf; - int ret = -1; - - priv = THIS->private; + int ret = -1; + int tasks = 0; + xlator_t *this = NULL; - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); + this = THIS; + GF_ASSERT (this); - ret = stat (mount_point_path, &buf); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "stat failed. Could not send " - " %s command", xattr_key); - goto out; - } + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ret = _add_task_to_dict (rsp_dict, volinfo, volinfo->rebal.op, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } - ret = sys_lsetxattr (mount_point_path, xattr_key, - value, - strlen (value) + 1, - 0); + if (!uuid_is_null (volinfo->rep_brick.rb_id)) { + ret = _add_task_to_dict (rsp_dict, volinfo, GD_OP_REPLACE_BRICK, + tasks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add task details to dict"); + goto out; + } + tasks++; + } + ret = dict_set_int32 (rsp_dict, "tasks", tasks); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "setxattr failed"); + gf_log (this->name, GF_LOG_ERROR, + "Error setting tasks count in dict"); goto out; } - ret = 0; out: @@ -2034,2160 +2327,2314 @@ out: } static int -rb_spawn_dst_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +glusterd_op_status_volume (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) { - glusterd_conf_t *priv = NULL; - char cmd_str[8192] = {0,}; - int ret = -1; - int32_t port = 0; + int ret = -1; + int node_count = 0; + int brick_index = -1; + int other_count = 0; + int other_index = 0; + uint32_t cmd = 0; + char *volname = NULL; + char *brick = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *vol_opts = NULL; + gf_boolean_t nfs_disabled = _gf_false; + gf_boolean_t shd_enabled = _gf_true; + gf_boolean_t origin_glusterd = _gf_false; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); + priv = this->private; - port = pmap_registry_alloc (THIS); - brickinfo->port = port; + GF_ASSERT (priv); - GF_ASSERT (port); + GF_ASSERT (dict); - snprintf (cmd_str, 8192, - "%s/sbin/glusterfs -f %s/vols/%s/%s -p %s/vols/%s/%s " - "--xlator-option src-server.listen-port=%d", - GFS_PREFIX, priv->workdir, volinfo->volname, - RB_DSTBRICKVOL_FILENAME, - priv->workdir, volinfo->volname, - RB_DSTBRICK_PIDFILE, - port); + origin_glusterd = is_origin_glusterd (dict); - ret = gf_system (cmd_str); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not start glusterfs"); + ret = dict_get_uint32 (dict, "cmd", &cmd); + if (ret) goto out; - } - gf_log ("", GF_LOG_DEBUG, - "Successfully started glusterfs: brick=%s:%s", - brickinfo->hostname, brickinfo->path); - - ret = 0; - -out: - return ret; -} + if (origin_glusterd) { + ret = 0; + if ((cmd & GF_CLI_STATUS_ALL)) { + ret = glusterd_get_all_volnames (rsp_dict); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to get all volume " + "names for status"); + } + } -static int -rb_spawn_glusterfs_client (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) -{ - glusterd_conf_t *priv = NULL; - char cmd_str[8192] = {0,}; - struct stat buf; - int ret = -1; + ret = dict_set_uint32 (rsp_dict, "cmd", cmd); + if (ret) + goto out; - priv = THIS->private; + if (cmd & GF_CLI_STATUS_ALL) + goto out; - snprintf (cmd_str, 4096, - "%s/sbin/glusterfs -f %s/vols/%s/%s %s/vols/%s/%s", - GFS_PREFIX, priv->workdir, volinfo->volname, - RB_CLIENTVOL_FILENAME, - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); + ret = dict_get_str (dict, "volname", &volname); + if (ret) + goto out; - ret = gf_system (cmd_str); + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not start glusterfs"); + gf_log (this->name, GF_LOG_ERROR, "Volume with name: %s " + "does not exist", volname); goto out; } + vol_opts = volinfo->dict; - gf_log ("", GF_LOG_DEBUG, - "Successfully started glusterfs: brick=%s:%s", - brickinfo->hostname, brickinfo->path); + if ((cmd & GF_CLI_STATUS_NFS) != 0) { + ret = glusterd_add_node_to_dict ("nfs", rsp_dict, 0, vol_opts); + if (ret) + goto out; + other_count++; + node_count++; - memset (cmd_str, 0, sizeof (cmd_str)); + } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { + ret = glusterd_add_node_to_dict ("glustershd", rsp_dict, 0, + vol_opts); + if (ret) + goto out; + other_count++; + node_count++; - snprintf (cmd_str, 4096, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); + } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_str (dict, "brick", &brick); + if (ret) + goto out; - ret = stat (cmd_str, &buf); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "stat on mountpoint failed"); - goto out; - } + ret = glusterd_volume_brickinfo_get_by_brick (brick, + volinfo, + &brickinfo); + if (ret) + goto out; - gf_log ("", GF_LOG_DEBUG, - "stat on mountpoint succeeded"); + if (uuid_compare (brickinfo->uuid, MY_UUID)) + goto out; - ret = 0; + glusterd_add_brick_to_dict (volinfo, brickinfo, rsp_dict, + ++brick_index); + if (cmd & GF_CLI_STATUS_DETAIL) + glusterd_add_brick_detail_to_dict (volinfo, brickinfo, + rsp_dict, + brick_index); + node_count++; -out: - return ret; -} + } else if ((cmd & GF_CLI_STATUS_TASKS) != 0) { + ret = glusterd_aggregate_task_status (rsp_dict, volinfo); + goto out; -static const char *client_volfile_str = "volume mnt-client\n" - " type protocol/client\n" - " option remote-host %s\n" - " option remote-subvolume %s\n" - " option remote-port %d\n" - "end-volume\n" - "volume mnt-wb\n" - " type performance/write-behind\n" - " subvolumes mnt-client\n" - "end-volume\n"; + } else { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brick_index++; + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; -static int -rb_generate_client_volfile (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) -{ - glusterd_conf_t *priv = NULL; - FILE *file = NULL; - char filename[PATH_MAX]; - int ret = -1; + glusterd_add_brick_to_dict (volinfo, brickinfo, + rsp_dict, brick_index); - priv = THIS->private; + if (cmd & GF_CLI_STATUS_DETAIL) { + glusterd_add_brick_detail_to_dict (volinfo, + brickinfo, + rsp_dict, + brick_index); + } + node_count++; + } - gf_log ("", GF_LOG_DEBUG, - "Creating volfile"); + if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) { + other_index = brick_index + 1; + + nfs_disabled = dict_get_str_boolean (vol_opts, + "nfs.disable", + _gf_false); + if (!nfs_disabled) { + ret = glusterd_add_node_to_dict ("nfs", + rsp_dict, + other_index, + vol_opts); + if (ret) + goto out; + other_index++; + other_count++; + node_count++; + } - snprintf (filename, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENTVOL_FILENAME); + shd_enabled = dict_get_str_boolean + (vol_opts, "cluster.self-heal-daemon", + _gf_true); + if (glusterd_is_volume_replicate (volinfo) + && shd_enabled) { + ret = glusterd_add_node_to_dict ("glustershd", + rsp_dict, + other_index, + vol_opts); + if (ret) + goto out; + other_count++; + node_count++; + } + } + } - file = fopen (filename, "w+"); - if (!file) { - gf_log ("", GF_LOG_DEBUG, - "Open of volfile failed"); - ret = -1; + ret = dict_set_int32 (rsp_dict, "brick-index-max", brick_index); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting brick-index-max to dict"); goto out; } - - GF_ASSERT (src_brickinfo->port); - - fprintf (file, client_volfile_str, src_brickinfo->hostname, - src_brickinfo->path, src_brickinfo->port); - - fclose (file); - - ret = 0; - -out: - return ret; -} - -static const char *dst_brick_volfile_str = "volume src-posix\n" - " type storage/posix\n" - " option directory %s\n" - "end-volume\n" - "volume %s\n" - " type features/locks\n" - " subvolumes src-posix\n" - "end-volume\n" - "volume src-server\n" - " type protocol/server\n" - " option auth.addr.%s.allow *\n" - " option transport-type tcp\n" - " subvolumes %s\n" - "end-volume\n"; - -static int -rb_generate_dst_brick_volfile (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *dst_brickinfo) -{ - glusterd_conf_t *priv = NULL; - FILE *file = NULL; - char filename[PATH_MAX]; - int ret = -1; - - priv = THIS->private; - - gf_log ("", GF_LOG_DEBUG, - "Creating volfile"); - - snprintf (filename, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_DSTBRICKVOL_FILENAME); - - file = fopen (filename, "w+"); - if (!file) { - gf_log ("", GF_LOG_DEBUG, - "Open of volfile failed"); - ret = -1; + ret = dict_set_int32 (rsp_dict, "other-count", other_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting other-count to dict"); + goto out; + } + ret = dict_set_int32 (rsp_dict, "count", node_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting node count to dict"); goto out; } - fprintf (file, dst_brick_volfile_str, dst_brickinfo->path, - dst_brickinfo->path, dst_brickinfo->path, - dst_brickinfo->path); - - fclose (file); + /* Active tasks */ + /* Tasks are added only for normal volume status request for either a + * single volume or all volumes, and only by the origin glusterd + */ + if (((cmd & GF_CLI_STATUS_MASK) != GF_CLI_STATUS_NONE) || + !(cmd & (GF_CLI_STATUS_VOL | GF_CLI_STATUS_ALL)) || + !origin_glusterd) + goto out; + ret = glusterd_aggregate_task_status (rsp_dict, volinfo); + if (ret) + goto out; ret = 0; out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; } static int -rb_mountpoint_mkdir (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) +glusterd_op_ac_none (glusterd_op_sm_event_t *event, void *ctx) { - glusterd_conf_t *priv = NULL; - char mount_point_path[PATH_MAX] = {0,}; - int ret = -1; - - priv = THIS->private; - - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); - - ret = mkdir (mount_point_path, 0777); - if (ret && (errno != EEXIST)) { - gf_log ("", GF_LOG_DEBUG, "mkdir failed, errno: %d", errno); - goto out; - } + int ret = 0; - ret = 0; + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); -out: return ret; } static int -rb_mountpoint_rmdir (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) +glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) { - glusterd_conf_t *priv = NULL; - char mount_point_path[PATH_MAX] = {0,}; - int ret = -1; + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + uint32_t pending_count = 0; + dict_t *dict = NULL; - priv = THIS->private; + this = THIS; + priv = this->private; + GF_ASSERT (priv); - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + GF_ASSERT (peerinfo); - ret = rmdir (mount_point_path); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "rmdir failed"); - goto out; + if (!peerinfo->connected || !peerinfo->mgmt) + continue; + if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && + (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) + continue; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (priv->op_version < 3) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send lock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_LOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send mgmt_v3 lock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; + } + } } - ret = 0; + opinfo.pending_count = pending_count; + if (!opinfo.pending_count) + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); out: + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } static int -rb_destroy_maintainence_client (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) +glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) { - glusterd_conf_t *priv = NULL; - char cmd_str[8192] = {0,}; - char filename[PATH_MAX] = {0,}; - struct stat buf; - char mount_point_path[PATH_MAX] = {0,}; - int ret = -1; - - priv = THIS->private; - - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + uint32_t pending_count = 0; + dict_t *dict = NULL; - ret = stat (mount_point_path, &buf); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "stat failed. Cannot destroy maintainence " - "client"); - goto out; - } - - snprintf (cmd_str, 8192, "/bin/umount -f %s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); + this = THIS; + priv = this->private; + GF_ASSERT (priv); - ret = gf_system (cmd_str); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "umount failed on maintainence client"); - goto out; - } + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + GF_ASSERT (peerinfo); - ret = rb_mountpoint_rmdir (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "rmdir of mountpoint failed"); - goto out; - } + if (!peerinfo->connected || !peerinfo->mgmt) + continue; + if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && + (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) + continue; - snprintf (filename, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENTVOL_FILENAME); + /* Based on the op_version, release the * + * cluster or mgmt_v3 lock */ + if (priv->op_version < 3) { + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send unlock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_UNLOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } - ret = unlink (filename); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "unlink failed"); - goto out; + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send mgmt_v3 unlock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; + } + } } - ret = 0; + opinfo.pending_count = pending_count; + if (!opinfo.pending_count) + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); out: + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } static int -rb_spawn_maintainence_client (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo) +glusterd_op_ac_ack_drain (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; + int ret = 0; - ret = rb_generate_client_volfile (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to generate client " - "volfile"); - goto out; - } + if (opinfo.pending_count > 0) + opinfo.pending_count--; - ret = rb_mountpoint_mkdir (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to mkdir " - "mountpoint"); - goto out; - } + if (!opinfo.pending_count) + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); - ret = rb_spawn_glusterfs_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs"); - goto out; - } + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); - ret = 0; -out: return ret; } static int -rb_spawn_destination_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *dst_brickinfo) - +glusterd_op_ac_send_unlock_drain (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - - ret = rb_generate_dst_brick_volfile (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to generate client " - "volfile"); - goto out; - } - - ret = rb_spawn_dst_brick (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs"); - goto out; - } - - ret = 0; -out: - return ret; + return glusterd_op_ac_ack_drain (event, ctx); } static int -rb_do_operation_start (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo) +glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) { - char start_value[8192] = {0,}; - int ret = -1; - + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; - gf_log ("", GF_LOG_DEBUG, - "replace-brick sending start xattr"); - - ret = rb_spawn_maintainence_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not spawn maintainence " - "client"); - goto out; - } + GF_ASSERT (event); + GF_ASSERT (ctx); - gf_log ("", GF_LOG_DEBUG, - "mounted the replace brick client"); + this = THIS; + priv = this->private; - snprintf (start_value, 8192, "%s:%s:%d", - dst_brickinfo->hostname, - dst_brickinfo->path, - dst_brickinfo->port); + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it acquiring a cluster + * or mgmt_v3 lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_lock (lock_ctx->uuid); + glusterd_op_lock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_mgmt_v3_lock (volname, lock_ctx->uuid, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", + volname); + } - ret = rb_send_xattr_command (volinfo, src_brickinfo, - dst_brickinfo, RB_PUMP_START_CMD, - start_value); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to send command to pump"); - } + glusterd_op_mgmt_v3_lock_send_resp (lock_ctx->req, + &event->txn_id, ret); - ret = rb_destroy_maintainence_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to destroy maintainence " - "client"); - goto out; + dict_unref (lock_ctx->dict); } - gf_log ("", GF_LOG_DEBUG, - "unmounted the replace brick client"); - ret = 0; - -out: + gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); return ret; } static int -rb_do_operation_pause (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo) +glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - - gf_log ("", GF_LOG_NORMAL, - "replace-brick send pause xattr"); + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; - ret = rb_spawn_maintainence_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not spawn maintainence " - "client"); - goto out; - } - gf_log ("", GF_LOG_DEBUG, - "mounted the replace brick client"); + GF_ASSERT (event); + GF_ASSERT (ctx); - ret = rb_send_xattr_command (volinfo, src_brickinfo, - dst_brickinfo, RB_PUMP_PAUSE_CMD, - "jargon"); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to send command to pump"); + this = THIS; + priv = this->private; - } + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = rb_destroy_maintainence_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to destroy maintainence " - "client"); - goto out; - } + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it releasing the cluster + * or mgmt_v3 lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_unlock (lock_ctx->uuid); + glusterd_op_unlock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_mgmt_v3_unlock (volname, lock_ctx->uuid, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", volname); + } + glusterd_op_mgmt_v3_unlock_send_resp (lock_ctx->req, + &event->txn_id, ret); - gf_log ("", GF_LOG_DEBUG, - "unmounted the replace brick client"); + dict_unref (lock_ctx->dict); + } - ret = 0; + gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); -out: - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - ret = rb_src_brick_restart (volinfo, src_brickinfo, - 0); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); - } - } + if (priv->pending_quorum_action) + glusterd_do_quorum_action (); return ret; } static int -rb_kill_destination_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *dst_brickinfo) +glusterd_op_ac_local_unlock (glusterd_op_sm_event_t *event, void *ctx) { - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; + int ret = 0; + uuid_t *originator = NULL; - priv = THIS->private; + GF_ASSERT (event); + GF_ASSERT (ctx); + + originator = (uuid_t *) ctx; - snprintf (pidfile, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_DSTBRICK_PIDFILE); + ret = glusterd_unlock (*originator); - return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true); + gf_log (THIS->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); + + return ret; } static int -rb_do_operation_abort (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo) +glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - - gf_log ("", GF_LOG_DEBUG, - "replace-brick sending abort xattr"); - - ret = rb_spawn_maintainence_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not spawn maintainence " - "client"); - goto out; - } + int ret = 0; - gf_log ("", GF_LOG_DEBUG, - "mounted the replace brick client"); + GF_ASSERT (event); - ret = rb_send_xattr_command (volinfo, src_brickinfo, - dst_brickinfo, RB_PUMP_ABORT_CMD, - "jargon"); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to send command to pump"); - } + if (opinfo.pending_count > 0) + opinfo.pending_count--; - ret = rb_destroy_maintainence_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to destroy maintainence " - "client"); + if (opinfo.pending_count > 0) goto out; - } - gf_log ("", GF_LOG_DEBUG, - "unmounted the replace brick client"); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); - ret = 0; + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); out: - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - ret = rb_src_brick_restart (volinfo, src_brickinfo, - 0); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); - } - } return ret; } - static int -rb_get_xattr_command (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo, - const char *xattr_key, - char *value) +glusterd_dict_set_volid (dict_t *dict, char *volname, char **op_errstr) { - glusterd_conf_t *priv = NULL; - char mount_point_path[PATH_MAX] = {0,}; - struct stat buf; - int ret = -1; - - priv = THIS->private; - - snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s", - priv->workdir, volinfo->volname, - RB_CLIENT_MOUNTPOINT); + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + char *volid = NULL; + char msg[1024] = {0,}; + xlator_t *this = NULL; - ret = stat (mount_point_path, &buf); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "stat failed. Could not send " - " %s command", xattr_key); - goto out; - } + this = THIS; + GF_ASSERT (this); - ret = lgetxattr (mount_point_path, xattr_key, - value, - 8192); + if (!dict || !volname) + goto out; - if (ret < 0) { - gf_log ("", GF_LOG_DEBUG, - "getxattr failed"); + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + volid = gf_strdup (uuid_utoa (volinfo->volume_id)); + if (!volid) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, "vol-id", volid); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to set volume id of volume" + " %s", volname); goto out; } - - ret = 0; - out: + if (msg[0] != '\0') { + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + } return ret; } -static int -rb_do_operation_status (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brickinfo, - glusterd_brickinfo_t *dst_brickinfo) +int +glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx) { - char status[2048] = {0,}; - char *status_reply = NULL; - dict_t *ctx = NULL; - int ret = 0; - gf_boolean_t origin = _gf_false; + int ret = -1; + void *ctx = NULL; + dict_t *dict = NULL; + dict_t *req_dict = NULL; + glusterd_op_t op = GD_OP_NONE; + char *volname = NULL; + uint32_t status_cmd = GF_CLI_STATUS_NONE; + char *errstr = NULL; + xlator_t *this = NULL; - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, - "Operation Context is not present"); - goto out; - } + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); - origin = _gf_true; + req_dict = dict_new (); + if (!req_dict) + goto out; - if (origin) { - ret = rb_spawn_maintainence_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not spawn maintainence " - "client"); + if (!op_ctx) { + op = glusterd_op_get_op (); + ctx = (void*)glusterd_op_get_ctx (); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, "Null Context for " + "op %d", op); + ret = -1; goto out; } - gf_log ("", GF_LOG_DEBUG, - "mounted the replace brick client"); - - ret = rb_get_xattr_command (volinfo, src_brickinfo, - dst_brickinfo, RB_PUMP_STATUS_CMD, - status); + } else { +#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation" + ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, (int32_t*)&op); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to get status from pump"); - goto umount; - } - - gf_log ("", GF_LOG_DEBUG, - "pump status is %s", status); - - status_reply = gf_strdup (status); - if (!status_reply) { - gf_log ("", GF_LOG_ERROR, "Out of memory"); - ret = -1; - goto umount; + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume" + " operation"); + goto out; } + ctx = op_ctx; +#undef GD_SYNC_OPCODE_KEY + } - ret = dict_set_dynstr (ctx, "status-reply", - status_reply); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "failed to set pump status in ctx"); + dict = ctx; + switch (op) { + case GD_OP_CREATE_VOLUME: + { + ++glusterfs_port; + ret = dict_set_int32 (dict, "port", + glusterfs_port); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set port in " + "dictionary"); + goto out; + } + dict_copy (dict, req_dict); + } + break; - } + case GD_OP_GSYNC_CREATE: + case GD_OP_GSYNC_SET: + { + ret = glusterd_op_gsync_args_get (dict, + &errstr, + &volname, + NULL, NULL); + if (ret == 0) { + ret = glusterd_dict_set_volid + (dict, volname, op_errstr); + if (ret) + goto out; + } + dict_copy (dict, req_dict); + } + break; - umount: - ret = rb_destroy_maintainence_client (volinfo, src_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to destroy maintainence " - "client"); - goto out; - } - } + case GD_OP_SET_VOLUME: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "volname is not present in " + "operation ctx"); + goto out; + } + if (strcmp (volname, "help") && + strcmp (volname, "help-xml") && + strcasecmp (volname, "all")) { + ret = glusterd_dict_set_volid + (dict, volname, op_errstr); + if (ret) + goto out; + } + dict_destroy (req_dict); + req_dict = dict_ref (dict); + } + break; - gf_log ("", GF_LOG_DEBUG, - "unmounted the replace brick client"); -out: - return ret; -} + case GD_OP_SYNC_VOLUME: + { + dict_copy (dict, req_dict); + break; + } -/* Set src-brick's port number to be used in the maintainance mount - * after all commit acks are received. - */ -static int -rb_update_srcbrick_port (glusterd_brickinfo_t *src_brickinfo, dict_t *rsp_dict, - dict_t *req_dict, int32_t replace_op) -{ - xlator_t *this = NULL; - dict_t *ctx = NULL; - int ret = 0; - int dict_ret = 0; - int src_port = 0; + case GD_OP_REMOVE_BRICK: + { + dict_t *dict = ctx; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "volname is not present in " + "operation ctx"); + goto out; + } - this = THIS; + ret = glusterd_dict_set_volid (dict, volname, + op_errstr); + if (ret) + goto out; - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - dict_ret = dict_get_int32 (req_dict, "src-brick-port", &src_port); - if (src_port) - src_brickinfo->port = src_port; - } + dict_destroy (req_dict); + req_dict = dict_ref (dict); + } + break; - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - gf_log ("", GF_LOG_NORMAL, - "adding src-brick port no"); + case GD_OP_STATUS_VOLUME: + { + ret = dict_get_uint32 (dict, "cmd", + &status_cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Status command not present " + "in op ctx"); + goto out; + } + if (GF_CLI_STATUS_ALL & status_cmd) { + dict_copy (dict, req_dict); + break; + } + } + /*fall-through*/ + case GD_OP_DELETE_VOLUME: + case GD_OP_START_VOLUME: + case GD_OP_STOP_VOLUME: + case GD_OP_ADD_BRICK: + case GD_OP_REPLACE_BRICK: + case GD_OP_RESET_VOLUME: + case GD_OP_LOG_ROTATE: + case GD_OP_QUOTA: + case GD_OP_PROFILE_VOLUME: + case GD_OP_REBALANCE: + case GD_OP_HEAL_VOLUME: + case GD_OP_STATEDUMP_VOLUME: + case GD_OP_CLEARLOCKS_VOLUME: + case GD_OP_DEFRAG_BRICK_VOLUME: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "volname is not present in " + "operation ctx"); + goto out; + } - src_brickinfo->port = pmap_registry_search (this, - src_brickinfo->path, GF_PMAP_PORT_BRICKSERVER); - if (!src_brickinfo->port && - replace_op != GF_REPLACE_OP_COMMIT_FORCE ) { - gf_log ("", GF_LOG_ERROR, - "Src brick port not available"); - ret = -1; - goto out; - } + if (strcasecmp (volname, "all")) { + ret = glusterd_dict_set_volid (dict, + volname, + op_errstr); + if (ret) + goto out; + } + dict_copy (dict, req_dict); + } + break; - if (rsp_dict) { - ret = dict_set_int32 (rsp_dict, "src-brick-port", src_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set src-brick port no"); - goto out; + case GD_OP_COPY_FILE: + { + dict_copy (dict, req_dict); + break; } - } - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = dict_set_int32 (ctx, "src-brick-port", src_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set src-brick port no"); - goto out; + case GD_OP_SYS_EXEC: + { + dict_copy (dict, req_dict); + break; } - } + default: + break; } + *req = req_dict; + ret = 0; + out: return ret; - } -static int -rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict, - dict_t *req_dict, int32_t replace_op) +gf_boolean_t +glusterd_is_get_op (xlator_t *this, glusterd_op_t op, dict_t *dict) { - dict_t *ctx = NULL; - int ret = 0; - int dict_ret = 0; - int dst_port = 0; + char *key = NULL; + char *volname = NULL; + int ret = 0; - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - dict_ret = dict_get_int32 (req_dict, "dst-brick-port", &dst_port); - if (dst_port) - dst_brickinfo->port = dst_port; + if (op == GD_OP_STATUS_VOLUME) + return _gf_true; + if ((op == GD_OP_SET_VOLUME)) { + //check for set volume help + ret = dict_get_str (dict, "volname", &volname); + if (volname && + ((strcmp (volname, "help") == 0) || + (strcmp (volname, "help-xml") == 0))) { + ret = dict_get_str (dict, "key1", &key); + if (ret < 0) + return _gf_true; + } } - if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_NORMAL, - "adding dst-brick port no"); + return _gf_false; +} - if (rsp_dict) { - ret = dict_set_int32 (rsp_dict, "dst-brick-port", - dst_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set dst-brick port no in rsp dict"); - goto out; - } - } +gf_boolean_t +glusterd_is_op_quorum_validation_required (xlator_t *this, glusterd_op_t op, + dict_t *dict) +{ + gf_boolean_t required = _gf_true; + char *key = NULL; + char *key_fixed = NULL; + int ret = -1; - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = dict_set_int32 (ctx, "dst-brick-port", - dst_brickinfo->port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set dst-brick port no"); - goto out; - } - } + if (glusterd_is_get_op (this, op, dict)) { + required = _gf_false; + goto out; } + if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME)) + goto out; + if (op == GD_OP_SET_VOLUME) + ret = dict_get_str (dict, "key1", &key); + else if (op == GD_OP_RESET_VOLUME) + ret = dict_get_str (dict, "key", &key); + if (ret) + goto out; + ret = glusterd_check_option_exists (key, &key_fixed); + if (ret <= 0) + goto out; + if (key_fixed) + key = key_fixed; + if (glusterd_is_quorum_option (key)) + required = _gf_false; out: - return ret; + GF_FREE (key_fixed); + return required; } static int -glusterd_op_replace_brick (gd1_mgmt_stage_op_req *req, dict_t *rsp_dict) +glusterd_op_validate_quorum (xlator_t *this, glusterd_op_t op, + dict_t *dict, char **op_errstr) { - int ret = 0; - dict_t *dict = NULL; - dict_t *ctx = NULL; - gf1_cli_replace_op replace_op; - glusterd_volinfo_t *volinfo = NULL; - char *volname = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - glusterd_brickinfo_t *src_brickinfo = NULL; - glusterd_brickinfo_t *dst_brickinfo = NULL; - - GF_ASSERT (req); - - this = THIS; - GF_ASSERT (this); + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *errstr = NULL; - priv = this->private; - GF_ASSERT (priv); - dict = dict_new (); - if (!dict) + errstr = "Quorum not met. Volume operation not allowed."; + if (!glusterd_is_op_quorum_validation_required (this, op, dict)) goto out; - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + ret = 0; goto out; } - ret = dict_get_str (dict, "src-brick", &src_brick); + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + ret = 0; goto out; } - gf_log (this->name, GF_LOG_DEBUG, - "src brick=%s", src_brick); - - ret = dict_get_str (dict, "dst-brick", &dst_brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst brick"); + if (does_gd_meet_server_quorum (this)) { + ret = 0; goto out; } - gf_log (this->name, GF_LOG_DEBUG, - "dst brick=%s", dst_brick); - - ret = dict_get_str (dict, "volname", &volname); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + if (glusterd_is_volume_in_server_quorum (volinfo)) { + ret = -1; + *op_errstr = gf_strdup (errstr); goto out; } + ret = 0; +out: + return ret; +} - ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); - if (ret) { - gf_log (this->name, GF_LOG_DEBUG, - "dict_get on operation failed"); - goto out; - } +static int +glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + dict_t *dict = NULL; + char *op_errstr = NULL; + glusterd_op_t op = GD_OP_NONE; + uint32_t pending_count = 0; - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); - ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, &src_brickinfo); + op = glusterd_op_get_op (); + + ret = glusterd_op_build_payload (&dict, &op_errstr, NULL); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + opinfo.op_errstr = op_errstr; goto out; } - ret = glusterd_brickinfo_from_brick (dst_brick, &dst_brickinfo); + ret = glusterd_op_validate_quorum (this, op, dict, &op_errstr); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get dst-brickinfo"); + gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr); + opinfo.op_errstr = op_errstr; goto out; } - ret = glusterd_resolve_brick (dst_brickinfo); + /* rsp_dict NULL from source */ + ret = glusterd_op_stage_validate (op, dict, &op_errstr, NULL); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo"); + gf_log (this->name, GF_LOG_ERROR, LOGSTR_STAGE_FAIL, + gd_op_list[op], "localhost", + (op_errstr) ? ":" : " ", (op_errstr) ? op_errstr : " "); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_STAGE_FAIL, + "localhost"); + opinfo.op_errstr = op_errstr; goto out; } - ret = rb_update_srcbrick_port (src_brickinfo, rsp_dict, - dict, replace_op); - if (ret) - goto out; - - if ((GF_REPLACE_OP_START != replace_op)) { - ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, - dict, replace_op); - if (ret) - goto out; - } - - switch (replace_op) { - case GF_REPLACE_OP_START: - { - if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_NORMAL, - "I AM THE DESTINATION HOST"); - if (!glusterd_is_rb_paused (volinfo)) { - ret = rb_spawn_destination_brick (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to spawn destination brick"); - goto out; - } - } else { - gf_log ("", GF_LOG_ERROR, "Replace brick is already " - "started=> no need to restart dst brick "); - } - } - - - if (!glusterd_is_local_addr (src_brickinfo->hostname)) { - ret = rb_src_brick_restart (volinfo, src_brickinfo, - 1); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not restart src-brick"); - goto out; - } - } - - if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_NORMAL, - "adding dst-brick port no"); - - ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, - dict, replace_op); - if (ret) - goto out; - } + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + GF_ASSERT (peerinfo); - glusterd_set_rb_status (volinfo, GF_RB_STATUS_STARTED); - break; - } + if (!peerinfo->connected || !peerinfo->mgmt) + continue; + if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && + (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) + continue; - case GF_REPLACE_OP_COMMIT: - case GF_REPLACE_OP_COMMIT_FORCE: - { - ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); - gf_log ("", GF_LOG_DEBUG, - "Received commit - will be adding dst brick and " - "removing src brick"); - - if (!glusterd_is_local_addr (dst_brickinfo->hostname) && - replace_op != GF_REPLACE_OP_COMMIT_FORCE) { - gf_log ("", GF_LOG_NORMAL, - "I AM THE DESTINATION HOST"); - ret = rb_kill_destination_brick (volinfo, dst_brickinfo); + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_STAGE_OP]; + GF_ASSERT (proc); + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to kill destination brick"); + gf_log (this->name, GF_LOG_ERROR, "failed to " + "set peerinfo"); goto out; } - } - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to cleanup " - "dst brick"); - goto out; + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to " + "send stage request for operation " + "'Volume %s' to peer %s", + gd_op_list[op], peerinfo->hostname); + continue; + } + pending_count++; } + } + opinfo.pending_count = pending_count; +out: + if (dict) + dict_unref (dict); + if (ret) { + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); + opinfo.op_ret = ret; + } - ret = glusterd_op_perform_replace_brick (volinfo, src_brick, - dst_brick); - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to add " - "dst-brick: %s to volume: %s", - dst_brick, volinfo->volname); - goto out; - } - - volinfo->version++; - volinfo->defrag_status = 0; + gf_log (this->name, GF_LOG_DEBUG, "Sent stage op request for " + "'Volume %s' to %d peers", gd_op_list[op], + opinfo.pending_count); - ret = glusterd_store_update_volume (volinfo); + if (!opinfo.pending_count) + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); - if (ret) - goto out; + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) - goto out; + return ret; - ret = glusterd_check_generate_start_nfs (volinfo); +} - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Failed to generate " - " nfs volume file"); - } +static int32_t +glusterd_op_start_rb_timer (dict_t *dict, uuid_t *txn_id) +{ + int32_t op = 0; + struct timespec timeout = {0, }; + glusterd_conf_t *priv = NULL; + int32_t ret = -1; + dict_t *rb_ctx = NULL; - ret = glusterd_fetchspec_notify (THIS); - glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - volinfo->src_brick = volinfo->dst_brick = NULL; - } - break; + GF_ASSERT (dict); + priv = THIS->private; - case GF_REPLACE_OP_PAUSE: - { + ret = dict_get_int32 (dict, "operation", &op); + if (ret) { gf_log ("", GF_LOG_DEBUG, - "Recieved pause - doing nothing"); - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = rb_do_operation_pause (volinfo, src_brickinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Pause operation failed"); - goto out; - } - } - - glusterd_set_rb_status (volinfo, GF_RB_STATUS_PAUSED); + "dict_get on operation failed"); + goto out; } - break; - - case GF_REPLACE_OP_ABORT: - { - ret = dict_set_int32 (volinfo->dict, "enable-pump", 0); - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to disable pump"); - } - - if (!glusterd_is_local_addr (dst_brickinfo->hostname)) { - gf_log ("", GF_LOG_NORMAL, - "I AM THE DESTINATION HOST"); - ret = rb_kill_destination_brick (volinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Failed to kill destination brick"); - goto out; - } - } - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = rb_do_operation_abort (volinfo, src_brickinfo, dst_brickinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, - "Abort operation failed"); - goto out; - } - } - glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); - volinfo->src_brick = volinfo->dst_brick = NULL; + if (op != GF_REPLACE_OP_START) { + ret = glusterd_op_sm_inject_all_acc (txn_id); + goto out; } - break; - case GF_REPLACE_OP_STATUS: - { - gf_log ("", GF_LOG_DEBUG, - "received status - doing nothing"); - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (ctx) { - ret = rb_do_operation_status (volinfo, src_brickinfo, - dst_brickinfo); - if (ret) - goto out; - } + timeout.tv_sec = 5; + timeout.tv_nsec = 0; - } - break; - default: + rb_ctx = dict_copy (dict, rb_ctx); + if (!rb_ctx) { + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't copy " + "replace brick context. Can't start replace brick"); ret = -1; goto out; } - if (ret) + ret = dict_set_bin (rb_ctx, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set transaction id."); goto out; + } else + gf_log ("", GF_LOG_DEBUG, + "transaction_id = %s", uuid_utoa (*txn_id)); + + priv->timer = gf_timer_call_after (THIS->ctx, timeout, + glusterd_do_replace_brick, + (void *) rb_ctx); + + ret = 0; out: - if (dict) - dict_unref (dict); return ret; } -void -_delete_reconfig_opt (dict_t *this, char *key, data_t *value, void *data) +/* This function takes a dict and converts the uuid values of key specified + * into hostnames + */ +static int +glusterd_op_volume_dict_uuid_to_hostname (dict_t *dict, const char *key_fmt, + int idx_min, int idx_max) { + int ret = -1; + int i = 0; + char key[1024]; + char *uuid_str = NULL; + uuid_t uuid = {0,}; + char *hostname = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (dict); + GF_ASSERT (key_fmt); + + for (i = idx_min; i < idx_max; i++) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), key_fmt, i); + ret = dict_get_str (dict, key, &uuid_str); + if (ret) + continue; - int exists = 0; + gf_log (this->name, GF_LOG_DEBUG, "Got uuid %s", + uuid_str); - exists = glusterd_check_option_exists(key, NULL); + ret = uuid_parse (uuid_str, uuid); + /* if parsing fails don't error out + * let the original value be retained + */ + if (ret) + continue; - if (exists == 1) { - gf_log ("", GF_LOG_DEBUG, "deleting dict with key=%s,value=%s", - key, value->data); - dict_del (this, key); + hostname = glusterd_uuid_to_hostname (uuid); + if (hostname) { + gf_log (this->name, GF_LOG_DEBUG, "%s -> %s", + uuid_str, hostname); + ret = dict_set_dynstr (dict, key, hostname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting hostname %s to dict", + hostname); + GF_FREE (hostname); + goto out; + } + } } +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; } -int -glusterd_options_reset (glusterd_volinfo_t *volinfo) +static int +reassign_defrag_status (dict_t *dict, char *key, gf_defrag_status_t *status) { - int ret = 0; - - gf_log ("", GF_LOG_DEBUG, "Received volume set reset command"); - - GF_ASSERT (volinfo->dict); + int ret = 0; - dict_foreach (volinfo->dict, _delete_reconfig_opt, volinfo->dict); + if (!*status) + return ret; - ret = glusterd_create_volfiles (volinfo); + switch (*status) { + case GF_DEFRAG_STATUS_STARTED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED; + break; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" - " 'volume set'"); - ret = -1; - goto out; - } + case GF_DEFRAG_STATUS_STOPPED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED; + break; - ret = glusterd_store_update_volume (volinfo); - if (ret) - goto out; + case GF_DEFRAG_STATUS_COMPLETE: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE; + break; - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) - goto out; + case GF_DEFRAG_STATUS_FAILED: + *status = GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED; + break; + default: + break; + } - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (volinfo); + ret = dict_set_int32(dict, key, *status); if (ret) - goto out; + gf_log (THIS->name, GF_LOG_WARNING, + "failed to reset defrag %s in dict", key); - ret = 0; - -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } - +/* Check and reassign the defrag_status enum got from the rebalance process + * of all peers so that the rebalance-status CLI command can display if a + * full-rebalance or just a fix-layout was carried out. + */ static int -glusterd_op_reset_volume (gd1_mgmt_stage_op_req *req) +glusterd_op_check_peer_defrag_status (dict_t *dict, int count) { - glusterd_volinfo_t *volinfo = NULL; - int ret = -1; - char *volname = NULL; - dict_t *dict = NULL; - - dict = dict_new (); - if (!dict) - goto out; - + glusterd_volinfo_t *volinfo = NULL; + gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED; + char key[256] = {0,}; + char *volname = NULL; + int ret = -1; + int i = 1; - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + gf_log (THIS->name, GF_LOG_WARNING, "Unable to get volume name"); goto out; } - ret = dict_get_str (dict, "volname", &volname); + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name " ); + gf_log (THIS->name, GF_LOG_WARNING, FMTSTR_CHECK_VOL_EXISTS, + volname); goto out; } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + if (volinfo->rebal.defrag_cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) { + /* Fix layout was not issued; we don't need to reassign + the status */ + ret = 0; goto out; } - - ret = glusterd_options_reset (volinfo); + do { + memset (key, 0, 256); + snprintf (key, 256, "status-%d", i); + ret = dict_get_int32 (dict, key, (int32_t *)&status); + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, + "failed to get defrag %s", key); + goto out; + } + ret = reassign_defrag_status (dict, key, &status); + if (ret) + goto out; + i++; + } while (i <= count); + + ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "'volume reset' returning %d", ret); return ret; - + } -static int -glusterd_op_set_volume (gd1_mgmt_stage_op_req *req) +/* This function is used to modify the op_ctx dict before sending it back + * to cli. This is useful in situations like changing the peer uuids to + * hostnames etc. + */ +void +glusterd_op_modify_op_ctx (glusterd_op_t op, void *ctx) { - int ret = 0; - dict_t *dict = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *volname = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - int count = 1; - char *key = NULL; - char *value = NULL; - char str[50] = {0, }; - GF_ASSERT (req); + int ret = -1; + dict_t *op_ctx = NULL; + int brick_index_max = -1; + int other_count = 0; + int count = 0; + uint32_t cmd = GF_CLI_STATUS_NONE; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); + if (ctx) + op_ctx = ctx; + else + op_ctx = glusterd_op_get_ctx(); - dict = dict_new (); - if (!dict) + if (!op_ctx) { + gf_log (this->name, GF_LOG_CRITICAL, + "Operation context is not present."); goto out; + } + switch (op) { + case GD_OP_STATUS_VOLUME: + ret = dict_get_uint32 (op_ctx, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to get status cmd"); + goto out; + } + if (!(cmd & GF_CLI_STATUS_NFS || cmd & GF_CLI_STATUS_SHD || + (cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE)) { + gf_log (this->name, GF_LOG_DEBUG, + "op_ctx modification not required for status " + "operation being performed"); + goto out; + } - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; - } + ret = dict_get_int32 (op_ctx, "brick-index-max", + &brick_index_max); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to get brick-index-max"); + goto out; + } - ret = dict_get_str (dict, "volname", &volname); + ret = dict_get_int32 (op_ctx, "other-count", &other_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to get other-count"); + goto out; + } - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); - goto out; - } + count = brick_index_max + other_count + 1; - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; - } + ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx, + "brick%d.path", + 0, count); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "Failed uuid to hostname conversion"); - for ( count = 1; ret != -1 ; count++ ) { + break; - sprintf (str, "key%d", count); - ret = dict_get_str (dict, str, &key); + case GD_OP_PROFILE_VOLUME: + ret = dict_get_str_boolean (op_ctx, "nfs", _gf_false); + if (!ret) + goto out; + ret = dict_get_int32 (op_ctx, "count", &count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to get brick count"); + goto out; + } - if (ret) - break; + ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx, + "%d-brick", + 1, (count + 1)); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "Failed uuid to hostname conversion"); - sprintf (str, "value%d", count); - ret = dict_get_str (dict, str, &value); + break; - if (ret) { - gf_log ("", GF_LOG_ERROR, "invalid key,value pair" - "in 'volume set'"); - ret = -1; - goto out; - } + /* For both rebalance and remove-brick status, the glusterd op is the + * same + */ + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = dict_get_int32 (op_ctx, "count", &count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to get count"); + goto out; + } - value = gf_strdup (value); - if (value) - ret = dict_set_dynstr (volinfo->dict, key, value); - else - ret = -1; + /* add 'node-name-%d' into op_ctx with value uuid_str. + this will be used to convert to hostname later */ + { + char key[1024]; + char *uuid_str = NULL; + int i; + + for (i = 1; i <= count; i++) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "node-uuid-%d", i); + ret = dict_get_str (op_ctx, key, &uuid_str); + if (!ret) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), + "node-name-%d", i); + ret = dict_set_str (op_ctx, key, + uuid_str); + } + } + } - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set the options" - "in 'volume set'"); - ret = -1; - goto out; - } - } + ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx, + "node-name-%d", + 1, (count + 1)); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "Failed uuid to hostname conversion"); - if ( count == 1 ) { - gf_log ("", GF_LOG_ERROR, "No options received "); - ret = -1; - goto out; - } + ret = glusterd_op_check_peer_defrag_status (op_ctx, count); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to reset defrag status for fix-layout"); + break; - ret = glusterd_create_volfiles (volinfo); + default: + ret = 0; + gf_log (this->name, GF_LOG_DEBUG, + "op_ctx modification not required"); + break; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create volfile for" - " 'volume set'"); - ret = -1; - goto out; } - ret = glusterd_store_update_volume (volinfo); +out: if (ret) - goto out; + gf_log (this->name, GF_LOG_WARNING, + "op_ctx modification failed"); + return; +} - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) - goto out; +static int +glusterd_op_commit_hook (glusterd_op_t op, dict_t *op_ctx, + glusterd_commit_hook_type_t type) +{ + glusterd_conf_t *priv = NULL; + char hookdir[PATH_MAX] = {0, }; + char scriptdir[PATH_MAX] = {0, }; + char type_subdir[256] = {0, }; + char *cmd_subdir = NULL; + int ret = -1; - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (volinfo); + priv = THIS->private; + switch (type) { + case GD_COMMIT_HOOK_NONE: + case GD_COMMIT_HOOK_MAX: + /*Won't be called*/ + break; - ret = 0; + case GD_COMMIT_HOOK_PRE: + strcpy (type_subdir, "pre"); + break; + case GD_COMMIT_HOOK_POST: + strcpy (type_subdir, "post"); + break; + } + + cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir (op); + if (strlen (cmd_subdir) == 0) + return -1; + + GLUSTERD_GET_HOOKS_DIR (hookdir, GLUSTERD_HOOK_VER, priv); + snprintf (scriptdir, sizeof (scriptdir), "%s/%s/%s", + hookdir, cmd_subdir, type_subdir); + + switch (type) { + case GD_COMMIT_HOOK_NONE: + case GD_COMMIT_HOOK_MAX: + /*Won't be called*/ + break; + + case GD_COMMIT_HOOK_PRE: + ret = glusterd_hooks_run_hooks (scriptdir, op, op_ctx, + type); + break; + case GD_COMMIT_HOOK_POST: + ret = glusterd_hooks_post_stub_enqueue (scriptdir, op, + op_ctx); + break; + } -out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); return ret; } static int -glusterd_op_remove_brick (gd1_mgmt_stage_op_req *req) +glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - dict_t *dict = NULL; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - char *brick = NULL; - int32_t count = 0; - int32_t i = 1; - char key[256] = {0,}; - - GF_ASSERT (req); + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + dict_t *dict = NULL; + dict_t *op_dict = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + char *op_errstr = NULL; + glusterd_op_t op = GD_OP_NONE; + uint32_t pending_count = 0; - dict = dict_new (); - if (!dict) - goto out; + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + op = glusterd_op_get_op (); + op_dict = glusterd_op_get_ctx (); + ret = glusterd_op_build_payload (&dict, &op_errstr, NULL); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + opinfo.op_errstr = op_errstr; goto out; } - ret = dict_get_str (dict, "volname", &volname); - + ret = glusterd_op_commit_perform (op, dict, &op_errstr, NULL); //rsp_dict invalid for source if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, LOGSTR_COMMIT_FAIL, + gd_op_list[op], "localhost", (op_errstr) ? ":" : " ", + (op_errstr) ? op_errstr : " "); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_COMMIT_FAIL, + "localhost"); + opinfo.op_errstr = op_errstr; goto out; } - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); - goto out; + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + GF_ASSERT (peerinfo); + + if (!peerinfo->connected || !peerinfo->mgmt) + continue; + if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && + (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) + continue; + + proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_COMMIT_OP]; + GF_ASSERT (proc); + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + goto out; + } + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to " + "send commit request for operation " + "'Volume %s' to peer %s", + gd_op_list[op], peerinfo->hostname); + continue; + } + pending_count++; + } } - ret = dict_get_int32 (dict, "count", &count); + opinfo.pending_count = pending_count; + gf_log (this->name, GF_LOG_DEBUG, "Sent commit op req for 'Volume %s' " + "to %d peers", gd_op_list[op], opinfo.pending_count); +out: + if (dict) + dict_unref (dict); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get count"); - goto out; + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); + opinfo.op_ret = ret; } + if (!opinfo.pending_count) { + if (op == GD_OP_REPLACE_BRICK) { + ret = glusterd_op_start_rb_timer (op_dict, + &event->txn_id); - while ( i <= count) { - snprintf (key, 256, "brick%d", i); - ret = dict_get_str (dict, key, &brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get %s", key); - goto out; + } else { + glusterd_op_modify_op_ctx (op, NULL); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); } - - ret = glusterd_op_perform_remove_brick (volinfo, brick); - if (ret) - goto out; - i++; + goto err; } - ret = glusterd_create_volfiles (volinfo); - if (ret) - goto out; +err: + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - volinfo->version++; - volinfo->defrag_status = 0; + return ret; - ret = glusterd_store_update_volume (volinfo); +} - if (ret) - goto out; +static int +glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) + GF_ASSERT (event); + + if (opinfo.pending_count > 0) + opinfo.pending_count--; + + if (opinfo.pending_count > 0) goto out; - if (GLUSTERD_STATUS_STARTED == volinfo->status) - ret = glusterd_check_generate_start_nfs (volinfo); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, + &event->txn_id, NULL); out: - if (dict) - dict_unref (dict); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; } - static int -glusterd_op_delete_volume (gd1_mgmt_stage_op_req *req) +glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - char volname[1024] = {0,}; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - xlator_t *this = NULL; - - GF_ASSERT (req); - - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - - strncpy (volname, req->buf.buf_val, req->buf.buf_len); - - ret = glusterd_volinfo_find (volname, &volinfo); + int ret = 0; - if (ret) - goto out; + GF_ASSERT (event); - ret = glusterd_store_delete_volume (volinfo); + if (opinfo.pending_count > 0) + opinfo.pending_count--; - if (ret) + if (opinfo.pending_count > 0) goto out; - ret = glusterd_volinfo_delete (volinfo); - - if (ret) - goto out; + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; } static int -glusterd_op_start_volume (gd1_mgmt_stage_op_req *req, char **op_errstr) +glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - char *volname = NULL; - int flags = 0; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - dict_t *dict = NULL; - - GF_ASSERT (req); - - dict = dict_new (); - if (!dict) - goto out; - - ret = glusterd_op_start_volume_args_get (req, dict, &volname, &flags); - if (ret) - goto out; - - ret = glusterd_volinfo_find (volname, &volinfo); - - if (ret) - goto out; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_brick_start (volinfo, brickinfo); - if (ret) - goto out; - } + int ret = 0; - glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED); + GF_ASSERT (event); - ret = glusterd_store_update_volume (volinfo); - if (ret) - goto out; + if (opinfo.pending_count > 0) + opinfo.pending_count--; - ret = glusterd_volume_compute_cksum (volinfo); - if (ret) + if (opinfo.pending_count > 0) goto out; - ret = glusterd_check_generate_start_nfs (volinfo); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; } static int -glusterd_op_log_filename (gd1_mgmt_stage_op_req *req) +glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - dict_t *dict = NULL; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - char *volname = NULL; - char *brick = NULL; - char *path = NULL; - char logfile[PATH_MAX] = {0,}; - char exp_path[PATH_MAX] = {0,}; - struct stat stbuf = {0,}; - int valid_brick = 0; - glusterd_brickinfo_t *tmpbrkinfo = NULL; - - GF_ASSERT (req); + int ret = 0; + glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; + gf_boolean_t free_errstr = _gf_false; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - dict = dict_new (); - if (!dict) { - gf_log ("", GF_LOG_ERROR, "ENOMEM, !dict"); - goto out; - } + GF_ASSERT (event); + GF_ASSERT (ctx); + ev_ctx = ctx; - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); + ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, ev_ctx->pending_node->node); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); + gf_log (this->name, GF_LOG_ERROR, "unknown response received "); + ret = -1; + free_errstr = _gf_true; goto out; } + if (opinfo.brick_pending_count > 0) + opinfo.brick_pending_count--; + if (opinfo.op_ret == 0) + opinfo.op_ret = ev_ctx->op_ret; - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volname not found"); - goto out; - } - ret = dict_get_str (dict, "path", &path); - if (ret) { - gf_log ("", GF_LOG_ERROR, "path not found"); - goto out; - } + if (opinfo.op_errstr == NULL) + opinfo.op_errstr = ev_ctx->op_errstr; + else + free_errstr = _gf_true; - ret = dict_get_str (dict, "brick", &brick); - if (ret) + if (opinfo.brick_pending_count > 0) goto out; - if (!strchr (brick, ':')) - brick = NULL; - else { - ret = glusterd_brickinfo_from_brick (brick, &tmpbrkinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot get brickinfo from brick"); - goto out; - } - } + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, ev_ctx->commit_ctx); - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) - goto out; +out: + if (ev_ctx->rsp_dict) + dict_unref (ev_ctx->rsp_dict); + if (free_errstr && ev_ctx->op_errstr) + GF_FREE (ev_ctx->op_errstr); + GF_FREE (ctx); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - ret = -1; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + return ret; +} - if (uuid_is_null (brickinfo->uuid)) { - ret = glusterd_resolve_brick (brickinfo); - } +static int +glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) +{ + dict_t *op_ctx = NULL; + int ret = 0; + gf_boolean_t commit_ack_inject = _gf_true; + glusterd_op_t op = GD_OP_NONE; + xlator_t *this = NULL; - /* check if the brickinfo belongs to the 'this' machine */ - if (uuid_compare (brickinfo->uuid, priv->uuid)) - continue; + this = THIS; + GF_ASSERT (this); + op = glusterd_op_get_op (); + GF_ASSERT (event); - if (brick && - (strcmp (tmpbrkinfo->hostname, brickinfo->hostname) || - strcmp (tmpbrkinfo->path,brickinfo->path))) - continue; + if (opinfo.pending_count > 0) + opinfo.pending_count--; - valid_brick = 1; + if (opinfo.pending_count > 0) + goto out; - /* If there are more than one brick in 'this' server, its an - * extra check, but it doesn't harm functionality - */ - ret = stat (path, &stbuf); - if (ret || !S_ISDIR (stbuf.st_mode)) { + if (op == GD_OP_REPLACE_BRICK) { + op_ctx = glusterd_op_get_ctx (); + if (!op_ctx) { + gf_log (this->name, GF_LOG_CRITICAL, "Operation " + "context is not present."); ret = -1; - gf_log ("", GF_LOG_ERROR, "not a directory"); goto out; } - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); - - snprintf (logfile, PATH_MAX, "%s/%s.log", path, exp_path); - - if (brickinfo->logfile) - GF_FREE (brickinfo->logfile); - brickinfo->logfile = gf_strdup (logfile); - ret = 0; + ret = glusterd_op_start_rb_timer (op_ctx, &event->txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Couldn't start " + "replace-brick operation."); + goto out; + } - /* If request was for brick, only one iteration is enough */ - if (brick) - break; + commit_ack_inject = _gf_false; + goto out; } - if (ret && !valid_brick) - ret = 0; -out: - if (dict) - dict_unref (dict); - if (tmpbrkinfo) - glusterd_brickinfo_delete (tmpbrkinfo); +out: + if (commit_ack_inject) { + if (ret) + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); + else if (!opinfo.pending_count) { + glusterd_op_modify_op_ctx (op, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, + &event->txn_id, NULL); + } + /*else do nothing*/ + } return ret; } static int -glusterd_op_log_rotate (gd1_mgmt_stage_op_req *req) +glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - dict_t *dict = NULL; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - xlator_t *this = NULL; - char *volname = NULL; - char *brick = NULL; - char path[PATH_MAX] = {0,}; - char logfile[PATH_MAX] = {0,}; - char pidfile[PATH_MAX] = {0,}; - FILE *file = NULL; - pid_t pid = 0; - uint64_t key = 0; - int valid_brick = 0; - glusterd_brickinfo_t *tmpbrkinfo = NULL; + int ret = 0; - GF_ASSERT (req); + GF_ASSERT (event); - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); + if (opinfo.pending_count > 0) + opinfo.pending_count--; - dict = dict_new (); - if (!dict) { - gf_log ("", GF_LOG_ERROR, "ENOMEM, !dict"); + if (opinfo.pending_count > 0) goto out; - } - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; - } + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); - ret = dict_get_str (dict, "volname", &volname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "volname not found"); - goto out; - } + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); - ret = dict_get_uint64 (dict, "rotate-key", &key); - if (ret) { - gf_log ("", GF_LOG_ERROR, "rotate key not found"); - goto out; - } +out: + return ret; +} - ret = dict_get_str (dict, "brick", &brick); - if (ret) - goto out; +int32_t +glusterd_op_clear_errstr() { + opinfo.op_errstr = NULL; + return 0; +} - if (!strchr (brick, ':')) - brick = NULL; - else { - ret = glusterd_brickinfo_from_brick (brick, &tmpbrkinfo); - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot get brickinfo from brick"); - goto out; - } - } +int32_t +glusterd_op_set_ctx (void *ctx) +{ - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) - goto out; + opinfo.op_ctx = ctx; - ret = -1; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - if (uuid_compare (brickinfo->uuid, priv->uuid)) - continue; + return 0; - if (brick && - (strcmp (tmpbrkinfo->hostname, brickinfo->hostname) || - strcmp (tmpbrkinfo->path,brickinfo->path))) - continue; +} - valid_brick = 1; +int32_t +glusterd_op_reset_ctx () +{ - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); + glusterd_op_set_ctx (NULL); - file = fopen (pidfile, "r+"); - if (!file) { - gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", - pidfile); - ret = -1; - goto out; - } + return 0; +} - ret = fscanf (file, "%d", &pid); - if (ret <= 0) { - gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s", - pidfile); - ret = -1; - goto out; - } - fclose (file); - file = NULL; +int32_t +glusterd_op_txn_complete (uuid_t *txn_id) +{ + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + int32_t op = -1; + int32_t op_ret = 0; + int32_t op_errno = 0; + rpcsvc_request_t *req = NULL; + void *ctx = NULL; + char *op_errstr = NULL; + char *volname = NULL; + xlator_t *this = NULL; - snprintf (logfile, PATH_MAX, "%s.%"PRIu64, - brickinfo->logfile, key); + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + op = glusterd_op_get_op (); + ctx = glusterd_op_get_ctx (); + op_ret = opinfo.op_ret; + op_errno = opinfo.op_errno; + req = opinfo.req; + if (opinfo.op_errstr) + op_errstr = opinfo.op_errstr; - ret = rename (brickinfo->logfile, logfile); + opinfo.op_ret = 0; + opinfo.op_errno = 0; + glusterd_op_clear_op (); + glusterd_op_reset_ctx (); + glusterd_op_clear_errstr (); + + /* Based on the op-version, we release the cluster or mgmt_v3 lock */ + if (priv->op_version < 3) { + ret = glusterd_unlock (MY_UUID); + /* unlock cant/shouldnt fail here!! */ + if (ret) + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to clear local lock, ret: %d", ret); + else + gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); + } else { + ret = dict_get_str (ctx, "volname", &volname); if (ret) - gf_log ("", GF_LOG_WARNING, "rename failed"); + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); - ret = kill (pid, SIGHUP); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to SIGHUP to %d", pid); - goto out; + if (volname) { + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); } - ret = 0; - - /* If request was for brick, only one iteration is enough */ - if (brick) - break; } - if (ret && !valid_brick) + ret = glusterd_op_send_cli_response (op, op_ret, + op_errno, req, ctx, op_errstr); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Responding to cli failed, " + "ret: %d", ret); + //Ignore this error, else state machine blocks ret = 0; + } -out: - if (dict) - dict_unref (dict); + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); - if (tmpbrkinfo) - glusterd_brickinfo_delete (tmpbrkinfo); + if (priv->pending_quorum_action) + glusterd_do_quorum_action (); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } static int -glusterd_op_stop_volume (gd1_mgmt_stage_op_req *req) +glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - int flags = 0; - char *volname = NULL; - glusterd_volinfo_t *volinfo = NULL; - dict_t *dict = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - - dict = dict_new (); - if (!dict) - goto out; - - ret = glusterd_op_stop_volume_args_get (req, dict, &volname, &flags); - if (ret) - goto out; - - ret = glusterd_volinfo_find (volname, &volinfo); - - if (ret) - goto out; - - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_brick_stop (volinfo, brickinfo); - if (ret) - goto out; - } + int ret = 0; - glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED); + GF_ASSERT (event); - ret = glusterd_store_update_volume (volinfo); - if (ret) - goto out; + ret = glusterd_op_txn_complete (&event->txn_id); - ret = glusterd_volume_compute_cksum (volinfo); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); - if (glusterd_are_all_volumes_stopped ()) { - if (glusterd_is_nfs_started ()) { - ret = glusterd_nfs_server_stop (); - if (ret) - goto out; - } - } else { - ret = glusterd_check_generate_start_nfs (volinfo); - } -out: - if (dict) - dict_unref (dict); return ret; } static int -glusterd_op_sync_volume (gd1_mgmt_stage_op_req *req, char **op_errstr, - dict_t *rsp_dict) +glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; - dict_t *dict = NULL; - char *volname = NULL; - char *hostname = NULL; - char msg[2048] = {0,}; - int count = 1; - int vol_count = 0; - glusterd_conf_t *priv = NULL; - glusterd_volinfo_t *volinfo = NULL; - xlator_t *this = NULL; + int ret = -1; + glusterd_req_ctx_t *req_ctx = NULL; + int32_t status = 0; + dict_t *rsp_dict = NULL; + char *op_errstr = NULL; + dict_t *dict = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; - GF_ASSERT (req); this = THIS; GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); + GF_ASSERT (ctx); + req_ctx = ctx; - dict = dict_new (); - if (!dict) - goto out; + dict = req_ctx->dict; - ret = dict_unserialize (req->buf.buf_val, req->buf.buf_len, &dict); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to unserialize dict"); - goto out; + rsp_dict = dict_new (); + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get new dictionary"); + return -1; } - ret = dict_get_str (dict, "hostname", &hostname); - if (ret) { - snprintf (msg, sizeof (msg), "hostname couldn't be " - "retrieved from msg"); - *op_errstr = gf_strdup (msg); - goto out; - } + status = glusterd_op_stage_validate (req_ctx->op, dict, &op_errstr, + rsp_dict); - if (glusterd_is_local_addr (hostname)) { - ret = 0; - goto out; + if (status) { + gf_log (this->name, GF_LOG_ERROR, "Stage failed on operation" + " 'Volume %s', Status : %d", gd_op_list[req_ctx->op], + status); } - //volname is not present in case of sync all - ret = dict_get_str (dict, "volname", &volname); - if (!ret) { - ret = glusterd_volinfo_find (volname, &volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Volume with name: %s " - "not exists", volname); - goto out; - } - } + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); - if (!rsp_dict) { - //this should happen only on source - ret = 0; - goto out; - } + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else + gf_log (this->name, GF_LOG_ERROR, "Out of Memory"); - if (volname) { - ret = glusterd_add_volume_to_dict (volinfo, rsp_dict, - 1); - vol_count = 1; - } else { - list_for_each_entry (volinfo, &priv->volumes, vol_list) { - ret = glusterd_add_volume_to_dict (volinfo, - rsp_dict, count); - if (ret) - goto out; + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); - vol_count = count++; - } - } - ret = dict_set_int32 (rsp_dict, "count", vol_count); -out: - if (dict) - dict_unref (dict); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op, + status, op_errstr, rsp_dict); + + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); + + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); + + if (rsp_dict) + dict_unref (rsp_dict); return ret; } -static int -glusterd_op_ac_none (glusterd_op_sm_event_t *event, void *ctx) +static gf_boolean_t +glusterd_need_brick_op (glusterd_op_t op) { - int ret = 0; + gf_boolean_t ret = _gf_false; + + GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + switch (op) { + case GD_OP_PROFILE_VOLUME: + case GD_OP_STATUS_VOLUME: + case GD_OP_DEFRAG_BRICK_VOLUME: + case GD_OP_HEAL_VOLUME: + ret = _gf_true; + break; + default: + ret = _gf_false; + } return ret; } -static int -glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) +dict_t* +glusterd_op_init_commit_rsp_dict (glusterd_op_t op) { - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; + dict_t *rsp_dict = NULL; + dict_t *op_ctx = NULL; - this = THIS; - priv = this->private; + GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX); - proc = &priv->mgmt->proctable[GD_MGMT_CLUSTER_LOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, NULL); - if (ret) - goto out; + if (glusterd_need_brick_op (op)) { + op_ctx = glusterd_op_get_ctx (); + GF_ASSERT (op_ctx); + rsp_dict = dict_ref (op_ctx); + } else { + rsp_dict = dict_new (); } - if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); - -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - - return ret; + return rsp_dict; } static int -glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) +glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; + int ret = 0; + glusterd_req_ctx_t *req_ctx = NULL; + int32_t status = 0; + char *op_errstr = NULL; + dict_t *dict = NULL; + dict_t *rsp_dict = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; - priv = this->private; - - /*ret = glusterd_unlock (priv->uuid); + GF_ASSERT (this); + GF_ASSERT (ctx); - if (ret) - goto out; - */ + req_ctx = ctx; - proc = &priv->mgmt->proctable[GD_MGMT_CLUSTER_UNLOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, NULL); - if (ret) - goto out; - } + dict = req_ctx->dict; - if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + rsp_dict = glusterd_op_init_commit_rsp_dict (req_ctx->op); + if (NULL == rsp_dict) + return -1; -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + if (GD_OP_CLEARLOCKS_VOLUME == req_ctx->op) { + /*clear locks should be run only on + * originator glusterd*/ + status = 0; -} + } else { + status = glusterd_op_commit_perform (req_ctx->op, dict, + &op_errstr, rsp_dict); + } -static int -glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; - glusterd_op_lock_ctx_t *lock_ctx = NULL; - int32_t status = 0; + if (status) + gf_log (this->name, GF_LOG_ERROR, "Commit of operation " + "'Volume %s' failed: %d", gd_op_list[req_ctx->op], + status); + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); - GF_ASSERT (event); - GF_ASSERT (ctx); + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else + gf_log (this->name, GF_LOG_ERROR, "Out of Memory"); - lock_ctx = (glusterd_op_lock_ctx_t *)ctx; + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); - status = glusterd_lock (lock_ctx->uuid); + ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op, + status, op_errstr, rsp_dict); - gf_log ("", GF_LOG_DEBUG, "Lock Returned %d", status); + if (op_errstr && (strcmp (op_errstr, ""))) + GF_FREE (op_errstr); - ret = glusterd_op_lock_send_resp (lock_ctx->req, status); + if (rsp_dict) + dict_unref (rsp_dict); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } static int -glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) +glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_op_lock_ctx_t *lock_ctx = NULL; + int ret = 0; + glusterd_req_ctx_t *req_ctx = NULL; + dict_t *op_ctx = NULL; - GF_ASSERT (event); GF_ASSERT (ctx); - lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - - ret = glusterd_unlock (lock_ctx->uuid); + req_ctx = ctx; - gf_log ("", GF_LOG_DEBUG, "Unlock Returned %d", ret); + op_ctx = glusterd_op_get_ctx (); - ret = glusterd_op_unlock_send_resp (lock_ctx->req, ret); + ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op, + opinfo.op_ret, opinfo.op_errstr, + op_ctx); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) { + GF_FREE (opinfo.op_errstr); + opinfo.op_errstr = NULL; + } + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } static int -glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx) +glusterd_op_sm_transition_state (glusterd_op_info_t *opinfo, + glusterd_op_sm_t *state, + glusterd_op_sm_event_type_t event_type) { - int ret = 0; - - GF_ASSERT (event); + glusterd_conf_t *conf = NULL; - opinfo.pending_count--; + GF_ASSERT (state); + GF_ASSERT (opinfo); - if (opinfo.pending_count) - goto out; + conf = THIS->private; + GF_ASSERT (conf); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + (void) glusterd_sm_tr_log_transition_add (&conf->op_sm_log, + opinfo->state.state, + state[event_type].next_state, + event_type); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - -out: - return ret; + opinfo->state.state = state[event_type].next_state; + return 0; } -static int -glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx) +int32_t +glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr, + dict_t *rsp_dict) { - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; + int ret = -1; + xlator_t *this = THIS; - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - GF_ASSERT (priv->mgmt); + switch (op) { + case GD_OP_CREATE_VOLUME: + ret = glusterd_op_stage_create_volume (dict, op_errstr); + break; - proc = &priv->mgmt->proctable[GD_MGMT_STAGE_OP]; - GF_ASSERT (proc); - if (proc->fn) { - ret = proc->fn (NULL, this, NULL); - if (ret) - goto out; - } + case GD_OP_START_VOLUME: + ret = glusterd_op_stage_start_volume (dict, op_errstr); + break; - if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + case GD_OP_STOP_VOLUME: + ret = glusterd_op_stage_stop_volume (dict, op_errstr); + break; -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + case GD_OP_DELETE_VOLUME: + ret = glusterd_op_stage_delete_volume (dict, op_errstr); + break; - return ret; + case GD_OP_ADD_BRICK: + ret = glusterd_op_stage_add_brick (dict, op_errstr); + break; -} + case GD_OP_REPLACE_BRICK: + ret = glusterd_op_stage_replace_brick (dict, op_errstr, + rsp_dict); + break; -static int32_t -glusterd_op_start_rb_timer (dict_t *dict) -{ - int32_t op = 0; - struct timeval timeout = {0, }; - glusterd_conf_t *priv = NULL; - int32_t ret = -1; + case GD_OP_SET_VOLUME: + ret = glusterd_op_stage_set_volume (dict, op_errstr); + break; - GF_ASSERT (dict); - priv = THIS->private; + case GD_OP_RESET_VOLUME: + ret = glusterd_op_stage_reset_volume (dict, op_errstr); + break; - ret = dict_get_int32 (dict, "operation", &op); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "dict_get on operation failed"); - goto out; - } + case GD_OP_REMOVE_BRICK: + ret = glusterd_op_stage_remove_brick (dict, op_errstr); + break; - if (op == GF_REPLACE_OP_START || - op == GF_REPLACE_OP_ABORT) - timeout.tv_sec = 5; - else - timeout.tv_sec = 1; + case GD_OP_LOG_ROTATE: + ret = glusterd_op_stage_log_rotate (dict, op_errstr); + break; - timeout.tv_usec = 0; + case GD_OP_SYNC_VOLUME: + ret = glusterd_op_stage_sync_volume (dict, op_errstr); + break; + case GD_OP_GSYNC_CREATE: + ret = glusterd_op_stage_gsync_create (dict, op_errstr); + break; - priv->timer = gf_timer_call_after (THIS->ctx, timeout, - glusterd_do_replace_brick, - (void *) dict); + case GD_OP_GSYNC_SET: + ret = glusterd_op_stage_gsync_set (dict, op_errstr); + break; - ret = 0; + case GD_OP_PROFILE_VOLUME: + ret = glusterd_op_stage_stats_volume (dict, op_errstr); + break; -out: + case GD_OP_QUOTA: + ret = glusterd_op_stage_quota (dict, op_errstr); + break; + + case GD_OP_STATUS_VOLUME: + ret = glusterd_op_stage_status_volume (dict, op_errstr); + break; + + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_op_stage_rebalance (dict, op_errstr); + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_op_stage_heal_volume (dict, op_errstr); + break; + + case GD_OP_STATEDUMP_VOLUME: + ret = glusterd_op_stage_statedump_volume (dict, + op_errstr); + break; + case GD_OP_CLEARLOCKS_VOLUME: + ret = glusterd_op_stage_clearlocks_volume (dict, + op_errstr); + break; + + case GD_OP_COPY_FILE: + ret = glusterd_op_stage_copy_file (dict, op_errstr); + break; + + case GD_OP_SYS_EXEC: + ret = glusterd_op_stage_sys_exec (dict, op_errstr); + break; + + default: + gf_log (this->name, GF_LOG_ERROR, "Unknown op %s", + gd_op_list[op]); + } + + gf_log (this->name, GF_LOG_DEBUG, "OP = %d. Returning %d", op, ret); return ret; } -static int -glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx) + +int32_t +glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr, + dict_t *rsp_dict) { - int ret = 0; - rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *priv = NULL; - xlator_t *this = NULL; - dict_t *dict = NULL; + int ret = -1; + xlator_t *this = THIS; - this = THIS; - GF_ASSERT (this); - priv = this->private; - GF_ASSERT (priv); - GF_ASSERT (priv->mgmt); + glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_PRE); + switch (op) { + case GD_OP_CREATE_VOLUME: + ret = glusterd_op_create_volume (dict, op_errstr); + break; - proc = &priv->mgmt->proctable[GD_MGMT_COMMIT_OP]; - GF_ASSERT (proc); - if (proc->fn) { - ret = proc->fn (NULL, this, NULL); - if (ret) - goto out; - } + case GD_OP_START_VOLUME: + ret = glusterd_op_start_volume (dict, op_errstr); + break; - if (!opinfo.pending_count) { - dict = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (dict) { - dict = dict_ref (dict); - ret = glusterd_op_start_rb_timer (dict); - if (ret) - goto out; - } else { - ret = glusterd_op_sm_inject_all_acc (); - } - } + case GD_OP_STOP_VOLUME: + ret = glusterd_op_stop_volume (dict); + break; -out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + case GD_OP_DELETE_VOLUME: + ret = glusterd_op_delete_volume (dict); + break; - return ret; + case GD_OP_ADD_BRICK: + ret = glusterd_op_add_brick (dict, op_errstr); + break; -} + case GD_OP_REPLACE_BRICK: + ret = glusterd_op_replace_brick (dict, rsp_dict); + break; -static int -glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; + case GD_OP_SET_VOLUME: + ret = glusterd_op_set_volume (dict); + break; - GF_ASSERT (event); + case GD_OP_RESET_VOLUME: + ret = glusterd_op_reset_volume (dict, op_errstr); + break; - opinfo.pending_count--; + case GD_OP_REMOVE_BRICK: + ret = glusterd_op_remove_brick (dict, op_errstr); + break; - if (opinfo.pending_count) - goto out; + case GD_OP_LOG_ROTATE: + ret = glusterd_op_log_rotate (dict); + break; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, NULL); + case GD_OP_SYNC_VOLUME: + ret = glusterd_op_sync_volume (dict, op_errstr, rsp_dict); + break; -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + case GD_OP_GSYNC_CREATE: + ret = glusterd_op_gsync_create (dict, op_errstr, + rsp_dict); + break; - return ret; -} + case GD_OP_GSYNC_SET: + ret = glusterd_op_gsync_set (dict, op_errstr, rsp_dict); + break; -void -glusterd_do_replace_brick (void *data) -{ - glusterd_volinfo_t *volinfo = NULL; - int32_t op = 0; - int32_t src_port = 0; - int32_t dst_port = 0; - dict_t *dict = NULL; - char *src_brick = NULL; - char *dst_brick = NULL; - char *volname = NULL; - glusterd_brickinfo_t *src_brickinfo = NULL; - glusterd_brickinfo_t *dst_brickinfo = NULL; - glusterd_conf_t *priv = NULL; + case GD_OP_PROFILE_VOLUME: + ret = glusterd_op_stats_volume (dict, op_errstr, + rsp_dict); + break; - int ret = 0; + case GD_OP_QUOTA: + ret = glusterd_op_quota (dict, op_errstr, rsp_dict); + break; - dict = data; + case GD_OP_STATUS_VOLUME: + ret = glusterd_op_status_volume (dict, op_errstr, rsp_dict); + break; - GF_ASSERT (THIS); + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict); + break; - priv = THIS->private; + case GD_OP_HEAL_VOLUME: + ret = glusterd_op_heal_volume (dict, op_errstr); + break; - if (priv->timer) { - gf_timer_call_cancel (THIS->ctx, priv->timer); - priv->timer = NULL; - gf_log ("", GF_LOG_DEBUG, - "Cancelled timer thread"); - } + case GD_OP_STATEDUMP_VOLUME: + ret = glusterd_op_statedump_volume (dict, op_errstr); + break; - gf_log ("", GF_LOG_DEBUG, - "Replace brick operation detected"); + case GD_OP_CLEARLOCKS_VOLUME: + ret = glusterd_op_clearlocks_volume (dict, op_errstr, + rsp_dict); + break; - ret = dict_get_int32 (dict, "operation", &op); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "dict_get on operation failed"); - goto out; - } - ret = dict_get_str (dict, "src-brick", &src_brick); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); - goto out; + case GD_OP_COPY_FILE: + ret = glusterd_op_copy_file (dict, op_errstr); + break; + + case GD_OP_SYS_EXEC: + ret = glusterd_op_sys_exec (dict, op_errstr, rsp_dict); + break; + + default: + gf_log (this->name, GF_LOG_ERROR, "Unknown op %s", + gd_op_list[op]); + break; } - gf_log ("", GF_LOG_DEBUG, - "src brick=%s", src_brick); + if (ret == 0) + glusterd_op_commit_hook (op, dict, GD_COMMIT_HOOK_POST); - ret = dict_get_str (dict, "dst-brick", &dst_brick); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + + +static int +glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr, + struct list_head *selected) +{ + int ret = 0; + int flags = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_pending_node_t *pending_node = NULL; + + ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst brick"); + gf_log (THIS->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); + gf_asprintf (op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname); goto out; } - gf_log ("", GF_LOG_DEBUG, - "dst brick=%s", dst_brick); + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (glusterd_is_brick_started (brickinfo)) { + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + list_add_tail (&pending_node->list, selected); + pending_node = NULL; + } + } + } + +out: + return ret; +} + +static int +glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr, + struct list_head *selected) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char key[256] = {0,}; + glusterd_pending_node_t *pending_node = NULL; + int32_t force = 0; + + ret = dict_get_str (dict, "volname", &volname); @@ -4197,684 +4644,1017 @@ glusterd_do_replace_brick (void *data) } ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); goto out; } - ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, &src_brickinfo); + ret = dict_get_int32 (dict, "count", &count); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); + gf_log ("", GF_LOG_ERROR, "Unable to get count"); goto out; } - ret = glusterd_brickinfo_from_brick (dst_brick, &dst_brickinfo); + ret = dict_get_int32 (dict, "force", &force); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to get dst-brickinfo"); + gf_log (THIS->name, GF_LOG_INFO, "force flag is not set"); + ret = 0; goto out; } - ret = glusterd_resolve_brick (dst_brickinfo); + while ( i <= count) { + snprintf (key, 256, "brick%d", i); + ret = dict_get_str (dict, key, &brick); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick"); + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, + &brickinfo); + if (ret) + goto out; + if (glusterd_is_brick_started (brickinfo)) { + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + list_add_tail (&pending_node->list, selected); + pending_node = NULL; + } + } + i++; + } + +out: + return ret; +} + +static int +glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr, + struct list_head *selected) +{ + int ret = -1; + char *volname = NULL; + char msg[2048] = {0,}; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int32_t stats_op = GF_CLI_STATS_NONE; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_pending_node_t *pending_node = NULL; + char *brick = NULL; + + + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + + ret = dict_get_str (dict, "volname", &volname); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo"); + gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); goto out; } - ret = dict_get_int32 (dict, "src-brick-port", &src_port); + ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get src-brick port"); + snprintf (msg, sizeof (msg), "Volume %s does not exists", + volname); + + *op_errstr = gf_strdup (msg); + gf_log ("", GF_LOG_ERROR, "%s", msg); goto out; } - ret = dict_get_int32 (dict, "dst-brick-port", &dst_port); + ret = dict_get_int32 (dict, "op", &stats_op); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get dst-brick port"); + gf_log ("glusterd", GF_LOG_ERROR, "volume profile op get failed"); + goto out; } - dst_brickinfo->port = dst_port; - src_brickinfo->port = src_port; + switch (stats_op) { + case GF_CLI_STATS_START: + case GF_CLI_STATS_STOP: + goto out; + break; + case GF_CLI_STATS_INFO: + ret = dict_get_str_boolean (dict, "nfs", _gf_false); + if (ret) { + if (!glusterd_is_nodesvc_online ("nfs")) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "NFS server" + " is not running"); + goto out; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = priv->nfs; + pending_node->type = GD_NODE_NFS; + list_add_tail (&pending_node->list, selected); + pending_node = NULL; - switch (op) { - case GF_REPLACE_OP_START: - if (!dst_port) { - ret = -1; + ret = 0; goto out; + + } + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (glusterd_is_brick_started (brickinfo)) { + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + list_add_tail (&pending_node->list, + selected); + pending_node = NULL; + } + } } + break; - ret = rb_do_operation_start (volinfo, src_brickinfo, dst_brickinfo); + case GF_CLI_STATS_TOP: + ret = dict_get_str_boolean (dict, "nfs", _gf_false); if (ret) { - glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); + if (!glusterd_is_nodesvc_online ("nfs")) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "NFS server" + " is not running"); + goto out; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = priv->nfs; + pending_node->type = GD_NODE_NFS; + list_add_tail (&pending_node->list, selected); + pending_node = NULL; + + ret = 0; goto out; + + } + ret = dict_get_str (dict, "brick", &brick); + if (!ret) { + ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, + &brickinfo); + if (ret) + goto out; + + if (!glusterd_is_brick_started (brickinfo)) + goto out; + + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + list_add_tail (&pending_node->list, + selected); + pending_node = NULL; + goto out; + } + } + ret = 0; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (glusterd_is_brick_started (brickinfo)) { + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + list_add_tail (&pending_node->list, + selected); + pending_node = NULL; + } + } } break; - case GF_REPLACE_OP_PAUSE: - case GF_REPLACE_OP_ABORT: - case GF_REPLACE_OP_COMMIT: - case GF_REPLACE_OP_COMMIT_FORCE: - case GF_REPLACE_OP_STATUS: - break; + default: + GF_ASSERT (0); + gf_log ("glusterd", GF_LOG_ERROR, "Invalid profile op: %d", + stats_op); ret = -1; goto out; + break; } + out: - if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); - else - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - glusterd_op_sm (); + return ret; } - - static int -glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) +_add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count) { - glusterd_conf_t *priv = NULL; - dict_t *dict = NULL; - int ret = 0; - gf_boolean_t commit_ack_inject = _gf_false; + int ret = -1; + char key[128] = {0,}; + char *xname = NULL; - priv = THIS->private; - GF_ASSERT (event); - - opinfo.pending_count--; - - if (opinfo.pending_count) + snprintf (key, sizeof (key), "xl-%d", count); + ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index); + if (ret == -1) goto out; - dict = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (dict) { - ret = glusterd_op_start_rb_timer (dict); - if (ret) - goto out; - commit_ack_inject = _gf_false; - goto out; - } + ret = dict_set_dynstr (dict, key, xname); + if (ret) + goto out; - commit_ack_inject = _gf_true; + ret = dict_set_int32 (dict, xname, index); out: - if (commit_ack_inject) { - if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); - else - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); - } - return ret; } -static int -glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx) -{ +int +get_replica_index_for_per_replica_cmd (glusterd_volinfo_t *volinfo, + dict_t *dict) { int ret = 0; + char *hostname = NULL; + char *path = NULL; + int index = 0; + glusterd_brickinfo_t *brickinfo = NULL; + int cmd_replica_index = -1; + int replica_count = -1; - GF_ASSERT (event); - opinfo.pending_count--; + if (!dict) { + ret = -1; + goto out; + } - if (opinfo.pending_count) + ret = dict_get_str (dict, "per-replica-cmd-hostname", &hostname); + if (ret) + goto out; + ret = dict_get_str (dict, "per-replica-cmd-path", &path); + if (ret) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + replica_count = volinfo->replica_count; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); + if (!strcmp (brickinfo->path, path) && + !strcmp (brickinfo->hostname, hostname)) { + cmd_replica_index = index/(replica_count); + goto out; + } + index++; + } - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); out: - return ret; -} + if (ret) + cmd_replica_index = -1; + return cmd_replica_index; +} -int32_t -glusterd_op_send_cli_response (int32_t op, int32_t op_ret, - int32_t op_errno, rpcsvc_request_t *req, - void *op_ctx, char *op_errstr) +int +_select_rxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo, + dict_t *dict, cli_cmd_type type) { - int32_t ret = -1; - gd_serialize_t sfunc = NULL; - void *cli_rsp = NULL; - dict_t *ctx = NULL; - - switch (op) { - case GD_MGMT_CLI_CREATE_VOLUME: - { - gf1_cli_create_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_create_vol_rsp; - break; - } + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + int index = 0; + int rxlator_count = 0; + int replica_count = 0; + gf_boolean_t add = _gf_false; + int ret = 0; + int cmd_replica_index = -1; - case GD_MGMT_CLI_START_VOLUME: - { - gf1_cli_start_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_start_vol_rsp; - break; - } + priv = this->private; + replica_count = volinfo->replica_count; - case GD_MGMT_CLI_STOP_VOLUME: - { - gf1_cli_stop_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_stop_vol_rsp; - break; - } + if (type == PER_REPLICA) { - case GD_MGMT_CLI_DELETE_VOLUME: - { - gf1_cli_delete_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_delete_vol_rsp; - break; - } + cmd_replica_index = get_replica_index_for_per_replica_cmd + (volinfo, dict); + if (cmd_replica_index == -1) { + ret = -1; + goto err; + } + } - case GD_MGMT_CLI_DEFRAG_VOLUME: - { - gf1_cli_defrag_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - //rsp.volname = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_defrag_vol_rsp; - break; - } + index = 1; - case GD_MGMT_CLI_ADD_BRICK: - { - gf1_cli_add_brick_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_add_brick_rsp; - break; - } + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); - case GD_MGMT_CLI_REMOVE_BRICK: - { - gf1_cli_remove_brick_rsp rsp = {0,}; - ctx = op_ctx; - if (ctx && - dict_get_str (ctx, "errstr", &rsp.op_errstr)) - rsp.op_errstr = ""; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_remove_brick_rsp; + switch (type) { + case ALL_REPLICA: + if (!uuid_compare (MY_UUID, brickinfo->uuid)) + add = _gf_true; break; - } + case PER_REPLICA: + if (!uuid_compare (MY_UUID, brickinfo->uuid) && + ((index-1)/replica_count == cmd_replica_index)) - case GD_MGMT_CLI_REPLACE_BRICK: - { - gf1_cli_replace_brick_rsp rsp = {0,}; - ctx = op_ctx; - if (ctx && - dict_get_str (ctx, "status-reply", &rsp.status)) - rsp.status = ""; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - rsp.volname = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_replace_brick_rsp; + add = _gf_true; break; - } + } - case GD_MGMT_CLI_SET_VOLUME: - { - gf1_cli_set_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_set_vol_rsp; - break; - } - - case GD_MGMT_CLI_RESET_VOLUME: - { - gf_log ("", GF_LOG_DEBUG, "Return value to CLI"); - gf1_cli_reset_vol_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = 1; - rsp.volname = ""; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = "Error while resetting options"; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_reset_vol_rsp; - break; + if (index % replica_count == 0) { + if (add) { + _add_rxlator_to_dict (dict, volinfo->volname, + (index-1)/replica_count, + rxlator_count); + rxlator_count++; } + add = _gf_false; + } - case GD_MGMT_CLI_LOG_FILENAME: - { - gf1_cli_log_filename_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_log_filename_rsp; - break; - } - case GD_MGMT_CLI_LOG_ROTATE: - { - gf1_cli_log_rotate_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - rsp.errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_serialize_cli_log_rotate_rsp; - break; - } - case GD_MGMT_CLI_SYNC_VOLUME: - { - gf1_cli_sync_volume_rsp rsp = {0,}; - rsp.op_ret = op_ret; - rsp.op_errno = op_errno; - if (op_errstr) - rsp.op_errstr = op_errstr; - else - rsp.op_errstr = ""; - cli_rsp = &rsp; - sfunc = gf_xdr_from_cli_sync_volume_rsp; - break; - } + index++; } - - ret = glusterd_submit_reply (req, cli_rsp, NULL, 0, NULL, - sfunc); - +err: if (ret) - goto out; - -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} + rxlator_count = -1; -int32_t -glusterd_op_clear_errstr() { - opinfo.op_errstr = NULL; - return 0; + return rxlator_count; } -int32_t -glusterd_op_txn_complete () +int +_select_rxlators_for_full_self_heal (xlator_t *this, + glusterd_volinfo_t *volinfo, + dict_t *dict) { - int32_t ret = -1; - glusterd_conf_t *priv = NULL; - int32_t op = -1; - int32_t op_ret = 0; - int32_t op_errno = 0; - int32_t cli_op = 0; - rpcsvc_request_t *req = NULL; - void *ctx = NULL; - gf_boolean_t ctx_free = _gf_false; - char *op_errstr = NULL; - + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + int index = 1; + int rxlator_count = 0; + int replica_count = 0; + uuid_t candidate = {0}; - priv = THIS->private; - GF_ASSERT (priv); + priv = this->private; + replica_count = volinfo->replica_count; - ret = glusterd_unlock (priv->uuid); + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); + + if (uuid_compare (brickinfo->uuid, candidate) > 0) + uuid_copy (candidate, brickinfo->uuid); + + if (index % replica_count == 0) { + if (!uuid_compare (MY_UUID, candidate)) { + _add_rxlator_to_dict (dict, volinfo->volname, + (index-1)/replica_count, + rxlator_count); + rxlator_count++; + } + uuid_clear (candidate); + } - if (ret) { - gf_log ("glusterd", GF_LOG_CRITICAL, - "Unable to clear local lock, ret: %d", ret); - goto out; + index++; } - - gf_log ("glusterd", GF_LOG_NORMAL, "Cleared local lock"); - - op_ret = opinfo.op_ret; - op_errno = opinfo.op_errno; - cli_op = opinfo.cli_op; - req = opinfo.req; - if (opinfo.op_errstr) - op_errstr = opinfo.op_errstr; + return rxlator_count; +} - opinfo.op_ret = 0; - opinfo.op_errno = 0; +static int +glusterd_bricks_select_snap (dict_t *dict, char **op_errstr, + struct list_head *selected) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_pending_node_t *pending_node = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int brick_index = -1; - op = glusterd_op_get_op (); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); - if (op != -1) { - glusterd_op_clear_pending_op (op); - glusterd_op_clear_commit_op (op); - glusterd_op_clear_op (op); - ctx = glusterd_op_get_ctx (op); - ctx_free = glusterd_op_get_ctx_free (op); - glusterd_op_set_ctx (op, NULL); - glusterd_op_clear_ctx_free (op); - glusterd_op_clear_errstr (); + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get" + " volname"); + goto out; } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; -out: - pthread_mutex_unlock (&opinfo.lock); - ret = glusterd_op_send_cli_response (cli_op, op_ret, - op_errno, req, ctx, op_errstr); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Responding to cli failed, ret: %d", - ret); - //Ignore this error, else state machine blocks - ret = 0; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brick_index++; + if (uuid_compare (brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started (brickinfo)) { + continue; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = brick_index; + list_add_tail (&pending_node->list, + selected); + pending_node = NULL; } - if (ctx_free && ctx && (op != -1)) - glusterd_op_free_ctx (op, ctx, ctx_free); - if (op_errstr && (strcmp (op_errstr, ""))) - GF_FREE (op_errstr); + ret = 0; - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret); return ret; } static int -glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx) +fill_shd_status_for_local_bricks (dict_t *dict, glusterd_volinfo_t *volinfo, + cli_cmd_type type, dict_t *req_dict) { + glusterd_brickinfo_t *brickinfo = NULL; + char msg[1024] = {0,}; + char key[1024] = {0,}; + char value[1024] = {0,}; + int index = 0; int ret = 0; + xlator_t *this = NULL; + int cmd_replica_index = -1; - GF_ASSERT (event); - - ret = glusterd_op_txn_complete (); - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + this = THIS; + snprintf (msg, sizeof (msg), "self-heal-daemon is not running on"); + + if (type == PER_REPLICA) { + cmd_replica_index = get_replica_index_for_per_replica_cmd + (volinfo, req_dict); + if (cmd_replica_index == -1) { + gf_log (THIS->name, GF_LOG_ERROR, "Could not find the " + "replica index for per replica type command"); + ret = -1; + goto out; + } + } - return ret; -} + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_is_null (brickinfo->uuid)) + (void)glusterd_resolve_brick (brickinfo); + if (uuid_compare (MY_UUID, brickinfo->uuid)) { + index++; + continue; + } -static int -glusterd_op_ac_commit_error (glusterd_op_sm_event_t *event, void *ctx) -{ - int ret = 0; + if (type == PER_REPLICA) { + if (cmd_replica_index != (index/volinfo->replica_count)) { + index++; + continue; + } - //Log here with who failed the commit - // + } + snprintf (key, sizeof (key), "%d-status",index); + snprintf (value, sizeof (value), "%s %s",msg, + uuid_utoa(MY_UUID)); + ret = dict_set_dynstr (dict, key, gf_strdup(value)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to" + "set the dictionary for shd status msg"); + goto out; + } + snprintf (key, sizeof (key), "%d-shd-status",index); + ret = dict_set_str (dict, key, "off"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to" + " set dictionary for shd status msg"); + goto out; + } - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_UNLOCK, NULL); + index++; + } +out: return ret; + } + static int -glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) +glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr, + struct list_head *selected, + dict_t *rsp_dict) { - int ret = -1; - gd1_mgmt_stage_op_req *req = NULL; - glusterd_op_stage_ctx_t *stage_ctx = NULL; - int32_t status = 0; - dict_t *rsp_dict = NULL; - char *op_errstr = NULL; + int ret = -1; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char msg[2048] = {0,}; + glusterd_pending_node_t *pending_node = NULL; + gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + int rxlator_count = 0; - GF_ASSERT (ctx); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); - stage_ctx = ctx; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); + goto out; + } - req = &stage_ctx->stage_req; + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), "Volume %s does not exist", + volname); + *op_errstr = gf_strdup (msg); + gf_log ("", GF_LOG_ERROR, "%s", msg); + goto out; + } - rsp_dict = dict_new (); - if (!rsp_dict) { - gf_log ("", GF_LOG_DEBUG, - "Out of memory"); - return -1; + ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); + if (ret || (heal_op == GF_AFR_OP_INVALID)) { + gf_log ("glusterd", GF_LOG_ERROR, "heal op invalid"); + goto out; } - status = glusterd_op_stage_validate (req, &op_errstr, - rsp_dict); + switch (heal_op) { + case GF_AFR_OP_INDEX_SUMMARY: + case GF_AFR_OP_STATISTICS_HEAL_COUNT: + if (!glusterd_is_nodesvc_online ("glustershd")) { + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Received " + "empty ctx."); + goto out; + } - if (status) { - gf_log ("", GF_LOG_ERROR, "Validate failed: %d", status); + ret = fill_shd_status_for_local_bricks (rsp_dict, + volinfo, + ALL_REPLICA, + dict); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "fill the shd status for the local " + "bricks"); + goto out; + + } + break; + case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + if (!glusterd_is_nodesvc_online ("glustershd")) { + if (!rsp_dict) { + gf_log (this->name, GF_LOG_ERROR, "Received " + "empty ctx."); + goto out; + } + ret = fill_shd_status_for_local_bricks (rsp_dict, + volinfo, + PER_REPLICA, + dict); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "fill the shd status for the local" + " bricks."); + goto out; + + } + break; + default: + break; } - ret = glusterd_op_stage_send_resp (stage_ctx->req, req->op, - status, op_errstr, rsp_dict); - if (op_errstr && (strcmp (op_errstr, ""))) - GF_FREE (op_errstr); + switch (heal_op) { + case GF_AFR_OP_HEAL_FULL: + rxlator_count = _select_rxlators_for_full_self_heal (this, + volinfo, + dict); + break; + case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + rxlator_count = _select_rxlators_with_local_bricks (this, + volinfo, + dict, + PER_REPLICA); + break; + default: + rxlator_count = _select_rxlators_with_local_bricks (this, + volinfo, + dict, + ALL_REPLICA); + break; + } + if (!rxlator_count) + goto out; + if (rxlator_count == -1){ + gf_log (this->name, GF_LOG_ERROR, "Could not determine the" + "translator count"); + ret = -1; + goto out; + } - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + ret = dict_set_int32 (dict, "count", rxlator_count); + if (ret) + goto out; - if (rsp_dict) - dict_unref (rsp_dict); + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = priv->shd; + pending_node->type = GD_NODE_SHD; + list_add_tail (&pending_node->list, selected); + pending_node = NULL; + } +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret); return ret; + } static int -glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) +glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr, + struct list_head *selected) { - int ret = 0; - gd1_mgmt_stage_op_req *req = NULL; - glusterd_op_commit_ctx_t *commit_ctx = NULL; - int32_t status = 0; - char *op_errstr = NULL; - dict_t *rsp_dict = NULL; - - GF_ASSERT (ctx); - - commit_ctx = ctx; + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char msg[2048] = {0,}; + glusterd_pending_node_t *pending_node = NULL; - req = &commit_ctx->stage_req; + this = THIS; + GF_ASSERT (this); - rsp_dict = dict_new (); - if (!rsp_dict) { - gf_log ("", GF_LOG_DEBUG, - "Out of memory"); - ret = -1; + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed"); goto out; } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), "Volume %s does not exist", + volname); - status = glusterd_op_commit_perform (req, &op_errstr, rsp_dict); - - if (status) { - gf_log ("", GF_LOG_ERROR, "Commit failed: %d", status); + *op_errstr = gf_strdup (msg); + gf_log ("", GF_LOG_ERROR, "%s", msg); + goto out; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } else { + pending_node->node = volinfo; + pending_node->type = GD_NODE_REBALANCE; + list_add_tail (&pending_node->list, + &opinfo.pending_bricks); + pending_node = NULL; } - - ret = glusterd_op_commit_send_resp (commit_ctx->req, req->op, status, - op_errstr, rsp_dict); out: - if (rsp_dict) - dict_unref (rsp_dict); - if (op_errstr && (strcmp (op_errstr, ""))) - GF_FREE (op_errstr); - - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); - return ret; } + + static int -glusterd_op_sm_transition_state (glusterd_op_info_t *opinfo, - glusterd_op_sm_t *state, - glusterd_op_sm_event_type_t event_type) +glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr, + struct list_head *selected) { + int ret = -1; + int cmd = 0; + int brick_index = -1; + char *volname = NULL; + char *brickname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_pending_node_t *pending_node = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; - GF_ASSERT (state); - GF_ASSERT (opinfo); - - gf_log ("", GF_LOG_NORMAL, "Transitioning from '%s' to '%s' due to " - "event '%s'", - glusterd_op_sm_state_name_get (opinfo->state.state), - glusterd_op_sm_state_name_get (state[event_type].next_state), - glusterd_op_sm_event_name_get (event_type)); - opinfo->state.state = state[event_type].next_state; - return 0; -} + GF_ASSERT (dict); -int32_t -glusterd_op_stage_validate (gd1_mgmt_stage_op_req *req, char **op_errstr, - dict_t *rsp_dict) -{ - int ret = -1; + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); - GF_ASSERT (req); + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get status type"); + goto out; + } - switch (req->op) { - case GD_OP_CREATE_VOLUME: - ret = glusterd_op_stage_create_volume (req, op_errstr); - break; + if (cmd & GF_CLI_STATUS_ALL) + goto out; - case GD_OP_START_VOLUME: - ret = glusterd_op_stage_start_volume (req, op_errstr); - break; + switch (cmd & GF_CLI_STATUS_MASK) { + case GF_CLI_STATUS_MEM: + case GF_CLI_STATUS_CLIENTS: + case GF_CLI_STATUS_INODE: + case GF_CLI_STATUS_FD: + case GF_CLI_STATUS_CALLPOOL: + case GF_CLI_STATUS_NFS: + case GF_CLI_STATUS_SHD: + break; + default: + goto out; + } + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volname"); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + goto out; + } - case GD_OP_STOP_VOLUME: - ret = glusterd_op_stage_stop_volume (req); - break; + if ( (cmd & GF_CLI_STATUS_BRICK) != 0) { + ret = dict_get_str (dict, "brick", &brickname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get brick"); + goto out; + } + ret = glusterd_volume_brickinfo_get_by_brick (brickname, + volinfo, + &brickinfo); + if (ret) + goto out; - case GD_OP_DELETE_VOLUME: - ret = glusterd_op_stage_delete_volume (req); - break; + if (uuid_compare (brickinfo->uuid, MY_UUID)|| + !glusterd_is_brick_started (brickinfo)) + goto out; - case GD_OP_ADD_BRICK: - ret = glusterd_op_stage_add_brick (req, op_errstr); - break; + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = 0; + list_add_tail (&pending_node->list, selected); - case GD_OP_REPLACE_BRICK: - ret = glusterd_op_stage_replace_brick (req, op_errstr, - rsp_dict); - break; + ret = 0; + } else if ((cmd & GF_CLI_STATUS_NFS) != 0) { + if (!glusterd_is_nodesvc_online ("nfs")) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "NFS server is not running"); + goto out; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = priv->nfs; + pending_node->type = GD_NODE_NFS; + pending_node->index = 0; + list_add_tail (&pending_node->list, selected); - case GD_OP_SET_VOLUME: - ret = glusterd_op_stage_set_volume (req, op_errstr); - break; + ret = 0; + } else if ((cmd & GF_CLI_STATUS_SHD) != 0) { + if (!glusterd_is_nodesvc_online ("glustershd")) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Self-heal daemon is not running"); + goto out; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + goto out; + } + pending_node->node = priv->shd; + pending_node->type = GD_NODE_SHD; + pending_node->index = 0; + list_add_tail (&pending_node->list, selected); - case GD_OP_RESET_VOLUME: - ret = glusterd_op_stage_reset_volume (req); - break; + ret = 0; + } else { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brick_index++; + if (uuid_compare (brickinfo->uuid, MY_UUID) || + !glusterd_is_brick_started (brickinfo)) { + continue; + } + pending_node = GF_CALLOC (1, sizeof (*pending_node), + gf_gld_mt_pending_node_t); + if (!pending_node) { + ret = -1; + gf_log (THIS->name ,GF_LOG_ERROR, + "Unable to allocate memory"); + goto out; + } + pending_node->node = brickinfo; + pending_node->type = GD_NODE_BRICK; + pending_node->index = brick_index; + list_add_tail (&pending_node->list, selected); + pending_node = NULL; + } + } +out: + return ret; +} - case GD_OP_REMOVE_BRICK: - ret = glusterd_op_stage_remove_brick (req); - break; +static int +glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx) +{ + int ret = 0; + rpc_clnt_procedure_t *proc = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_op_t op = GD_OP_NONE; + glusterd_req_ctx_t *req_ctx = NULL; + char *op_errstr = NULL; - case GD_OP_LOG_FILENAME: - ret = glusterd_op_stage_log_filename (req); - break; + this = THIS; + priv = this->private; - case GD_OP_LOG_ROTATE: - ret = glusterd_op_stage_log_rotate (req); - break; + if (ctx) { + req_ctx = ctx; + } else { + req_ctx = GF_CALLOC (1, sizeof (*req_ctx), + gf_gld_mt_op_allack_ctx_t); + op = glusterd_op_get_op (); + req_ctx->op = op; + uuid_copy (req_ctx->uuid, MY_UUID); + ret = glusterd_op_build_payload (&req_ctx->dict, &op_errstr, + NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, + OPERRSTR_BUILD_PAYLOAD); + opinfo.op_errstr = op_errstr; + goto out; + } + } - case GD_OP_SYNC_VOLUME: - ret = glusterd_op_stage_sync_volume (req, op_errstr); - break; + proc = &priv->gfs_mgmt->proctable[GLUSTERD_BRICK_OP]; + if (proc->fn) { + ret = proc->fn (NULL, this, req_ctx); + if (ret) + goto out; + } - default: - gf_log ("", GF_LOG_ERROR, "Unknown op %d", - req->op); + if (!opinfo.pending_count && !opinfo.brick_pending_count) { + glusterd_clear_pending_nodes (&opinfo.pending_bricks); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, req_ctx); } - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } -int32_t -glusterd_op_commit_perform (gd1_mgmt_stage_op_req *req, char **op_errstr, - dict_t *rsp_dict) +static int +glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) { - int ret = -1; + int ret = 0; + glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; + char *op_errstr = NULL; + glusterd_op_t op = GD_OP_NONE; + gd_node_type type = GD_NODE_NONE; + dict_t *op_ctx = NULL; + glusterd_req_ctx_t *req_ctx = NULL; + void *pending_entry = NULL; + xlator_t *this = NULL; - GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + GF_ASSERT (event); + GF_ASSERT (ctx); + ev_ctx = ctx; - switch (req->op) { - case GD_OP_CREATE_VOLUME: - ret = glusterd_op_create_volume (req, op_errstr); - break; + req_ctx = ev_ctx->commit_ctx; + GF_ASSERT (req_ctx); - case GD_OP_START_VOLUME: - ret = glusterd_op_start_volume (req, op_errstr); - break; + op = req_ctx->op; + op_ctx = glusterd_op_get_ctx (); + pending_entry = ev_ctx->pending_node->node; + type = ev_ctx->pending_node->type; - case GD_OP_STOP_VOLUME: - ret = glusterd_op_stop_volume (req); - break; + ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, + pending_entry); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unknown response received "); + ret = -1; + goto out; + } - case GD_OP_DELETE_VOLUME: - ret = glusterd_op_delete_volume (req); - break; + if (opinfo.brick_pending_count > 0) + opinfo.brick_pending_count--; - case GD_OP_ADD_BRICK: - ret = glusterd_op_add_brick (req, op_errstr); - break; + glusterd_handle_node_rsp (req_ctx->dict, pending_entry, op, ev_ctx->rsp_dict, + op_ctx, &op_errstr, type); - case GD_OP_REPLACE_BRICK: - ret = glusterd_op_replace_brick (req, rsp_dict); - break; + if (opinfo.brick_pending_count > 0) + goto out; - case GD_OP_SET_VOLUME: - ret = glusterd_op_set_volume (req); - break; + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, &event->txn_id, + ev_ctx->commit_ctx); - case GD_OP_RESET_VOLUME: - ret = glusterd_op_reset_volume (req); - break; +out: + if (ev_ctx->rsp_dict) + dict_unref (ev_ctx->rsp_dict); + GF_FREE (ev_ctx); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - case GD_OP_REMOVE_BRICK: - ret = glusterd_op_remove_brick (req); - break; + return ret; +} - case GD_OP_LOG_FILENAME: - ret = glusterd_op_log_filename (req); - break; +int32_t +glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr, + struct list_head *selected, dict_t *rsp_dict) +{ + int ret = 0; - case GD_OP_LOG_ROTATE: - ret = glusterd_op_log_rotate (req); - break; + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (op > GD_OP_NONE); + GF_ASSERT (op < GD_OP_MAX); - case GD_OP_SYNC_VOLUME: - ret = glusterd_op_sync_volume (req, op_errstr, rsp_dict); - break; + switch (op) { + case GD_OP_STOP_VOLUME: + ret = glusterd_bricks_select_stop_volume (dict, op_errstr, + selected); + break; - default: - gf_log ("", GF_LOG_ERROR, "Unknown op %d", - req->op); - } + case GD_OP_REMOVE_BRICK: + ret = glusterd_bricks_select_remove_brick (dict, op_errstr, + selected); + break; - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + case GD_OP_PROFILE_VOLUME: + ret = glusterd_bricks_select_profile_volume (dict, op_errstr, + selected); + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_bricks_select_heal_volume (dict, op_errstr, + selected, rsp_dict); + break; + + case GD_OP_STATUS_VOLUME: + ret = glusterd_bricks_select_status_volume (dict, op_errstr, + selected); + break; + + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr, + selected); + break; + case GD_OP_SNAP: + ret = glusterd_bricks_select_snap (dict, op_errstr, selected); + break; + default: + break; + } + + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -4892,29 +5672,33 @@ glusterd_op_sm_t glusterd_op_state_default [] = { {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_OP {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_MAX }; glusterd_op_sm_t glusterd_op_state_lock_sent [] = { {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCK + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_lock}, //EVENT_LOCK {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_rcvd_lock_acc}, //EVENT_RCVD_ACC {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_send_stage_op}, //EVENT_ALL_ACC {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_RCVD_RJT + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_RJT {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK + {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX }; glusterd_op_sm_t glusterd_op_state_locked [] = { {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_NONE {GD_OP_STATE_LOCKED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_LOCK + {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, //EVENT_LOCK {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_ACC {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACC {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_STAGE_ACC @@ -4924,61 +5708,177 @@ glusterd_op_sm_t glusterd_op_state_locked [] = { {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_OP {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_MAX }; glusterd_op_sm_t glusterd_op_state_stage_op_sent [] = { {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCK + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_rcvd_stage_op_acc}, //EVENT_RCVD_ACC - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_send_stage_op}, //EVENT_ALL_ACC - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_send_commit_op}, //EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_ALL_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_STAGE_ACC {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_RCVD_RJT + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK + {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX }; +glusterd_op_sm_t glusterd_op_state_stage_op_failed [] = { + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_ACC + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX +}; + glusterd_op_sm_t glusterd_op_state_staged [] = { {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_NONE {GD_OP_STATE_STAGED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_LOCK + {GD_OP_STATE_STAGED, glusterd_op_ac_lock}, //EVENT_LOCK {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_ACC {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACC {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_ACC {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_RJT {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_OP - {GD_OP_STATE_COMMITED, glusterd_op_ac_commit_op}, //EVENT_COMMIT_OP + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_send_brick_op}, //EVENT_COMMIT_OP {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_MAX }; +glusterd_op_sm_t glusterd_op_state_brick_op_sent [] = { + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_BRICK_OP + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_send_commit_op}, //EVENT_ALL_ACK + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX +}; + +glusterd_op_sm_t glusterd_op_state_brick_op_failed [] = { + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_BRICK_OP + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX +}; + +glusterd_op_sm_t glusterd_op_state_brick_committed [] = { + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_NONE + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none},//EVENT_START_LOCK + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_lock}, //EVENT_LOCK + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_ALL_ACC + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_OP + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_COMMITED, glusterd_op_ac_commit_op}, //EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_MAX +}; + +glusterd_op_sm_t glusterd_op_state_brick_commit_failed [] = { + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_NONE + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_send_commit_failed}, //EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_MAX +}; + +glusterd_op_sm_t glusterd_op_state_commit_op_failed [] = { + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX +}; + glusterd_op_sm_t glusterd_op_state_commit_op_sent [] = { {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCK + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_rcvd_commit_op_acc}, //EVENT_RCVD_ACC {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACC {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_COMMIT_ACC - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_commit_error}, //EVENT_RCVD_RJT + {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_START_UNLOCK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK + {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX }; -glusterd_op_sm_t glusterd_op_state_commited [] = { +glusterd_op_sm_t glusterd_op_state_committed [] = { {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_NONE {GD_OP_STATE_COMMITED, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_LOCK + {GD_OP_STATE_COMMITED, glusterd_op_ac_lock}, //EVENT_LOCK {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_ACC {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACC {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_ACC @@ -4988,25 +5888,46 @@ glusterd_op_sm_t glusterd_op_state_commited [] = { {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_OP {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_MAX }; glusterd_op_sm_t glusterd_op_state_unlock_sent [] = { {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_lock}, //EVENT_LOCK {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_ACC {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlocked_all}, //EVENT_ALL_ACC {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_RCVD_RJT + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_RJT {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_unlock}, //EVENT_UNLOCK - {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX }; +glusterd_op_sm_t glusterd_op_state_ack_drain [] = { + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_NONE + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none},//EVENT_START_LOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_lock}, //EVENT_LOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_ACC + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_ALL_ACC + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_STAGE_ACC + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_COMMIT_ACC + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_RJT + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_STAGE_OP + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_COMMIT_OP + {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK + {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP + {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_MAX +}; glusterd_op_sm_t *glusterd_op_state_table [] = { glusterd_op_state_default, @@ -5015,8 +5936,15 @@ glusterd_op_sm_t *glusterd_op_state_table [] = { glusterd_op_state_stage_op_sent, glusterd_op_state_staged, glusterd_op_state_commit_op_sent, - glusterd_op_state_commited, - glusterd_op_state_unlock_sent + glusterd_op_state_committed, + glusterd_op_state_unlock_sent, + glusterd_op_state_stage_op_failed, + glusterd_op_state_commit_op_failed, + glusterd_op_state_brick_op_sent, + glusterd_op_state_brick_op_failed, + glusterd_op_state_brick_committed, + glusterd_op_state_brick_commit_failed, + glusterd_op_state_ack_drain }; int @@ -5043,7 +5971,7 @@ glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx) + uuid_t *txn_id, void *ctx) { int32_t ret = -1; glusterd_op_sm_event_t *event = NULL; @@ -5058,7 +5986,10 @@ glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, event->ctx = ctx; - gf_log ("glusterd", GF_LOG_NORMAL, "Enqueuing event: '%s'", + if (txn_id) + uuid_copy (event->txn_id, *txn_id); + + gf_log (THIS->name, GF_LOG_DEBUG, "Enqueue event: '%s'", glusterd_op_sm_event_name_get (event->event)); list_add_tail (&event->list, &gd_op_sm_queue); @@ -5067,6 +5998,24 @@ out: } void +glusterd_destroy_req_ctx (glusterd_req_ctx_t *ctx) +{ + if (!ctx) + return; + if (ctx->dict) + dict_unref (ctx->dict); + GF_FREE (ctx); +} + +void +glusterd_destroy_local_unlock_ctx (uuid_t *ctx) +{ + if (!ctx) + return; + GF_FREE (ctx); +} + +void glusterd_destroy_op_event_ctx (glusterd_op_sm_event_t *event) { if (!event) @@ -5078,10 +6027,11 @@ glusterd_destroy_op_event_ctx (glusterd_op_sm_event_t *event) glusterd_destroy_lock_ctx (event->ctx); break; case GD_OP_EVENT_STAGE_OP: - glusterd_destroy_stage_ctx (event->ctx); + case GD_OP_EVENT_ALL_ACK: + glusterd_destroy_req_ctx (event->ctx); break; - case GD_OP_EVENT_COMMIT_OP: - glusterd_destroy_commit_ctx (event->ctx); + case GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP: + glusterd_destroy_local_unlock_ctx (event->ctx); break; default: break; @@ -5094,11 +6044,21 @@ glusterd_op_sm () glusterd_op_sm_event_t *event = NULL; glusterd_op_sm_event_t *tmp = NULL; int ret = -1; + int lock_err = 0; glusterd_op_sm_ac_fn handler = NULL; glusterd_op_sm_t *state = NULL; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info; - (void ) pthread_mutex_lock (&gd_op_sm_lock); + this = THIS; + GF_ASSERT (this); + + if ((lock_err = pthread_mutex_trylock (&gd_op_sm_lock))) { + gf_log (this->name, GF_LOG_ERROR, "lock failed due to %s", + strerror (lock_err)); + goto lock_failed; + } while (!list_empty (&gd_op_sm_queue)) { @@ -5106,9 +6066,24 @@ glusterd_op_sm () list_del_init (&event->list); event_type = event->event; - gf_log ("", GF_LOG_NORMAL, "Dequeued event of type: '%s'", + gf_log (this->name, GF_LOG_DEBUG, "Dequeued event of " + "type: '%s'", glusterd_op_sm_event_name_get(event_type)); + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", + uuid_utoa (event->txn_id)); + + ret = glusterd_get_txn_opinfo (&event->txn_id, + &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get transaction's opinfo"); + glusterd_destroy_op_event_ctx (event); + GF_FREE (event); + continue; + } else + opinfo = txn_op_info; + state = glusterd_op_state_table[opinfo.state.state]; GF_ASSERT (state); @@ -5119,7 +6094,7 @@ glusterd_op_sm () ret = handler (event, event->ctx); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "handler returned: %d", ret); glusterd_destroy_op_event_ctx (event); GF_FREE (event); @@ -5130,7 +6105,7 @@ glusterd_op_sm () event_type); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Unable to transition" "state from '%s' to '%s'", glusterd_op_sm_state_name_get(opinfo.state.state), @@ -5139,8 +6114,27 @@ glusterd_op_sm () return ret; } + if ((state[event_type].next_state == + GD_OP_STATE_DEFAULT) && + (event_type == GD_OP_EVENT_UNLOCK)) { + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo(&event->txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear " + "transaction's opinfo"); + } else { + ret = glusterd_set_txn_opinfo (&event->txn_id, + &opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set " + "transaction's opinfo"); + } + glusterd_destroy_op_event_ctx (event); GF_FREE (event); + } } @@ -5148,6 +6142,8 @@ glusterd_op_sm () (void ) pthread_mutex_unlock (&gd_op_sm_lock); ret = 0; +lock_failed: + return ret; } @@ -5158,9 +6154,7 @@ glusterd_op_set_op (glusterd_op_t op) GF_ASSERT (op < GD_OP_MAX); GF_ASSERT (op > GD_OP_NONE); - opinfo.op[op] = 1; - opinfo.pending_op[op] = 1; - opinfo.commit_op[op] = 1; + opinfo.op = op; return 0; @@ -5170,40 +6164,8 @@ int32_t glusterd_op_get_op () { - int i = 0; - int32_t ret = 0; - - for ( i = 0; i < GD_OP_MAX; i++) { - if (opinfo.op[i]) - break; - } - - if ( i == GD_OP_MAX) - ret = -1; - else - ret = i; - - return ret; - -} - - -int32_t -glusterd_op_set_cli_op (gf_mgmt_procnum op) -{ - - int32_t ret; - - ret = pthread_mutex_trylock (&opinfo.lock); - - if (ret) - goto out; - - opinfo.cli_op = op; + return opinfo.op; -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; } int32_t @@ -5216,132 +6178,59 @@ glusterd_op_set_req (rpcsvc_request_t *req) } int32_t -glusterd_op_clear_pending_op (glusterd_op_t op) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.pending_op[op] = 0; - - return 0; - -} - -int32_t -glusterd_op_clear_commit_op (glusterd_op_t op) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.commit_op[op] = 0; - - return 0; - -} - -int32_t glusterd_op_clear_op (glusterd_op_t op) { - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.op[op] = 0; - - return 0; - -} - -int32_t -glusterd_op_set_ctx (glusterd_op_t op, void *ctx) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.op_ctx[op] = ctx; + opinfo.op = GD_OP_NONE; return 0; } int32_t -glusterd_op_free_ctx (glusterd_op_t op, void *ctx, gf_boolean_t ctx_free) +glusterd_op_free_ctx (glusterd_op_t op, void *ctx) { - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - if (ctx && ctx_free) { + if (ctx) { switch (op) { case GD_OP_CREATE_VOLUME: + case GD_OP_DELETE_VOLUME: case GD_OP_STOP_VOLUME: case GD_OP_ADD_BRICK: case GD_OP_REMOVE_BRICK: case GD_OP_REPLACE_BRICK: - case GD_OP_LOG_FILENAME: case GD_OP_LOG_ROTATE: case GD_OP_SYNC_VOLUME: case GD_OP_SET_VOLUME: case GD_OP_START_VOLUME: + case GD_OP_RESET_VOLUME: + case GD_OP_GSYNC_SET: + case GD_OP_QUOTA: + case GD_OP_PROFILE_VOLUME: + case GD_OP_STATUS_VOLUME: + case GD_OP_REBALANCE: + case GD_OP_HEAL_VOLUME: + case GD_OP_STATEDUMP_VOLUME: + case GD_OP_CLEARLOCKS_VOLUME: + case GD_OP_DEFRAG_BRICK_VOLUME: dict_unref (ctx); break; - case GD_OP_DELETE_VOLUME: - GF_FREE (ctx); - break; default: + GF_ASSERT (0); break; } } - return 0; - -} - -void * -glusterd_op_get_ctx (glusterd_op_t op) -{ - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - return opinfo.op_ctx[op]; - -} - -int32_t -glusterd_op_set_ctx_free (glusterd_op_t op, gf_boolean_t ctx_free) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.ctx_free[op] = ctx_free; - - return 0; - -} - -int32_t -glusterd_op_clear_ctx_free (glusterd_op_t op) -{ - - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - - opinfo.ctx_free[op] = _gf_false; + glusterd_op_reset_ctx (); return 0; } -gf_boolean_t -glusterd_op_get_ctx_free (glusterd_op_t op) +void * +glusterd_op_get_ctx () { - GF_ASSERT (op < GD_OP_MAX); - GF_ASSERT (op > GD_OP_NONE); - return opinfo.ctx_free[op]; + return opinfo.op_ctx; } @@ -5352,9 +6241,3 @@ glusterd_op_sm_init () pthread_mutex_init (&gd_op_sm_lock, NULL); return 0; } - -int32_t -glusterd_opinfo_unlock(){ - return (pthread_mutex_unlock(&opinfo.lock)); -} - diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index f65880512..cf57b78e0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_OP_SM_H_ #define _GLUSTERD_OP_SM_H_ @@ -25,6 +15,10 @@ #include "config.h" #endif +#ifndef GSYNC_CONF_TEMPLATE +#define GSYNC_CONF_TEMPLATE GEOREP"/gsyncd_template.conf" +#endif + #include <pthread.h> #include "uuid.h" @@ -38,6 +32,8 @@ #include "protocol-common.h" #define GD_VOLUME_NAME_MAX 256 +#define GD_OP_PROTECTED (0x02) +#define GD_OP_UNPROTECTED (0x04) typedef enum glusterd_op_sm_state_ { GD_OP_STATE_DEFAULT = 0, @@ -48,6 +44,13 @@ typedef enum glusterd_op_sm_state_ { GD_OP_STATE_COMMIT_OP_SENT, GD_OP_STATE_COMMITED, GD_OP_STATE_UNLOCK_SENT, + GD_OP_STATE_STAGE_OP_FAILED, + GD_OP_STATE_COMMIT_OP_FAILED, + GD_OP_STATE_BRICK_OP_SENT, + GD_OP_STATE_BRICK_OP_FAILED, + GD_OP_STATE_BRICK_COMMITTED, + GD_OP_STATE_BRICK_COMMIT_FAILED, + GD_OP_STATE_ACK_DRAIN, GD_OP_STATE_MAX, } glusterd_op_sm_state_t; @@ -64,6 +67,8 @@ typedef enum glusterd_op_sm_event_type_ { GD_OP_EVENT_COMMIT_OP, GD_OP_EVENT_UNLOCK, GD_OP_EVENT_START_UNLOCK, + GD_OP_EVENT_ALL_ACK, + GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP, GD_OP_EVENT_MAX } glusterd_op_sm_event_type_t; @@ -72,6 +77,7 @@ struct glusterd_op_sm_event_ { struct list_head list; void *ctx; glusterd_op_sm_event_type_t event; + uuid_t txn_id; }; typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t; @@ -91,29 +97,20 @@ typedef struct glusterd_op_sm_state_info_ { struct glusterd_op_info_ { glusterd_op_sm_state_info_t state; int32_t pending_count; + int32_t brick_pending_count; int32_t op_count; - glusterd_op_t op[GD_OP_MAX]; - glusterd_op_t pending_op[GD_OP_MAX]; - glusterd_op_t commit_op[GD_OP_MAX]; + glusterd_op_t op; struct list_head op_peers; - void *op_ctx[GD_OP_MAX]; + void *op_ctx; rpcsvc_request_t *req; int32_t op_ret; int32_t op_errno; - pthread_mutex_t lock; - int32_t cli_op; - gf_boolean_t ctx_free[GD_OP_MAX]; char *op_errstr; + struct list_head pending_bricks; }; typedef struct glusterd_op_info_ glusterd_op_info_t; -struct glusterd_op_delete_volume_ctx_ { - char volume_name[GD_VOLUME_NAME_MAX]; -}; - -typedef struct glusterd_op_delete_volume_ctx_ glusterd_op_delete_volume_ctx_t; - struct glusterd_op_log_filename_ctx_ { char volume_name[GD_VOLUME_NAME_MAX]; char brick[GD_VOLUME_NAME_MAX]; @@ -123,31 +120,64 @@ typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t; struct glusterd_op_lock_ctx_ { uuid_t uuid; + dict_t *dict; rpcsvc_request_t *req; }; typedef struct glusterd_op_lock_ctx_ glusterd_op_lock_ctx_t; -struct glusterd_op_stage_ctx_ { - rpcsvc_request_t *req; - gd1_mgmt_stage_op_req stage_req; +struct glusterd_req_ctx_ { + rpcsvc_request_t *req; + u_char uuid[16]; + int op; + dict_t *dict; }; -typedef struct glusterd_op_stage_ctx_ glusterd_op_stage_ctx_t; - -struct glusterd_op_commit_ctx_ { - rpcsvc_request_t *req; - gd1_mgmt_stage_op_req stage_req; -}; - -typedef struct glusterd_op_commit_ctx_ glusterd_op_commit_ctx_t; +typedef struct glusterd_req_ctx_ glusterd_req_ctx_t; + +typedef struct glusterd_op_brick_rsp_ctx_ { + int op_ret; + char *op_errstr; + dict_t *rsp_dict; + glusterd_req_ctx_t *commit_ctx; + glusterd_pending_node_t *pending_node; +} glusterd_op_brick_rsp_ctx_t; + +typedef struct glusterd_pr_brick_rsp_conv_t { + int count; + dict_t *dict; +} glusterd_pr_brick_rsp_conv_t; + +typedef struct glusterd_heal_rsp_conv_ { + dict_t *dict; + glusterd_volinfo_t *volinfo; + xlator_t *this; +} glusterd_heal_rsp_conv_t; + +typedef struct glusterd_status_rsp_conv_ { + int count; + int brick_index_max; + int other_count; + dict_t *dict; +} glusterd_status_rsp_conv_t; + +typedef struct glusterd_gsync_status_temp { + dict_t *rsp_dict; + glusterd_volinfo_t *volinfo; + char *node; +}glusterd_gsync_status_temp_t; + +typedef enum cli_cmd_type_ { + PER_REPLICA, + ALL_REPLICA, + } cli_cmd_type; int glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, glusterd_op_sm_event_t **new_event); int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx); + uuid_t *txn_id, void *ctx); int glusterd_op_sm_init (); @@ -156,76 +186,114 @@ int glusterd_op_sm (); int32_t -glusterd_op_set_ctx (glusterd_op_t op, void *ctx); +glusterd_op_set_ctx (void *ctx); int32_t glusterd_op_set_op (glusterd_op_t op); -int32_t -glusterd_op_clear_pending_op (glusterd_op_t op); - -int32_t -glusterd_op_clear_commit_op (glusterd_op_t op); - int -glusterd_op_build_payload (glusterd_op_t op, gd1_mgmt_stage_op_req **req); +glusterd_op_build_payload (dict_t **req, char **op_errstr, dict_t *op_ctx); int32_t -glusterd_op_stage_validate (gd1_mgmt_stage_op_req *req, char **op_errstr, +glusterd_op_stage_validate (glusterd_op_t op, dict_t *req, char **op_errstr, dict_t *rsp_dict); int32_t -glusterd_op_commit_perform (gd1_mgmt_stage_op_req *req, char **op_errstr, +glusterd_op_commit_perform (glusterd_op_t op, dict_t *req, char **op_errstr, dict_t* dict); -void * -glusterd_op_get_ctx (glusterd_op_t op); +int32_t +glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + char *err_str, size_t err_len); int32_t -glusterd_op_set_req (rpcsvc_request_t *req); +glusterd_op_txn_complete (); + +void * +glusterd_op_get_ctx (); int32_t -glusterd_op_set_cli_op (gf_mgmt_procnum op); +glusterd_op_set_req (rpcsvc_request_t *req); int32_t -glusterd_op_send_cli_response (int32_t op, int32_t op_ret, +glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, int32_t op_errno, rpcsvc_request_t *req, void *ctx, char *op_errstr); int32_t glusterd_op_get_op (); int32_t -glusterd_op_clear_pending_op (glusterd_op_t op); +glusterd_op_clear_op (); int32_t -glusterd_op_clear_commit_op (glusterd_op_t op); +glusterd_op_free_ctx (glusterd_op_t op, void *ctx); -int32_t -glusterd_op_clear_op (glusterd_op_t op); +int +glusterd_check_option_exists(char *optstring, char **completion); -int32_t -glusterd_op_free_ctx (glusterd_op_t op, void *ctx, gf_boolean_t ctx_free); +int +set_xlator_option (dict_t *dict, char *key, char *value); -int32_t -glusterd_opinfo_unlock(); +void +glusterd_do_replace_brick (void *data); -int32_t -glusterd_op_set_ctx_free (glusterd_op_t op, gf_boolean_t ctx_free); +char* +glusterd_op_sm_state_name_get (int state); +char* +glusterd_op_sm_event_name_get (int event); +int32_t +glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr, + struct list_head *selected, dict_t *rsp_dict); +int +glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo, + gd1_mgmt_brick_op_req **req, dict_t *dict); +int +glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req, + dict_t *dict); int32_t -glusterd_op_clear_ctx_free (glusterd_op_t op); +glusterd_handle_brick_rsp (void *pending_entry, glusterd_op_t op, + dict_t *rsp_dict, dict_t *ctx_dict, char **op_errstr, + gd_node_type type); -gf_boolean_t -glusterd_op_get_ctx_free (glusterd_op_t op); +dict_t* +glusterd_op_init_commit_rsp_dict (glusterd_op_t op); + +void +glusterd_op_modify_op_ctx (glusterd_op_t op, void *op_ctx); +int32_t +glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size, + int32_t blk_count, double *throughput, double *time); +int32_t +glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size, + int32_t blk_count, double *throughput, double *time); +gf_boolean_t +glusterd_is_volume_started (glusterd_volinfo_t *volinfo); int -glusterd_check_option_exists(char *optstring, char **completion); +glusterd_start_bricks (glusterd_volinfo_t *volinfo); +gf_boolean_t +glusterd_are_all_volumes_stopped (); +int +glusterd_stop_bricks (glusterd_volinfo_t *volinfo); +int +gsync_status (char *master, char *slave, char *conf_path, int *status); int -set_xlator_option (dict_t *dict, char *key, char *value); +glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag); -void -glusterd_do_replace_brick (void *data); int -glusterd_options_reset (glusterd_volinfo_t *volinfo); +glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx); +int +glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg); + +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c index 67d10bfa6..a153ca1a9 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.c +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H @@ -30,7 +20,8 @@ #include "glusterd.h" #include "glusterd-utils.h" -#include "portmap.h" +#include "portmap-xdr.h" +#include "xdr-generic.h" #include "protocol-common.h" #include "rpcsvc.h" @@ -61,8 +52,8 @@ pmap_port_isfree (int port) } -struct pmap_registry * -pmap_registry_new (void) +static struct pmap_registry * +pmap_registry_new (xlator_t *this) { struct pmap_registry *pmap = NULL; int i = 0; @@ -78,8 +69,8 @@ pmap_registry_new (void) pmap->ports[i].type = GF_PMAP_PORT_FOREIGN; } - pmap->base_port = GF_DEFAULT_BASE_PORT + 2; - pmap->last_alloc = GF_DEFAULT_BASE_PORT + 2; + pmap->base_port = pmap->last_alloc = + ((glusterd_conf_t *)(this->private))->base_port; return pmap; } @@ -95,7 +86,7 @@ pmap_registry_get (xlator_t *this) pmap = priv->pmap; if (!pmap) { - pmap = pmap_registry_new (); + pmap = pmap_registry_new (this); if (!pmap) return NULL; priv->pmap = pmap; @@ -227,8 +218,7 @@ pmap_registry_bind (xlator_t *this, int port, const char *brickname, p = port; pmap->ports[p].type = type; - if (pmap->ports[p].brickname) - free (pmap->ports[p].brickname); + free (pmap->ports[p].brickname); pmap->ports[p].brickname = strdup (brickname); pmap->ports[p].type = type; pmap->ports[p].xprt = xprt; @@ -280,8 +270,7 @@ remove: gf_log ("pmap", GF_LOG_INFO, "removing brick %s on port %d", pmap->ports[p].brickname, p); - if (pmap->ports[p].brickname) - free (pmap->ports[p].brickname); + free (pmap->ports[p].brickname); pmap->ports[p].brickname = NULL; pmap->ports[p].xprt = NULL; @@ -290,36 +279,18 @@ out: return 0; } - -typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data); - - -static int -xdr_to_glusterfs_req (rpcsvc_request_t *req, void *arg, gfs_serialize_t sfunc) -{ - int ret = -1; - - if (!req) - return -1; - - ret = sfunc (req->msg[0], arg); - - if (ret > 0) - ret = 0; - - return ret; -} - - int -gluster_pmap_portbybrick (rpcsvc_request_t *req) +__gluster_pmap_portbybrick (rpcsvc_request_t *req) { pmap_port_by_brick_req args = {0,}; pmap_port_by_brick_rsp rsp = {0,}; char *brick = NULL; int port = 0; + int ret = -1; - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_port_by_brick_req)) { + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_pmap_port_by_brick_req); + if (ret < 0) { req->rpc_err = GARBAGE_ARGS; goto fail; } @@ -335,21 +306,30 @@ gluster_pmap_portbybrick (rpcsvc_request_t *req) fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_port_by_brick_rsp); - if (args.brick) - free (args.brick);//malloced by xdr + (xdrproc_t)xdr_pmap_port_by_brick_rsp); + free (args.brick);//malloced by xdr return 0; } int -gluster_pmap_brickbyport (rpcsvc_request_t *req) +gluster_pmap_portbybrick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_portbybrick); +} + + +int +__gluster_pmap_brickbyport (rpcsvc_request_t *req) { pmap_brick_by_port_req args = {0,}; pmap_brick_by_port_rsp rsp = {0,}; + int ret = -1; - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_brick_by_port_req)) { + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_pmap_brick_by_port_req); + if (ret < 0) { req->rpc_err = GARBAGE_ARGS; goto fail; } @@ -362,11 +342,19 @@ gluster_pmap_brickbyport (rpcsvc_request_t *req) fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_brick_by_port_rsp); + (xdrproc_t)xdr_pmap_brick_by_port_rsp); return 0; } + +int +gluster_pmap_brickbyport (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_brickbyport); +} + + static int glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo, gf_boolean_t value) @@ -377,13 +365,16 @@ glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo, } int -gluster_pmap_signup (rpcsvc_request_t *req) +__gluster_pmap_signup (rpcsvc_request_t *req) { pmap_signup_req args = {0,}; pmap_signup_rsp rsp = {0,}; + int ret = -1; - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signup_req)) { + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_pmap_signup_req); + if (ret < 0) { req->rpc_err = GARBAGE_ARGS; goto fail; } @@ -393,22 +384,29 @@ gluster_pmap_signup (rpcsvc_request_t *req) fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_signup_rsp); - if (args.brick) - free (args.brick);//malloced by xdr + (xdrproc_t)xdr_pmap_signup_rsp); + free (args.brick);//malloced by xdr return 0; } int -gluster_pmap_signin (rpcsvc_request_t *req) +gluster_pmap_signup (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_signup); +} + +int +__gluster_pmap_signin (rpcsvc_request_t *req) { pmap_signin_req args = {0,}; pmap_signin_rsp rsp = {0,}; glusterd_brickinfo_t *brickinfo = NULL; int ret = -1; - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signin_req)) { + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_pmap_signin_req); + if (ret < 0) { req->rpc_err = GARBAGE_ARGS; goto fail; } @@ -418,14 +416,10 @@ gluster_pmap_signin (rpcsvc_request_t *req) ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true, &brickinfo); - if (!ret) - glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED); - fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_signin_rsp); - if (args.brick) - free (args.brick);//malloced by xdr + (xdrproc_t)xdr_pmap_signin_rsp); + free (args.brick);//malloced by xdr if (!ret) glusterd_brick_update_signin (brickinfo, _gf_true); @@ -434,16 +428,24 @@ fail: } +int +gluster_pmap_signin (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_signin); +} + int -gluster_pmap_signout (rpcsvc_request_t *req) +__gluster_pmap_signout (rpcsvc_request_t *req) { pmap_signout_req args = {0,}; pmap_signout_rsp rsp = {0,}; int ret = -1; glusterd_brickinfo_t *brickinfo = NULL; - if (xdr_to_glusterfs_req (req, &args, xdr_to_pmap_signout_req)) { + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_pmap_signout_req); + if (ret < 0) { //failed to decode msg; req->rpc_err = GARBAGE_ARGS; goto fail; @@ -452,16 +454,12 @@ gluster_pmap_signout (rpcsvc_request_t *req) rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick, GF_PMAP_PORT_BRICKSERVER, req->trans); - ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true, + ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true, &brickinfo); - if (!ret) - glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); - fail: glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - (gd_serialize_t)xdr_from_pmap_signout_rsp); - if (args.brick) - free (args.brick);//malloced by xdr + (xdrproc_t)xdr_pmap_signout_rsp); + free (args.brick);//malloced by xdr if (!ret) glusterd_brick_update_signin (brickinfo, _gf_false); @@ -469,18 +467,19 @@ fail: return 0; } +int +gluster_pmap_signout (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __gluster_pmap_signout); +} + rpcsvc_actor_t gluster_pmap_actors[] = { - [GF_PMAP_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, NULL }, - [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK, - gluster_pmap_portbybrick, NULL, NULL }, - [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT, - gluster_pmap_brickbyport, NULL, NULL }, - [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN, - gluster_pmap_signin, NULL, NULL }, - [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT, - gluster_pmap_signout, NULL, NULL }, - [GF_PMAP_SIGNUP] = {"SIGNUP", GF_PMAP_SIGNUP, - gluster_pmap_signup, NULL, NULL }, + [GF_PMAP_NULL] = {"NULL", GF_PMAP_NULL, NULL, NULL, 0, DRC_NA}, + [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK, gluster_pmap_portbybrick, NULL, 0, DRC_NA}, + [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT, gluster_pmap_brickbyport, NULL, 0, DRC_NA}, + [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN, gluster_pmap_signin, NULL, 0, DRC_NA}, + [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT, gluster_pmap_signout, NULL, 0, DRC_NA}, + [GF_PMAP_SIGNUP] = {"SIGNUP", GF_PMAP_SIGNUP, gluster_pmap_signup, NULL, 0, DRC_NA}, }; diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h index a87efed5a..6336ee998 100644 --- a/xlators/mgmt/glusterd/src/glusterd-pmap.h +++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_PMAP_H_ #define _GLUSTERD_PMAP_H_ @@ -38,6 +28,8 @@ #include "rpcsvc.h" +#define GF_IANA_PRIV_PORTS_START 49152 /* RFC 6335 */ + struct pmap_port_status { gf_pmap_port_type_t type; char *brickname; diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c new file mode 100644 index 000000000..318267199 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -0,0 +1,839 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "run.h" + +#include <sys/wait.h> + +int +__glusterd_handle_quota (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_QUOTA; + char operation[256] = {0, }; + char *volname = NULL; + int32_t type = 0; + char msg[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name, " + "while handling quota command"); + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to get type of command"); + gf_log (this->name, GF_LOG_ERROR, "Unable to get type of cmd, " + "while handling quota command"); + goto out; + } + + switch (type) { + case GF_QUOTA_OPTION_TYPE_ENABLE: + strncpy (operation, "enable", sizeof (operation)); + break; + + case GF_QUOTA_OPTION_TYPE_DISABLE: + strncpy (operation, "disable", sizeof (operation)); + break; + + case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE: + strncpy (operation, "limit-usage", sizeof (operation)); + break; + + case GF_QUOTA_OPTION_TYPE_REMOVE: + strncpy (operation, "remove", sizeof (operation)); + break; + } + ret = glusterd_op_begin_synctask (req, GD_OP_QUOTA, dict); + +out: + if (ret) { + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, msg); + } + + return ret; +} + +int +glusterd_handle_quota (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, __glusterd_handle_quota); +} + +int32_t +glusterd_check_if_quota_trans_enabled (glusterd_volinfo_t *volinfo) +{ + int32_t ret = 0; + int flag = _gf_false; + + flag = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA); + if (flag == -1) { + gf_log ("", GF_LOG_ERROR, "failed to get the quota status"); + ret = -1; + goto out; + } + + if (flag == _gf_false) { + gf_log ("", GF_LOG_ERROR, "first enable the quota translator"); + ret = -1; + goto out; + } + ret = 0; +out: + return ret; +} + +/* At the end of the function, the variable found will be set + * to true if the path to be removed was present in the limit-list, + * else will be false. + */ +int32_t +_glusterd_quota_remove_limits (char **quota_limits, char *path, + gf_boolean_t *found) +{ + int ret = 0; + int i = 0; + int size = 0; + int len = 0; + int pathlen = 0; + int skiplen = 0; + int flag = 0; + char *limits = NULL; + char *qlimits = NULL; + + if (found != NULL) + *found = _gf_false; + + if (*quota_limits == NULL) + return -1; + + qlimits = *quota_limits; + + pathlen = strlen (path); + + len = strlen (qlimits); + + limits = GF_CALLOC (len + 1, sizeof (char), gf_gld_mt_char); + if (!limits) + return -1; + + while (i < len) { + if (!memcmp ((void *) &qlimits [i], (void *)path, pathlen)) + if (qlimits [i + pathlen] == ':') { + flag = 1; + if (found != NULL) + *found = _gf_true; + } + + while (qlimits [i + size] != ',' && + qlimits [i + size] != '\0') + size++; + + if (!flag) { + memcpy ((void *) &limits [i], (void *) &qlimits [i], size + 1); + } else { + skiplen = size + 1; + size = len - i - size; + memcpy ((void *) &limits [i], (void *) &qlimits [i + skiplen], size); + break; + } + + i += size + 1; + size = 0; + } + + if (!flag) { + ret = 1; + } else { + len = strlen (limits); + + if (len == 0) { + GF_FREE (qlimits); + + *quota_limits = NULL; + + goto out; + } + + if (limits[len - 1] == ',') { + limits[len - 1] = '\0'; + len --; + } + + GF_FREE (qlimits); + + qlimits = GF_CALLOC (len + 1, sizeof (char), gf_gld_mt_char); + + if (!qlimits) { + ret = -1; + goto out; + } + + memcpy ((void *) qlimits, (void *) limits, len + 1); + + *quota_limits = qlimits; + + ret = 0; + } + +out: + GF_FREE (limits); + + return ret; +} + +int32_t +glusterd_quota_initiate_fs_crawl (glusterd_conf_t *priv, char *volname) +{ + pid_t pid; + int32_t ret = 0; + int status = 0; + char mountdir[] = "/tmp/mntXXXXXX"; + runner_t runner = {0}; + + if (mkdtemp (mountdir) == NULL) { + gf_log ("glusterd", GF_LOG_DEBUG, + "failed to create a temporary mount directory"); + ret = -1; + goto out; + } + + runinit (&runner); + runner_add_args (&runner, SBIN_DIR"/glusterfs", + "-s", "localhost", + "--volfile-id", volname, + "-l", DEFAULT_LOG_FILE_DIRECTORY"/quota-crawl.log", + mountdir, NULL); + + synclock_unlock (&priv->big_lock); + ret = runner_run_reuse (&runner); + synclock_lock (&priv->big_lock); + if (ret == -1) { + runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed"); + runner_end (&runner); + goto out; + } + runner_end (&runner); + + if ((pid = fork ()) < 0) { + gf_log ("glusterd", GF_LOG_WARNING, "fork from parent failed"); + ret = -1; + goto out; + } else if (pid == 0) {//first child + /* fork one more to not hold back main process on + * blocking call below + */ + pid = fork (); + if (pid) + _exit (pid > 0 ? EXIT_SUCCESS : EXIT_FAILURE); + + ret = chdir (mountdir); + if (ret == -1) { + gf_log ("glusterd", GF_LOG_WARNING, "chdir %s failed, " + "reason: %s", mountdir, strerror (errno)); + exit (EXIT_FAILURE); + } + runinit (&runner); + runner_add_args (&runner, "/usr/bin/find", "find", ".", NULL); + if (runner_start (&runner) == -1) + _exit (EXIT_FAILURE); + +#ifndef GF_LINUX_HOST_OS + runner_end (&runner); /* blocks in waitpid */ + runcmd ("umount", mountdir, NULL); +#else + runcmd ("umount", "-l", mountdir, NULL); +#endif + rmdir (mountdir); + _exit (EXIT_SUCCESS); + } + ret = (waitpid (pid, &status, 0) == pid && + WIFEXITED (status) && WEXITSTATUS (status) == EXIT_SUCCESS) ? 0 : -1; + +out: + return ret; +} + +char * +glusterd_quota_get_limit_value (char *quota_limits, char *path) +{ + int32_t i, j, k, l, len; + int32_t pat_len, diff; + char *ret_str = NULL; + + len = strlen (quota_limits); + pat_len = strlen (path); + i = 0; + j = 0; + + while (i < len) { + j = i; + k = 0; + while (path [k] == quota_limits [j]) { + j++; + k++; + } + + l = j; + + while (quota_limits [j] != ',' && + quota_limits [j] != '\0') + j++; + + if (quota_limits [l] == ':' && pat_len == (l - i)) { + diff = j - i; + ret_str = GF_CALLOC (diff + 1, sizeof (char), + gf_gld_mt_char); + + strncpy (ret_str, "a_limits [i], diff); + + break; + } + i = ++j; //skip ',' + } + + return ret_str; +} + +char* +_glusterd_quota_get_limit_usages (glusterd_volinfo_t *volinfo, + char *path, char **op_errstr) +{ + int32_t ret = 0; + char *quota_limits = NULL; + char *ret_str = NULL; + + if (volinfo == NULL) + return NULL; + + ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE, + "a_limits); + if (ret) + return NULL; + if (quota_limits == NULL) { + ret_str = NULL; + *op_errstr = gf_strdup ("Limit not set on any directory"); + } else if (path == NULL) + ret_str = gf_strdup (quota_limits); + else + ret_str = glusterd_quota_get_limit_value (quota_limits, path); + + return ret_str; +} + +int32_t +glusterd_quota_get_limit_usages (glusterd_conf_t *priv, + glusterd_volinfo_t *volinfo, + char *volname, + dict_t *dict, + char **op_errstr, + dict_t *rsp_dict) +{ + int32_t i = 0; + int32_t ret = 0; + int32_t count = 0; + char *path = NULL; + char cmd_str [1024] = {0, }; + char *ret_str = NULL; + + if (rsp_dict == NULL) + return 0; + + ret = dict_get_int32 (dict, "count", &count); + if (ret < 0) + goto out; + + if (count == 0) { + ret_str = _glusterd_quota_get_limit_usages (volinfo, NULL, + op_errstr); + } else { + i = 0; + while (count--) { + snprintf (cmd_str, 1024, "path%d", i++); + + ret = dict_get_str (dict, cmd_str, &path); + if (ret < 0) + goto out; + + ret_str = _glusterd_quota_get_limit_usages (volinfo, path, op_errstr); + } + } + + if (ret_str) { + ret = dict_set_dynstr (rsp_dict, "limit_list", ret_str); + } +out: + return ret; +} + +int32_t +glusterd_quota_enable (glusterd_volinfo_t *volinfo, char **op_errstr, + gf_boolean_t *crawl) +{ + int32_t ret = -1; + char *quota_status = NULL; + + GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); + GF_VALIDATE_OR_GOTO ("glusterd", crawl, out); + GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out); + + if (glusterd_is_volume_started (volinfo) == 0) { + *op_errstr = gf_strdup ("Volume is stopped, start volume " + "to enable quota."); + goto out; + } + + ret = glusterd_check_if_quota_trans_enabled (volinfo); + if (ret == 0) { + *op_errstr = gf_strdup ("Quota is already enabled"); + goto out; + } + + quota_status = gf_strdup ("on"); + if (!quota_status) { + gf_log ("", GF_LOG_ERROR, "memory allocation failed"); + *op_errstr = gf_strdup ("Enabling quota has been unsuccessful"); + goto out; + } + + ret = dict_set_dynstr (volinfo->dict, VKEY_FEATURES_QUOTA, quota_status); + if (ret) { + gf_log ("", GF_LOG_ERROR, "dict set failed"); + *op_errstr = gf_strdup ("Enabling quota has been unsuccessful"); + goto out; + } + + *op_errstr = gf_strdup ("Enabling quota has been successful"); + + *crawl = _gf_true; + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_quota_disable (glusterd_volinfo_t *volinfo, char **op_errstr) +{ + int32_t ret = -1; + char *quota_status = NULL, *quota_limits = NULL; + + GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); + GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out); + + ret = glusterd_check_if_quota_trans_enabled (volinfo); + if (ret == -1) { + *op_errstr = gf_strdup ("Quota is already disabled"); + goto out; + } + + quota_status = gf_strdup ("off"); + if (!quota_status) { + gf_log ("", GF_LOG_ERROR, "memory allocation failed"); + *op_errstr = gf_strdup ("Disabling quota has been unsuccessful"); + goto out; + } + + ret = dict_set_dynstr (volinfo->dict, VKEY_FEATURES_QUOTA, quota_status); + if (ret) { + gf_log ("", GF_LOG_ERROR, "dict set failed"); + *op_errstr = gf_strdup ("Disabling quota has been unsuccessful"); + goto out; + } + + *op_errstr = gf_strdup ("Disabling quota has been successful"); + + ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE, + "a_limits); + if (ret) { + gf_log ("", GF_LOG_WARNING, "failed to get the quota limits"); + } else { + GF_FREE (quota_limits); + } + + dict_del (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE); + +out: + return ret; +} + +int32_t +glusterd_quota_limit_usage (glusterd_volinfo_t *volinfo, dict_t *dict, char **op_errstr) +{ + int32_t ret = -1; + char *path = NULL; + char *limit = NULL; + char *value = NULL; + char msg [1024] = {0,}; + char *quota_limits = NULL; + + GF_VALIDATE_OR_GOTO ("glusterd", dict, out); + GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); + GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out); + + ret = glusterd_check_if_quota_trans_enabled (volinfo); + if (ret == -1) { + *op_errstr = gf_strdup ("Quota is disabled, please enable " + "quota"); + goto out; + } + + ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE, + "a_limits); + if (ret) { + gf_log ("", GF_LOG_ERROR, "failed to get the quota limits"); + *op_errstr = gf_strdup ("failed to set limit"); + goto out; + } + + ret = dict_get_str (dict, "path", &path); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" ); + *op_errstr = gf_strdup ("failed to set limit"); + goto out; + } + + ret = dict_get_str (dict, "limit", &limit); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" ); + *op_errstr = gf_strdup ("failed to set limit"); + goto out; + } + + if (quota_limits) { + ret = _glusterd_quota_remove_limits ("a_limits, path, NULL); + if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + *op_errstr = gf_strdup ("failed to set limit"); + goto out; + } + } + + if (quota_limits == NULL) { + ret = gf_asprintf (&value, "%s:%s", path, limit); + if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + *op_errstr = gf_strdup ("failed to set limit"); + goto out; + } + } else { + ret = gf_asprintf (&value, "%s,%s:%s", + quota_limits, path, limit); + if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + *op_errstr = gf_strdup ("failed to set limit"); + goto out; + } + + GF_FREE (quota_limits); + } + + quota_limits = value; + + ret = dict_set_str (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE, + quota_limits); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set quota limits" ); + *op_errstr = gf_strdup ("failed to set limit"); + goto out; + } + snprintf (msg, 1024, "limit set on %s", path); + *op_errstr = gf_strdup (msg); + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_quota_remove_limits (glusterd_volinfo_t *volinfo, dict_t *dict, char **op_errstr) +{ + int32_t ret = -1; + char str [PATH_MAX + 1024] = {0,}; + char *quota_limits = NULL; + char *path = NULL; + gf_boolean_t flag = _gf_false; + + GF_VALIDATE_OR_GOTO ("glusterd", dict, out); + GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out); + GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out); + + ret = glusterd_check_if_quota_trans_enabled (volinfo); + if (ret == -1) { + *op_errstr = gf_strdup ("Quota is disabled, please enable quota"); + goto out; + } + + ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE, + "a_limits); + if (ret) { + gf_log ("", GF_LOG_ERROR, "failed to get the quota limits"); + goto out; + } + + ret = dict_get_str (dict, "path", &path); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" ); + goto out; + } + + ret = _glusterd_quota_remove_limits ("a_limits, path, &flag); + if (ret == -1) { + if (flag == _gf_true) + snprintf (str, sizeof (str), "Removing limit on %s has " + "been unsuccessful", path); + else + snprintf (str, sizeof (str), "%s has no limit set", path); + *op_errstr = gf_strdup (str); + goto out; + } else { + if (flag == _gf_true) + snprintf (str, sizeof (str), "Removed quota limit on " + "%s", path); + else + snprintf (str, sizeof (str), "no limit set on %s", + path); + *op_errstr = gf_strdup (str); + } + + if (quota_limits) { + ret = dict_set_str (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE, + quota_limits); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set quota limits" ); + goto out; + } + } else { + dict_del (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE); + } + + ret = 0; + +out: + return ret; +} + + +int +glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + glusterd_volinfo_t *volinfo = NULL; + int32_t ret = -1; + char *volname = NULL; + int type = -1; + gf_boolean_t start_crawl = _gf_false; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + priv = THIS->private; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name " ); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + + if (type == GF_QUOTA_OPTION_TYPE_ENABLE) { + ret = glusterd_quota_enable (volinfo, op_errstr, &start_crawl); + if (ret < 0) + goto out; + + goto create_vol; + } + + if (type == GF_QUOTA_OPTION_TYPE_DISABLE) { + ret = glusterd_quota_disable (volinfo, op_errstr); + if (ret < 0) + goto out; + + goto create_vol; + } + + if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) { + ret = glusterd_quota_limit_usage (volinfo, dict, op_errstr); + if (ret < 0) + goto out; + + goto create_vol; + } + + if (type == GF_QUOTA_OPTION_TYPE_REMOVE) { + ret = glusterd_quota_remove_limits (volinfo, dict, op_errstr); + if (ret < 0) + goto out; + + goto create_vol; + } + + if (type == GF_QUOTA_OPTION_TYPE_LIST) { + ret = glusterd_check_if_quota_trans_enabled (volinfo); + if (ret == -1) { + *op_errstr = gf_strdup ("cannot list the limits, " + "quota is disabled"); + goto out; + } + + ret = glusterd_quota_get_limit_usages (priv, volinfo, volname, + dict, op_errstr, rsp_dict); + + goto out; + } +create_vol: + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to re-create volfile for" + " 'quota'"); + ret = -1; + goto out; + } + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) + ret = glusterd_check_generate_start_nfs (); + + ret = 0; + +out: + if (rsp_dict && start_crawl == _gf_true) + glusterd_quota_initiate_fs_crawl (priv, volname); + + if (rsp_dict && *op_errstr) { + ret = dict_set_dynstr (rsp_dict, "errstr", *op_errstr); + if (ret) { + GF_FREE (*op_errstr); + gf_log ("", GF_LOG_DEBUG, + "failed to set error message in ctx"); + } + *op_errstr = NULL; + } + + return ret; +} + +int +glusterd_op_stage_quota (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + gf_boolean_t exists = _gf_false; + int type = 0; + dict_t *ctx = NULL; + + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + exists = glusterd_check_volume_exists (volname); + if (!exists) { + gf_log ("", GF_LOG_ERROR, "Volume with name: %s " + "does not exist", + volname); + *op_errstr = gf_strdup ("Invalid volume name"); + ret = -1; + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get 'type' for quota op"); + *op_errstr = gf_strdup ("Volume quota failed, internal error " + ", unable to get type of operation"); + goto out; + } + + + ctx = glusterd_op_get_ctx(); + if (ctx && (type == GF_QUOTA_OPTION_TYPE_ENABLE + || type == GF_QUOTA_OPTION_TYPE_LIST)) { + /* Fuse mount req. only for enable & list-usage options*/ + if (!glusterd_is_fuse_available ()) { + gf_log ("glusterd", GF_LOG_ERROR, "Unable to open /dev/" + "fuse (%s), quota command failed", + strerror (errno)); + *op_errstr = gf_strdup ("Fuse unavailable"); + ret = -1; + goto out; + } + } + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index b1eed6419..b7b974c68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -1,28 +1,22 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" #endif + #include <inttypes.h> +#include <sys/types.h> +#include <unistd.h> #include <sys/resource.h> +#include <sys/statvfs.h> #include "globals.h" #include "compat.h" @@ -36,461 +30,720 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "run.h" +#include "glusterd-volgen.h" #include "syscall.h" -#include "cli1.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +int32_t +glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe); int -gf_glusterd_rebalance_move_data (glusterd_volinfo_t *volinfo, const char *dir) +glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, glusterd_op_t op) { - int ret = -1; - int dst_fd = -1; - int src_fd = -1; - DIR *fd = NULL; - glusterd_defrag_info_t *defrag = NULL; - struct dirent *entry = NULL; - struct stat stbuf = {0,}; - struct stat new_stbuf = {0,}; - char full_path[1024] = {0,}; - char tmp_filename[1024] = {0,}; - char value[16] = {0,}; - - if (!volinfo->defrag) + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + /* Check only if operation is not remove-brick */ + if ((GD_OP_REMOVE_BRICK != op) && + !gd_is_remove_brick_committed (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, "A remove-brick task on " + "volume %s is not yet committed", volinfo->volname); + snprintf (op_errstr, len, "A remove-brick task on volume %s is" + " not yet committed. Either commit or stop the " + "remove-brick task.", volinfo->volname); goto out; + } - defrag = volinfo->defrag; + if (glusterd_is_defrag_on (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, + "rebalance on volume %s already started", + volinfo->volname); + snprintf (op_errstr, len, "Rebalance on %s is already started", + volinfo->volname); + goto out; + } - fd = opendir (dir); - if (!fd) + if (glusterd_is_rb_started (volinfo) || + glusterd_is_rb_paused (volinfo)) { + gf_log (this->name, GF_LOG_DEBUG, + "Rebalance failed as replace brick is in progress on volume %s", + volinfo->volname); + snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on " + "volume %s", volinfo->volname); goto out; - while ((entry = readdir (fd))) { - if (!entry) - break; + } + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; - snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); +int32_t +__glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + glusterd_volinfo_t *volinfo = NULL; + glusterd_defrag_info_t *defrag = NULL; + int ret = 0; + char pidfile[PATH_MAX]; + glusterd_conf_t *priv = NULL; - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; + priv = THIS->private; + if (!priv) + return 0; - if (S_ISREG (stbuf.st_mode)) - defrag->num_files_lookedup += 1; + volinfo = mydata; + if (!volinfo) + return 0; - if (!(S_ISREG (stbuf.st_mode) && - ((stbuf.st_mode & 01000) == 01000))) - continue; + defrag = volinfo->rebal.defrag; + if (!defrag) + return 0; - /* If the file is open, don't run rebalance on it */ - ret = sys_lgetxattr (full_path, GLUSTERFS_OPEN_FD_COUNT, - &value, 16); - if ((ret < 0) || !strncmp (value, "1", 1)) - continue; + if ((event == RPC_CLNT_DISCONNECT) && defrag->connected) + volinfo->rebal.defrag = NULL; - /* If its a regular file, and sticky bit is set, we need to - rebalance that */ - snprintf (tmp_filename, 1024, "%s/.%s.gfs%llu", dir, - entry->d_name, - (unsigned long long)stbuf.st_size); + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); - dst_fd = creat (tmp_filename, (stbuf.st_mode & ~01000)); - if (dst_fd == -1) - continue; + switch (event) { + case RPC_CLNT_CONNECT: + { + if (defrag->connected) + return 0; - src_fd = open (full_path, O_RDONLY); - if (src_fd == -1) { - close (dst_fd); - continue; + LOCK (&defrag->lock); + { + defrag->connected = 1; } + UNLOCK (&defrag->lock); - while (1) { - ret = read (src_fd, defrag->databuf, 131072); - if (!ret || (ret < 0)) { - close (dst_fd); - close (src_fd); - break; - } - ret = write (dst_fd, defrag->databuf, ret); - if (ret < 0) { - close (dst_fd); - close (src_fd); - break; - } + gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT", + rpc->conn.trans->name); + break; + } + + case RPC_CLNT_DISCONNECT: + { + if (!defrag->connected) + return 0; + + LOCK (&defrag->lock); + { + defrag->connected = 0; } + UNLOCK (&defrag->lock); - ret = stat (full_path, &new_stbuf); - if (ret < 0) - continue; - /* No need to rebalance, if there is some - activity on source file */ - if (new_stbuf.st_mtime != stbuf.st_mtime) - continue; - - ret = rename (tmp_filename, full_path); - if (ret != -1) { - LOCK (&defrag->lock); - { - defrag->total_files += 1; - defrag->total_data += stbuf.st_size; + if (!glusterd_is_service_running (pidfile, NULL)) { + if (volinfo->rebal.defrag_status == + GF_DEFRAG_STATUS_STARTED) { + volinfo->rebal.defrag_status = + GF_DEFRAG_STATUS_FAILED; } - UNLOCK (&defrag->lock); - } + } - if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) { - closedir (fd); - ret = -1; - goto out; + glusterd_store_perform_node_state_store (volinfo); + + if (defrag->rpc) { + rpc_clnt_unref (defrag->rpc); + defrag->rpc = NULL; } + if (defrag->cbk_fn) + defrag->cbk_fn (volinfo, + volinfo->rebal.defrag_status); + + GF_FREE (defrag); + gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT", + rpc->conn.trans->name); + break; } - closedir (fd); + default: + gf_log ("", GF_LOG_TRACE, + "got some other RPC event %d", event); + ret = 0; + break; + } + + return ret; +} - fd = opendir (dir); - if (!fd) +int32_t +glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data) +{ + return glusterd_big_locked_notify (rpc, mydata, event, + data, __glusterd_defrag_notify); +} + +int +glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op) +{ + int ret = -1; + glusterd_defrag_info_t *defrag = NULL; + runner_t runner = {0,}; + glusterd_conf_t *priv = NULL; + char defrag_path[PATH_MAX]; + char sockfile[PATH_MAX] = {0,}; + char pidfile[PATH_MAX] = {0,}; + char logfile[PATH_MAX] = {0,}; + dict_t *options = NULL; + char valgrind_logfile[PATH_MAX] = {0,}; + + priv = THIS->private; + + GF_ASSERT (volinfo); + GF_ASSERT (op_errstr); + + ret = glusterd_defrag_start_validate (volinfo, op_errstr, len, op); + if (ret) + goto out; + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); + if (!volinfo->rebal.defrag) goto out; - while ((entry = readdir (fd))) { - if (!entry) - break; - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; + defrag = volinfo->rebal.defrag; - snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); + defrag->cmd = cmd; - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; + volinfo->rebal.defrag_cmd = cmd; + volinfo->rebal.op = op; - if (!S_ISDIR (stbuf.st_mode)) - continue; + LOCK_INIT (&defrag->lock); - ret = gf_glusterd_rebalance_move_data (volinfo, - full_path); - if (ret) - break; + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED; + + glusterd_volinfo_reset_defrag_stats (volinfo); + glusterd_store_perform_node_state_store (volinfo); + + GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv); + ret = mkdir_p (defrag_path, 0777, _gf_true); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create " + "directory %s", defrag_path); + goto out; } - closedir (fd); - if (!entry) - ret = 0; + GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); + GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv); + snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log", + DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname); + runinit (&runner); + + if (priv->valgrind) { + snprintf (valgrind_logfile, PATH_MAX, + "%s/valgrind-%s-rebalance.log", + DEFAULT_LOG_FILE_DIRECTORY, + volinfo->volname); + + runner_add_args (&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); + } + + runner_add_args (&runner, SBIN_DIR"/glusterfs", + "-s", "localhost", "--volfile-id", volinfo->volname, + "--xlator-option", "*dht.use-readdirp=yes", + "--xlator-option", "*dht.lookup-unhashed=yes", + "--xlator-option", "*dht.assert-no-child-down=yes", + "--xlator-option", "*replicate*.data-self-heal=off", + "--xlator-option", + "*replicate*.metadata-self-heal=off", + "--xlator-option", "*replicate*.entry-self-heal=off", + "--xlator-option", "*replicate*.readdir-failover=off", + "--xlator-option", "*dht.readdir-optimize=on", + NULL); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID)); + runner_add_arg (&runner, "--socket-file"); + runner_argprintf (&runner, "%s",sockfile); + runner_add_arg (&runner, "--pid-file"); + runner_argprintf (&runner, "%s",pidfile); + runner_add_arg (&runner, "-l"); + runner_argprintf (&runner, logfile); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + + ret = runner_run_nowait (&runner); + if (ret) { + gf_log ("glusterd", GF_LOG_DEBUG, "rebalance command failed"); + goto out; + } + + sleep (5); + + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, sockfile, 600); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); + goto out; + } + + synclock_unlock (&priv->big_lock); + ret = glusterd_rpc_create (&defrag->rpc, options, + glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); + goto out; + } + + if (cbk) + defrag->cbk_fn = cbk; + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } + int -gf_glusterd_rebalance_fix_layout (glusterd_volinfo_t *volinfo, const char *dir) +glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, + glusterd_conf_t *priv, int cmd) { - int ret = -1; - char value[128] = {0,}; - char full_path[1024] = {0,}; - struct stat stbuf = {0,}; - DIR *fd = NULL; - struct dirent *entry = NULL; - - if (!volinfo->defrag) - goto out; + dict_t *options = NULL; + char sockfile[PATH_MAX] = {0,}; + int ret = -1; + glusterd_defrag_info_t *defrag = NULL; + + if (!volinfo->rebal.defrag) + volinfo->rebal.defrag = + GF_CALLOC (1, sizeof (*volinfo->rebal.defrag), + gf_gld_mt_defrag_info); - fd = opendir (dir); - if (!fd) + if (!volinfo->rebal.defrag) goto out; - while ((entry = readdir (fd))) { - if (!entry) - break; + defrag = volinfo->rebal.defrag; - if (!strcmp (entry->d_name, ".") || !strcmp (entry->d_name, "..")) - continue; + defrag->cmd = cmd; - snprintf (full_path, 1024, "%s/%s", dir, entry->d_name); + LOCK_INIT (&defrag->lock); - ret = stat (full_path, &stbuf); - if (ret == -1) - continue; + GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); + + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. The + * default timeout of 30mins used for unreliable network connections is + * too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, sockfile, 600); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed"); + goto out; + } - if (S_ISDIR (stbuf.st_mode)) { - /* Fix the layout of the directory */ - sys_lgetxattr (full_path, "trusted.distribute.fix.layout", - &value, 128); + synclock_unlock (&priv->big_lock); + ret = glusterd_rpc_create (&defrag->rpc, options, + glusterd_defrag_notify, volinfo); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed"); + goto out; + } + ret = 0; +out: + return ret; +} - volinfo->defrag->total_files += 1; +int +glusterd_rebalance_cmd_validate (int cmd, char *volname, + glusterd_volinfo_t **volinfo, + char *op_errstr, size_t len) +{ + int ret = -1; - /* Traverse into subdirectory */ - ret = gf_glusterd_rebalance_fix_layout (volinfo, - full_path); - if (ret) - break; - } + if (glusterd_volinfo_find(volname, volinfo)) { + gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on invalid" + " volname %s", volname); + snprintf (op_errstr, len, "Volume %s does not exist", + volname); + goto out; + } + if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) { + gf_log ("glusterd", GF_LOG_ERROR, "Volume %s is not a " + "distribute type or contains only 1 brick", volname); + snprintf (op_errstr, len, "Volume %s is not a distribute " + "volume or contains only 1 brick.\n" + "Not performing rebalance", volname); + goto out; + } - if (volinfo->defrag_status == GF_DEFRAG_STATUS_STOPED) { - closedir (fd); - ret = -1; - goto out; - } + if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) { + gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on stopped" + " volname %s", volname); + snprintf (op_errstr, len, "Volume %s needs to " + "be started to perform rebalance", volname); + goto out; } - closedir (fd); - if (!entry) - ret = 0; + ret = 0; out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); return ret; } -void * -glusterd_defrag_start (void *data) +int +__glusterd_handle_defrag_volume (rpcsvc_request_t *req) { - glusterd_volinfo_t *volinfo = data; - glusterd_defrag_info_t *defrag = NULL; - char cmd_str[1024] = {0,}; - int ret = -1; - struct stat stbuf = {0,}; - char value[128] = {0,}; - - defrag = volinfo->defrag; - if (!defrag) + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + char *volname = NULL; + gf_cli_defrag_type cmd = 0; + char msg[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; goto out; + } - sleep (1); - ret = stat (defrag->mount, &stbuf); - if ((ret == -1) && (errno == ENOTCONN)) { - /* Wait for some more time before starting rebalance */ - sleep (2); - ret = stat (defrag->mount, &stbuf); - if (ret == -1) { - volinfo->defrag_status = GF_DEFRAG_STATUS_FAILED; - volinfo->rebalance_files = 0; - volinfo->rebalance_data = 0; - volinfo->lookedup_files = 0; + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); goto out; } } - /* Fix the root ('/') first */ - sys_lgetxattr (defrag->mount, "trusted.distribute.fix.layout", - &value, 128); + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } - /* root's layout got fixed */ - defrag->total_files = 1; + ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get command"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } - /* Step 1: Fix layout of all the directories */ - ret = gf_glusterd_rebalance_fix_layout (volinfo, defrag->mount); + ret = dict_set_static_bin (dict, "node-uuid", MY_UUID, 16); if (ret) goto out; - /* Completed first step */ - volinfo->defrag_status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE; + if ((cmd == GF_DEFRAG_CMD_STATUS) || + (cmd == GF_DEFRAG_CMD_STOP)) { + ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME, + dict, msg, sizeof (msg)); + } else + ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict, + msg, sizeof (msg)); - /* It was used by number of layout fixes on directories */ - defrag->total_files = 0; +out: - /* Step 2: Iterate over directories to move data */ - ret = gf_glusterd_rebalance_move_data (volinfo, defrag->mount); + glusterd_friend_sm (); + glusterd_op_sm (); + + if (ret) { + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); + ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, + req, dict, msg); - /* Completed whole process */ - volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE; - volinfo->rebalance_files = defrag->total_files; - volinfo->rebalance_data = defrag->total_data; - volinfo->lookedup_files = defrag->num_files_lookedup; -out: - volinfo->defrag = NULL; - if (defrag) { - gf_log ("rebalance", GF_LOG_NORMAL, "rebalance on %s complete", - defrag->mount); - - snprintf (cmd_str, 1024, "umount -l %s", defrag->mount); - ret = system (cmd_str); - LOCK_DESTROY (&defrag->lock); - GF_FREE (defrag); } - return NULL; + free (cli_req.dict.dict_val);//malloced by xdr + + return 0; } int -glusterd_defrag_stop (glusterd_volinfo_t *volinfo, - gf1_cli_defrag_vol_rsp *rsp) +glusterd_handle_defrag_volume (rpcsvc_request_t *req) { - /* TODO: set a variaeble 'stop_defrag' here, it should be checked - in defrag loop */ - if (!volinfo || !volinfo->defrag) - goto out; - - LOCK (&volinfo->defrag->lock); - { - volinfo->defrag_status = GF_DEFRAG_STATUS_STOPED; - rsp->files = volinfo->defrag->total_files; - rsp->size = volinfo->defrag->total_data; - } - UNLOCK (&volinfo->defrag->lock); - - rsp->op_ret = 0; -out: - return 0; + return glusterd_big_locked_handler (req, __glusterd_handle_defrag_volume); } + int -glusterd_defrag_status_get (glusterd_volinfo_t *volinfo, - gf1_cli_defrag_vol_rsp *rsp) +glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) { - if (!volinfo) + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *task_id_str = NULL; + dict_t *op_ctx = NULL; + xlator_t *this = 0; + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "volname not found"); goto out; + } - if (volinfo->defrag) { - LOCK (&volinfo->defrag->lock); - { - rsp->files = volinfo->defrag->total_files; - rsp->size = volinfo->defrag->total_data; - rsp->lookedup_files = volinfo->defrag->num_files_lookedup; + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "cmd not found"); + goto out; + } + + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, + msg, sizeof (msg)); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "failed to validate"); + goto out; + } + switch (cmd) { + case GF_DEFRAG_CMD_START: + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_FORCE: + if (is_origin_glusterd (dict)) { + op_ctx = glusterd_op_get_ctx (); + if (!op_ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (op_ctx, GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + } else { + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, + &task_id_str); + if (ret) { + snprintf (msg, sizeof (msg), + "Missing rebalance-id"); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + ret = 0; + } + } + ret = glusterd_defrag_start_validate (volinfo, msg, + sizeof (msg), + GD_OP_REBALANCE); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "start validate failed"); + goto out; } - UNLOCK (&volinfo->defrag->lock); - } else { - rsp->files = volinfo->rebalance_files; - rsp->size = volinfo->rebalance_data; - rsp->lookedup_files = volinfo->lookedup_files; + break; + case GF_DEFRAG_CMD_STATUS: + case GF_DEFRAG_CMD_STOP: + break; + default: + break; } - rsp->op_errno = volinfo->defrag_status; - rsp->op_ret = 0; + ret = 0; out: - return 0; + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup (msg); + + return ret; } + int -glusterd_handle_defrag_volume (rpcsvc_request_t *req) +glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) { - int32_t ret = -1; - gf1_cli_defrag_vol_req cli_req = {0,}; - glusterd_conf_t *priv = NULL; - char cmd_str[4096] = {0,}; - glusterd_volinfo_t *volinfo = NULL; - glusterd_defrag_info_t *defrag = NULL; - gf1_cli_defrag_vol_rsp rsp = {0,}; - - GF_ASSERT (req); - - priv = THIS->private; - if (!gf_xdr_to_cli_defrag_vol_req (req->msg[0], &cli_req)) { - //failed to decode msg; - req->rpc_err = GARBAGE_ARGS; + char *volname = NULL; + int ret = 0; + int32_t cmd = 0; + char msg[2048] = {0}; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + gf_boolean_t volfile_update = _gf_false; + char *task_id_str = NULL; + dict_t *ctx = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "volname not given"); goto out; } - switch (cli_req.cmd) { - case GF_DEFRAG_CMD_START: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: start, attempted", cli_req.volname); - break; - case GF_DEFRAG_CMD_STOP: - gf_cmd_log ("Volume rebalance"," on volname: %s " - "cmd: stop, attempted", cli_req.volname); - break; - default: - break; + ret = dict_get_int32 (dict, "rebalance-command", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "command not given"); + goto out; } - gf_log ("glusterd", GF_LOG_NORMAL, "Received rebalance volume on %s", - cli_req.volname); - rsp.volname = cli_req.volname; - rsp.op_ret = -1; - if (glusterd_volinfo_find(cli_req.volname, &volinfo)) { - gf_log ("glusterd", GF_LOG_NORMAL, "Received rebalance on invalid" - " volname %s", cli_req.volname); + + ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo, + msg, sizeof (msg)); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "cmd validate failed"); goto out; } - if (volinfo->status != GLUSTERD_STATUS_STARTED) { - gf_log ("glusterd", GF_LOG_NORMAL, "Received rebalance on stopped" - " volname %s", cli_req.volname); - goto out; + /* Set task-id, if available, in op_ctx dict for operations other than + * start + */ + if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) { + if (!uuid_is_null (volinfo->rebal.rebalance_id)) { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + ret = -1; + goto out; + } + + if (GD_OP_REMOVE_BRICK == volinfo->rebal.op) + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REMOVE_BRICK_TID_KEY); + else + ret = glusterd_copy_uuid_to_dict + (volinfo->rebal.rebalance_id, ctx, + GF_REBALANCE_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set task-id"); + goto out; + } + } } - switch (cli_req.cmd) { + switch (cmd) { case GF_DEFRAG_CMD_START: - { - if (volinfo->defrag) { - gf_log ("glusterd", GF_LOG_DEBUG, - "rebalance on volume %s already started", - cli_req.volname); - rsp.op_errno = EEXIST; - goto out; + case GF_DEFRAG_CMD_START_LAYOUT_FIX: + case GF_DEFRAG_CMD_START_FORCE: + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance " + "id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rebal.rebalance_id) ; + volinfo->rebal.op = GD_OP_REBALANCE; } - - if (glusterd_is_rb_started (volinfo) || - glusterd_is_rb_paused (volinfo)) { - gf_log ("glusterd", GF_LOG_DEBUG, - "Replace brick is in progress on volume %s", - cli_req.volname); - goto out; + ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg), + cmd, NULL, GD_OP_REBALANCE); + break; + case GF_DEFRAG_CMD_STOP: + /* Clear task-id only on explicitly stopping rebalance. + * Also clear the stored operation, so it doesn't cause trouble + * with future rebalance/remove-brick starts + */ + uuid_clear (volinfo->rebal.rebalance_id); + volinfo->rebal.op = GD_OP_NONE; + + /* Fall back to the old volume file in case of decommission*/ + list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, + brick_list) { + if (!brickinfo->decommissioned) + continue; + brickinfo->decommissioned = 0; + volfile_update = _gf_true; } - volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t), - gf_gld_mt_defrag_info); - if (!volinfo->defrag) - goto out; - - defrag = volinfo->defrag; - LOCK_INIT (&defrag->lock); - snprintf (defrag->mount, 1024, "%s/mount/%s", - priv->workdir, cli_req.volname); - /* Create a directory, mount glusterfs over it, start glusterfs-defrag */ - snprintf (cmd_str, 4096, "mkdir -p %s", defrag->mount); - ret = system (cmd_str); + if (volfile_update == _gf_false) { + ret = 0; + break; + } + ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed", cmd_str); + gf_log (this->name, GF_LOG_WARNING, + "failed to create volfiles"); goto out; } - snprintf (cmd_str, 4096, "%s/sbin/glusterfs -s localhost " - "--volfile-id %s --volume-name %s-quick-read " - "--xlator-option *dht.unhashed-sticky-bit=yes " - "--xlator-option *dht.use-readdirp=yes " - "--xlator-option *dht.lookup-unhashed=yes %s", - GFS_PREFIX, cli_req.volname, cli_req.volname, - defrag->mount); - ret = gf_system (cmd_str); + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); if (ret) { - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed", cmd_str); + gf_log (this->name, GF_LOG_WARNING, + "failed to store volinfo"); goto out; } - volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED; - rsp.op_ret = 0; - - ret = pthread_create (&defrag->th, NULL, glusterd_defrag_start, - volinfo); - if (ret) { - snprintf (cmd_str, 1024, "umount -l %s", defrag->mount); - ret = system (cmd_str); - rsp.op_ret = -1; - } - break; - } - case GF_DEFRAG_CMD_STOP: - ret = glusterd_defrag_stop (volinfo, &rsp); + ret = 0; break; + case GF_DEFRAG_CMD_STATUS: - ret = glusterd_defrag_status_get (volinfo, &rsp); break; default: break; } - if (ret) - gf_log("glusterd", GF_LOG_DEBUG, "command: %s failed",cmd_str); - if (cli_req.cmd != GF_DEFRAG_CMD_STATUS) { - gf_cmd_log ("volume rebalance"," on volname: %s %d %s", - cli_req.volname, - cli_req.cmd, ((ret)?"FAILED":"SUCCESS")); +out: + if (ret && op_errstr && msg[0]) + *op_errstr = gf_strdup (msg); + + return ret; +} + +int32_t +glusterd_defrag_event_notify_handle (dict_t *dict) +{ + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + int32_t ret = -1; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to get volname"); + return ret; } -out: + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Failed to get volinfo for %s" + , volname); + return ret; + } - ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, - gf_xdr_serialize_cli_defrag_vol_rsp); - if (cli_req.volname) - free (cli_req.volname);//malloced by xdr + ret = glusterd_defrag_volume_status_update (volinfo, dict); - return 0; + if (ret) + gf_log ("", GF_LOG_ERROR, "Failed to update status"); + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c new file mode 100644 index 000000000..54b830870 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -0,0 +1,2024 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterfs.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "run.h" +#include "syscall.h" + +#include <signal.h> + +#define GLUSTERD_GET_RB_MNTPT(path, len, volinfo) \ + snprintf (path, len, \ + DEFAULT_VAR_RUN_DIRECTORY"/%s-"RB_CLIENT_MOUNTPOINT, \ + volinfo->volname); + +extern uuid_t global_txn_id; + +int +glusterd_get_replace_op_str (gf1_cli_replace_op op, char *op_str) +{ + int ret = -1; + + if (!op_str) + goto out; + + switch (op) { + case GF_REPLACE_OP_START: + strcpy (op_str, "start"); + break; + case GF_REPLACE_OP_COMMIT: + strcpy (op_str, "commit"); + break; + case GF_REPLACE_OP_PAUSE: + strcpy (op_str, "pause"); + break; + case GF_REPLACE_OP_ABORT: + strcpy (op_str, "abort"); + break; + case GF_REPLACE_OP_STATUS: + strcpy (op_str, "status"); + break; + case GF_REPLACE_OP_COMMIT_FORCE: + strcpy (op_str, "commit-force"); + break; + default: + strcpy (op_str, "unknown"); + break; + } + + ret = 0; +out: + return ret; +} + +int +__glusterd_handle_replace_brick (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + int32_t op = 0; + char operation[256]; + glusterd_op_t cli_op = GD_OP_REPLACE_BRICK; + char *volname = NULL; + char msg[2048] = {0,}; + xlator_t *this = NULL; + + GF_ASSERT (req); + this = THIS; + GF_ASSERT (this); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_INFO, "Received replace brick req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (msg, sizeof (msg), "Unable to decode the " + "command"); + goto out; + } + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (msg, sizeof (msg), "Could not get volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + ret = dict_get_int32 (dict, "operation", &op); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "dict_get on operation failed"); + snprintf (msg, sizeof (msg), "Could not get operation"); + goto out; + } + + ret = dict_get_str (dict, "src-brick", &src_brick); + + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get src brick"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + gf_log (this->name, GF_LOG_DEBUG, + "src brick=%s", src_brick); + + ret = dict_get_str (dict, "dst-brick", &dst_brick); + + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get dest brick"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + (void) glusterd_get_replace_op_str (op, operation); + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); + gf_log (this->name, GF_LOG_INFO, "Received replace brick %s request", + operation); + + ret = glusterd_op_begin (req, GD_OP_REPLACE_BRICK, dict, + msg, sizeof (msg)); + +out: + free (cli_req.dict.dict_val);//malloced by xdr + + glusterd_friend_sm (); + glusterd_op_sm (); + + if (ret) { + if (msg[0] == '\0') + snprintf (msg, sizeof (msg), "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, msg); + } + + return ret; +} + +int +glusterd_handle_replace_brick (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_replace_brick); +} + +static int +glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t **brickinfo) +{ + int32_t ret = -1; + + if (!volinfo || !brickinfo) + goto out; + + *brickinfo = volinfo->rep_brick.dst_brick; + + ret = 0; + +out: + return ret; +} + +int +glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = 0; + int32_t port = 0; + char *src_brick = NULL; + char *dst_brick = NULL; + char *volname = NULL; + int replace_op = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *src_brickinfo = NULL; + char *host = NULL; + char *path = NULL; + char msg[2048] = {0}; + char *dup_dstbrick = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *dst_brickinfo = NULL; + gf_boolean_t is_run = _gf_false; + dict_t *ctx = NULL; + glusterd_conf_t *priv = NULL; + char *savetok = NULL; + char pidfile[PATH_MAX] = {0}; + char *task_id_str = NULL; + xlator_t *this = NULL; + gf_boolean_t is_force = _gf_false; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "src-brick", &src_brick); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick); + + ret = dict_get_str (dict, "dst-brick", &dst_brick); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get dest brick"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); + + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "dict get on replace-brick operation failed"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), "volume: %s does not exist", + volname); + *op_errstr = gf_strdup (msg); + goto out; + } + + if (GLUSTERD_STATUS_STARTED != volinfo->status) { + ret = -1; + snprintf (msg, sizeof (msg), "volume: %s is not started", + volname); + *op_errstr = gf_strdup (msg); + goto out; + } + + if (!glusterd_store_is_valid_brickpath (volname, dst_brick) || + !glusterd_is_valid_volfpath (volname, dst_brick)) { + snprintf (msg, sizeof (msg), "brick path %s is too " + "long.", dst_brick); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + + ret = -1; + goto out; + } + + ret = glusterd_check_gsync_running (volinfo, &is_run); + if (ret && (is_run == _gf_false)) + gf_log (this->name, GF_LOG_WARNING, "Unable to get the status" + " of active "GEOREP" session"); + if (is_run) { + gf_log (this->name, GF_LOG_WARNING, GEOREP" sessions active" + "for the volume %s ", volname); + snprintf (msg, sizeof(msg), GEOREP" sessions are active " + "for the volume %s.\nStop "GEOREP " sessions " + "involved in this volume. Use 'volume "GEOREP + " status' command for more info.", + volname); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + if (glusterd_is_defrag_on(volinfo)) { + snprintf (msg, sizeof(msg), "Volume name %s rebalance is in " + "progress. Please retry after completion", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + ctx = glusterd_op_get_ctx(); + + switch (replace_op) { + case GF_REPLACE_OP_START: + if (glusterd_is_rb_started (volinfo)) { + snprintf (msg, sizeof (msg), "Replace brick is already " + "started for volume"); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + if (is_origin_glusterd (dict)) { + if (!ctx) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to get op_ctx"); + goto out; + } + + ret = glusterd_generate_and_set_task_id + (ctx, GF_REPLACE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate task-id"); + goto out; + } + + } else { + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, + &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Missing replace-brick-id"); + ret = 0; + } + } + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + break; + + case GF_REPLACE_OP_PAUSE: + if (glusterd_is_rb_paused (volinfo)) { + gf_log (this->name, GF_LOG_ERROR, "Replace brick is " + "already paused for volume "); + ret = -1; + goto out; + } else if (!glusterd_is_rb_started(volinfo)) { + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not" + " started for volume "); + ret = -1; + goto out; + } + break; + + case GF_REPLACE_OP_ABORT: + if (!glusterd_is_rb_ongoing (volinfo)) { + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not" + " started or paused for volume "); + ret = -1; + goto out; + } + break; + + case GF_REPLACE_OP_COMMIT: + if (!glusterd_is_rb_ongoing (volinfo)) { + gf_log (this->name, GF_LOG_ERROR, "Replace brick is not " + "started for volume "); + ret = -1; + goto out; + } + break; + + case GF_REPLACE_OP_COMMIT_FORCE: + is_force = _gf_true; + break; + + case GF_REPLACE_OP_STATUS: + + if (glusterd_is_rb_ongoing (volinfo) == _gf_false) { + ret = gf_asprintf (op_errstr, "replace-brick not" + " started on volume %s", + volinfo->volname); + if (ret < 0) { + *op_errstr = NULL; + goto out; + } + + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + break; + + default: + ret = -1; + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, + &src_brickinfo); + if (ret) { + snprintf (msg, sizeof (msg), "brick: %s does not exist in " + "volume: %s", src_brick, volname); + *op_errstr = gf_strdup (msg); + goto out; + } + + if (ctx) { + if (!glusterd_is_fuse_available ()) { + gf_log (this->name, GF_LOG_ERROR, "Unable to open /dev/" + "fuse (%s), replace-brick command failed", + strerror (errno)); + snprintf (msg, sizeof(msg), "Fuse unavailable\n " + "Replace-brick failed"); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + } + + if (gf_is_local_addr (src_brickinfo->hostname)) { + gf_log (this->name, GF_LOG_DEBUG, + "I AM THE SOURCE HOST"); + if (src_brickinfo->port && rsp_dict) { + ret = dict_set_int32 (rsp_dict, "src-brick-port", + src_brickinfo->port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set src-brick-port=%d", + src_brickinfo->port); + } + } + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, src_brickinfo, + priv); + if ((replace_op != GF_REPLACE_OP_COMMIT_FORCE) && + !glusterd_is_service_running (pidfile, NULL)) { + snprintf(msg, sizeof(msg), "Source brick %s:%s " + "is not online.", src_brickinfo->hostname, + src_brickinfo->path); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + + } + + dup_dstbrick = gf_strdup (dst_brick); + if (!dup_dstbrick) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Memory allocation failed"); + goto out; + } + host = strtok_r (dup_dstbrick, ":", &savetok); + path = strtok_r (NULL, ":", &savetok); + + if (!host || !path) { + gf_log (this->name, GF_LOG_ERROR, + "dst brick %s is not of form <HOSTNAME>:<export-dir>", + dst_brick); + ret = -1; + goto out; + } + + ret = glusterd_brickinfo_new_from_brick (dst_brick, &dst_brickinfo); + if (ret) + goto out; + + ret = glusterd_new_brick_validate (dst_brick, dst_brickinfo, + msg, sizeof (msg)); + if (ret) { + *op_errstr = gf_strdup (msg); + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + goto out; + } + + if (!glusterd_is_rb_ongoing (volinfo) && + (replace_op == GF_REPLACE_OP_START || + replace_op == GF_REPLACE_OP_COMMIT_FORCE)) { + + volinfo->rep_brick.src_brick = src_brickinfo; + volinfo->rep_brick.dst_brick = dst_brickinfo; + } + + if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) { + + ret = -1; + *op_errstr = gf_strdup ("Incorrect source or " + "destination brick"); + if (*op_errstr) + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + goto out; + } + + if (!glusterd_is_rb_ongoing (volinfo) && + gf_is_local_addr (host)) { + ret = glusterd_validate_and_create_brickpath (dst_brickinfo, + volinfo->volume_id, + op_errstr, is_force); + if (ret) + goto out; + } + + if (!gf_is_local_addr (host)) { + ret = glusterd_friend_find (NULL, host, &peerinfo); + if (ret) { + snprintf (msg, sizeof (msg), "%s, is not a friend", + host); + *op_errstr = gf_strdup (msg); + goto out; + } + + if (!peerinfo->connected) { + snprintf (msg, sizeof (msg), "%s, is not connected at " + "the moment", host); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) { + snprintf (msg, sizeof (msg), "%s, is not befriended " + "at the moment", host); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + } + + if (replace_op == GF_REPLACE_OP_START && + gf_is_local_addr (volinfo->rep_brick.dst_brick->hostname)) { + port = pmap_registry_alloc (THIS); + if (!port) { + gf_log (THIS->name, GF_LOG_CRITICAL, + "No free ports available"); + ret = -1; + goto out; + } + + ctx = glusterd_op_get_ctx(); + ret = dict_set_int32 ((ctx)?ctx:rsp_dict, "dst-brick-port", + port); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to set dst " + "brick port"); + goto out; + } + volinfo->rep_brick.dst_brick->port = port; + } + + ret = 0; + +out: + GF_FREE (dup_dstbrick); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +static int +rb_set_mntfd (int mntfd) +{ + int ret = -1; + dict_t *ctx = NULL; + + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log (THIS->name, GF_LOG_CRITICAL, "Failed to get op ctx"); + goto out; + } + ret = dict_set_int32 (ctx, "mntfd", mntfd); + if (ret) + gf_log (THIS->name, GF_LOG_DEBUG, "Failed to set mnt fd " + "in op ctx"); +out: + return ret; +} + +static int +rb_get_mntfd (int *mntfd) +{ + int ret = -1; + dict_t *ctx = NULL; + + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log (THIS->name, GF_LOG_CRITICAL, "Failed to get op ctx"); + goto out; + } + ret = dict_get_int32 (ctx, "mntfd", mntfd); + if (ret) + gf_log (THIS->name, GF_LOG_DEBUG, "Failed to get mnt fd " + "from op ctx"); +out: + return ret; +} + +static int +rb_regenerate_volfiles (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + int32_t pump_needed) +{ + dict_t *dict = NULL; + int ret = 0; + + dict = volinfo->dict; + + gf_log ("", GF_LOG_DEBUG, + "attempting to set pump value=%d", pump_needed); + + ret = dict_set_int32 (dict, "enable-pump", pump_needed); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "could not dict_set enable-pump"); + goto out; + } + + ret = glusterd_create_rb_volfiles (volinfo, brickinfo); + + dict_del (dict, "enable-pump"); + +out: + return ret; +} + +static int +rb_src_brick_restart (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo, + int activate_pump) +{ + int ret = 0; + + gf_log ("", GF_LOG_DEBUG, + "Attempting to kill src"); + + ret = glusterd_nfs_server_stop (volinfo); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to stop nfs, ret: %d", + ret); + } + + ret = glusterd_volume_stop_glusterfs (volinfo, src_brickinfo, + _gf_false); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to stop " + "glusterfs, ret: %d", ret); + goto out; + } + + glusterd_delete_volfile (volinfo, src_brickinfo); + + if (activate_pump) { + ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 1); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not regenerate volfiles with pump"); + goto out; + } + } else { + ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 0); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not regenerate volfiles without pump"); + goto out; + } + + } + + sleep (2); + ret = glusterd_volume_start_glusterfs (volinfo, src_brickinfo, + _gf_false); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to start " + "glusterfs, ret: %d", ret); + goto out; + } + +out: + ret = glusterd_nfs_server_start (volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to start nfs, ret: %d", + ret); + } + return ret; +} + +static int +rb_send_xattr_command (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo, + glusterd_brickinfo_t *dst_brickinfo, + const char *xattr_key, const char *value) +{ + int ret = -1; + int mntfd = -1; + + ret = rb_get_mntfd (&mntfd); + if (ret) + goto out; + + ret = sys_fsetxattr (mntfd, xattr_key, value, strlen (value) + 1, 0); + if (ret) + gf_log (THIS->name, GF_LOG_DEBUG, "setxattr on key: " + "%s, reason: %s", xattr_key, strerror (errno)); + +out: + return ret; +} + +static int +rb_spawn_dst_brick (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + int ret = -1; + int32_t port = 0; + + priv = THIS->private; + + port = brickinfo->port; + GF_ASSERT (port); + + runinit (&runner); + runner_add_arg (&runner, SBIN_DIR"/glusterfs"); + runner_argprintf (&runner, "-f" "%s/vols/%s/"RB_DSTBRICKVOL_FILENAME, + priv->workdir, volinfo->volname); + runner_argprintf (&runner, "-p" "%s/vols/%s/"RB_DSTBRICK_PIDFILE, + priv->workdir, volinfo->volname); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "src-server.listen-port=%d", port); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + + ret = runner_run_nowait (&runner); + if (ret) { + pmap_registry_remove (THIS, 0, brickinfo->path, + GF_PMAP_PORT_BRICKSERVER, NULL); + gf_log ("", GF_LOG_DEBUG, + "Could not start glusterfs"); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, + "Successfully started glusterfs: brick=%s:%s", + brickinfo->hostname, brickinfo->path); + + ret = 0; + +out: + return ret; +} + +static int +rb_spawn_glusterfs_client (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + struct stat buf = {0,}; + char mntpt[PATH_MAX] = {0,}; + int mntfd = -1; + int ret = -1; + + this = THIS; + priv = this->private; + + GLUSTERD_GET_RB_MNTPT (mntpt, sizeof (mntpt), volinfo); + runinit (&runner); + runner_add_arg (&runner, SBIN_DIR"/glusterfs"); + runner_argprintf (&runner, "-f" "%s/vols/%s/"RB_CLIENTVOL_FILENAME, + priv->workdir, volinfo->volname); + runner_add_arg (&runner, mntpt); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + + ret = runner_run_reuse (&runner); + if (ret) { + runner_log (&runner, this->name, GF_LOG_DEBUG, + "Could not start glusterfs"); + runner_end (&runner); + goto out; + } else { + runner_log (&runner, this->name, GF_LOG_DEBUG, + "Successfully started glusterfs"); + runner_end (&runner); + } + + ret = stat (mntpt, &buf); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "stat on mount point %s " + "failed", mntpt); + goto out; + } + + mntfd = open (mntpt, O_DIRECTORY); + if (mntfd == -1) + goto out; + + ret = rb_set_mntfd (mntfd); + if (ret) + goto out; + + runinit (&runner); + runner_add_args (&runner, "/bin/umount", "-l", mntpt, NULL); + ret = runner_run_reuse (&runner); + if (ret) { + runner_log (&runner, this->name, GF_LOG_DEBUG, + "Lazy unmount failed on maintenance client"); + runner_end (&runner); + goto out; + } else { + runner_log (&runner, this->name, GF_LOG_DEBUG, + "Successfully unmounted maintenance client"); + runner_end (&runner); + } + + +out: + + return ret; +} + +static const char *client_volfile_str = "volume mnt-client\n" + " type protocol/client\n" + " option remote-host %s\n" + " option remote-subvolume %s\n" + " option remote-port %d\n" + " option transport-type %s\n" + " option username %s\n" + " option password %s\n" + "end-volume\n" + "volume mnt-wb\n" + " type performance/write-behind\n" + " subvolumes mnt-client\n" + "end-volume\n"; + +static int +rb_generate_client_volfile (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo) +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + FILE *file = NULL; + char filename[PATH_MAX] = {0, }; + int ret = -1; + int fd = -1; + char *ttype = NULL; + + this = THIS; + priv = this->private; + + gf_log (this->name, GF_LOG_DEBUG, "Creating volfile"); + + snprintf (filename, PATH_MAX, "%s/vols/%s/%s", + priv->workdir, volinfo->volname, + RB_CLIENTVOL_FILENAME); + + fd = open (filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR); + if (fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s", strerror (errno)); + goto out; + } + close (fd); + + file = fopen (filename, "w+"); + if (!file) { + gf_log (this->name, GF_LOG_DEBUG, + "Open of volfile failed"); + ret = -1; + goto out; + } + + GF_ASSERT (src_brickinfo->port); + + ttype = glusterd_get_trans_type_rb (volinfo->transport_type); + if (NULL == ttype){ + ret = -1; + goto out; + } + + fprintf (file, client_volfile_str, src_brickinfo->hostname, + src_brickinfo->path, + src_brickinfo->port, ttype, + glusterd_auth_get_username (volinfo), + glusterd_auth_get_password (volinfo)); + + fclose (file); + GF_FREE (ttype); + + ret = 0; + +out: + return ret; +} + +static const char *dst_brick_volfile_str = "volume src-posix\n" + " type storage/posix\n" + " option directory %s\n" + " option volume-id %s\n" + "end-volume\n" + "volume %s\n" + " type features/locks\n" + " subvolumes src-posix\n" + "end-volume\n" + "volume src-server\n" + " type protocol/server\n" + " option auth.login.%s.allow %s\n" + " option auth.login.%s.password %s\n" + " option auth.addr.%s.allow *\n" + " option transport-type %s\n" + " subvolumes %s\n" + "end-volume\n"; + +static int +rb_generate_dst_brick_volfile (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *dst_brickinfo) +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + FILE *file = NULL; + char filename[PATH_MAX] = {0, }; + int ret = -1; + int fd = -1; + char *trans_type = NULL; + + this = THIS; + priv = this->private; + + gf_log (this->name, GF_LOG_DEBUG, + "Creating volfile"); + + snprintf (filename, PATH_MAX, "%s/vols/%s/%s", + priv->workdir, volinfo->volname, + RB_DSTBRICKVOL_FILENAME); + + fd = creat (filename, S_IRUSR | S_IWUSR); + if (fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "%s", strerror (errno)); + goto out; + } + close (fd); + + file = fopen (filename, "w+"); + if (!file) { + gf_log (this->name, GF_LOG_DEBUG, + "Open of volfile failed"); + ret = -1; + goto out; + } + + trans_type = glusterd_get_trans_type_rb (volinfo->transport_type); + if (NULL == trans_type){ + ret = -1; + goto out; + } + + fprintf (file, dst_brick_volfile_str, + dst_brickinfo->path, + uuid_utoa (volinfo->volume_id), + dst_brickinfo->path, + dst_brickinfo->path, + glusterd_auth_get_username (volinfo), + glusterd_auth_get_username (volinfo), + glusterd_auth_get_password (volinfo), + dst_brickinfo->path, + trans_type, + dst_brickinfo->path); + + GF_FREE (trans_type); + + fclose (file); + + ret = 0; + +out: + return ret; +} + + +static int +rb_mountpoint_mkdir (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo) +{ + char mntpt[PATH_MAX] = {0,}; + int ret = -1; + + GLUSTERD_GET_RB_MNTPT (mntpt, sizeof (mntpt), volinfo); + ret = mkdir (mntpt, 0777); + if (ret && (errno != EEXIST)) { + gf_log ("", GF_LOG_DEBUG, "mkdir failed, due to %s", + strerror (errno)); + goto out; + } + + ret = 0; + +out: + return ret; +} + +static int +rb_mountpoint_rmdir (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo) +{ + char mntpt[PATH_MAX] = {0,}; + int ret = -1; + + GLUSTERD_GET_RB_MNTPT (mntpt, sizeof (mntpt), volinfo); + ret = rmdir (mntpt); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "rmdir failed, reason: %s", + strerror (errno)); + goto out; + } + + ret = 0; + +out: + return ret; +} + +static int +rb_destroy_maintenance_client (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char volfile[PATH_MAX] = {0,}; + int ret = -1; + int mntfd = -1; + + this = THIS; + priv = this->private; + + ret = rb_get_mntfd (&mntfd); + if (ret) + goto out; + + ret = close (mntfd); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to close mount " + "point directory"); + goto out; + } + + ret = rb_mountpoint_rmdir (volinfo, src_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "rmdir of mountpoint " + "failed"); + goto out; + } + + snprintf (volfile, PATH_MAX, "%s/vols/%s/%s", priv->workdir, + volinfo->volname, RB_CLIENTVOL_FILENAME); + + ret = unlink (volfile); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "unlink of %s failed, reason: %s", + volfile, strerror (errno)); + goto out; + } + + ret = 0; + +out: + return ret; +} + +static int +rb_spawn_maintenance_client (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo) +{ + int ret = -1; + + ret = rb_generate_client_volfile (volinfo, src_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to generate client " + "volfile"); + goto out; + } + + ret = rb_mountpoint_mkdir (volinfo, src_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to mkdir " + "mountpoint"); + goto out; + } + + ret = rb_spawn_glusterfs_client (volinfo, src_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs"); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +rb_spawn_destination_brick (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *dst_brickinfo) + +{ + int ret = -1; + + ret = rb_generate_dst_brick_volfile (volinfo, dst_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to generate client " + "volfile"); + goto out; + } + + ret = rb_spawn_dst_brick (volinfo, dst_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs"); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +rb_kill_destination_brick (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *dst_brickinfo) +{ + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = {0,}; + + priv = THIS->private; + + snprintf (pidfile, PATH_MAX, "%s/vols/%s/%s", + priv->workdir, volinfo->volname, + RB_DSTBRICK_PIDFILE); + + return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true); +} + +static int +rb_get_xattr_command (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo, + glusterd_brickinfo_t *dst_brickinfo, + const char *xattr_key, + char *value) +{ + int ret = -1; + int mntfd = -1; + + ret = rb_get_mntfd (&mntfd); + if (ret) + goto out; + + ret = sys_fgetxattr (mntfd, xattr_key, value, 8192); + + if (ret < 0) { + gf_log (THIS->name, GF_LOG_DEBUG, "getxattr on key: %s " + "failed, reason: %s", xattr_key, strerror (errno)); + goto out; + } + + ret = 0; +out: + return ret; +} + +static int +rb_send_cmd (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src, + glusterd_brickinfo_t *dst, + gf1_cli_replace_op op) +{ + char start_value[8192] = {0,}; + char status_str[8192] = {0,}; + char *status_reply = NULL; + char *tmp = NULL; + char *save_ptr = NULL; + char filename[PATH_MAX] = {0,}; + char *current_file = NULL; + uint64_t files = 0; + int status = 0; + dict_t *ctx = NULL; + int ret = 0; + + GF_ASSERT (volinfo); + GF_ASSERT (src); + GF_ASSERT (dst); + GF_ASSERT ((op > GF_REPLACE_OP_NONE) + && (op <= GF_REPLACE_OP_COMMIT_FORCE)); + + switch (op) { + case GF_REPLACE_OP_START: + { + snprintf (start_value, sizeof (start_value), + "%s:%s:%d", dst->hostname, dst->path, + dst->port); + ret = rb_send_xattr_command (volinfo, src, dst, + RB_PUMP_CMD_START, + start_value); + } + break; + case GF_REPLACE_OP_PAUSE: + { + ret = rb_send_xattr_command (volinfo, src, dst, + RB_PUMP_CMD_PAUSE, + RB_PUMP_DEF_ARG); + } + break; + case GF_REPLACE_OP_ABORT: + { + ret = rb_send_xattr_command (volinfo, src, dst, + RB_PUMP_CMD_ABORT, + RB_PUMP_DEF_ARG); + } + break; + case GF_REPLACE_OP_COMMIT: + { + ret = rb_send_xattr_command (volinfo, src, dst, + RB_PUMP_CMD_COMMIT, + RB_PUMP_DEF_ARG); + } + break; + case GF_REPLACE_OP_STATUS: + { + ret = rb_get_xattr_command (volinfo, src, dst, + RB_PUMP_CMD_STATUS, + status_str); + if (ret) + goto out; + + ctx = glusterd_op_get_ctx (); + GF_ASSERT (ctx); + if (!ctx) { + ret = -1; + gf_log (THIS->name, GF_LOG_CRITICAL, + "ctx is not present."); + goto out; + } + + /* Split status reply into different parts */ + tmp = strtok_r (status_str, ":", &save_ptr); + if (!tmp) { + ret = -1; + gf_log (THIS->name, GF_LOG_ERROR, + "Couldn't tokenize status string"); + goto out; + } + sscanf (tmp, "status=%d", &status); + ret = dict_set_int32 (ctx, "status", status); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't " + "set rb status in context"); + goto out; + } + + tmp = NULL; + tmp = strtok_r (NULL, ":", &save_ptr); + if (!tmp) { + ret = -1; + gf_log (THIS->name, GF_LOG_ERROR, + "Couldn't tokenize status string"); + goto out; + } + sscanf (tmp, "no_of_files=%"SCNu64, &files); + ret = dict_set_uint64 (ctx, "files", files); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't " + "set rb files in context"); + goto out; + } + + if (status == 0) { + tmp = NULL; + tmp = strtok_r (NULL, ":", &save_ptr); + if (!tmp) { + ret = -1; + gf_log (THIS->name, GF_LOG_ERROR, + "Couldn't tokenize status " + "string"); + goto out; + } + sscanf (tmp, "current_file=%s", filename); + current_file = gf_strdup (filename); + ret = dict_set_dynstr (ctx, "current_file", + current_file); + if (ret) { + GF_FREE (current_file); + gf_log (THIS->name, GF_LOG_ERROR, + "Couldn't set rb current file " + "in context"); + goto out; + } + } + if (status) { + ret = gf_asprintf (&status_reply, + "Number of files migrated = %" + PRIu64"\tMigration complete", + files); + } else { + ret = gf_asprintf (&status_reply, + "Number of files migrated = %" + PRIu64"\tCurrent file = %s", + files, filename); + } + if (ret == -1) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to create status_reply string"); + goto out; + } + ret = dict_set_dynstr (ctx, "status-reply", + status_reply); + if (ret) { + GF_FREE (status_reply); + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't " + "set rb status response in context."); + goto out; + } + } + break; + default: + { + GF_ASSERT (0); + ret = -1; + gf_log (THIS->name, GF_LOG_CRITICAL, "Invalid replace" + " brick subcommand."); + } + break; + } +out: + return ret; +} + +static int +rb_do_operation (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *src_brickinfo, + glusterd_brickinfo_t *dst_brickinfo, + gf1_cli_replace_op op) +{ + + int ret = -1; + char op_str[256] = {0, }; + xlator_t *this = NULL; + + this = THIS; + + ret = rb_spawn_maintenance_client (volinfo, src_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Could not spawn " + "maintenance client"); + goto umount; + } + + ret = rb_send_cmd (volinfo, src_brickinfo, dst_brickinfo, op); + if (ret) { + (void) glusterd_get_replace_op_str (op, op_str); + gf_log (this->name, GF_LOG_DEBUG, "Sending replace-brick " + "sub-command %s failed.", op_str); + } + +umount: + if (rb_destroy_maintenance_client (volinfo, src_brickinfo)) + gf_log (this->name, GF_LOG_DEBUG, "Failed to destroy " + "maintenance client"); + + return ret; +} + +/* Set src-brick's port number to be used in the maintenance mount + * after all commit acks are received. + */ +static int +rb_update_srcbrick_port (glusterd_brickinfo_t *src_brickinfo, dict_t *rsp_dict, + dict_t *req_dict, int32_t replace_op) +{ + xlator_t *this = NULL; + dict_t *ctx = NULL; + int ret = 0; + int dict_ret = 0; + int src_port = 0; + + this = THIS; + + dict_ret = dict_get_int32 (req_dict, "src-brick-port", &src_port); + if (src_port) + src_brickinfo->port = src_port; + + if (gf_is_local_addr (src_brickinfo->hostname)) { + gf_log ("", GF_LOG_INFO, + "adding src-brick port no"); + + src_brickinfo->port = pmap_registry_search (this, + src_brickinfo->path, GF_PMAP_PORT_BRICKSERVER); + if (!src_brickinfo->port && + replace_op != GF_REPLACE_OP_COMMIT_FORCE ) { + gf_log ("", GF_LOG_ERROR, + "Src brick port not available"); + ret = -1; + goto out; + } + + if (rsp_dict) { + ret = dict_set_int32 (rsp_dict, "src-brick-port", src_brickinfo->port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set src-brick port no"); + goto out; + } + } + + ctx = glusterd_op_get_ctx (); + if (ctx) { + ret = dict_set_int32 (ctx, "src-brick-port", src_brickinfo->port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set src-brick port no"); + goto out; + } + } + + } + +out: + return ret; + +} + +static int +rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict, + dict_t *req_dict, int32_t replace_op) +{ + dict_t *ctx = NULL; + int ret = 0; + int dict_ret = 0; + int dst_port = 0; + + dict_ret = dict_get_int32 (req_dict, "dst-brick-port", &dst_port); + if (!dict_ret) + dst_brickinfo->port = dst_port; + + + if (gf_is_local_addr (dst_brickinfo->hostname)) { + gf_log ("", GF_LOG_INFO, + "adding dst-brick port no"); + + if (rsp_dict) { + ret = dict_set_int32 (rsp_dict, "dst-brick-port", + dst_brickinfo->port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set dst-brick port no in rsp dict"); + goto out; + } + } + + ctx = glusterd_op_get_ctx (); + if (ctx) { + ret = dict_set_int32 (ctx, "dst-brick-port", + dst_brickinfo->port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set dst-brick port no"); + goto out; + } + } + } +out: + return ret; +} + +static int +glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo, + char *old_brick, char *new_brick) +{ + glusterd_brickinfo_t *old_brickinfo = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + int32_t ret = -1; + + GF_ASSERT (volinfo); + + ret = glusterd_brickinfo_new_from_brick (new_brick, + &new_brickinfo); + if (ret) + goto out; + + ret = glusterd_resolve_brick (new_brickinfo); + + if (ret) + goto out; + + ret = glusterd_volume_brickinfo_get_by_brick (old_brick, + volinfo, &old_brickinfo); + if (ret) + goto out; + + list_add_tail (&new_brickinfo->brick_list, + &old_brickinfo->brick_list); + + volinfo->brick_count++; + + ret = glusterd_op_perform_remove_brick (volinfo, old_brick, 1, NULL); + if (ret) + goto out; + + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) + goto out; + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + ret = glusterd_brick_start (volinfo, new_brickinfo, _gf_false); + if (ret) + goto out; + } + +out: + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) +{ + int ret = 0; + dict_t *ctx = NULL; + int replace_op = 0; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + glusterd_brickinfo_t *src_brickinfo = NULL; + glusterd_brickinfo_t *dst_brickinfo = NULL; + char *task_id_str = NULL; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "src-brick", &src_brick); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get src brick"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "src brick=%s", src_brick); + + ret = dict_get_str (dict, "dst-brick", &dst_brick); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get dst brick"); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "dst brick=%s", dst_brick); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "dict_get on operation failed"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, + &src_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to get src-brickinfo"); + goto out; + } + + + ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get " + "replace brick destination brickinfo"); + goto out; + } + + ret = glusterd_resolve_brick (dst_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Unable to resolve dst-brickinfo"); + goto out; + } + + ret = rb_update_srcbrick_port (src_brickinfo, rsp_dict, + dict, replace_op); + if (ret) + goto out; + + + if ((GF_REPLACE_OP_START != replace_op)) { + + /* Set task-id, if available, in op_ctx dict for operations + * other than start + */ + if (is_origin_glusterd (dict)) { + ctx = glusterd_op_get_ctx(); + if (!ctx) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get op_ctx"); + ret = -1; + goto out; + } + if (!uuid_is_null (volinfo->rep_brick.rb_id)) { + ret = glusterd_copy_uuid_to_dict + (volinfo->rep_brick.rb_id, ctx, + GF_REPLACE_BRICK_TID_KEY); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set " + "replace-brick-id"); + goto out; + } + } + } + } + ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, + dict, replace_op); + if (ret) + goto out; + + switch (replace_op) { + case GF_REPLACE_OP_START: + { + ret = dict_get_str (dict, GF_REPLACE_BRICK_TID_KEY, &task_id_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Missing replace-brick-id"); + ret = 0; + } else { + uuid_parse (task_id_str, volinfo->rep_brick.rb_id); + } + + if (gf_is_local_addr (dst_brickinfo->hostname)) { + gf_log (this->name, GF_LOG_INFO, + "I AM THE DESTINATION HOST"); + if (!glusterd_is_rb_paused (volinfo)) { + ret = rb_spawn_destination_brick + (volinfo, dst_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to spawn destination " + "brick"); + goto out; + } + } else { + gf_log (this->name, GF_LOG_ERROR, + "Replace brick is already started=> no " + "need to restart dst brick "); + } + } + + + if (gf_is_local_addr (src_brickinfo->hostname)) { + ret = rb_src_brick_restart (volinfo, src_brickinfo, + 1); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Could not restart src-brick"); + goto out; + } + } + + if (gf_is_local_addr (dst_brickinfo->hostname)) { + gf_log (this->name, GF_LOG_INFO, + "adding dst-brick port no"); + + ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict, + dict, replace_op); + if (ret) + goto out; + } + + glusterd_set_rb_status (volinfo, GF_RB_STATUS_STARTED); + break; + } + + case GF_REPLACE_OP_COMMIT: + { + ctx = glusterd_op_get_ctx (); + if (ctx) { + ret = rb_do_operation (volinfo, src_brickinfo, + dst_brickinfo, + GF_REPLACE_OP_COMMIT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Commit operation failed"); + goto out; + } + } + } + /* fall through */ + case GF_REPLACE_OP_COMMIT_FORCE: + { + if (gf_is_local_addr (dst_brickinfo->hostname)) { + gf_log (this->name, GF_LOG_DEBUG, + "I AM THE DESTINATION HOST"); + ret = rb_kill_destination_brick (volinfo, + dst_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to cleanup dst brick"); + goto out; + } + } + + ret = glusterd_nodesvcs_stop (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to stop nfs server, ret: %d", ret); + } + + ret = glusterd_op_perform_replace_brick (volinfo, src_brick, + dst_brick); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Unable to add " + "dst-brick: %s to volume: %s", dst_brick, + volinfo->volname); + (void) glusterd_nodesvcs_handle_graph_change (volinfo); + goto out; + } + + volinfo->rebal.defrag_status = 0; + + ret = glusterd_nodesvcs_handle_graph_change (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Failed to generate nfs volume file"); + } + + + ret = glusterd_fetchspec_notify (THIS); + glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); + glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; + uuid_clear (volinfo->rep_brick.rb_id); + } + break; + + case GF_REPLACE_OP_PAUSE: + { + gf_log (this->name, GF_LOG_DEBUG, + "Received pause - doing nothing"); + ctx = glusterd_op_get_ctx (); + if (ctx) { + ret = rb_do_operation (volinfo, src_brickinfo, + dst_brickinfo, + GF_REPLACE_OP_PAUSE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Pause operation failed"); + goto out; + } + } + + glusterd_set_rb_status (volinfo, GF_RB_STATUS_PAUSED); + } + break; + + case GF_REPLACE_OP_ABORT: + { + + ctx = glusterd_op_get_ctx (); + if (ctx) { + ret = rb_do_operation (volinfo, src_brickinfo, + dst_brickinfo, + GF_REPLACE_OP_ABORT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Abort operation failed"); + goto out; + } + } + + if (gf_is_local_addr (src_brickinfo->hostname)) { + ret = rb_src_brick_restart (volinfo, src_brickinfo, + 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Couldn't restart src brick " + "with pump xlator disabled."); + goto out; + } + } + + if (gf_is_local_addr (dst_brickinfo->hostname)) { + gf_log (this->name, GF_LOG_INFO, + "I AM THE DESTINATION HOST"); + ret = rb_kill_destination_brick (volinfo, dst_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Failed to kill destination brick"); + goto out; + } + } + glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE); + glusterd_brickinfo_delete (volinfo->rep_brick.dst_brick); + volinfo->rep_brick.src_brick = NULL; + volinfo->rep_brick.dst_brick = NULL; + } + break; + + case GF_REPLACE_OP_STATUS: + { + gf_log (this->name, GF_LOG_DEBUG, + "received status - doing nothing"); + ctx = glusterd_op_get_ctx (); + if (ctx) { + if (glusterd_is_rb_paused (volinfo)) { + ret = dict_set_str (ctx, "status-reply", + "replace brick has been paused"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to set pump status" + " in ctx"); + goto out; + } + + ret = rb_do_operation (volinfo, src_brickinfo, + dst_brickinfo, + GF_REPLACE_OP_STATUS); + if (ret) + goto out; + } + + } + break; + + default: + ret = -1; + goto out; + } + if (!ret && replace_op != GF_REPLACE_OP_STATUS) + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Couldn't store" + " replace brick operation's state"); + +out: + return ret; +} + +void +glusterd_do_replace_brick (void *data) +{ + glusterd_volinfo_t *volinfo = NULL; + int32_t op = 0; + int32_t src_port = 0; + int32_t dst_port = 0; + dict_t *dict = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *volname = NULL; + glusterd_brickinfo_t *src_brickinfo = NULL; + glusterd_brickinfo_t *dst_brickinfo = NULL; + glusterd_conf_t *priv = NULL; + uuid_t *txn_id = &global_txn_id; + + int ret = 0; + + dict = data; + + GF_ASSERT (THIS); + + priv = THIS->private; + + if (priv->timer) { + gf_timer_call_cancel (THIS->ctx, priv->timer); + priv->timer = NULL; + gf_log ("", GF_LOG_DEBUG, + "Cancelling timer thread"); + } + + gf_log ("", GF_LOG_DEBUG, + "Replace brick operation detected"); + + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + ret = dict_get_int32 (dict, "operation", &op); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "dict_get on operation failed"); + goto out; + } + ret = dict_get_str (dict, "src-brick", &src_brick); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get src brick"); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, + "src brick=%s", src_brick); + + ret = dict_get_str (dict, "dst-brick", &dst_brick); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get dst brick"); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, + "dst brick=%s", dst_brick); + + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to allocate memory"); + goto out; + } + + ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo, + &src_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo"); + goto out; + } + + ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo); + if (!dst_brickinfo) { + gf_log ("", GF_LOG_DEBUG, "Unable to get dst-brickinfo"); + goto out; + } + + ret = glusterd_resolve_brick (dst_brickinfo); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo"); + goto out; + } + + ret = dict_get_int32 (dict, "src-brick-port", &src_port); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get src-brick port"); + goto out; + } + + ret = dict_get_int32 (dict, "dst-brick-port", &dst_port); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get dst-brick port"); + } + + dst_brickinfo->port = dst_port; + src_brickinfo->port = src_port; + + switch (op) { + case GF_REPLACE_OP_START: + if (!dst_port) { + ret = -1; + goto out; + } + + ret = rb_do_operation (volinfo, src_brickinfo, dst_brickinfo, + GF_REPLACE_OP_START); + if (ret) + goto out; + break; + case GF_REPLACE_OP_PAUSE: + case GF_REPLACE_OP_ABORT: + case GF_REPLACE_OP_COMMIT: + case GF_REPLACE_OP_COMMIT_FORCE: + case GF_REPLACE_OP_STATUS: + break; + default: + ret = -1; + goto out; + } + +out: + if (ret) + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, NULL); + else + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, + txn_id, NULL); + + glusterd_op_sm (); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c new file mode 100644 index 000000000..d5200a4ae --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -0,0 +1,1974 @@ +/* + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "rpc-clnt.h" +#include "glusterd1-xdr.h" +#include "cli1-xdr.h" + +#include "xdr-generic.h" + +#include "compat-errno.h" +#include "glusterd-op-sm.h" +#include "glusterd-sm.h" +#include "glusterd.h" +#include "protocol-common.h" +#include "glusterd-utils.h" +#include "common-utils.h" +#include <sys/uio.h> + + +#define SERVER_PATH_MAX (16 * 1024) + + +extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; + +int32_t +glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, + int32_t op_errno, rpcsvc_request_t *req, + void *op_ctx, char *op_errstr) +{ + int32_t ret = -1; + void *cli_rsp = NULL; + dict_t *ctx = NULL; + char *free_ptr = NULL; + glusterd_conf_t *conf = NULL; + xdrproc_t xdrproc = NULL; + char *errstr = NULL; + int32_t status = 0; + int32_t count = 0; + gf_cli_rsp rsp = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + + ctx = op_ctx; + + switch (op) { + case GD_OP_REMOVE_BRICK: + { + if (ctx) + ret = dict_get_str (ctx, "errstr", &errstr); + break; + } + case GD_OP_RESET_VOLUME: + { + if (op_ret && !op_errstr) + errstr = "Error while resetting options"; + break; + } + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + { + if (ctx) { + ret = dict_get_int32 (ctx, "status", &status); + if (ret) { + gf_log (this->name, GF_LOG_TRACE, + "failed to get status"); + } + } + break; + } + case GD_OP_GSYNC_CREATE: + case GD_OP_GSYNC_SET: + { + if (ctx) { + ret = dict_get_str (ctx, "errstr", &errstr); + ret = dict_set_str (ctx, "glusterd_workdir", conf->workdir); + /* swallow error here, that will be re-triggered in cli */ + + } + break; + + } + case GD_OP_QUOTA: + { + if (ctx && !op_errstr) { + ret = dict_get_str (ctx, "errstr", &errstr); + } + break; + } + case GD_OP_PROFILE_VOLUME: + { + if (ctx && dict_get_int32 (ctx, "count", &count)) { + ret = dict_set_int32 (ctx, "count", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set count in dictionary"); + } + } + break; + } + case GD_OP_START_BRICK: + case GD_OP_STOP_BRICK: + { + gf_log (this->name, GF_LOG_DEBUG, "op '%s' not supported", + gd_op_list[op]); + break; + } + case GD_OP_NONE: + case GD_OP_MAX: + { + gf_log (this->name, GF_LOG_ERROR, "invalid operation"); + break; + } + case GD_OP_CREATE_VOLUME: + case GD_OP_START_VOLUME: + case GD_OP_STOP_VOLUME: + case GD_OP_DELETE_VOLUME: + case GD_OP_DEFRAG_VOLUME: + case GD_OP_ADD_BRICK: + case GD_OP_LOG_ROTATE: + case GD_OP_SYNC_VOLUME: + case GD_OP_STATEDUMP_VOLUME: + case GD_OP_REPLACE_BRICK: + case GD_OP_STATUS_VOLUME: + case GD_OP_SET_VOLUME: + case GD_OP_LIST_VOLUME: + case GD_OP_CLEARLOCKS_VOLUME: + case GD_OP_HEAL_VOLUME: + case GD_OP_SNAP: + { + /*nothing specific to be done*/ + break; + } + case GD_OP_COPY_FILE: + { + if (ctx) + ret = dict_get_str (ctx, "errstr", &errstr); + break; + } + case GD_OP_SYS_EXEC: + { + if (ctx) { + ret = dict_get_str (ctx, "errstr", &errstr); + ret = dict_set_str (ctx, "glusterd_workdir", + conf->workdir); + } + break; + } + } + + rsp.op_ret = op_ret; + rsp.op_errno = errno; + if (errstr) + rsp.op_errstr = errstr; + else if (op_errstr) + rsp.op_errstr = op_errstr; + + if (!rsp.op_errstr) + rsp.op_errstr = ""; + + if (ctx) { + ret = dict_allocate_and_serialize (ctx, &rsp.dict.dict_val, + &rsp.dict.dict_len); + if (ret < 0 ) + gf_log (this->name, GF_LOG_ERROR, "failed to " + "serialize buffer"); + else + free_ptr = rsp.dict.dict_val; + } + + /* needed by 'rebalance status' */ + if (status) + rsp.op_errno = status; + + cli_rsp = &rsp; + xdrproc = (xdrproc_t) xdr_gf_cli_rsp; + + glusterd_to_cli (req, cli_rsp, NULL, 0, NULL, + xdrproc, ctx); + ret = 0; + + GF_FREE (free_ptr); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_big_locked_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe, fop_cbk_fn_t fn) +{ + glusterd_conf_t *priv = THIS->private; + int ret = -1; + + synclock_lock (&priv->big_lock); + ret = fn (req, iov, count, myframe); + synclock_unlock (&priv->big_lock); + + return ret; +} + +int +__glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_probe_rsp rsp = {{0},}; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_friend_sm_event_t *event = NULL; + glusterd_probe_ctx_t *ctx = NULL; + + if (-1 == req->rpc_status) { + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_probe_rsp); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error"); + //rsp.op_ret = -1; + //rsp.op_errno = EINVAL; + goto out; + } + + gf_log ("glusterd", GF_LOG_INFO, + "Received probe resp from uuid: %s, host: %s", + uuid_utoa (rsp.uuid), rsp.hostname); + if (rsp.op_ret != 0) { + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + + GF_ASSERT (ctx); + + if (ctx->req) { + glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret, + rsp.op_errno, + rsp.op_errstr, + ctx->hostname, ctx->port, + ctx->dict); + } + + glusterd_destroy_probe_ctx (ctx); + (void) glusterd_friend_remove (rsp.uuid, rsp.hostname); + ret = rsp.op_ret; + goto out; + } + ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo); + if (ret) { + GF_ASSERT (0); + } + + if (strncasecmp (rsp.hostname, peerinfo->hostname, 1024)) { + gf_log (THIS->name, GF_LOG_INFO, "Host: %s with uuid: %s " + "already present in cluster with alias hostname: %s", + rsp.hostname, uuid_utoa (rsp.uuid), peerinfo->hostname); + + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + + GF_ASSERT (ctx); + + rsp.op_errno = GF_PROBE_FRIEND; + if (ctx->req) { + glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret, + rsp.op_errno, + rsp.op_errstr, + ctx->hostname, ctx->port, + ctx->dict); + } + + glusterd_destroy_probe_ctx (ctx); + (void) glusterd_friend_remove (NULL, rsp.hostname); + ret = rsp.op_ret; + goto out; + } + + uuid_copy (peerinfo->uuid, rsp.uuid); + + ret = glusterd_friend_sm_new_event + (GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event); + + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Unable to get event"); + goto out; + } + + event->peerinfo = peerinfo; + event->ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + ret = glusterd_friend_sm_inject_event (event); + + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + gf_log ("glusterd", GF_LOG_INFO, "Received resp to probe req"); + +out: + free (rsp.hostname);//malloced by xdr + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int +glusterd_probe_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_probe_cbk); +} + + +int +__glusterd_friend_add_cbk (struct rpc_req * req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_friend_rsp rsp = {{0},}; + int ret = -1; + glusterd_friend_sm_event_t *event = NULL; + glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + glusterd_probe_ctx_t *ctx = NULL; + glusterd_friend_update_ctx_t *ev_ctx = NULL; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + + gf_log ("glusterd", GF_LOG_INFO, + "Received %s from uuid: %s, host: %s, port: %d", + (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port); + + ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "received friend add response from" + " unknown peer uuid: %s", uuid_utoa (rsp.uuid)); + goto out; + } + + if (op_ret) + event_type = GD_FRIEND_EVENT_RCVD_RJT; + else + event_type = GD_FRIEND_EVENT_RCVD_ACC; + + ret = glusterd_friend_sm_new_event (event_type, &event); + + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Unable to get event"); + goto out; + } + event->peerinfo = peerinfo; + ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), + gf_gld_mt_friend_update_ctx_t); + if (!ev_ctx) { + ret = -1; + goto out; + } + + uuid_copy (ev_ctx->uuid, rsp.uuid); + ev_ctx->hostname = gf_strdup (rsp.hostname); + + event->ctx = ev_ctx; + ret = glusterd_friend_sm_inject_event (event); + + if (ret) + goto out; + +out: + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + + GF_ASSERT (ctx); + + if (ctx->req)//reverse probe doesn't have req + ret = glusterd_xfer_cli_probe_resp (ctx->req, op_ret, op_errno, + NULL, ctx->hostname, + ctx->port, ctx->dict); + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + if (ctx) + glusterd_destroy_probe_ctx (ctx); + free (rsp.hostname);//malloced by xdr + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int +glusterd_friend_add_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_friend_add_cbk); +} + +int +__glusterd_friend_remove_cbk (struct rpc_req * req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_friend_rsp rsp = {{0},}; + glusterd_conf_t *conf = NULL; + int ret = -1; + glusterd_friend_sm_event_t *event = NULL; + glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + int32_t op_ret = -1; + int32_t op_errno = -1; + glusterd_probe_ctx_t *ctx = NULL; + gf_boolean_t move_sm_now = _gf_true; + + conf = THIS->private; + GF_ASSERT (conf); + + ctx = ((call_frame_t *)myframe)->local; + ((call_frame_t *)myframe)->local = NULL; + GF_ASSERT (ctx); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + move_sm_now = _gf_false; + goto inject; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto respond; + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + + gf_log ("glusterd", GF_LOG_INFO, + "Received %s from uuid: %s, host: %s, port: %d", + (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port); + +inject: + ret = glusterd_friend_find (rsp.uuid, ctx->hostname, &peerinfo); + + if (ret) { + //can happen as part of rpc clnt connection cleanup + //when the frame timeout happens after 30 minutes + goto respond; + } + + event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; + + ret = glusterd_friend_sm_new_event (event_type, &event); + + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Unable to get event"); + goto respond; + } + event->peerinfo = peerinfo; + + ret = glusterd_friend_sm_inject_event (event); + + if (ret) + goto respond; + + /*friend_sm would be moved on CLNT_DISCONNECT, consequently + cleaning up peerinfo. Else, we run the risk of triggering + a clnt_destroy within saved_frames_unwind. + */ + op_ret = 0; + + +respond: + ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, NULL, + ctx->hostname, ctx->dict); + if (!ret && move_sm_now) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + if (ctx) { + glusterd_broadcast_friend_delete (ctx->hostname, NULL); + glusterd_destroy_probe_ctx (ctx); + } + + free (rsp.hostname);//malloced by xdr + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int +glusterd_friend_remove_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_friend_remove_cbk); +} + +int32_t +__glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + gd1_mgmt_friend_update_rsp rsp = {{0}, }; + xlator_t *this = NULL; + + GF_ASSERT (req); + this = THIS; + + if (-1 == req->rpc_status) { + gf_log (this->name, GF_LOG_ERROR, "RPC Error"); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize friend" + " update repsonse"); + goto out; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_INFO, "Received %s from uuid: %s", + (ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); + + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int +glusterd_friend_update_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_friend_update_cbk); +} + +int32_t +__glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_cluster_lock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + +out: + op_ret = rsp.op_ret; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received lock %s from uuid: %s", (op_ret) ? "RJT" : "ACC", + uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Lock response received " + "from unknown peer: %s", uuid_utoa (rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_cluster_lock_cbk); +} + +static int32_t +glusterd_mgmt_v3_lock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode mgmt_v3 lock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + +out: + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received mgmt_v3 lock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "mgmt_v3 lock response received " + "from unknown peer: %s", uuid_utoa (rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_mgmt_v3_lock_peers_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_mgmt_v3_lock_peers_cbk_fn); +} + +static int32_t +glusterd_mgmt_v3_unlock_peers_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode mgmt_v3 unlock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + +out: + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received mgmt_v3 unlock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", + uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "mgmt_v3 unlock response received " + "from unknown peer: %s", uuid_utoa (rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_mgmt_v3_unlock_peers_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_mgmt_v3_unlock_peers_cbk_fn); +} + +int32_t +__glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_cluster_lock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + +out: + op_ret = rsp.op_ret; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received unlock %s from uuid: %s", + (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Unlock response received " + "from unknown peer %s", uuid_utoa (rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_cluster_unlock_cbk); +} + +int32_t +__glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_stage_op_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + dict_t *dict = NULL; + char err_str[2048] = {0}; + char *peer_str = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup ("error"); + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode stage " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup ("Failed to decode stage response " + "received from peer."); + goto out; + } + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize rsp-buffer to dictionary"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } else { + dict->extra_stdfree = rsp.dict.dict_val; + } + } + +out: + op_ret = rsp.op_ret; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received stage %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Stage response received " + "from unknown peer: %s", uuid_utoa (rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + if (strcmp ("", rsp.op_errstr)) { + opinfo.op_errstr = gf_strdup (rsp.op_errstr); + } else { + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (rsp.uuid); + snprintf (err_str, sizeof (err_str), + OPERRSTR_STAGE_FAIL, peer_str); + opinfo.op_errstr = gf_strdup (err_str); + } + if (!opinfo.op_errstr) { + ret = -1; + goto out; + } + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + switch (rsp.op) { + case GD_OP_REPLACE_BRICK: + glusterd_rb_use_rsp_dict (NULL, dict); + break; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + free (rsp.op_errstr); //malloced by xdr + if (dict) { + if (!dict->extra_stdfree && rsp.dict.dict_val) + free (rsp.dict.dict_val); //malloced by xdr + dict_unref (dict); + } else { + free (rsp.dict.dict_val); //malloced by xdr + } + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_stage_op_cbk); +} + +int32_t +__glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_commit_op_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + dict_t *dict = NULL; + char err_str[2048] = {0}; + char *peer_str = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup ("error"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode commit " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup ("Failed to decode commit response " + "received from peer."); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize rsp-buffer to dictionary"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } else { + dict->extra_stdfree = rsp.dict.dict_val; + } + } + + op_ret = rsp.op_ret; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received commit %s from uuid: %s", + (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); + + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Commit response for " + "'Volume %s' received from unknown peer: %s", + gd_op_list[opinfo.op], uuid_utoa (rsp.uuid)); + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + if (strcmp ("", rsp.op_errstr)) { + opinfo.op_errstr = gf_strdup(rsp.op_errstr); + } else { + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (rsp.uuid); + snprintf (err_str, sizeof (err_str), + OPERRSTR_COMMIT_FAIL, peer_str); + opinfo.op_errstr = gf_strdup (err_str); + } + if (!opinfo.op_errstr) { + ret = -1; + goto out; + } + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + switch (rsp.op) { + case GD_OP_REPLACE_BRICK: + ret = glusterd_rb_use_rsp_dict (NULL, dict); + if (ret) + goto out; + break; + + case GD_OP_SYNC_VOLUME: + ret = glusterd_sync_use_rsp_dict (NULL, dict); + if (ret) + goto out; + break; + + case GD_OP_PROFILE_VOLUME: + ret = glusterd_profile_volume_use_rsp_dict (NULL, dict); + if (ret) + goto out; + break; + + case GD_OP_GSYNC_SET: + ret = glusterd_gsync_use_rsp_dict (NULL, dict, rsp.op_errstr); + if (ret) + goto out; + break; + + case GD_OP_STATUS_VOLUME: + ret = glusterd_volume_status_copy_to_op_ctx_dict (NULL, dict); + if (ret) + goto out; + break; + + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_volume_rebalance_use_rsp_dict (NULL, dict); + if (ret) + goto out; + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_volume_heal_use_rsp_dict (NULL, dict); + if (ret) + goto out; + + break; + + default: + break; + } + } + +out: + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + if (dict) + dict_unref (dict); + free (rsp.op_errstr); //malloced by xdr + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_commit_op_cbk); +} + +int32_t +glusterd_rpc_probe (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_probe_req req = {{0},}; + int ret = 0; + int port = 0; + char *hostname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *dict = NULL; + + if (!frame || !this || !data) { + ret = -1; + goto out; + } + + dict = data; + priv = this->private; + + GF_ASSERT (priv); + ret = dict_get_str (dict, "hostname", &hostname); + if (ret) + goto out; + ret = dict_get_int32 (dict, "port", &port); + if (ret) + port = GF_DEFAULT_BASE_PORT; + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + uuid_copy (req.uuid, MY_UUID); + req.hostname = gf_strdup (hostname); + req.port = port; + + ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer, + GLUSTERD_PROBE_QUERY, + NULL, this, glusterd_probe_cbk, + (xdrproc_t)xdr_gd1_mgmt_probe_req); + +out: + GF_FREE (req.hostname); + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + + +int32_t +glusterd_rpc_friend_add (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_friend_req req = {{0},}; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_friend_sm_event_t *event = NULL; + dict_t *vols = NULL; + + + if (!frame || !this || !data) { + ret = -1; + goto out; + } + + event = data; + priv = this->private; + + GF_ASSERT (priv); + + peerinfo = event->peerinfo; + + ret = glusterd_build_volume_dict (&vols); + if (ret) + goto out; + + uuid_copy (req.uuid, MY_UUID); + req.hostname = peerinfo->hostname; + req.port = peerinfo->port; + + ret = dict_allocate_and_serialize (vols, &req.vols.vols_val, + &req.vols.vols_len); + if (ret) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer, + GLUSTERD_FRIEND_ADD, + NULL, this, glusterd_friend_add_cbk, + (xdrproc_t)xdr_gd1_mgmt_friend_req); + + +out: + GF_FREE (req.vols.vols_val); + + if (vols) + dict_unref (vols); + + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_rpc_friend_remove (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_friend_req req = {{0},}; + int ret = 0; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_friend_sm_event_t *event = NULL; + + if (!frame || !this || !data) { + ret = -1; + goto out; + } + + event = data; + priv = this->private; + + GF_ASSERT (priv); + + peerinfo = event->peerinfo; + + uuid_copy (req.uuid, MY_UUID); + req.hostname = peerinfo->hostname; + req.port = peerinfo->port; + ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer, + GLUSTERD_FRIEND_REMOVE, NULL, + this, glusterd_friend_remove_cbk, + (xdrproc_t)xdr_gd1_mgmt_friend_req); + +out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + + +int32_t +glusterd_rpc_friend_update (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_friend_update req = {{0},}; + int ret = 0; + glusterd_conf_t *priv = NULL; + dict_t *friends = NULL; + call_frame_t *dummy_frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + + priv = this->private; + GF_ASSERT (priv); + + friends = data; + if (!friends) + goto out; + + ret = dict_get_ptr (friends, "peerinfo", VOID(&peerinfo)); + if (ret) + goto out; + + ret = dict_allocate_and_serialize (friends, &req.friends.friends_val, + &req.friends.friends_len); + if (ret) + goto out; + + uuid_copy (req.uuid, MY_UUID); + + dummy_frame = create_frame (this, this->ctx->pool); + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->peer, + GLUSTERD_FRIEND_UPDATE, NULL, + this, glusterd_friend_update_cbk, + (xdrproc_t)xdr_gd1_mgmt_friend_update); + +out: + GF_FREE (req.friends.friends_val); + + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_cluster_lock (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_cluster_lock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + + if (!this) + goto out; + + peerinfo = data; + + priv = this->private; + GF_ASSERT (priv); + + glusterd_get_uuid (&req.uuid); + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_LOCK, + NULL, + this, glusterd_cluster_lock_cbk, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_lock_peers (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_v3_lock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + uuid_t *txn_id = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + /* Sending valid transaction ID to peers */ + ret = dict_get_bin (dict, "transaction_id", + (void **)&txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get transaction id."); + goto out; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (*txn_id)); + uuid_copy (req.txn_id, *txn_id); + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_LOCK, NULL, + this, glusterd_mgmt_v3_lock_peers_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_mgmt_v3_unlock_peers (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_v3_unlock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + uuid_t *txn_id = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + /* Sending valid transaction ID to peers */ + ret = dict_get_bin (dict, "transaction_id", + (void **)&txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get transaction id."); + goto out; + } else { + gf_log (this->name, GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (*txn_id)); + uuid_copy (req.txn_id, *txn_id); + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_UNLOCK, NULL, + this, glusterd_mgmt_v3_unlock_peers_cbk, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_cluster_unlock (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_cluster_lock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + + if (!this ) { + ret = -1; + goto out; + } + peerinfo = data; + priv = this->private; + GF_ASSERT (priv); + + glusterd_get_uuid (&req.uuid); + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_UNLOCK, + NULL, + this, glusterd_cluster_unlock_cbk, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_stage_op (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_stage_op_req req = {{0,},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + gf_boolean_t is_alloc = _gf_true; + + if (!this) { + goto out; + } + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + req.op = glusterd_op_get_op (); + + ret = dict_allocate_and_serialize (dict, &req.buf.buf_val, + &req.buf.buf_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt, GLUSTERD_MGMT_STAGE_OP, + NULL, + this, glusterd_stage_op_cbk, + (xdrproc_t)xdr_gd1_mgmt_stage_op_req); + +out: + if ((_gf_true == is_alloc) && req.buf.buf_val) + GF_FREE (req.buf.buf_val); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_commit_op (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_commit_op_req req = {{0,},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + gf_boolean_t is_alloc = _gf_true; + + if (!this) { + goto out; + } + + dict = data; + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + req.op = glusterd_op_get_op (); + + ret = dict_allocate_and_serialize (dict, &req.buf.buf_val, + &req.buf.buf_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict to " + "request buffer"); + goto out; + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt, GLUSTERD_MGMT_COMMIT_OP, + NULL, + this, glusterd_commit_op_cbk, + (xdrproc_t)xdr_gd1_mgmt_commit_op_req); + +out: + if ((_gf_true == is_alloc) && req.buf.buf_val) + GF_FREE (req.buf.buf_val); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +__glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_brick_op_rsp rsp = {0}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + call_frame_t *frame = NULL; + glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL; + dict_t *dict = NULL; + int index = 0; + glusterd_req_ctx_t *req_ctx = NULL; + glusterd_pending_node_t *node = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (req); + frame = myframe; + req_ctx = frame->local; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + /* use standard allocation because to keep uniformity + in freeing it */ + rsp.op_errstr = strdup ("error"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode brick op " + "response received"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + rsp.op_errstr = strdup ("Unable to decode brick op response"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + + if (rsp.output.output_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (rsp.output.output_val, + rsp.output.output_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "unserialize rsp-buffer to dictionary"); + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } else { + dict->extra_stdfree = rsp.output.output_val; + } + } + + op_ret = rsp.op_ret; + + /* Add index to rsp_dict for GD_OP_STATUS_VOLUME */ + if (GD_OP_STATUS_VOLUME == req_ctx->op) { + node = frame->cookie; + index = node->index; + ret = dict_set_int32 (dict, "index", index); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting index on brick status rsp dict"); + rsp.op_ret = -1; + event_type = GD_OP_EVENT_RCVD_RJT; + goto out; + } + } +out: + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t); + GF_ASSERT (ev_ctx); + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + ev_ctx->op_ret = op_ret; + ev_ctx->op_errstr = gf_strdup(rsp.op_errstr); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + ev_ctx->pending_node = frame->cookie; + ev_ctx->rsp_dict = dict; + ev_ctx->commit_ctx = frame->local; + ret = glusterd_op_sm_inject_event (event_type, txn_id, ev_ctx); + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + + if (ret && dict) + dict_unref (dict); + free (rsp.op_errstr); //malloced by xdr + GLUSTERD_STACK_DESTROY (frame); + return ret; +} + +int32_t +glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + __glusterd_brick_op_cbk); +} + +int32_t +glusterd_brick_op (call_frame_t *frame, xlator_t *this, + void *data) +{ + + gd1_mgmt_brick_op_req *req = NULL; + int ret = 0; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + char *op_errstr = NULL; + int pending_bricks = 0; + glusterd_pending_node_t *pending_node; + glusterd_req_ctx_t *req_ctx = NULL; + struct rpc_clnt *rpc = NULL; + dict_t *op_ctx = NULL; + uuid_t *txn_id = &global_txn_id; + + if (!this) { + ret = -1; + goto out; + } + priv = this->private; + GF_ASSERT (priv); + + req_ctx = data; + GF_ASSERT (req_ctx); + INIT_LIST_HEAD (&opinfo.pending_bricks); + ret = glusterd_op_bricks_select (req_ctx->op, req_ctx->dict, &op_errstr, + &opinfo.pending_bricks, NULL); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to select bricks " + "while performing brick op during 'Volume %s'", + gd_op_list[opinfo.op]); + opinfo.op_errstr = op_errstr; + goto out; + } + + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + list_for_each_entry (pending_node, &opinfo.pending_bricks, list) { + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + continue; + + if ((pending_node->type == GD_NODE_NFS) || + ((pending_node->type == GD_NODE_SHD) && + (req_ctx->op == GD_OP_STATUS_VOLUME))) + ret = glusterd_node_op_build_payload + (req_ctx->op, + (gd1_mgmt_brick_op_req **)&req, + req_ctx->dict); + else { + ret = glusterd_brick_op_build_payload + (req_ctx->op, pending_node->node, + (gd1_mgmt_brick_op_req **)&req, + req_ctx->dict); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "build brick op payload during " + "'Volume %s'", gd_op_list[req_ctx->op]); + goto out; + } + } + + dummy_frame->local = data; + dummy_frame->cookie = pending_node; + + rpc = glusterd_pending_node_get_rpc (pending_node); + if (!rpc) { + if (pending_node->type == GD_NODE_REBALANCE) { + opinfo.brick_pending_count = 0; + ret = 0; + if (req) { + GF_FREE (req->input.input_val); + GF_FREE (req); + req = NULL; + } + GLUSTERD_STACK_DESTROY (dummy_frame); + + op_ctx = glusterd_op_get_ctx (); + if (!op_ctx) + goto out; + glusterd_defrag_volume_node_rsp (req_ctx->dict, + NULL, op_ctx); + + goto out; + } + + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Brick Op failed " + "due to rpc failure."); + goto out; + } + + ret = glusterd_submit_request (rpc, req, dummy_frame, + priv->gfs_mgmt, + req->op, NULL, + this, glusterd_brick_op_cbk, + (xdrproc_t)xdr_gd1_mgmt_brick_op_req); + if (req) { + GF_FREE (req->input.input_val); + GF_FREE (req); + req = NULL; + } + if (!ret) + pending_bricks++; + } + + gf_log (this->name, GF_LOG_DEBUG, "Sent brick op req for operation " + "'Volume %s' to %d bricks", gd_op_list[req_ctx->op], + pending_bricks); + opinfo.brick_pending_count = pending_bricks; + +out: + if (ret) { + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, data); + opinfo.op_ret = ret; + } + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +struct rpc_clnt_procedure gd_brick_actors[GLUSTERD_BRICK_MAXVALUE] = { + [GLUSTERD_BRICK_NULL] = {"NULL", NULL }, + [GLUSTERD_BRICK_OP] = {"BRICK_OP", glusterd_brick_op}, +}; + +struct rpc_clnt_procedure gd_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = { + [GLUSTERD_FRIEND_NULL] = {"NULL", NULL }, + [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_rpc_probe}, + [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_rpc_friend_add}, + [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", glusterd_rpc_friend_remove}, + [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_rpc_friend_update}, +}; + +struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { + [GLUSTERD_MGMT_NULL] = {"NULL", NULL }, + [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd_cluster_lock}, + [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", glusterd_cluster_unlock}, + [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_stage_op}, + [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op}, +}; + +struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { + [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, + [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_mgmt_v3_lock_peers}, + [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK", glusterd_mgmt_v3_unlock_peers}, +}; + +struct rpc_clnt_program gd_mgmt_prog = { + .progname = "glusterd mgmt", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_VERSION, + .proctable = gd_mgmt_actors, + .numproc = GLUSTERD_MGMT_MAXVALUE, +}; + +struct rpc_clnt_program gd_brick_prog = { + .progname = "brick operations", + .prognum = GD_BRICK_PROGRAM, + .progver = GD_BRICK_VERSION, + .proctable = gd_brick_actors, + .numproc = GLUSTERD_BRICK_MAXVALUE, +}; + +struct rpc_clnt_program gd_peer_prog = { + .progname = "Peer mgmt", + .prognum = GD_FRIEND_PROGRAM, + .progver = GD_FRIEND_VERSION, + .proctable = gd_peer_actors, + .numproc = GLUSTERD_FRIEND_MAXVALUE, +}; + +struct rpc_clnt_program gd_mgmt_v3_prog = { + .progname = "glusterd mgmt v3", + .prognum = GD_MGMT_V3_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .proctable = gd_mgmt_v3_actors, + .numproc = GLUSTERD_MGMT_V3_MAXVALUE, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index d862a697d..c671edf68 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is GF_FREE software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H @@ -41,6 +31,7 @@ #include "compat-errno.h" #include "statedump.h" #include "glusterd-sm.h" +#include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" @@ -55,6 +46,9 @@ static char *glusterd_friend_sm_state_names[] = { "Sent and Received peer request", "Peer Rejected", "Peer detach in progress", + "Probe Received from peer", + "Connected to Peer", + "Peer is connected and Accepted", "Invalid State" }; @@ -75,7 +69,7 @@ static char *glusterd_friend_sm_event_names[] = { }; char* -glusterd_friend_sm_state_name_get (glusterd_friend_sm_state_t state) +glusterd_friend_sm_state_name_get (int state) { if (state < 0 || state >= GD_FRIEND_STATE_MAX) return glusterd_friend_sm_state_names[GD_FRIEND_STATE_MAX]; @@ -83,7 +77,7 @@ glusterd_friend_sm_state_name_get (glusterd_friend_sm_state_t state) } char* -glusterd_friend_sm_event_name_get (glusterd_friend_sm_event_type_t event) +glusterd_friend_sm_event_name_get (int event) { if (event < 0 || event >= GD_FRIEND_EVENT_MAX) return glusterd_friend_sm_event_names[GD_FRIEND_EVENT_MAX]; @@ -96,8 +90,7 @@ glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx) if (!ctx) return; - if (ctx->hostname) - GF_FREE (ctx->hostname); + GF_FREE (ctx->hostname); GF_FREE (ctx); } @@ -109,8 +102,7 @@ glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx) if (ctx->vols) dict_unref (ctx->vols); - if (ctx->hostname) - GF_FREE (ctx->hostname); + GF_FREE (ctx->hostname); GF_FREE (ctx); } @@ -119,8 +111,7 @@ glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx) { if (!ctx) return; - if (ctx->hostname) - GF_FREE (ctx->hostname); + GF_FREE (ctx->hostname); GF_FREE (ctx); } @@ -129,25 +120,62 @@ glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid) { int ret = 0; rpc_clnt_procedure_t *proc = NULL; - glusterd_conf_t *conf = NULL; xlator_t *this = NULL; glusterd_friend_update_ctx_t ctx = {{0},}; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + dict_t *friends = NULL; + char key[100] = {0,}; + int32_t count = 0; this = THIS; - conf = this->private; + priv = this->private; - GF_ASSERT (conf); - GF_ASSERT (conf->mgmt); + GF_ASSERT (priv); ctx.hostname = hostname; ctx.op = GD_FRIEND_UPDATE_DEL; - proc = &conf->mgmt->proctable[GD_MGMT_FRIEND_UPDATE]; - if (proc->fn) { - ret = proc->fn (NULL, this, &ctx); + + friends = dict_new (); + if (!friends) + goto out; + + snprintf (key, sizeof (key), "op"); + ret = dict_set_int32 (friends, key, ctx.op); + if (ret) + goto out; + + snprintf (key, sizeof (key), "hostname"); + ret = dict_set_str (friends, key, hostname); + if (ret) + goto out; + + ret = dict_set_int32 (friends, "count", count); + if (ret) + goto out; + + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + if (!peerinfo->connected || !peerinfo->peer) + continue; + + ret = dict_set_static_ptr (friends, "peerinfo", peerinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); + goto out; + } + + proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE]; + if (proc->fn) { + ret = proc->fn (NULL, this, friends); + } } gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); +out: + if (friends) + dict_unref (friends); + return ret; } @@ -216,12 +244,9 @@ glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx) out: if (ret) { - if (new_event) - GF_FREE (new_event); - if (new_ev_ctx->hostname) - GF_FREE (new_ev_ctx->hostname); - if (new_ev_ctx) - GF_FREE (new_ev_ctx); + GF_FREE (new_event); + GF_FREE (new_ev_ctx->hostname); + GF_FREE (new_ev_ctx); } gf_log ("", GF_LOG_DEBUG, "returning with %d", ret); return ret; @@ -244,9 +269,10 @@ glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx) conf = this->private; GF_ASSERT (conf); - GF_ASSERT (conf->mgmt); - proc = &conf->mgmt->proctable[GD_MGMT_FRIEND_ADD]; + if (!peerinfo->peer) + goto out; + proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD]; if (proc->fn) { frame = create_frame (this, this->ctx->pool); if (!frame) { @@ -271,6 +297,7 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) glusterd_conf_t *conf = NULL; xlator_t *this = NULL; glusterd_probe_ctx_t *probe_ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; dict_t *dict = NULL; GF_ASSERT (ctx); @@ -284,11 +311,17 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) conf = this->private; GF_ASSERT (conf); - if (!conf->mgmt) - goto out; + ret = glusterd_friend_find (NULL, probe_ctx->hostname, &peerinfo); + if (ret) { + //We should not reach this state ideally + GF_ASSERT (0); + goto out; + } - proc = &conf->mgmt->proctable[GD_MGMT_PROBE_QUERY]; + if (!peerinfo->peer) + goto out; + proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY]; if (proc->fn) { frame = create_frame (this, this->ctx->pool); if (!frame) { @@ -305,6 +338,13 @@ glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx) ret = dict_set_int32 (dict, "port", probe_ctx->port); if (ret) goto out; + + ret = dict_set_static_ptr (dict, "peerinfo", peerinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); + goto out; + } + ret = proc->fn (frame, this, dict); if (ret) goto out; @@ -321,7 +361,8 @@ out: } static int -glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, void *ctx) +glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, + void *data) { int ret = 0; glusterd_peerinfo_t *peerinfo = NULL; @@ -329,7 +370,9 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, void *ctx call_frame_t *frame = NULL; glusterd_conf_t *conf = NULL; xlator_t *this = NULL; - + glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; + glusterd_probe_ctx_t *ctx = NULL; + glusterd_friend_sm_event_t *new_event = NULL; GF_ASSERT (event); peerinfo = event->peerinfo; @@ -338,15 +381,46 @@ glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event, void *ctx conf = this->private; GF_ASSERT (conf); - GF_ASSERT (conf->mgmt); - proc = &conf->mgmt->proctable[GD_MGMT_FRIEND_REMOVE]; + ctx = event->ctx; + + if (!peerinfo->connected) { + event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; + + ret = glusterd_friend_sm_new_event (event_type, &new_event); + + if (!ret) { + new_event->peerinfo = peerinfo; + ret = glusterd_friend_sm_inject_event (new_event); + } else { + gf_log ("glusterd", GF_LOG_ERROR, + "Unable to get event"); + } + + if (ctx) + ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0, + NULL, + ctx->hostname, + ctx->dict); + glusterd_friend_sm (); + glusterd_op_sm (); + + if (ctx) { + glusterd_broadcast_friend_delete (ctx->hostname, NULL); + glusterd_destroy_probe_ctx (ctx); + } + goto out; + } + + if (!peerinfo->peer) + goto out; + proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE]; if (proc->fn) { frame = create_frame (this, this->ctx->pool); if (!frame) { goto out; } - frame->local = ctx; + frame->local = data; ret = proc->fn (frame, this, event); } @@ -356,45 +430,131 @@ out: return ret; } +static gf_boolean_t +glusterd_should_update_peer (glusterd_peerinfo_t *peerinfo, + glusterd_peerinfo_t *cur_peerinfo) +{ + gf_boolean_t is_valid = _gf_false; + + if ((peerinfo == cur_peerinfo) || + (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) + is_valid = _gf_true; + + return is_valid; +} + static int glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - rpc_clnt_procedure_t *proc = NULL; - call_frame_t *frame = NULL; - glusterd_conf_t *conf = NULL; - xlator_t *this = NULL; - glusterd_friend_update_ctx_t *ev_ctx = NULL; + int ret = 0; + glusterd_peerinfo_t *cur_peerinfo = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + rpc_clnt_procedure_t *proc = NULL; + xlator_t *this = NULL; + glusterd_friend_update_ctx_t ev_ctx = {{0}}; + glusterd_conf_t *priv = NULL; + dict_t *friends = NULL; + char key[100] = {0,}; + char *dup_buf = NULL; + int32_t count = 0; GF_ASSERT (event); - peerinfo = event->peerinfo; + cur_peerinfo = event->peerinfo; this = THIS; - conf = this->private; + priv = this->private; - GF_ASSERT (conf); - GF_ASSERT (conf->mgmt); - ev_ctx = ctx; + GF_ASSERT (priv); - ev_ctx->op = GD_FRIEND_UPDATE_ADD; + ev_ctx.op = GD_FRIEND_UPDATE_ADD; - proc = &conf->mgmt->proctable[GD_MGMT_FRIEND_UPDATE]; - if (proc->fn) { - frame = create_frame (this, this->ctx->pool); - if (!frame) { + friends = dict_new (); + if (!friends) + goto out; + + snprintf (key, sizeof (key), "op"); + ret = dict_set_int32 (friends, key, ev_ctx.op); + if (ret) + goto out; + + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + if (!glusterd_should_update_peer (peerinfo, cur_peerinfo)) + continue; + + count++; + snprintf (key, sizeof (key), "friend%d.uuid", count); + dup_buf = gf_strdup (uuid_utoa (peerinfo->uuid)); + ret = dict_set_dynstr (friends, key, dup_buf); + if (ret) + goto out; + snprintf (key, sizeof (key), "friend%d.hostname", count); + ret = dict_set_str (friends, key, peerinfo->hostname); + if (ret) + goto out; + gf_log ("", GF_LOG_INFO, "Added uuid: %s, host: %s", + dup_buf, peerinfo->hostname); + } + + ret = dict_set_int32 (friends, "count", count); + if (ret) + goto out; + + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + if (!peerinfo->connected || !peerinfo->peer) + continue; + + if (!glusterd_should_update_peer (peerinfo, cur_peerinfo)) + continue; + + ret = dict_set_static_ptr (friends, "peerinfo", peerinfo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "failed to set peerinfo"); goto out; } - frame->local = ctx; - ret = proc->fn (frame, this, ctx); + + proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE]; + if (proc->fn) { + ret = proc->fn (NULL, this, friends); + } } -out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); +out: + if (friends) + dict_unref (friends); + return ret; } +static int +glusterd_peer_detach_cleanup (glusterd_conf_t *priv) +{ + int ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + + GF_ASSERT (priv); + + list_for_each_entry_safe (volinfo,tmp_volinfo, + &priv->volumes, vol_list) { + if (!glusterd_friend_contains_vol_bricks (volinfo, + MY_UUID)) { + gf_log (THIS->name, GF_LOG_INFO, + "Deleting stale volume %s", volinfo->volname); + ret = glusterd_delete_volume (volinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Error deleting stale volume"); + goto out; + } + } + } + ret = 0; +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} static int glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, @@ -430,9 +590,14 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, if (ret) goto out; } - + ret = glusterd_peer_detach_cleanup (priv); + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, + "Peer detach cleanup was not successful"); + ret = 0; + } out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } @@ -442,10 +607,13 @@ glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx) { int ret = -1; - ret = glusterd_friend_cleanup (event->peerinfo); + ret = glusterd_friend_remove_cleanup_vols (event->peerinfo->uuid); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, "Volumes cleanup failed"); + ret = glusterd_friend_cleanup (event->peerinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, "Cleanup returned: %d", ret); + gf_log (THIS->name, GF_LOG_ERROR, "Cleanup returned: %d", ret); } return 0; @@ -483,15 +651,15 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) uuid_copy (peerinfo->uuid, ev_ctx->uuid); //Build comparison logic here. - ret = glusterd_compare_friend_data (ev_ctx->vols, &status); + ret = glusterd_compare_friend_data (ev_ctx->vols, &status, + peerinfo->hostname); if (ret) goto out; if (GLUSTERD_VOL_COMP_RJT != status) { event_type = GD_FRIEND_EVENT_LOCAL_ACC; op_ret = 0; - } - else { + } else { event_type = GD_FRIEND_EVENT_LOCAL_RJT; op_errno = GF_PROBE_VOLUME_CONFLICT; op_ret = -1; @@ -521,7 +689,8 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) glusterd_friend_sm_inject_event (new_event); ret = glusterd_xfer_friend_add_resp (ev_ctx->req, ev_ctx->hostname, - ev_ctx->port, op_ret, op_errno); + peerinfo->hostname, ev_ctx->port, + op_ret, op_errno); out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -538,13 +707,10 @@ glusterd_friend_sm_transition_state (glusterd_peerinfo_t *peerinfo, GF_ASSERT (state); GF_ASSERT (peerinfo); - //peerinfo->state.state = state; - - gf_log ("", GF_LOG_NORMAL, "Transitioning from '%s' to '%s' due to " - "event '%s'", - glusterd_friend_sm_state_name_get (peerinfo->state.state), - glusterd_friend_sm_state_name_get (state[event_type].next_state), - glusterd_friend_sm_event_name_get (event_type)); + (void) glusterd_sm_tr_log_transition_add (&peerinfo->sm_log, + peerinfo->state.state, + state[event_type].next_state, + event_type); peerinfo->state.state = state[event_type].next_state; return 0; @@ -567,6 +733,53 @@ glusterd_sm_t glusterd_state_default [] = { {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX }; +glusterd_sm_t glusterd_state_probe_rcvd [] = { + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_PROBE + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT + {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX +}; + +glusterd_sm_t glusterd_state_connected_rcvd [] = { + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_PROBE + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin}, //EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT + {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_MAX +}; + +glusterd_sm_t glusterd_state_connected_accepted [] = { + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_friend_probe}, //EV_PROBE + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_RJT + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND + {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_CONNECTED + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_MAX +}; glusterd_sm_t glusterd_state_req_sent [] = { {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_NONE, @@ -586,17 +799,17 @@ glusterd_sm_t glusterd_state_req_sent [] = { glusterd_sm_t glusterd_state_req_rcvd [] = { {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_NONE, - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_friend_probe}, //EVENT_PROBE, - {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_friend_add}, //EVENT_INIT_FRIEND_REQ, + {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_PROBE, + {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_reverse_probe_begin}, //EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND, {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_CONNECTED + {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},//EVENT_CONNECTED {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX }; @@ -605,7 +818,7 @@ glusterd_sm_t glusterd_state_befriended [] = { {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_PROBE, {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_ACC - {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC + {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_LOCAL_ACC {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ @@ -660,12 +873,12 @@ glusterd_sm_t glusterd_state_req_accepted [] = { {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none},//EVENT_CONNECTED + {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin},//EVENT_CONNECTED {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX }; glusterd_sm_t glusterd_state_unfriend_sent [] = { - {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_NONE, + {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_NONE, {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_PROBE, {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ, {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_ACC @@ -689,6 +902,9 @@ glusterd_sm_t *glusterd_friend_state_table [] = { glusterd_state_req_sent_rcvd, glusterd_state_rejected, glusterd_state_unfriend_sent, + glusterd_state_probe_rcvd, + glusterd_state_connected_rcvd, + glusterd_state_connected_accepted }; int @@ -717,7 +933,7 @@ int glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event) { GF_ASSERT (event); - gf_log ("glusterd", GF_LOG_NORMAL, "Enqueuing event: '%s'", + gf_log ("glusterd", GF_LOG_DEBUG, "Enqueue event: '%s'", glusterd_friend_sm_event_name_get (event->event)); list_add_tail (&event->list, &gd_friend_sm_queue); @@ -746,6 +962,27 @@ glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event) } } +gf_boolean_t +gd_does_peer_affect_quorum (glusterd_friend_sm_state_t old_state, + glusterd_friend_sm_event_type_t event_type, + glusterd_peerinfo_t *peerinfo) +{ + gf_boolean_t affects = _gf_false; + + //When glusterd comes up with friends in BEFRIENDED state in store, + //wait until compare-data happens. + if ((old_state == GD_FRIEND_STATE_BEFRIENDED) && + (event_type != GD_FRIEND_EVENT_RCVD_ACC) && + (event_type != GD_FRIEND_EVENT_LOCAL_ACC)) + goto out; + if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED) + && peerinfo->connected) { + affects = _gf_true; + } +out: + return affects; +} + int glusterd_friend_sm () { @@ -757,6 +994,8 @@ glusterd_friend_sm () glusterd_peerinfo_t *peerinfo = NULL; glusterd_friend_sm_event_type_t event_type = 0; gf_boolean_t is_await_conn = _gf_false; + gf_boolean_t quorum_action = _gf_false; + glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT; while (!list_empty (&gd_friend_sm_queue)) { list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) { @@ -767,12 +1006,16 @@ glusterd_friend_sm () if (!peerinfo) { gf_log ("glusterd", GF_LOG_CRITICAL, "Received" " event %s with empty peer info", - glusterd_friend_sm_event_name_get(event_type)); + glusterd_friend_sm_event_name_get (event_type)); GF_FREE (event); continue; } + gf_log ("", GF_LOG_DEBUG, "Dequeued event of type: '%s'", + glusterd_friend_sm_event_name_get (event_type)); + + old_state = peerinfo->state.state; state = glusterd_friend_state_table[peerinfo->state.state]; GF_ASSERT (state); @@ -813,7 +1056,16 @@ glusterd_friend_sm () goto out; } - ret = glusterd_store_update_peerinfo (peerinfo); + if (gd_does_peer_affect_quorum (old_state, event_type, + peerinfo)) { + peerinfo->quorum_contrib = QUORUM_UP; + if (peerinfo->quorum_action) { + peerinfo->quorum_action = _gf_false; + quorum_action = _gf_true; + } + } + + ret = glusterd_store_peerinfo (peerinfo); glusterd_destroy_friend_event_context (event); GF_FREE (event); @@ -826,6 +1078,25 @@ glusterd_friend_sm () ret = 0; out: + if (quorum_action) { + /* When glusterd is restarted, it needs to wait until the 'friends' view + * of the volumes settle, before it starts any of the internal daemons. + * + * Every friend that was part of the cluster, would send its + * cluster-view, 'our' way. For every friend, who belongs to + * a partition which has a different cluster-view from our + * partition, we may update our cluster-view. For subsequent + * friends from that partition would agree with us, if the first + * friend wasn't rejected. For every first friend, whom we agreed with, + * we would need to start internal daemons/bricks belonging to the + * new volumes. + * glusterd_spawn_daemons calls functions that are idempotent. ie, + * the functions spawn process(es) only if they are not started yet. + * + * */ + glusterd_spawn_daemons (NULL); + glusterd_do_quorum_action (); + } return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index ebe6cb379..b9bedbe69 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_SM_H_ #define _GLUSTERD_SM_H_ @@ -37,14 +27,21 @@ #include "byte-order.h" //#include "glusterd.h" #include "rpcsvc.h" - -struct glusterd_store_handle_ { - char *path; - int fd; - FILE *read; -}; - -typedef struct glusterd_store_handle_ glusterd_store_handle_t; +#include "store.h" + +typedef enum gd_quorum_contribution_ { + QUORUM_NONE, + QUORUM_WAITING, + QUORUM_DOWN, + QUORUM_UP +} gd_quorum_contrib_t; + +typedef enum gd_quorum_status_ { + QUORUM_UNKNOWN, + QUORUM_NOT_APPLICABLE, + QUORUM_MEETS, + QUORUM_DOES_NOT_MEET +} gd_quorum_status_t; typedef enum glusterd_friend_sm_state_ { GD_FRIEND_STATE_DEFAULT = 0, @@ -55,6 +52,9 @@ typedef enum glusterd_friend_sm_state_ { GD_FRIEND_STATE_REQ_SENT_RCVD, GD_FRIEND_STATE_REJECTED, GD_FRIEND_STATE_UNFRIEND_SENT, + GD_FRIEND_STATE_PROBE_RCVD, + GD_FRIEND_STATE_CONNECTED_RCVD, + GD_FRIEND_STATE_CONNECTED_ACCEPTED, GD_FRIEND_STATE_MAX } glusterd_friend_sm_state_t; @@ -68,18 +68,42 @@ typedef struct glusterd_peer_hostname_ { struct list_head hostname_list; }glusterd_peer_hostname_t; +typedef struct glusterd_sm_transition_ { + int old_state; + int event; + int new_state; + time_t time; +} glusterd_sm_transition_t; + +typedef struct glusterd_sm_tr_log_ { + glusterd_sm_transition_t *transitions; + size_t current; + size_t size; + size_t count; + char* (*state_name_get) (int); + char* (*event_name_get) (int); +} glusterd_sm_tr_log_t; + struct glusterd_peerinfo_ { uuid_t uuid; - char uuid_str[50]; + char uuid_str[50]; /* Retrieve this using + * gd_peer_uuid_str () + */ glusterd_peer_state_info_t state; char *hostname; int port; struct list_head uuid_list; struct list_head op_peers_list; - struct list_head hostnames; struct rpc_clnt *rpc; + rpc_clnt_prog_t *mgmt; + rpc_clnt_prog_t *peer; + rpc_clnt_prog_t *mgmt_v3; int connected; - glusterd_store_handle_t *shandle; + gf_store_handle_t *shandle; + glusterd_sm_tr_log_t sm_log; + gf_boolean_t quorum_action; + gd_quorum_contrib_t quorum_contrib; + gf_boolean_t locked; }; typedef struct glusterd_peerinfo_ glusterd_peerinfo_t; @@ -93,11 +117,13 @@ typedef enum glusterd_ev_gen_mode_ { typedef struct glusterd_peer_ctx_args_ { rpcsvc_request_t *req; glusterd_ev_gen_mode_t mode; + dict_t *dict; } glusterd_peerctx_args_t; typedef struct glusterd_peer_ctx_ { glusterd_peerctx_args_t args; glusterd_peerinfo_t *peerinfo; + char *errstr; } glusterd_peerctx_t; typedef enum glusterd_friend_sm_event_type_ { @@ -158,6 +184,7 @@ typedef struct glusterd_probe_ctx_ { char *hostname; rpcsvc_request_t *req; int port; + dict_t *dict; } glusterd_probe_ctx_t; int glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type, @@ -178,7 +205,10 @@ void glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx); char* -glusterd_friend_sm_state_name_get (glusterd_friend_sm_state_t state); +glusterd_friend_sm_state_name_get (int state); + +char* +glusterd_friend_sm_event_name_get (int event); int glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid); diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c new file mode 100644 index 000000000..9b811cd05 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c @@ -0,0 +1,5590 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <inttypes.h> +#include <sys/types.h> +#include <unistd.h> +#include <sys/resource.h> +#include <sys/statvfs.h> +#include <sys/mount.h> + +#include "globals.h" +#include "compat.h" +#include "protocol-common.h" +#include "xlator.h" +#include "logging.h" +#include "timer.h" +#include "glusterd-mem-types.h" +#include "glusterd.h" +#include "glusterd-sm.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-store.h" +#include "run.h" +#include "glusterd-volgen.h" +#include "glusterd-mgmt.h" +#include "glusterd-syncop.h" + +#include "syscall.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" + +#ifdef GF_LINUX_HOST_OS +#include <mntent.h> +#endif + +char snap_mount_folder[PATH_MAX]; + +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op); + +/* This function will restore a snapshot volumes + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (NULL == snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) not found", + snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, glusterd_volinfo_t, + vol_list); + + ret = glusterd_volinfo_find (snap_volinfo->parent_volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get volinfo of " + "%s", snap_volinfo->parent_volname); + goto out; + } + + if (is_origin_glusterd (dict) == _gf_true) { + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap restores"); + goto out; + } + } + + ret = gd_restore_snap_volume (rsp_dict, volinfo, snap_volinfo); + if (ret) { + /* No need to update op_errstr because it is assumed + * that the called function will do that in case of + * failure. + */ + gf_log (this->name, GF_LOG_ERROR, "Failed to restore " + "snap for %s volume", volname); + goto out; + } + + ret = 0; + + /* TODO: Need to check if we need to delete the snap after the + * operation is successful or not. Also need to persist the state + * of restore operation in the store. + */ +out: + return ret; +} + +/* This function is called before actual restore is taken place. This function + * will validate whether the snapshot volumes are ready to be restored or not. + * + * @param dict dictionary containing snapshot restore request + * @param op_errstr In case of any failure error message will be returned + * in this variable + * @param rsp_dict response dictionary + * @return Negative value on Failure and 0 in success + */ +int +glusterd_snapshot_restore_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + int32_t i = 0; + int32_t volcount = 0; + gf_boolean_t snap_restored = _gf_false; + char key[PATH_MAX] = {0, }; + char *volname = NULL; + char *snapname = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (NULL == snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) not found", + snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + snap_restored = snap->snap_restored; + + if (snap_restored) { + ret = gf_asprintf (op_errstr, "Snap (%s) is already " + "restored", snapname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + ret = dict_set_str (rsp_dict, "snapname", snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap name"); + goto out; + } + + ret = dict_get_int32 (dict, "volcount", &volcount); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume count"); + goto out; + } + + /* Snapshot restore will only work if all the volumes, + that are part of the snapshot, are stopped. */ + for (i = 1; i <= volcount; ++i) { + snprintf (key, sizeof (key), "volname%d", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume (%s) not found", + volname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + + if (glusterd_is_volume_started (volinfo)) { + ret = gf_asprintf (op_errstr, "Volume (%s) has been " + "started. Volume needs to be stopped before restoring " + "a snapshot.", volname); + if (ret < 0) { + goto out; + } + gf_log (this->name, GF_LOG_ERROR, "%s", *op_errstr); + ret = -1; + goto out; + } + } + + ret = 0; +out: + return ret; +} + +int +snap_max_hard_limits_validate (dict_t *dict, char *volname, + uint64_t value, char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + if (volname) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (!ret) { + if (volinfo->is_snap_volume) { + ret = -1; + snprintf (err_str, PATH_MAX, + "%s is a snap volume. Configuring " + "snap-max-hard-limit for a snap " + "volume is prohibited.", volname); + goto out; + } + } + } + + if (value) { + /* Max limit for the system is GLUSTERD_SNAPS_MAX_HARD_LIMIT + * but max limit for a volume is conf->snap_max_hard_limit. + */ + if (volname) { + max_limit = conf->snap_max_hard_limit; + } else { + max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + } + } + + if ((value < 0) || (value > max_limit)) { + ret = -1; + snprintf (err_str, PATH_MAX, "Invalid snap-max-hard-limit" + "%"PRIu64 ". Expected range 0 - %"PRIu64, + value, max_limit); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +glusterd_snapshot_config_prevalidate (dict_t *dict, char **op_errstr) +{ + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + int ret = -1; + int config_command = 0; + char err_str[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + uint64_t value = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + gf_loglevel_t loglevel = GF_LOG_ERROR; + uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "config-command", &config_command); + if (ret) { + snprintf (err_str, sizeof (err_str), + "failed to get config-command type"); + goto out; + } + + ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit); + + ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit); + + ret = dict_get_str (dict, "volname", &volname); + + if (volname) { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), + "Volume %s does not exist.", volname); + goto out; + } + } + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (hard_limit) { + /* Validations for snap-max-hard-limits */ + ret = snap_max_hard_limits_validate (dict, volname, + hard_limit, op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-hard-limit validation " + "failed."); + goto out; + } + } + + if (soft_limit) { + max_limit = GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT; + if ((soft_limit < 0) || (soft_limit > max_limit)) { + ret = -1; + snprintf (err_str, PATH_MAX, "Invalid " + "snap-max-soft-limit ""%" + PRIu64 ". Expected range 0 - %"PRIu64, + value, max_limit); + goto out; + } + break; + } + default: + break; + } + + ret = 0; +out: + + if (ret && err_str[0] != '\0') { + gf_log (this->name, loglevel, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + + return ret; +} + +int +glusterd_snap_create_pre_val_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *snap_brick_dir = NULL; + char *snap_device = NULL; + char *tmpstr = NULL; + char key[PATH_MAX] = ""; + char snapbrckcnt[PATH_MAX] = ""; + char snapbrckord[PATH_MAX] = ""; + int ret = -1; + int64_t i = -1; + int64_t j = -1; + int64_t volume_count = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dst); + GF_ASSERT (src); + + ret = dict_get_int64 (src, "volcount", &volume_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + for (i = 0; i < volume_count; i++) { + memset (snapbrckcnt, '\0', sizeof(snapbrckcnt)); + ret = snprintf (snapbrckcnt, sizeof(snapbrckcnt) - 1, + "vol%ld_brickcount", i+1); + ret = dict_get_int64 (src, snapbrckcnt, &brick_count); + if (ret) { + gf_log (this->name, GF_LOG_TRACE, + "No bricks for this volume in this dict"); + continue; + } + + for (j = 0; j < brick_count; j++) { + /* Fetching data from source dict */ + snprintf (key, sizeof(key) - 1, + "vol%ld.brickdir%ld", i+1, j); + + ret = dict_get_ptr (src, key, + (void **)&snap_brick_dir); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to fetch %s", key); + continue; + } + + snprintf (key, sizeof(key) - 1, + "vol%ld.brick_snapdevice%ld", i+1, j); + + ret = dict_get_ptr (src, key, + (void **)&snap_device); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_device"); + goto out; + } + + snprintf (snapbrckord, sizeof(snapbrckord) - 1, + "vol%ld.brick%ld.order", i+1, j); + + ret = dict_get_int64 (src, snapbrckord, &brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get brick order"); + goto out; + } + + /* Adding the data in the dst dict */ + snprintf (key, sizeof(key) - 1, + "vol%ld.brickdir%ld", i+1, brick_order); + + tmpstr = gf_strdup (snap_brick_dir); + if (!tmpstr) { + gf_log (this->name, GF_LOG_ERROR, + "Out Of Memory"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dst, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (tmpstr); + goto out; + } + + snprintf (key, sizeof(key) - 1, + "vol%ld.brick_snapdevice%ld", + i+1, brick_order); + + tmpstr = gf_strdup (snap_device); + if (!tmpstr) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dst, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (tmpstr); + goto out; + } + + } + } + + ret = 0; +out: + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_pre_validate_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snap_create_pre_val_use_rsp_dict (dst, src); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to use " + "rsp dict"); + goto out; + } + break; + default: + break; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_snapshot_create_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + char *snapname = NULL; + char *device = NULL; + char *tmpstr = NULL; + char *brick_dir = NULL; + char snap_brick_dir[PATH_MAX] = ""; + char *mnt_pt = NULL; + char key[PATH_MAX] = ""; + char snap_mount[PATH_MAX] = ""; + char snap_volname[64] = ""; + char err_str[PATH_MAX] = ""; + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + int64_t brick_count = 0; + int64_t brick_order = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + uuid_t *snap_volid = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; + glusterd_conf_t *conf = NULL; + int64_t effective_max_limit = 0; + + this = THIS; + GF_ASSERT (op_errstr); + conf = this->private; + GF_ASSERT (conf); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + snprintf (err_str, sizeof (err_str), "Invalid volume count %ld " + "supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get snapname"); + goto out; + } + + if (glusterd_find_snap_by_name (snapname)) { + ret = -1; + snprintf (err_str, sizeof (err_str), "Snap %s already exists", + snapname); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%ld", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), + "failed to get volume name"); + goto out; + } + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, sizeof (err_str), + "Volume (%s) does not exist ", volname); + goto out; + } + + ret = -1; + if (!glusterd_is_volume_started (volinfo)) { + snprintf (err_str, sizeof (err_str), "volume %s is " + "not started", volinfo->volname); + loglevel = GF_LOG_WARNING; + goto out; + } + if (glusterd_is_defrag_on (volinfo)) { + snprintf (err_str, sizeof (err_str), + "rebalance process is running for the " + "volume %s", volname); + loglevel = GF_LOG_WARNING; + goto out; + } + /* TODO: Also check whether geo replication is running */ + + if (volinfo->is_snap_volume == _gf_true) { + snprintf (err_str, sizeof (err_str), + "Volume %s is a snap volume", volname); + loglevel = GF_LOG_WARNING; + goto out; + } + + if (volinfo->snap_max_hard_limit < conf->snap_max_hard_limit) + effective_max_limit = volinfo->snap_max_hard_limit; + else + effective_max_limit = conf->snap_max_hard_limit; + + if (volinfo->snap_count >= effective_max_limit) { + snprintf (err_str, sizeof (err_str), + "The number of existing snaps has reached " + "the effective maximum limit of %"PRIu64" ," + "for the volume %s", effective_max_limit, + volname); + loglevel = GF_LOG_WARNING; + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%ld_volid", i); + ret = dict_get_bin (dict, key, (void **)&snap_volid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volid"); + goto out; + } + + /* snap volume uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names + provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN (snap_volname, *snap_volid); + + brick_count = 0; + brick_order = 0; + /* Adding snap bricks mount paths to the dict */ + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) { + brick_order++; + continue; + } + + if (!glusterd_is_brick_started (brickinfo)) { + gf_log (this->name, GF_LOG_WARNING, + "brick %s:%s is not started", + brickinfo->hostname, + brickinfo->path); + brick_order++; + brick_count++; + continue; + } + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + snprintf (err_str, sizeof (err_str), + "getting device name for the brick " + "%s:%s failed", brickinfo->hostname, + brickinfo->path); + ret = -1; + goto out; + } + + device = glusterd_build_snap_device_path (device, + snap_volname); + if (!device) { + snprintf (err_str, sizeof (err_str), + "cannot copy the snapshot device " + "name (volname: %s, snapname: %s)", + volinfo->volname, snapname); + loglevel = GF_LOG_WARNING; + ret = -1; + goto out; + } + + snprintf (key, sizeof(key), + "vol%ld.brick_snapdevice%ld", i, + brick_count); + ret = dict_set_dynstr (rsp_dict, key, device); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + GF_FREE (device); + goto out; + } + + ret = glusterd_get_brick_root (brickinfo->path, + &mnt_pt); + if (ret) { + snprintf (err_str, sizeof (err_str), + "could not get the root of the brick path %s", + brickinfo->path); + loglevel = GF_LOG_WARNING; + goto out; + } + if (strncmp (brickinfo->path, mnt_pt, strlen(mnt_pt))) { + snprintf (err_str, sizeof (err_str), + "brick: %s brick mount: %s", + brickinfo->path, mnt_pt); + loglevel = GF_LOG_WARNING; + goto out; + } + + brick_dir = &brickinfo->path[strlen (mnt_pt)]; + brick_dir++; + + snprintf (snap_brick_dir, sizeof (snap_brick_dir), + "/%s", brick_dir); + + tmpstr = gf_strdup (snap_brick_dir); + if (!tmpstr) { + ret = -1; + goto out; + } + snprintf (key, sizeof(key), "vol%ld.brickdir%ld", i, + brick_count); + ret = dict_set_dynstr (rsp_dict, key, tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", snap_mount); + goto out; + } + tmpstr = NULL; + + snprintf (key, sizeof(key) - 1, "vol%ld.brick%ld.order", + i, brick_count); + ret = dict_set_int64 (rsp_dict, key, brick_order); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set %s", key); + goto out; + } + + brick_count++; + brick_order++; + } + snprintf (key, sizeof(key) - 1, "vol%ld_brickcount", i); + ret = dict_set_int64 (rsp_dict, key, brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + } + + ret = dict_set_int64 (rsp_dict, "volcount", volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount"); + goto out; + } + + ret = 0; +out: + if (ret) + GF_FREE (tmpstr); + + if (ret && err_str[0] != '\0') { + gf_log (this->name, loglevel, "%s", err_str); + *op_errstr = gf_strdup (err_str); + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +glusterd_snap_t* +glusterd_new_snap_object() +{ + glusterd_snap_t *snap = NULL; + + snap = GF_CALLOC (1, sizeof (*snap), gf_gld_mt_snap_t); + + if (snap) { + if (LOCK_INIT (&snap->lock)) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed initiating" + " snap lock"); + GF_FREE (snap); + return NULL; + } + + INIT_LIST_HEAD (&snap->snap_list); + INIT_LIST_HEAD (&snap->volumes); + snap->snapname[0] = 0; + snap->snap_status = GD_SNAP_STATUS_INIT; + } + + return snap; + +}; + +/* Function glusterd_list_add_snapvol adds the volinfo object (snapshot volume) + to the snapshot object list and to the parent volume list */ +int32_t +glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_snap_t *snap = NULL; + + GF_VALIDATE_OR_GOTO ("glusterd", origin_vol, out); + GF_VALIDATE_OR_GOTO ("glusterd", snap_vol, out); + + snap = snap_vol->snapshot; + GF_ASSERT (snap); + + list_add_tail (&snap_vol->vol_list, &snap->volumes); + LOCK (&origin_vol->lock); + { + list_add_order (&snap_vol->snapvol_list, + &origin_vol->snap_volumes, + glusterd_compare_snap_vol_time); + origin_vol->snap_count++; + } + UNLOCK (&origin_vol->lock); + + gf_log (THIS->name, GF_LOG_DEBUG, "Snap %s added to the list", + snap->snapname); + ret = 0; +out: + return ret; +} + +glusterd_snap_t* +glusterd_find_snap_by_name (char *snapname) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snapname); + + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!strcmp (snap->snapname, snapname)) { + gf_log (THIS->name, GF_LOG_DEBUG, "Found " + "snap %s (%s)", snap->snapname, + uuid_utoa (snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +glusterd_snap_t* +glusterd_find_snap_by_id (uuid_t snap_id) +{ + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + if (uuid_is_null(snap_id)) + goto out; + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!uuid_compare (snap->snap_id, snap_id)) { + gf_log (THIS->name, GF_LOG_DEBUG, "Found " + "snap %s (%s)", snap->snapname, + uuid_utoa (snap->snap_id)); + goto out; + } + } + snap = NULL; +out: + return snap; +} + +int +glusterd_do_lvm_snapshot_remove (glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo, + const char *mount_pt, const char *snap_device) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + char msg[1024] = {0, }; + char pidfile[PATH_MAX] = {0, }; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + if (!brickinfo) { + gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL"); + goto out; + } + + GF_ASSERT (snap_vol); + GF_ASSERT (mount_pt); + GF_ASSERT (snap_device); + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_vol, brickinfo, priv); + if (glusterd_is_service_running (pidfile, &pid)) { + ret = kill (pid, SIGKILL); + if (ret && errno != ESRCH) { + gf_log (this->name, GF_LOG_ERROR, "Unable to kill pid " + "%d reason : %s", pid, strerror(errno)); + goto out; + } + } + + runinit (&runner); + snprintf (msg, sizeof (msg), "umount the snapshot mounted path %s", + mount_pt); + runner_add_args (&runner, "umount", mount_pt, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + /* We need not do synclock_unlock => runner_run => synclock_lock here. + Because it is needed if we are running a glusterfs process in + runner_run, so that when the glusterfs process started wants to + communicate to glusterd, glusterd wont be able to respond if it + has held the big lock. So we do unlock, run glusterfs process + (thus communicate to glusterd), lock. But since this is not a + glusterfs command that is being run, unlocking and then relocking + is not needed. + */ + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "unmounting the " + "path %s (brick: %s) failed (%s)", mount_pt, + brickinfo->path, strerror (errno)); + goto out; + } + + runinit (&runner); + snprintf (msg, sizeof(msg), "remove snapshot of the brick %s:%s, " + "device: %s", brickinfo->hostname, brickinfo->path, + snap_device); + runner_add_args (&runner, "/sbin/lvremove", "-f", snap_device, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + ret = runner_run (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, snap_device); + goto out; + } + +out: + return ret; +} + +int32_t +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol) +{ + char *mnt_pt = NULL; + struct mntent *entry = NULL; + int32_t brick_count = -1; + int32_t ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + FILE *mtab = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + + if (!snap_vol) { + gf_log (this->name, GF_LOG_ERROR, "snap volinfo is NULL"); + goto out; + } + + brick_count = -1; + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + brick_count++; + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "snapshot was pending. lvm not present " + "for brick %s:%s of the snap %s.", + brickinfo->hostname, brickinfo->path, + snap_vol->snapshot->snapname); + + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_vol->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + continue; + } + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "getting the root " + "of the brick for volume %s (snap %s) failed ", + snap_vol->volname, snap_vol->snapshot->snapname); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { + gf_log (this->name, GF_LOG_WARNING, "getting the mount" + " entry for the brick %s:%s of the snap %s " + "(volume: %s) failed", brickinfo->hostname, + brickinfo->path, snap_vol->snapshot->snapname, + snap_vol->volname); + ret = -1; + goto out; + } + ret = glusterd_do_lvm_snapshot_remove (snap_vol, brickinfo, + mnt_pt, + entry->mnt_fsname); + if (mtab) + endmntent (mtab); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "remove the snapshot %s (%s)", + brickinfo->path, entry->mnt_fsname); + goto out; + } + + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snap_volume_remove (dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + gf_boolean_t remove_lvm, + gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *origin_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + + if (!snap_vol) { + gf_log(this->name, GF_LOG_WARNING, "snap_vol in NULL"); + ret = -1; + goto out; + } + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + ret = glusterd_brick_stop (snap_vol, brickinfo, _gf_false); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to stop " + "brick for volume %s", snap_vol->volname); + save_ret = ret; + + /* Continue to cleaning up the snap in case of error + if force flag is enabled */ + if (!force) + goto out; + } + } + + /* Only remove the backend lvm when required */ + if (remove_lvm) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove " + "lvm snapshot volume %s", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_volume (snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove volume %s " + "from store", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (!list_empty(&snap_vol->snapvol_list)) { + ret = glusterd_volinfo_find (snap_vol->parent_volname, + &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "parent volinfo %s for volume %s", + snap_vol->parent_volname, snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + origin_vol->snap_count--; + } + + ret = glusterd_volinfo_delete (snap_vol); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove volinfo " + "%s ", snap_vol->volname); + save_ret = ret; + if (!force) + goto out; + } + + if (save_ret) + ret = save_ret; +out: + gf_log (this->name, GF_LOG_TRACE, "returning %d", ret); + return ret; +} + +int32_t +glusterd_snapobject_delete (glusterd_snap_t *snap) +{ + if (snap == NULL) { + gf_log(THIS->name, GF_LOG_WARNING, "snap is NULL"); + return -1; + } + + list_del_init (&snap->snap_list); + list_del_init (&snap->volumes); + if (LOCK_DESTROY(&snap->lock)) + gf_log (THIS->name, GF_LOG_WARNING, "Failed destroying lock" + "of snap %s", snap->snapname); + + GF_FREE (snap->description); + GF_FREE (snap); + + return 0; +} + +int32_t +glusterd_snap_remove (dict_t *rsp_dict, + glusterd_snap_t *snap, + gf_boolean_t remove_lvm, + gf_boolean_t force) +{ + int ret = -1; + int save_ret = 0; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); + + if (!snap) { + gf_log(this->name, GF_LOG_WARNING, "snap is NULL"); + ret = -1; + goto out; + } + + list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { + ret = glusterd_snap_volume_remove (rsp_dict, snap_vol, + remove_lvm, force); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove " + "volinfo %s for snap %s", snap_vol->volname, + snap->snapname); + save_ret = ret; + + /* Continue to cleaning up the snap in case of error + if force flag is enabled */ + if (!force) + goto out; + } + } + + ret = glusterd_store_delete_snap (snap); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, "Failed to remove snap %s " + "from store", snap->snapname); + save_ret = ret; + if (!force) + goto out; + } + + ret = glusterd_snapobject_delete (snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap object %s", snap->snapname); + + if (save_ret) + ret = save_ret; +out: + gf_log (THIS->name, GF_LOG_TRACE, "returning %d", ret); + return ret; +} + +static int +glusterd_snapshot_get_snapvol_detail (dict_t *dict, + glusterd_volinfo_t *snap_vol, + char *keyprefix, int detail) +{ + int ret = -1; + int snap_limit = 0; + char key[PATH_MAX] = {0,}; + char *value = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + + GF_ASSERT (dict); + GF_ASSERT (snap_vol); + GF_ASSERT (keyprefix); + + /* Volume Name */ + value = gf_strdup (snap_vol->volname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.volname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume name in dictionary: %s", key); + goto out; + } + + /* Volume ID */ + value = gf_strdup (uuid_utoa (snap_vol->volume_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.vol-id", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume id in dictionary: %s", key); + goto out; + } + value = NULL; + + /* volume status */ + snprintf (key, sizeof (key), "%s.vol-status", keyprefix); + switch (snap_vol->status) { + case GLUSTERD_STATUS_STARTED: + ret = dict_set_str (dict, key, "Started"); + break; + case GLUSTERD_STATUS_STOPPED: + ret = dict_set_str (dict, key, "Stopped"); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_str (dict, key, "None"); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Invalid volume status"); + ret = -1; + goto out; + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volume status" + " in dictionary: %s", key); + goto out; + } + + + ret = glusterd_volinfo_find (snap_vol->parent_volname, &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the parent " + "volinfo for the volume %s", snap_vol->volname); + goto out; + } + + /* Snaps available */ + if (conf->snap_max_hard_limit < origin_vol->snap_max_hard_limit) { + snap_limit = conf->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } else { + snap_limit = origin_vol->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } + + snprintf (key, sizeof (key), "%s.snaps-available", keyprefix); + if (snap_limit > origin_vol->snap_count) + ret = dict_set_int32 (dict, key, + snap_limit - origin_vol->snap_count); + else + ret = dict_set_int32 (dict, key, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set available snaps"); + goto out; + } + + snprintf (key, sizeof (key), "%s.snapcount", keyprefix); + ret = dict_set_int32 (dict, key, origin_vol->snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount"); + goto out; + } + + if (!detail) + goto out; + + /* Parent volume name */ + value = gf_strdup (snap_vol->parent_volname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.origin-volname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set parent " + "volume name in dictionary: %s", key); + goto out; + } + value = NULL; + + ret = 0; +out: + if (value) + GF_FREE (value); + + return ret; +} + +static int +glusterd_snapshot_get_snap_detail (dict_t *dict, glusterd_snap_t *snap, + char *keyprefix, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int volcount = 0; + char key[PATH_MAX] = {0,}; + char *value = NULL; + char *timestr = NULL; + struct tm *tmptr = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (dict); + GF_ASSERT (snap); + GF_ASSERT (keyprefix); + + /* Snap Name */ + value = gf_strdup (snap->snapname); + if (!value) + goto out; + + snprintf (key, sizeof (key), "%s.snapname", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap name in dictionary"); + goto out; + } + + /* Snap ID */ + value = gf_strdup (uuid_utoa (snap->snap_id)); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-id", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap id in dictionary"); + goto out; + } + value = NULL; + + tmptr = localtime (&(snap->time_stamp)); + if (NULL == tmptr) { + gf_log (this->name, GF_LOG_ERROR, "Failed to convert " + "time_t to *tm"); + ret = -1; + goto out; + } + + timestr = GF_CALLOC (1, PATH_MAX, gf_gld_mt_char); + if (NULL == timestr) { + ret = -1; + goto out; + } + + ret = strftime (timestr, PATH_MAX, "%Y-%m-%d %H:%M:%S", tmptr); + if (0 == ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to convert time_t " + "to string"); + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-time", keyprefix); + ret = dict_set_dynstr (dict, key, timestr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap time stamp in dictionary"); + goto out; + } + timestr = NULL; + + /* If snap description is provided then add that into dictionary */ + if (NULL != snap->description) { + value = gf_strdup (snap->description); + if (NULL == value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.snap-desc", keyprefix); + ret = dict_set_dynstr (dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "snap description in dictionary"); + goto out; + } + value = NULL; + } + + snprintf (key, sizeof (key), "%s.snap-status", keyprefix); + switch (snap->snap_status) { + case GD_SNAP_STATUS_INIT: + ret = dict_set_str (dict, key, "Init"); + break; + case GD_SNAP_STATUS_IN_USE: + ret = dict_set_str (dict, key, "In-use"); + break; + case GD_SNAP_STATUS_DECOMMISSION: + ret = dict_set_str (dict, key, "Decommisioned"); + break; + case GD_SNAP_STATUS_RESTORED: + ret = dict_set_str (dict, key, "Restored"); + break; + case GD_SNAP_STATUS_NONE: + ret = dict_set_str (dict, key, "None"); + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Invalid snap status"); + ret = -1; + goto out; + } + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap status " + "in dictionary"); + goto out; + } + + if (volinfo) { + volcount = 1; + snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail (dict, + volinfo, key, 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + goto done; + } + + list_for_each_entry_safe (snap_vol, tmp_vol, &snap->volumes, vol_list) { + volcount++; + snprintf (key, sizeof (key), "%s.vol%d", keyprefix, volcount); + ret = glusterd_snapshot_get_snapvol_detail (dict, + snap_vol, key, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get volume detail %s for snap %s", + snap_vol->volname, snap->snapname); + goto out; + } + } + +done: + snprintf (key, sizeof (key), "%s.vol-count", keyprefix); + ret = dict_set_int32 (dict, key, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE (value); + + if (timestr) + GF_FREE(timestr); + + return ret; +} + +static int +glusterd_snapshot_get_all_snap_info (dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char key[PATH_MAX] = {0,}; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + + /* General parameter validation */ + GF_ASSERT (dict); + + list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) { + snapcount++; + snprintf (key, sizeof (key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail (dict, snap, key, NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snapdetail for snap %s", snap->snapname); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_get_info_by_volume (dict_t *dict, char *volname, + char *err_str, size_t len) +{ + int ret = -1; + int snapcount = 0; + int snap_limit = 0; + char *value = NULL; + char key[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + conf = this->private; + GF_ASSERT (conf); + + GF_ASSERT (dict); + GF_ASSERT (volname); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, "Volume (%s) does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + /* Snaps available */ + if (conf->snap_max_hard_limit < volinfo->snap_max_hard_limit) { + snap_limit = conf->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "system snap-max-hard-limit is" + " lesser than volume snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } else { + snap_limit = volinfo->snap_max_hard_limit; + gf_log(this->name, GF_LOG_DEBUG, "volume snap-max-hard-limit is" + " lesser than system snap-max-hard-limit, " + "snap-max-hard-limit value is set to %d", snap_limit); + } + + if (snap_limit > volinfo->snap_count) + ret = dict_set_int32 (dict, "snaps-available", + snap_limit - volinfo->snap_count); + else + ret = dict_set_int32 (dict, "snaps-available", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set available snaps"); + goto out; + } + + /* Origin volume name */ + value = gf_strdup (volinfo->volname); + if (!value) + goto out; + + ret = dict_set_dynstr (dict, "origin-volname", value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set parent " + "volume name in dictionary: %s", key); + goto out; + } + value = NULL; + + list_for_each_entry_safe (snap_vol, tmp_vol, &volinfo->snap_volumes, + snapvol_list) { + snapcount++; + snprintf (key, sizeof (key), "snap%d", snapcount); + ret = glusterd_snapshot_get_snap_detail (dict, + snap_vol->snapshot, + key, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "snapdetail for snap %s", + snap_vol->snapshot->snapname); + goto out; + } + } + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + if (value) + GF_FREE (value); + + return ret; +} + +/* This function will be called from RPC handler routine. + * This function is responsible for getting the requested + * snapshot info into the dictionary. + * + * @param req RPC request object. Required for sending a response back. + * @param op glusterd operation. Required for sending a response back. + * @param dict pointer to dictionary which will contain both + * request and response key-pair values. + * @return -1 on error and 0 on success + */ +int +glusterd_handle_snapshot_info (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int8_t snap_driven = 1; + char *volname = NULL; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + int32_t cmd = GF_SNAP_INFO_TYPE_ALL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, req, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get type " + "of snapshot info"); + goto out; + } + + switch (cmd) { + case GF_SNAP_INFO_TYPE_ALL: + { + ret = glusterd_snapshot_get_all_snap_info (dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get info of all snaps"); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_SNAP: + { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snap name"); + goto out; + } + + ret = dict_set_int32 (dict, "snap-count", 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snapcount"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, + "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + ret = glusterd_snapshot_get_snap_detail (dict, snap, + "snap1", NULL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snap detail of snap " + "%s", snap->snapname); + goto out; + } + break; + } + + case GF_SNAP_INFO_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volname"); + goto out; + } + ret = glusterd_snapshot_get_info_by_volume (dict, + volname, err_str, len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volume info of volume " + "%s", volname); + goto out; + } + snap_driven = 0; + break; + } + } + + ret = dict_set_int8 (dict, "snap-driven", snap_driven); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap-driven"); + goto out; + } + + /* If everything is successful then send the response back to cli. + * In case of failure the caller of this function will take care + of the response */ + ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This function sets all the snapshot names in the dictionary */ +int +glusterd_snapshot_get_all_snapnames (dict_t *dict) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[PATH_MAX] = {0,}; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (dict); + + list_for_each_entry_safe (snap, tmp_snap, &priv->snapshots, snap_list) { + snapcount++; + snapname = gf_strdup (snap->snapname); + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, "strdup failed"); + ret = -1; + goto out; + } + snprintf (key, sizeof (key), "snapname%d", snapcount); + ret = dict_set_dynstr (dict, key, snapname); + if (ret) { + GF_FREE (snapname); + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s", + key); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +/* This function sets all the snapshot names + under a given volume in the dictionary */ +int +glusterd_snapshot_get_vol_snapnames (dict_t *dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int snapcount = 0; + char *snapname = NULL; + char key[PATH_MAX] = {0,}; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp_vol = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (dict); + GF_ASSERT (volinfo); + + list_for_each_entry_safe (snap_vol, tmp_vol, + &volinfo->snap_volumes, snapvol_list) { + snapcount++; + snapname = gf_strdup (snap_vol->snapshot->snapname); + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, + "strdup failed"); + ret = -1; + goto out; + } + snprintf (key, sizeof (key), "snapname%d", snapcount); + ret = dict_set_dynstr (dict, key, snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set %s", key); + GF_FREE (snapname); + goto out; + } + } + + ret = dict_set_int32 (dict, "snap-count", snapcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snapcount"); + goto out; + } + + ret = 0; +out: + + return ret; +} + +int +glusterd_handle_snapshot_list (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_VALIDATE_OR_GOTO (this->name, req, out); + GF_VALIDATE_OR_GOTO (this->name, dict, out); + + /* Ignore error for getting volname as it is optional */ + ret = dict_get_str (dict, "volname", &volname); + + if (NULL == volname) { + ret = glusterd_snapshot_get_all_snapnames (dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snapshot list"); + goto out; + } + } else { + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, + "Volume (%s) does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + goto out; + } + + ret = glusterd_snapshot_get_vol_snapnames (dict, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get snapshot list for volume %s", + volname); + goto out; + } + } + + /* If everything is successful then send the response back to cli. + In case of failure the caller of this function will take of response.*/ + ret = glusterd_op_send_cli_response (op, 0, 0, req, dict, err_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to send cli " + "response"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +/* This is a snapshot create handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual snap creation on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_create (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + int64_t volcount = 0; + xlator_t *this = NULL; + char key[PATH_MAX] = ""; + char *username = NULL; + char *password = NULL; + uuid_t *uuid_ptr = NULL; + uuid_t tmp_uuid = {0}; + int i = 0; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + if (volcount <= 0) { + gf_log (this->name, GF_LOG_ERROR, "Invalid volume count %ld " + "supplied", volcount); + ret = -1; + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to get the snapname"); + goto out; + } + + if (strlen(snapname) >= GLUSTERD_MAX_SNAP_NAME) { + snprintf (err_str, len, "snapname cannot exceed 255 " + "characters"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_log (this->name, GF_LOG_ERROR, "Out Of Memory"); + ret = -1; + goto out; + } + + uuid_generate (*uuid_ptr); + ret = dict_set_bin (dict, "snap-id", uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-id"); + GF_FREE (uuid_ptr); + goto out; + } + uuid_ptr = NULL; + + ret = dict_set_int64 (dict, "snap-time", (int64_t)time(NULL)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to set snap-time"); + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%d", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get volume name"); + goto out; + } + + /* generate internal username and password for the snap*/ + uuid_generate (tmp_uuid); + username = gf_strdup (uuid_utoa (tmp_uuid)); + snprintf (key, sizeof(key), "volume%d_username", i); + ret = dict_set_dynstr (dict, key, username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap " + "username for volume %s", volname); + GF_FREE (username); + goto out; + } + + uuid_generate (tmp_uuid); + password = gf_strdup (uuid_utoa (tmp_uuid)); + snprintf (key, sizeof(key), "volume%d_password", i); + ret = dict_set_dynstr (dict, key, password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set snap " + "password for volume %s", volname); + GF_FREE (password); + goto out; + } + + uuid_ptr = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!uuid_ptr) { + gf_log (this->name, GF_LOG_ERROR, "Out Of Memory"); + ret = -1; + goto out; + } + + snprintf (key, sizeof(key) - 1, "vol%d_volid", i); + uuid_generate (*uuid_ptr); + ret = dict_set_bin (dict, key, uuid_ptr, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snap_volid"); + GF_FREE (uuid_ptr); + goto out; + } + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + } + +out: + return ret; +} + +/* This is a snapshot status handler function. This function will be + * executed in a originator node. This function is responsible for + * calling mgmt v3 framework to get the actual snapshot status from + * all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot status request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * return : 0 in case of success. + * -1 in case of failure. + * + */ +int +glusterd_handle_snapshot_status (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *volname = NULL; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + int i = 0; + dict_t *voldict = NULL; + char key[PATH_MAX] = ""; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get status type"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + /* IF we give "gluster snapshot status" + * then lock is held on all snaps. + * This is the place where necessary information + * (snapname and snapcount)is populated in dictionary + * for locking. + */ + ++i; + list_for_each_entry (snap, &conf->snapshots, snap_list) + { + snprintf (key, sizeof (key), "snapname%d", i); + buf = gf_strdup (snap->snapname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapname (%s) " + "in the dictionary", + snap->snapname); + GF_FREE (buf); + goto out; + } + + buf = NULL; + i++; + } + + ret = dict_set_int32 (dict, "snapcount", i - 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not " + "save snapcount in the dictionary"); + goto out; + } + break; + } + + case GF_SNAP_STATUS_TYPE_SNAP: + { + /* IF we give "gluster snapshot status <snapname>" + * then lock is held on single snap. + * This is the place where necessary information + * (snapname)is populated in dictionary + * for locking. + */ + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s)" + "does not exist", snapname); + gf_log(this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, len, "Volume (%s) " + "does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + goto out; + } + + i = 1; + list_for_each_entry (snap_volinfo, + &volinfo->snap_volumes, snapvol_list) { + snprintf (key, sizeof (key), "snapname%d", i); + + buf = gf_strdup + (snap_volinfo->snapshot->snapname); + if (!buf) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapname"); + GF_FREE (buf); + goto out; + } + + buf = NULL; + i++; + } + + ret = dict_set_int32 (dict, "snapcount", i-1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save snapcount"); + goto out; + } + break; + default: + { + gf_log (this->name, GF_LOG_ERROR, "Unknown type"); + ret = -1; + goto out; + } + } + + /* Volume lock is not necessary for snapshot status, hence + * turning it off + */ + ret = dict_set_int8 (dict, "hold_vol_locks", 0); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Setting volume lock " + "flag failed"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate " + "snap phases"); + goto out; + } + + ret = 0; + +out: + if (voldict) { + dict_unref (voldict); + } + return ret; +} + + +/* This is a snapshot restore handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt_v3 framework to do the actual restore on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot restore request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_restore (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + char *snapname = NULL; + char *buf = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + int32_t i = 0; + char key[PATH_MAX] = ""; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + ret = -1; + goto out; + } + + list_for_each_entry (snap_volinfo, &snap->volumes, vol_list) { + i++; + snprintf (key, sizeof (key), "volname%d", i); + buf = gf_strdup (snap_volinfo->parent_volname); + if (!buf) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, key, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not set " + "parent volume name %s in the dict", + snap_volinfo->parent_volname); + GF_FREE (buf); + goto out; + } + buf = NULL; + } + + ret = dict_set_int32 (dict, "volcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save volume count"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; + +out: + return ret; +} + +glusterd_snap_t* +glusterd_create_snap_object (dict_t *dict, dict_t *rsp_dict) +{ + char *snapname = NULL; + uuid_t *snap_id = NULL; + char *description = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int64_t time_stamp = 0; + + this = THIS; + priv = this->private; + + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + /* Fetch snapname, description, id and time from dict */ + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname"); + goto out; + } + + /* Ignore ret value for description*/ + ret = dict_get_str (dict, "description", &description); + + ret = dict_get_bin (dict, "snap-id", (void **)&snap_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap_id"); + goto out; + } + + ret = dict_get_int64 (dict, "snap-time", &time_stamp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snap-time"); + goto out; + } + if (time_stamp <= 0) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Invalid time-stamp: %ld", + time_stamp); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + if (!strcmp (snap->snapname, snapname) || + !uuid_compare (snap->snap_id, *snap_id)) { + gf_log (THIS->name, GF_LOG_ERROR, + "Found duplicate snap %s (%s)", + snap->snapname, uuid_utoa (snap->snap_id)); + ret = -1; + break; + } + } + if (ret) { + snap = NULL; + goto out; + } + + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Could not create " + "the snap object for snap %s", snapname); + goto out; + } + + strcpy (snap->snapname, snapname); + uuid_copy (snap->snap_id, *snap_id); + snap->time_stamp = (time_t)time_stamp; + /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot + is taken and snap is really ready to use, set the status to + GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete + snapshots and cleaning them up. + */ + snap->snap_status = GD_SNAP_STATUS_INIT; + if (description) { + snap->description = gf_strdup (description); + if (snap->description == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "Saving the Snap Description Failed"); + ret = -1; + goto out; + } + } + + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + + gf_log (this->name, GF_LOG_TRACE, "Snap %s added to the list", + snap->snapname); + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); + snap = NULL; + } + + return snap; +} + +/* This function is called to get the device path of the snap lvm. Usually + if /dev/mapper/<group-name>-<lvm-name> is the device for the lvm, + then the snap device will be /dev/<group-name>/<snapname>. + This function takes care of building the path for the snap device. +*/ +char * +glusterd_build_snap_device_path (char *device, char *snapname) +{ + char snap[PATH_MAX] = ""; + char msg[1024] = ""; + char volgroup[PATH_MAX] = ""; + char *snap_device = NULL; + xlator_t *this = NULL; + runner_t runner = {0,}; + char *ptr = NULL; + int ret = -1; + + this = THIS; + GF_ASSERT (this); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "device is NULL"); + goto out; + } + if (!snapname) { + gf_log (this->name, GF_LOG_ERROR, "snapname is NULL"); + goto out; + } + + runinit (&runner); + runner_add_args (&runner, "/sbin/lvs", "--noheadings", "-o", "vg_name", + device, NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + snprintf (msg, sizeof (msg), "Get volume group for device %s", device); + runner_log (&runner, this->name, GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group " + "for device %s", device); + runner_end (&runner); + goto out; + } + ptr = fgets(volgroup, sizeof(volgroup), + runner_chio (&runner, STDOUT_FILENO)); + if (!ptr || !strlen(volgroup)) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume group " + "for snap %s", snapname); + runner_end (&runner); + ret = -1; + goto out; + } + runner_end (&runner); + + snprintf (snap, sizeof(snap), "/dev/%s/%s", gf_trim(volgroup), + snapname); + snap_device = gf_strdup (snap); + if (!snap_device) { + gf_log (this->name, GF_LOG_WARNING, "Cannot copy the " + "snapshot device name for snapname: %s)", snapname); + } + +out: + return snap_device; +} + +/* This function actually calls the command (or the API) for taking the + snapshot of the backend brick filesystem. If this is successful, + then call the glusterd_snap_create function to create the snap object + for glusterd +*/ +char * +glusterd_take_lvm_snapshot (glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *brickinfo) +{ + char msg[NAME_MAX] = ""; + char buf[PATH_MAX] = ""; + char *snap_device = NULL; + char *ptr = NULL; + char *device = NULL; + int ret = -1; + gf_boolean_t match = _gf_false; + runner_t runner = {0,}; + xlator_t *this = NULL; + + this = THIS; + + if (!brickinfo) { + gf_log (this->name, GF_LOG_ERROR, "brickinfo NULL"); + goto out; + } + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "getting device name for " + "the brick %s:%s failed", brickinfo->hostname, + brickinfo->path); + goto out; + } + + /* Figuring out if setactivationskip flag is supported or not */ + runinit (&runner); + snprintf (msg, sizeof (msg), "running lvcreate help"); + runner_add_args (&runner, "/sbin/lvcreate", "--help", NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to run lvcreate help"); + runner_end (&runner); + goto out; + } + + /* Looking for setactivationskip in lvcreate --help */ + do { + ptr = fgets(buf, sizeof(buf), + runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + if (strstr(buf, "setactivationskip")) { + match = _gf_true; + break; + } + } + } while (ptr != NULL); + runner_end (&runner); + + /* Takng the actual snapshot */ + runinit (&runner); + snprintf (msg, sizeof (msg), "taking snapshot of the brick %s:%s", + brickinfo->hostname, brickinfo->path); + if (match == _gf_true) + runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + "--setactivationskip", "n", "--name", + snap_vol->volname, NULL); + else + runner_add_args (&runner, "/sbin/lvcreate", "-s", device, + "--name", snap_vol->volname, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "taking snapshot of the " + "brick (%s:%s) of device %s failed", + brickinfo->hostname, brickinfo->path, device); + runner_end (&runner); + goto out; + } + runner_end (&runner); + + snap_device = glusterd_build_snap_device_path (device, + snap_vol->volname); + if (!snap_device) { + gf_log (this->name, GF_LOG_WARNING, "Cannot copy the snapshot " + "device name for snap %s (volume id: %s)", + snap_vol->snapshot->snapname, snap_vol->volname); + ret = -1; + goto out; + } + +out: + return snap_device; +} + +int32_t +glusterd_snap_brick_create (char *device, glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *original_brickinfo, + int32_t brick_count, char *snap_brick_dir) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char snap_brick_mount_path[PATH_MAX] = ""; + char snap_brick_path[PATH_MAX] = ""; + char msg[1024] = ""; + struct stat statbuf = {0, }; + runner_t runner = {0, }; + + this = THIS; + priv = this->private; + + GF_ASSERT (device); + GF_ASSERT (snap_volinfo); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brick_dir); + + snprintf (snap_brick_mount_path, sizeof (snap_brick_mount_path), + "%s/%s/brick%d", snap_mount_folder, snap_volinfo->volname, + brick_count+1); + + snprintf (snap_brick_path, sizeof (snap_brick_path), "%s%s", + snap_brick_mount_path, snap_brick_dir); + + ret = mkdir_p (snap_brick_mount_path, 0777, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "creating the brick directory" + " %s for the snapshot %s(device: %s) failed", + snap_brick_mount_path, snap_volinfo->volname, device); + goto out; + } + /* mount the snap logical device on the directory inside + /run/gluster/snaps/<snapname>/@snap_brick_mount_path + Way to mount the snap brick via mount api is this. + ret = mount (device, snap_brick_mount_path, entry->mnt_type, + MS_MGC_VAL, "nouuid"); + But for now, mounting using runner apis. + */ + runinit (&runner); + snprintf (msg, sizeof (msg), "mounting snapshot of the brick %s:%s", + original_brickinfo->hostname, original_brickinfo->path); + runner_add_args (&runner, "mount", "-o", "nouuid", device, + snap_brick_mount_path, NULL); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + + /* let glusterd get blocked till snapshot is over */ + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "mounting the snapshot " + "logical device %s failed (error: %s)", device, + strerror (errno)); + goto out; + } else + gf_log (this->name, GF_LOG_DEBUG, "mounting the snapshot " + "logical device %s successful", device); + + ret = stat (snap_brick_path, &statbuf); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "stat of the brick %s" + "(brick mount: %s) failed (%s)", snap_brick_path, + snap_brick_mount_path, strerror (errno)); + goto out; + } + ret = sys_lsetxattr (snap_brick_path, + GF_XATTR_VOL_ID_KEY, + snap_volinfo->volume_id, 16, + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "extended attribute %s on %s. Reason: " + "%s, snap: %s", GF_XATTR_VOL_ID_KEY, + snap_brick_path, strerror (errno), + snap_volinfo->volname); + goto out; + } + +out: + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "unmounting the snap brick" + " mount %s", snap_brick_mount_path); + umount (snap_brick_mount_path); + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Added missed_snap_entry to rsp_dict */ +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op) +{ + char *buf = NULL; + char missed_snap_entry[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t missed_snap_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_uuid); + GF_ASSERT (brickinfo); + + snprintf (missed_snap_entry, sizeof(missed_snap_entry), + "%s:%s=%d:%s:%d:%d", uuid_utoa(brickinfo->uuid), + snap_uuid, brick_number, brickinfo->path, op, + GD_MISSED_SNAP_PENDING); + + buf = gf_strdup (missed_snap_entry); + if (!buf) { + ret = -1; + goto out; + } + + /* Fetch the missed_snap_count from the dict */ + ret = dict_get_int32 (rsp_dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + /* Initialize the missed_snap_count for the first time */ + missed_snap_count = 0; + } + + /* Setting the missed_snap_entry in the rsp_dict */ + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + missed_snap_count); + ret = dict_set_dynstr (rsp_dict, name_buf, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_entry (%s) " + "in the rsp_dict.", buf); + GF_FREE (buf); + goto out; + } + missed_snap_count++; + + /* Setting the new missed_snap_count in the dict */ + ret = dict_set_int32 (rsp_dict, "missed_snap_count", + missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set missed_snap_count for %s " + "in the rsp_dict.", missed_snap_entry); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_add_bricks_to_snap_volume (dict_t *dict, dict_t *rsp_dict, + glusterd_volinfo_t *snap_vol, + glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, + char **snap_brick_dir, int64_t volcount, + int32_t brick_count) +{ + char key[PATH_MAX] = ""; + char snap_brick_path[PATH_MAX] = ""; + char *snap_device = NULL; + gf_boolean_t add_missed_snap = _gf_false; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap_vol); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); + GF_ASSERT (snap_brick_dir); + + snprintf (key, sizeof(key) - 1, "vol%ld.brickdir%d", volcount, + brick_count); + ret = dict_get_ptr (dict, key, (void **)snap_brick_dir); + if (ret) { + /* Using original brickinfo here because it will be a + * pending snapshot and storing the original brickinfo + * will help in mapping while recreating the missed snapshot + */ + gf_log (this->name, GF_LOG_WARNING, "Unable to fetch " + "snap mount path (%s). Using original brickinfo", key); + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + + /* In origiator node add snaps missed + * from different nodes to the dict + */ + if (is_origin_glusterd (dict) == _gf_true) + add_missed_snap = _gf_true; + } else { + /* Create brick-path in the format /var/run/gluster/snaps/ * + * <snap-uuid>/<original-brick#>/snap-brick-dir * + */ + snprintf (snap_brick_path, sizeof(snap_brick_path), + "%s/%s/brick%d%s", snap_mount_folder, + snap_vol->volname, brick_count+1, + *snap_brick_dir); + } + + if ((snap_brickinfo->snap_status != -1) && + (!uuid_compare (original_brickinfo->uuid, MY_UUID)) && + (!glusterd_is_brick_started (original_brickinfo))) { + /* In case if the brick goes down after prevalidate. */ + gf_log (this->name, GF_LOG_WARNING, "brick %s:%s is not" + " started (snap: %s)", + original_brickinfo->hostname, + original_brickinfo->path, + snap_vol->snapshot->snapname); + + snap_brickinfo->snap_status = -1; + strcpy (snap_brick_path, original_brickinfo->path); + add_missed_snap = _gf_true; + } + + if (add_missed_snap) { + ret = glusterd_add_missed_snaps_to_dict (rsp_dict, + snap_vol->volname, + original_brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_CREATE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to add missed" + " snapshot info for %s:%s in the rsp_dict", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + } + + snprintf (key, sizeof(key), "vol%ld.brick_snapdevice%d", + volcount, brick_count); + ret = dict_get_ptr (dict, key, (void **)&snap_device); + if (ret) { + /* If the device name is empty, so will be the brick path + * Hence the missed snap has already been added above + */ + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch " + "snap device (%s). Leaving empty", key); + } else + strcpy (snap_brickinfo->device_path, snap_device); + + ret = gf_canonicalize_path (snap_brick_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to canonicalize path"); + goto out; + } + + strcpy (snap_brickinfo->hostname, original_brickinfo->hostname); + strcpy (snap_brickinfo->path, snap_brick_path); + uuid_copy (snap_brickinfo->uuid, original_brickinfo->uuid); + list_add_tail (&snap_brickinfo->brick_list, &snap_vol->bricks); + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +static int32_t +glusterd_take_brick_snapshot (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol, dict_t *rsp_dict, + glusterd_brickinfo_t *original_brickinfo, + glusterd_brickinfo_t *snap_brickinfo, + char *snap_brick_dir, int32_t brick_count) +{ + char *device = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (origin_vol); + GF_ASSERT (snap_vol); + GF_ASSERT (rsp_dict); + GF_ASSERT (original_brickinfo); + GF_ASSERT (snap_brickinfo); + GF_ASSERT (snap_brick_dir); + + device = glusterd_take_lvm_snapshot (snap_vol, original_brickinfo); + /* Fail the snapshot even though snapshot on one of + the bricks fails. At the end when we check whether + the snapshot volume meets quorum or not, then the + the snapshot can either be treated as success, or + in case of failure we can undo the changes and return + failure to cli. */ + if (!device) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to take snapshot of %s:%s", + original_brickinfo->hostname, + original_brickinfo->path); + goto out; + } + + /* create the complete brick here */ + ret = glusterd_snap_brick_create (device, snap_vol, + original_brickinfo, + brick_count, snap_brick_dir); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to" + " create the brickinfo for the snap %s" + ", volume %s", snap_vol->snapshot->snapname, + origin_vol->volname); + goto out; + } + +out: + if (device) + GF_FREE (device); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Look for disconnected peers, for missed snap creates or deletes */ +static int32_t +glusterd_find_missed_snap (dict_t *rsp_dict, glusterd_volinfo_t *vol, + char *snap_uuid, struct list_head *peers, + int32_t op) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (peers); + GF_ASSERT (vol); + GF_ASSERT (snap_uuid); + + brick_count = 0; + list_for_each_entry (brickinfo, &vol->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + /* If the brick belongs to the same node */ + brick_count++; + continue; + } + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) { + /* If the brick doesnt belong to this peer */ + continue; + } + + /* Found peer who owns the brick, * + * if peer is not connected or not * + * friend add it to missed snap list */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_uuid, + brickinfo, + brick_count + 1, + op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot " + "info for %s:%s in the " + "rsp_dict", brickinfo->hostname, + brickinfo->path); + goto out; + } + } + } + brick_count++; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +glusterd_volinfo_t * +glusterd_do_snap_vol (glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap, + dict_t *dict, dict_t *rsp_dict, int64_t volcount) +{ + char key[PATH_MAX] = ""; + char *snap_brick_dir = NULL; + char *username = NULL; + char *password = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_vol = NULL; + uuid_t *snap_volid = NULL; + int32_t ret = -1; + int32_t brick_count = 0; + glusterd_brickinfo_t *snap_brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (origin_vol); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + /* fetch username, password and vol_id from dict*/ + snprintf (key, sizeof(key), "volume%ld_username", volcount); + ret = dict_get_str (dict, key, &username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " + "snap %s", key, snap->snapname); + goto out; + } + + snprintf (key, sizeof(key), "volume%ld_password", volcount); + ret = dict_get_str (dict, key, &password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get %s for " + "snap %s", key, snap->snapname); + goto out; + } + snprintf (key, sizeof(key) - 1, "vol%ld_volid", volcount); + ret = dict_get_bin (dict, key, (void **)&snap_volid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volid"); + goto out; + } + + /* We are not setting the username and password here as + * we need to set the user name and password passed in + * the dictionary + */ + ret = glusterd_volinfo_dup (origin_vol, &snap_vol, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to duplicate volinfo " + "for the snapshot %s", snap->snapname); + goto out; + } + + /* uuid is used as lvm snapshot name. + This will avoid restrictions on snapshot names provided by user */ + GLUSTERD_GET_UUID_NOHYPHEN (snap_vol->volname, *snap_volid); + uuid_copy (snap_vol->volume_id, *snap_volid); + snap_vol->is_snap_volume = _gf_true; + strcpy (snap_vol->parent_volname, origin_vol->volname); + snap_vol->snapshot = snap; + + glusterd_auth_set_username (snap_vol, username); + glusterd_auth_set_password (snap_vol, password); + + /* Adding snap brickinfos to the snap volinfo */ + brick_count = 0; + list_for_each_entry (brickinfo, &origin_vol->bricks, brick_list) { + snap_brickinfo = NULL; + + ret = glusterd_brickinfo_new (&snap_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "initializing the brick for the snap " + "volume failed (snapname: %s)", snap->snapname); + goto out; + } + + ret = glusterd_add_bricks_to_snap_volume (dict, rsp_dict, + snap_vol, + brickinfo, + snap_brickinfo, + &snap_brick_dir, + volcount, + brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add the snap brick for " + "%s:%s to the snap volume", + brickinfo->hostname, brickinfo->path); + GF_FREE (snap_brickinfo); + goto out; + } + + /* Take snapshot of the brick */ + if ((uuid_compare (brickinfo->uuid, MY_UUID)) || + (snap_brickinfo->snap_status == -1)) { + brick_count++; + continue; + } + + ret = glusterd_take_brick_snapshot (origin_vol, snap_vol, + rsp_dict, brickinfo, + snap_brickinfo, + snap_brick_dir, + brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to take snapshot for %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + + brick_count++; + } + + /*TODO: the quorum check of the snap volume here */ + + ret = glusterd_store_volinfo (snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store snapshot " + "volinfo (%s) for snap %s", snap_vol->volname, + snap->snapname); + goto out; + } + + ret = generate_brick_volfiles (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the brick " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = generate_client_volfiles (snap_vol, GF_CLIENT_TRUSTED); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the trusted " + "client volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + ret = generate_client_volfiles (snap_vol, GF_CLIENT_OTHER); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "generating the client " + "volfiles for the snap %s (volume: %s) failed", + snap->snapname, origin_vol->volname); + goto out; + } + + ret = glusterd_list_add_snapvol (origin_vol, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "could not add the snap " + "volume %s to the list", snap_vol->volname); + goto out; + } + + list_for_each_entry (brickinfo, &snap_vol->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "not starting snap brick %s:%s for " + "for the snap %s (volume: %s)", + brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + continue; + } + + ret = glusterd_brick_start (snap_vol, brickinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "starting the " + "brick %s:%s for the snap %s (volume: %s) " + "failed", brickinfo->hostname, brickinfo->path, + snap->snapname, origin_vol->volname); + goto out; + } + } + + snap_vol->status = GLUSTERD_STATUS_STARTED; + ret = glusterd_store_volinfo (snap_vol, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store snap volinfo"); + goto out; + } + +out: + if (ret) { + if (snap_vol) + glusterd_snap_volume_remove (rsp_dict, snap_vol, + _gf_true, _gf_true); + snap_vol = NULL; + } + + return snap_vol; +} + +/* This is a snapshot remove handler function. This function will be + * executed in the originator node. This function is responsible for + * calling mgmt v3 framework to do the actual remove on all the bricks + * + * @param req RPC request object + * @param op gluster operation + * @param dict dictionary containing snapshot remove request + * @param err_str In case of an err this string should be populated + * @param len length of err_str buffer + * + * @return Negative value on Failure and 0 in success + */ +int +glusterd_handle_snapshot_remove (rpcsvc_request_t *req, glusterd_op_t op, + dict_t *dict, char *err_str, size_t len) +{ + int ret = -1; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char key[PATH_MAX] = ""; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_volinfo_t *tmp = NULL; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (req); + GF_ASSERT (dict); + GF_ASSERT (err_str); + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get snapname"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + snprintf (err_str, len, "Snap (%s) does not exist", snapname); + gf_log (this->name, GF_LOG_ERROR, + "%s", err_str); + ret = -1; + goto out; + } + + /* Set volnames in the dict to get mgmt_v3 lock */ + list_for_each_entry_safe (snap_vol, tmp, &snap->volumes, vol_list) { + volcount++; + volname = gf_strdup (snap_vol->parent_volname); + if (!volname) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "strdup failed"); + goto out; + } + + snprintf (key, sizeof (key), "volname%ld", volcount); + ret = dict_set_dynstr (dict, key, volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "volume name in dictionary"); + GF_FREE (volname); + goto out; + } + volname = NULL; + } + ret = dict_set_int64 (dict, "volcount", volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volcount"); + goto out; + } + + ret = glusterd_mgmt_v3_initiate_snap_phases (req, op, dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to initiate snap " + "phases"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_remove_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist", + snapname); + ret = -1; + goto out; + } + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_status_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + char *snapname = NULL; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int32_t cmd = -1; + glusterd_volinfo_t *volinfo = NULL; + char *volname = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + + GF_ASSERT (conf); + GF_ASSERT (op_errstr); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "Input dict is NULL"); + goto out; + } + + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch status cmd"); + goto out; + } + + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + break; + } + case GF_SNAP_STATUS_TYPE_SNAP: + { + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch snapname"); + goto out; + } + + if (!glusterd_find_snap_by_name (snapname)) { + ret = gf_asprintf (op_errstr, "Snap (%s) " + "not found", snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Snap (%s) " + "not found", snapname); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not fetch volname"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = gf_asprintf (op_errstr, "Volume (%s)" + "not found", volname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Volume " + "%s not present", volname); + goto out; + } + break; + + } + default: + { + gf_log (this->name, GF_LOG_ERROR, "Invalid command"); + break; + } + } + ret = 0; + +out: + return ret; +} + +int32_t +glusterd_snapshot_remove_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + char *snapname = NULL; + char *dup_snapname = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *snap_volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + priv = this->private; + GF_ASSERT (priv); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting the snap name " + "failed"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Snap %s does not exist", + snapname); + ret = -1; + goto out; + } + + if (is_origin_glusterd (dict) == _gf_true) { + /* TODO : As of now there is only volume in snapshot. + * Change this when multiple volume snapshot is introduced + */ + snap_volinfo = list_entry (snap->volumes.next, + glusterd_volinfo_t, + vol_list); + if (!snap_volinfo) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch snap_volinfo"); + ret = -1; + goto out; + } + + /* From origin glusterd check if * + * any peers with snap bricks is down */ + ret = glusterd_find_missed_snap (rsp_dict, snap_volinfo, + snap_volinfo->volname, + &priv->peers, + GF_SNAP_OPTION_TYPE_DELETE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to find missed snap deletes"); + goto out; + } + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove snap %s", + snapname); + goto out; + } + + dup_snapname = gf_strdup (snapname); + if (!dup_snapname) { + gf_log (this->name, GF_LOG_ERROR, "Strdup failed"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, "snapname", dup_snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set the snapname"); + GF_FREE (dup_snapname); + goto out; + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_do_snap_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + char *name = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_snap_t *snap = NULL; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (!dict || !op_errstr) { + gf_log (this->name, GF_LOG_ERROR, "input parameters NULL"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "getting the snap " + "name failed (volume: %s)", volinfo->volname); + goto out; + } + + /* + If the snapname is not found that means the failure happened at + staging, or in commit, before the snap object is created, in which + case there is nothing to cleanup. So set ret to 0. + */ + snap = glusterd_find_snap_by_name (name); + if (!snap) { + gf_log (this->name, GF_LOG_INFO, "snap %s is not found", name); + ret = 0; + goto out; + } + + ret = glusterd_snap_remove (rsp_dict, snap, _gf_true, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "removing the snap %s failed", + name); + goto out; + } + + name = NULL; + + ret = 0; + +out: + + return ret; +} + +/* In case of a successful, delete or create operation, during post_validate * + * look for missed snap operations and update the missed snap lists */ +int32_t +glusterd_snapshot_update_snaps_post_validate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t ret = -1; + int32_t missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + GF_ASSERT (op_errstr); + + ret = dict_get_int32 (dict, "missed_snap_count", + &missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_update_missed_snaps (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update missed_snaps_list"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snapshot_create_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int ret = -1; + int64_t i = 0; + int64_t volcount = 0; + char *snapname = NULL; + char *volname = NULL; + char *tmp_name = NULL; + char key[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_snap_t *snap = NULL; + glusterd_volinfo_t *origin_vol = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + GF_ASSERT(op_errstr); + GF_ASSERT(rsp_dict); + priv = this->private; + GF_ASSERT(priv); + + ret = dict_get_int64 (dict, "volcount", &volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "get the volume count"); + goto out; + } + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to fetch snapname"); + goto out; + } + tmp_name = gf_strdup (snapname); + if (!tmp_name) { + gf_log (this->name, GF_LOG_ERROR, "Out of memory"); + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, "snapname", tmp_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set snapname in rsp_dict"); + GF_FREE (tmp_name); + goto out; + } + tmp_name = NULL; + + snap = glusterd_create_snap_object (dict, rsp_dict); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "creating the" + "snap object %s failed", snapname); + ret = -1; + goto out; + } + + for (i = 1; i <= volcount; i++) { + snprintf (key, sizeof (key), "volname%ld", i); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &origin_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get the volinfo for " + "the volume %s", volname); + goto out; + } + + /* TODO: Create a stub where the bricks are + added parallely by worker threads so that + the snap creating happens parallely. */ + snap_vol = glusterd_do_snap_vol (origin_vol, snap, dict, + rsp_dict, i); + if (!snap_vol) { + ret = -1; + gf_log (this->name, GF_LOG_WARNING, "taking the " + "snapshot of the volume %s failed", volname); + goto out; + } + } + + snap->snap_status = GD_SNAP_STATUS_IN_USE; + ret = glusterd_store_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Could not store snap" + "object %s", snap->snapname); + goto out; + } + + ret = 0; + +out: + if (ret) { + if (snap) + glusterd_snap_remove (rsp_dict, snap, + _gf_true, _gf_true); + snap = NULL; + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +snap_max_hard_limit_set_commit (dict_t *dict, uint64_t value, + char *volname, char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (volname); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + /* TODO: Initiate auto deletion when there is a limit change */ + if (!volname) { + /* For system limit */ + conf->snap_max_hard_limit = value; + + ret = glusterd_store_global_info (this); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-hard-limit for system"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to get the" + " volinfo for volume %s", volname); + goto out; + } + + volinfo->snap_max_hard_limit = value; + + ret = glusterd_store_volinfo (volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-hard-limit for volume %s", volname); + goto out; + } + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +snap_max_limits_display_commit (dict_t *rsp_dict, char *volname, + char **op_errstr) +{ + char err_str[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = -1; + uint64_t active_hard_limit = 0; + uint64_t snap_max_limit = 0; + uint64_t soft_limit_value = -1; + uint64_t count = 0; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + GF_ASSERT (volname); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + if (!volname) { + /* For system limit */ + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (volinfo->is_snap_volume == _gf_true) + continue; + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > conf->snap_max_hard_limit) + active_hard_limit = conf->snap_max_hard_limit; + else + active_hard_limit = snap_max_limit; + soft_limit_value = (active_hard_limit * + conf->snap_max_soft_limit) / 100; + + snprintf (buf, sizeof(buf), "volume%ld-volname", count); + ret = dict_set_str (rsp_dict, buf, volinfo->volname); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, + active_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + count++; + } + + ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set voldisplaycount"); + goto out; + } + } else { + /* For one volume */ + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to get the" + " volinfo for volume %s", volname); + goto out; + } + + snap_max_limit = volinfo->snap_max_hard_limit; + if (snap_max_limit > conf->snap_max_hard_limit) + active_hard_limit = conf->snap_max_hard_limit; + else + active_hard_limit = snap_max_limit; + + soft_limit_value = (active_hard_limit * + conf->snap_max_soft_limit) / 100; + + snprintf (buf, sizeof(buf), "volume%ld-volname", count); + ret = dict_set_str (rsp_dict, buf, volinfo->volname); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, snap_max_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, active_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", count); + ret = dict_set_uint64 (rsp_dict, buf, soft_limit_value); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set %s", buf); + goto out; + } + + count++; + + ret = dict_set_uint64 (rsp_dict, "voldisplaycount", count); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set voldisplaycount"); + goto out; + } + + } + + ret = dict_set_uint64 (rsp_dict, "snap-max-hard-limit", + conf->snap_max_hard_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set sys-snap-max-hard-limit "); + goto out; + } + + ret = dict_set_uint64 (rsp_dict, "snap-max-soft-limit", + conf->snap_max_soft_limit); + if (ret) { + snprintf (err_str, PATH_MAX, + "Failed to set sys-snap-max-hard-limit "); + goto out; + } + + ret = 0; +out: + if (ret) { + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + } + return ret; +} + +int +glusterd_snapshot_config_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + char *volname = NULL; + xlator_t *this = NULL; + int ret = -1; + char err_str[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + int config_command = 0; + uint64_t hard_limit = 0; + uint64_t soft_limit = 0; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + + ret = dict_get_int32 (dict, "config-command", &config_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + /* Ignore the return value of the following dict_get, + * as they are optional + */ + ret = dict_get_str (dict, "volname", &volname); + + ret = dict_get_uint64 (dict, "snap-max-hard-limit", &hard_limit); + + ret = dict_get_uint64 (dict, "snap-max-soft-limit", &soft_limit); + + switch (config_command) { + case GF_SNAP_CONFIG_TYPE_SET: + if (hard_limit) { + /* Commit ops for snap-max-hard-limit */ + ret = snap_max_hard_limit_set_commit (dict, hard_limit, + volname, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-hard-limit set " + "commit failed."); + goto out; + } + } + + if (soft_limit) { + /* For system limit */ + conf->snap_max_soft_limit = soft_limit; + + ret = glusterd_store_global_info (this); + if (ret) { + snprintf (err_str, PATH_MAX, "Failed to store " + "snap-max-soft-limit for system"); + *op_errstr = gf_strdup (err_str); + gf_log (this->name, GF_LOG_ERROR, "%s", + err_str); + goto out; + } + } + break; + + case GF_SNAP_CONFIG_DISPLAY: + /* Reading data from local node only */ + if (!is_origin_glusterd (dict)) { + ret = 0; + break; + } + + ret = snap_max_limits_display_commit (rsp_dict, volname, + op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "snap-max-limit " + "display commit failed."); + goto out; + } + break; + default: + break; + } + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_get_brick_lvm_details (dict_t *rsp_dict, + glusterd_brickinfo_t *brickinfo, char *volname, + char *device, char *key_prefix) +{ + + int ret = -1; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + xlator_t *this = NULL; + char msg[PATH_MAX] = ""; + char buf[PATH_MAX] = ""; + char *ptr = NULL; + char *token = NULL; + char key[PATH_MAX] = ""; + char *value = NULL; + + GF_ASSERT (rsp_dict); + GF_ASSERT (brickinfo); + GF_ASSERT (volname); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + device = glusterd_get_brick_mount_details (brickinfo); + if (!device) { + gf_log (this->name, GF_LOG_ERROR, "Getting device name for " + "the brick %s:%s failed", brickinfo->hostname, + brickinfo->path); + goto out; + } + runinit (&runner); + snprintf (msg, sizeof (msg), "running lvs command, " + "for getting snap status"); + /* Using lvs command fetch the Volume Group name, + * Percentage of data filled and Logical Volume size + * + * "-o" argument is used to get the desired information, + * example : "lvs /dev/VolGroup/thin_vol -o vgname,lv_size", + * will get us Volume Group name and Logical Volume size. + * + * Here separator used is ":", + * for the above given command with separator ":", + * The output will be "vgname:lvsize" + */ + runner_add_args (&runner, "lvs", device, "--noheading", "-o", + "vg_name,data_percent,lv_size", + "--separator", ":", NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + runner_log (&runner, "", GF_LOG_DEBUG, msg); + ret = runner_start (&runner); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not perform lvs action"); + goto end; + } + do { + ptr = fgets (buf, sizeof (buf), + runner_chio (&runner, STDOUT_FILENO)); + + if (ptr == NULL) + break; + token = strtok (buf, ":"); + if (token != NULL) { + while (token && token[0] == ' ') + token++; + if (!token) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Invalid vg entry"); + goto end; + } + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.vgname", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto end; + } + } + + token = strtok (NULL, ":"); + if (token != NULL) { + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.data", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save data percent "); + goto end; + } + } + token = strtok (NULL, ":"); + if (token != NULL) { + value = gf_strdup (token); + if (!value) { + ret = -1; + goto end; + } + ret = snprintf (key, sizeof (key), "%s.lvsize", + key_prefix); + if (ret < 0) { + goto end; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save meta data percent "); + goto end; + } + } + + } while (ptr != NULL); + + ret = 0; + +end: + runner_end (&runner); + +out: + if (ret && value) { + GF_FREE (value); + } + + return ret; +} + +int +glusterd_get_single_brick_status (char **op_errstr, dict_t *rsp_dict, + char *keyprefix, int index, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char key[PATH_MAX] = ""; + char *device = NULL; + char *value = NULL; + char brick_path[PATH_MAX] = ""; + char pidfile[PATH_MAX] = ""; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (keyprefix); + GF_ASSERT (snap_volinfo); + GF_ASSERT (brickinfo); + + ret = snprintf (key, sizeof (key), "%s.brick%d.path", keyprefix, + index); + if (ret < 0) { + goto out; + } + + ret = snprintf (brick_path, sizeof (brick_path), + "%s:%s", brickinfo->hostname, brickinfo->path); + if (ret < 0) { + goto out; + } + + value = gf_strdup (brick_path); + if (!value) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to store " + "brick_path %s", brickinfo->path); + goto out; + } + + if (brickinfo->snap_status == -1) { + /* Setting vgname as "Pending Snapshot" */ + value = gf_strdup ("Pending Snapshot"); + if (!value) { + ret = -1; + goto out; + } + + snprintf (key, sizeof (key), "%s.brick%d.vgname", + keyprefix, index); + ret = dict_set_dynstr (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save vgname "); + goto out; + } + + ret = 0; + goto out; + } + value = NULL; + + ret = snprintf (key, sizeof (key), "%s.brick%d.status", + keyprefix, index); + if (ret < 0) { + goto out; + } + + if (brickinfo->status == GF_BRICK_STOPPED) { + value = gf_strdup ("No"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick status"); + goto out; + } + value = NULL; + } else { + value = gf_strdup ("Yes"); + if (!value) { + ret = -1; + goto out; + } + ret = dict_set_str (rsp_dict, key, value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick status"); + goto out; + } + value = NULL; + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, snap_volinfo, + brickinfo, priv); + ret = glusterd_is_service_running (pidfile, &pid); + + ret = snprintf (key, sizeof (key), "%s.brick%d.pid", + keyprefix, index); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, pid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save pid %d", pid); + goto out; + } + } + + ret = snprintf (key, sizeof (key), "%s.brick%d", + keyprefix, index); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_brick_lvm_details (rsp_dict, brickinfo, + snap_volinfo->volname, + device, key); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "brick LVM details"); + goto out; + } +out: + if (ret && value) { + GF_FREE (value); + } + + return ret; +} + +int +glusterd_get_single_snap_status (char **op_errstr, dict_t *rsp_dict, + char *keyprefix, glusterd_snap_t *snap) +{ + int ret = -1; + xlator_t *this = NULL; + char key[PATH_MAX] = ""; + char brickkey[PATH_MAX] = ""; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + int volcount = 0; + int brickcount = 0; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (keyprefix); + GF_ASSERT (snap); + + list_for_each_entry_safe (snap_volinfo, tmp_volinfo, &snap->volumes, + vol_list) { + ret = snprintf (key, sizeof (key), "%s.vol%d", keyprefix, + volcount); + if (ret < 0) { + goto out; + } + list_for_each_entry (brickinfo, &snap_volinfo->bricks, + brick_list) { + if (!glusterd_is_local_brick (this, snap_volinfo, + brickinfo)) { + brickcount++; + continue; + } + + ret = glusterd_get_single_brick_status (op_errstr, + rsp_dict, key, brickcount, + snap_volinfo, brickinfo); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Getting " + "single snap status failed"); + goto out; + } + brickcount++; + } + ret = snprintf (brickkey, sizeof (brickkey), "%s.brickcount", + key); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, brickkey, brickcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save brick count"); + goto out; + } + volcount++; + } + + ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, volcount); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save volcount"); + goto out; + } + +out: + + return ret; +} + +int +glusterd_get_each_snap_object_status (char **op_errstr, dict_t *rsp_dict, + glusterd_snap_t *snap, char *keyprefix) +{ + int ret = -1; + char key[PATH_MAX] = ""; + char *temp = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (snap); + GF_ASSERT (keyprefix); + + /* TODO : Get all the snap volume info present in snap object, + * as of now, There will be only one snapvolinfo per snap object + */ + ret = snprintf (key, sizeof (key), "%s.snapname", keyprefix); + if (ret < 0) { + goto out; + } + + temp = gf_strdup (snap->snapname); + if (temp == NULL) { + ret = -1; + goto out; + } + ret = dict_set_dynstr (rsp_dict, key, temp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save " + "snap name"); + goto out; + } + + temp = NULL; + + ret = snprintf (key, sizeof (key), "%s.uuid", keyprefix); + if (ret < 0) { + goto out; + } + + temp = gf_strdup (uuid_utoa (snap->snap_id)); + if (temp == NULL) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (rsp_dict, key, temp); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save " + "snap UUID"); + goto out; + } + + temp = NULL; + + ret = glusterd_get_single_snap_status (op_errstr, rsp_dict, keyprefix, + snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not get single snap status"); + goto out; + } + + ret = snprintf (key, sizeof (key), "%s.volcount", keyprefix); + if (ret < 0) { + goto out; + } + + ret = dict_set_int32 (rsp_dict, key, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save volcount"); + goto out; + } +out: + if (ret && temp) + GF_FREE (temp); + + return ret; +} + +int +glusterd_get_snap_status_of_volume (char **op_errstr, dict_t *rsp_dict, + char *volname, char *keyprefix) { + int ret = -1; + glusterd_volinfo_t *snap_volinfo = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + char key[PATH_MAX] = ""; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int i = 0; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (op_errstr); + GF_ASSERT (rsp_dict); + GF_ASSERT (volname); + GF_ASSERT (keyprefix); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volinfo of " + "volume %s", volname); + goto out; + } + + list_for_each_entry_safe (snap_volinfo, temp_volinfo, + &volinfo->snap_volumes, snapvol_list) { + ret = snprintf (key, sizeof (key), "status.snap%d", i); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap_volinfo->snapshot, key); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Function : " + "glusterd_get_single_snap_status failed"); + goto out; + } + i++; + } + + ret = dict_set_int32 (rsp_dict, "status.snapcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to save snapcount"); + ret = -1; + goto out; + } +out: + return ret; +} + +int +glusterd_get_all_snapshot_status (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int32_t i = 0; + int ret = -1; + char key[PATH_MAX] = ""; + glusterd_conf_t *priv = NULL; + glusterd_snap_t *snap = NULL; + glusterd_snap_t *tmp_snap = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + list_for_each_entry_safe (snap, tmp_snap, + &priv->snapshots, snap_list) { + ret = snprintf (key, sizeof (key), "status.snap%d", i); + if (ret < 0) { + goto out; + } + + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap, key); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not get " + "the details of a snap object: %s", + snap->snapname); + goto out; + } + i++; + } + + ret = dict_set_int32 (rsp_dict, "status.snapcount", i); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Could not save snapcount"); + goto out; + } + + ret = 0; +out: + return ret; +} + + +int +glusterd_snapshot_status_commit (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + xlator_t *this = NULL; + int ret = -1; + glusterd_conf_t *conf = NULL; + char *get_buffer = NULL; + int32_t cmd = -1; + char *snapname = NULL; + glusterd_snap_t *snap = NULL; + char *volname = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (op_errstr); + + conf = this->private; + + GF_ASSERT (conf); + ret = dict_get_int32 (dict, "cmd", &cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to get status cmd type"); + goto out; + } + + ret = dict_set_int32 (rsp_dict, "cmd", cmd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not save status cmd in rsp dictionary"); + goto out; + } + switch (cmd) { + case GF_SNAP_STATUS_TYPE_ALL: + { + ret = glusterd_get_all_snapshot_status (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snapshot status"); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_SNAP: + { + + ret = dict_get_str (dict, "snapname", &snapname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snap name"); + goto out; + } + + snap = glusterd_find_snap_by_name (snapname); + if (!snap) { + ret = gf_asprintf (op_errstr, "Snap (%s) " + "not found", snapname); + if (ret < 0) { + goto out; + } + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get snap volinfo"); + goto out; + } + ret = glusterd_get_each_snap_object_status (op_errstr, + rsp_dict, snap, "status.snap0"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "get status of snap %s", get_buffer); + goto out; + } + break; + } + case GF_SNAP_STATUS_TYPE_VOL: + { + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to" + " get volume name"); + goto out; + } + + ret = glusterd_get_snap_status_of_volume (op_errstr, + rsp_dict, volname, "status.vol0"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Function :" + " glusterd_get_snap_status_of_volume " + "failed"); + goto out; + } + } + } + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot_create_postvalidate (dict_t *dict, int32_t op_ret, + char **op_errstr, dict_t *rsp_dict) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + if (op_ret) { + ret = glusterd_do_snap_cleanup (dict, op_errstr, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "cleanup operation " + "failed"); + goto out; + } + } else { + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } + } + + ret = 0; +out: + return ret; +} + +int32_t +glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t snap_command = 0; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "create snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snapshot_config_commit (dict, op_errstr, + rsp_dict); + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "delete snapshot"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to " + "restore snapshot"); + goto out; + } + + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_commit (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "show snapshot status"); + goto out; + } + break; + + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + break; + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int ret = -1; + int64_t vol_count = 0; + int64_t count = 1; + char key[1024] = {0,}; + char *volname = NULL; + int32_t snap_command = 0; + xlator_t *this = NULL; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = dict_get_int64 (dict, "volcount", &vol_count); + if (ret) + goto out; + while (count <= vol_count) { + snprintf (key, 1024, "volname%"PRId64, count); + ret = dict_get_str (dict, key, &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get volname"); + goto out; + } + ret = dict_set_str (dict, "volname", volname); + if (ret) + goto out; + + ret = gd_brick_op_phase (GD_OP_SNAP, NULL, dict, + op_errstr); + if (ret) + goto out; + volname = NULL; + count++; + } + + dict_del (dict, "volname"); + ret = 0; + break; + case GF_SNAP_OPTION_TYPE_DELETE: + break; + default: + break; + } + +out: + return ret; +} + +int +glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case (GF_SNAP_OPTION_TYPE_CREATE): + ret = glusterd_snapshot_create_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "pre-validation failed"); + goto out; + } + break; + + case (GF_SNAP_OPTION_TYPE_CONFIG): + ret = glusterd_snapshot_config_prevalidate (dict, op_errstr); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot config " + "pre-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_restore_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot restore " + "validation failed"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snapshot_remove_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot remove " + "validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_snapshot_status_prevalidate (dict, op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot status " + "validation failed"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict) +{ + int snap_command = 0; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (dict); + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (dict, "type", &snap_command); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_snapshot_create_postvalidate (dict, op_ret, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "post-validation failed"); + goto out; + } + break; + + case GF_SNAP_OPTION_TYPE_DELETE: + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_snapshot_update_snaps_post_validate (dict, + op_errstr, + rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "update missed snaps list"); + goto out; + } + break; + + default: + gf_log (this->name, GF_LOG_WARNING, "invalid snap command"); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_handle_snapshot_fn (rpcsvc_request_t *req) +{ + int32_t ret = 0; + dict_t *dict = NULL; + gf_cli_req cli_req = {{0},}; + glusterd_op_t cli_op = GD_OP_SNAP; + int type = 0; + glusterd_conf_t *conf = NULL; + char *host_uuid = NULL; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + char *volname = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + ret = xdr_to_generic (req->msg[0], &cli_req, + (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len > 0) { + dict = dict_new (); + if (!dict) + goto out; + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } + + dict->extra_stdfree = cli_req.dict.dict_val; + + host_uuid = gf_strdup (uuid_utoa(MY_UUID)); + if (host_uuid == NULL) { + snprintf (err_str, sizeof (err_str), "Failed to get " + "the uuid of local glusterd"); + ret = -1; + goto out; + } + ret = dict_set_dynstr (dict, "host-uuid", host_uuid); + if (ret) { + GF_FREE (host_uuid); + goto out; + } + + + } else { + gf_log (this->name, GF_LOG_ERROR, "request dict length is %d", + cli_req.dict.dict_len); + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Command type not found"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + switch (type) { + case GF_SNAP_OPTION_TYPE_CREATE: + ret = glusterd_handle_snapshot_create (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot create " + "failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_RESTORE: + ret = glusterd_handle_snapshot_restore (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot restore " + "failed: %s", err_str); + } + + break; + case GF_SNAP_OPTION_TYPE_INFO: + ret = glusterd_handle_snapshot_info (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot info " + "failed"); + } + break; + case GF_SNAP_OPTION_TYPE_LIST: + ret = glusterd_handle_snapshot_list (req, cli_op, dict, + err_str, sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot list " + "failed"); + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + /* TODO : Type of lock to be taken when we are setting + * limits system wide + */ + ret = dict_get_str (dict, "volname", &volname); + if (!volname) { + ret = dict_set_int32 (dict, "hold_vol_locks", + _gf_false); + if (ret) { + gf_log ("cli", GF_LOG_ERROR, + "Unable to set hold_vol_locks value " + "as _gf_false"); + goto out; + } + + } + ret = glusterd_mgmt_v3_initiate_all_phases (req, cli_op, dict); + break; + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_handle_snapshot_remove (req, cli_op, dict, + err_str, + sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot delete " + "failed: %s", err_str); + } + break; + case GF_SNAP_OPTION_TYPE_START: + case GF_SNAP_OPTION_TYPE_STOP: + case GF_SNAP_OPTION_TYPE_STATUS: + ret = glusterd_handle_snapshot_status (req, cli_op, dict, + err_str, + sizeof (err_str)); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Snapshot status " + "failed: %s", err_str); + } + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Unkown snapshot request " + "type (%d)", type); + ret = -1; /* Failure */ + } + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + + return ret; +} + +int +glusterd_handle_snapshot (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, glusterd_handle_snapshot_fn); +} + +static inline void +glusterd_free_snap_op (glusterd_snap_op_t *snap_op) +{ + if (snap_op) { + if (snap_op->brick_path) + GF_FREE (snap_op->brick_path); + + GF_FREE (snap_op); + } +} + +/* Look for duplicates and accordingly update the list */ +int32_t +glusterd_update_missed_snap_entry (glusterd_missed_snap_info *missed_snapinfo, + glusterd_snap_op_t *missed_snap_op) +{ + int32_t ret = -1; + glusterd_snap_op_t *snap_opinfo = NULL; + gf_boolean_t match = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_snapinfo); + GF_ASSERT(missed_snap_op); + + list_for_each_entry (snap_opinfo, &missed_snapinfo->snap_ops, + snap_ops_list) { + if ((!strcmp (snap_opinfo->brick_path, + missed_snap_op->brick_path)) && + (snap_opinfo->op == missed_snap_op->op)) { + /* If two entries have conflicting status + * GD_MISSED_SNAP_DONE takes precedence + */ + if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) && + (missed_snap_op->status == GD_MISSED_SNAP_DONE)) { + snap_opinfo->status = GD_MISSED_SNAP_DONE; + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + match = _gf_true; + break; + } else if ((snap_opinfo->brick_num == + missed_snap_op->brick_num) && + (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) && + (missed_snap_op->op == + GF_SNAP_OPTION_TYPE_DELETE)) { + /* Optimizing create and delete entries for the same + * brick and same node + */ + gf_log (this->name, GF_LOG_INFO, + "Updating missed snap status " + "for %s:%d:%s:%d as DONE", + missed_snapinfo->node_snap_info, + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op); + snap_opinfo->status = GD_MISSED_SNAP_DONE; + ret = 0; + glusterd_free_snap_op (missed_snap_op); + goto out; + } + } + + if (match == _gf_true) { + gf_log (this->name, GF_LOG_INFO, + "Duplicate entry. Not updating"); + glusterd_free_snap_op (missed_snap_op); + } else { + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add new missed snap entry to the missed_snaps list. */ +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status) +{ + int32_t ret = -1; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *missed_snap_op = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t match = _gf_false; + gf_boolean_t free_missed_snap_info = _gf_false; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(missed_info); + GF_ASSERT(brick_path); + + priv = this->private; + GF_ASSERT (priv); + + /* Create the snap_op object consisting of the * + * snap id and the op */ + ret = glusterd_missed_snap_op_new (&missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create new missed snap object."); + ret = -1; + goto out; + } + + missed_snap_op->brick_path = gf_strdup(brick_path); + if (!missed_snap_op->brick_path) { + ret = -1; + goto out; + } + missed_snap_op->brick_num = brick_num; + missed_snap_op->op = snap_op; + missed_snap_op->status = snap_status; + + /* Look for other entries for the same node and same snap */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + if (!strcmp (missed_snapinfo->node_snap_info, + missed_info)) { + /* Found missed snapshot info for * + * the same node and same snap */ + match = _gf_true; + break; + } + } + + if (match == _gf_false) { + /* First snap op missed for the brick */ + ret = glusterd_missed_snapinfo_new (&missed_snapinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create missed snapinfo"); + goto out; + } + free_missed_snap_info = _gf_true; + missed_snapinfo->node_snap_info = gf_strdup(missed_info); + if (!missed_snapinfo->node_snap_info) { + ret = -1; + goto out; + } + + list_add_tail (&missed_snap_op->snap_ops_list, + &missed_snapinfo->snap_ops); + list_add_tail (&missed_snapinfo->missed_snaps, + &priv->missed_snaps_list); + + ret = 0; + goto out; + } else { + ret = glusterd_update_missed_snap_entry (missed_snapinfo, + missed_snap_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to update existing missed snap entry."); + goto out; + } + } + +out: + if (ret) { + glusterd_free_snap_op (missed_snap_op); + + if (missed_snapinfo && + (free_missed_snap_info == _gf_true)) { + if (missed_snapinfo->node_snap_info) + GF_FREE (missed_snapinfo->node_snap_info); + + GF_FREE (missed_snapinfo); + } + } + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Add missing snap entries to the in-memory conf->missed_snap_list */ +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count) +{ + char *buf = NULL; + char *tmp = NULL; + char *save_ptr = NULL; + char *nodeid = NULL; + char *snap_uuid = NULL; + char *brick_path = NULL; + char missed_info[PATH_MAX] = ""; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + /* We can update the missed_snaps_list without acquiring * + * any additional locks as big lock will be held. */ + for (i = 0; i < missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (dict, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "missed_snap_entry = %s", + buf); + + /* Need to make a duplicate string coz the same dictionary * + * is resent to the non-originator nodes */ + tmp = gf_strdup (buf); + if (!tmp) { + ret = -1; + goto out; + } + + /* Fetch the node-id, snap-id, brick_num, + * brick_path, snap_op and snap status + */ + nodeid = strtok_r (tmp, ":", &save_ptr); + snap_uuid = strtok_r (NULL, "=", &save_ptr); + brick_num = atoi(strtok_r (NULL, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!nodeid || !snap_uuid || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + snprintf (missed_info, sizeof(missed_info), "%s:%s", + nodeid, snap_uuid); + + ret = glusterd_store_missed_snaps_list (missed_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + GF_FREE (tmp); + tmp = NULL; + } + +out: + if (tmp) + GF_FREE (tmp); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 4b9895219..1c2ec58e8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -1,21 +1,13 @@ /* - Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ + #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -42,158 +34,311 @@ #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-hooks.h" +#include "store.h" #include "glusterd-store.h" -#include "glusterd1.h" -#include "cli1.h" #include "rpc-clnt.h" +#include "common-utils.h" #include <sys/resource.h> #include <inttypes.h> #include <dirent.h> -static int32_t -glusterd_store_create_volume_dir (char *volname) +void +glusterd_replace_slash_with_hyphen (char *str) +{ + char *ptr = NULL; + + ptr = strchr (str, '/'); + + while (ptr) { + *ptr = '-'; + ptr = strchr (str, '/'); + } +} + +int32_t +glusterd_store_create_brick_dir (glusterd_volinfo_t *volinfo) { int32_t ret = -1; - char path[PATH_MAX] = {0,}; + char brickdirpath[PATH_MAX] = {0,}; glusterd_conf_t *priv = NULL; - GF_ASSERT (volname); + GF_ASSERT (volinfo); + priv = THIS->private; + GF_ASSERT (priv); + + GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv); + ret = gf_store_mkdir (brickdirpath); + + return ret; +} +static void +glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo, + char *key_vol_brick, size_t len) +{ + GF_ASSERT (brickinfo); + GF_ASSERT (key_vol_brick); + GF_ASSERT (len >= PATH_MAX); + + snprintf (key_vol_brick, len, "%s", brickinfo->path); + glusterd_replace_slash_with_hyphen (key_vol_brick); +} + +static void +glusterd_store_brickinfofname_set (glusterd_brickinfo_t *brickinfo, + char *brickfname, size_t len) +{ + char key_vol_brick[PATH_MAX] = {0}; + + GF_ASSERT (brickfname); + GF_ASSERT (brickinfo); + GF_ASSERT (len >= PATH_MAX); + + glusterd_store_key_vol_brick_set (brickinfo, key_vol_brick, + sizeof (key_vol_brick)); + snprintf (brickfname, len, "%s:%s", brickinfo->hostname, key_vol_brick); +} + +static void +glusterd_store_brickinfopath_set (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *brickpath, size_t len) +{ + char brickfname[PATH_MAX] = {0}; + char brickdirpath[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (brickpath); + GF_ASSERT (brickinfo); + GF_ASSERT (len >= PATH_MAX); + + priv = THIS->private; GF_ASSERT (priv); - snprintf (path, 1024, "%s/vols/%s", priv->workdir, - volname); + GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv); + glusterd_store_brickinfofname_set (brickinfo, brickfname, + sizeof (brickfname)); + snprintf (brickpath, len, "%s/%s", brickdirpath, brickfname); +} + +gf_boolean_t +glusterd_store_is_valid_brickpath (char *volname, char *brick) +{ + char brickpath[PATH_MAX] = {0}; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t ret = 0; + size_t volname_len = strlen (volname); + xlator_t *this = NULL; - ret = mkdir (path, 0777); + this = THIS; + GF_ASSERT (this); - if (-1 == ret) { - gf_log ("", GF_LOG_ERROR, "mkdir() failed on path %s," - "errno: %d", path, errno); + ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to create brick " + "info for brick %s", brick); + ret = 0; + goto out; + } + ret = glusterd_volinfo_new (&volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to create volinfo"); + ret = 0; + goto out; + } + if (volname_len >= sizeof (volinfo->volname)) { + gf_log (this->name, GF_LOG_WARNING, "volume name too long"); + ret = 0; goto out; } + memcpy (volinfo->volname, volname, volname_len+1); + glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath, + sizeof (brickpath)); + + ret = (strlen (brickpath) < _POSIX_PATH_MAX); out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (brickinfo) + glusterd_brickinfo_delete (brickinfo); + if (volinfo) + glusterd_volinfo_delete (volinfo); + return ret; } int32_t -glusterd_store_create_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo, int32_t brick_count) +glusterd_store_volinfo_brick_fname_write (int vol_fd, + glusterd_brickinfo_t *brickinfo, + int32_t brick_count) { + char key[PATH_MAX] = {0,}; + char brickfname[PATH_MAX] = {0,}; int32_t ret = -1; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; + + snprintf (key, sizeof (key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK, + brick_count); + glusterd_store_brickinfofname_set (brickinfo, brickfname, + sizeof (brickfname)); + ret = gf_store_save_value (vol_fd, key, brickfname); + if (ret) + goto out; + +out: + return ret; +} + +int32_t +glusterd_store_create_brick_shandle_on_absence (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ char brickpath[PATH_MAX] = {0,}; - struct stat stbuf = {0,}; - char buf[4096] = {0,}; - char *tmppath = NULL; - char *ptr = NULL; - glusterd_store_handle_t *shandle = NULL; - char tmpbuf[4096] = {0,}; + int32_t ret = 0; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); - priv = THIS->private; + glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath, + sizeof (brickpath)); + ret = gf_store_handle_create_on_absence (&brickinfo->shandle, + brickpath); + return ret; +} - GF_ASSERT (priv); +int32_t +glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo) +{ + char value[256] = {0,}; + int32_t ret = 0; - GLUSTERD_GET_BRICK_DIR (path, volinfo, priv); + GF_ASSERT (brickinfo); + GF_ASSERT (fd > 0); - ret = stat (path, &stbuf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, + brickinfo->hostname); + if (ret) + goto out; - if (ret == -1 && ENOENT == errno) { - ret = mkdir (path, 0777); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PATH, + brickinfo->path); + if (ret) + goto out; + + snprintf (value, sizeof(value), "%d", brickinfo->port); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PORT, value); + + snprintf (value, sizeof(value), "%d", brickinfo->rdma_port); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, + value); + + snprintf (value, sizeof(value), "%d", brickinfo->decommissioned); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, + value); + if (ret) + goto out; + + if (strlen(brickinfo->device_path) > 0) { + snprintf (value, sizeof(value), "%s", brickinfo->device_path); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, + value); if (ret) goto out; } - tmppath = gf_strdup (brickinfo->path); + snprintf (value, sizeof(value), "%d", brickinfo->snap_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + value); + if (ret) + goto out; - ptr = strchr (tmppath, '/'); + if (!brickinfo->vg[0]) + goto out; - while (ptr) { - *ptr = '-'; - ptr = strchr (tmppath, '/'); - } + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_VGNAME, + brickinfo->vg); +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - snprintf (brickpath, sizeof (brickpath), "%s/%s:%s", - path, brickinfo->hostname, tmppath); +int32_t +glusterd_store_perform_brick_store (glusterd_brickinfo_t *brickinfo) +{ + int fd = -1; + int32_t ret = -1; + GF_ASSERT (brickinfo); - ret = glusterd_store_handle_new (brickpath, &brickinfo->shandle); + fd = gf_store_mkstemp (brickinfo->shandle); + if (fd <= 0) { + ret = -1; + goto out; + } + ret = glusterd_store_brickinfo_write (fd, brickinfo); if (ret) goto out; - shandle = brickinfo->shandle; - shandle->fd = open (brickpath, O_RDWR | O_CREAT | O_APPEND, 0666); +out: + if (ret && (fd > 0)) + gf_store_unlink_tmppath (brickinfo->shandle); + if (fd > 0) + close (fd); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - if (shandle->fd < 0) { - gf_log ("", GF_LOG_ERROR, "Open failed on %s", - brickpath); - ret = -1; - goto out; - } +int32_t +glusterd_store_brickinfo (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, int32_t brick_count, + int vol_fd) +{ + int32_t ret = -1; + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); - snprintf (buf, sizeof(buf), "%s=%s\n", - GLUSTERD_STORE_KEY_BRICK_HOSTNAME, brickinfo->hostname); - ret = write (shandle->fd, buf, strlen(buf)); + ret = glusterd_store_volinfo_brick_fname_write (vol_fd, brickinfo, + brick_count); if (ret) - gf_log ("", GF_LOG_TRACE, "failed to write brick->hostname"); - snprintf (buf, sizeof(buf), "%s=%s\n", - GLUSTERD_STORE_KEY_BRICK_PATH, brickinfo->path); - ret = write (shandle->fd, buf, strlen(buf)); - if (ret) - gf_log ("", GF_LOG_TRACE, "failed to write brick->path"); - snprintf (buf, sizeof(buf), "%s=%d\n", - GLUSTERD_STORE_KEY_BRICK_PORT, brickinfo->port); - ret = write (shandle->fd, buf, strlen(buf)); - if (ret) - gf_log ("", GF_LOG_TRACE, "failed to write brick->port"); + goto out; - ret = 0; + ret = glusterd_store_create_brick_dir (volinfo); + if (ret) + goto out; - snprintf (buf, sizeof (buf), "%s-%d",GLUSTERD_STORE_KEY_VOL_BRICK, - brick_count); - snprintf (tmpbuf, sizeof (tmpbuf), "%s:%s", brickinfo->hostname, - tmppath); - ret = glusterd_store_save_value (volinfo->shandle, buf, tmpbuf); + ret = glusterd_store_create_brick_shandle_on_absence (volinfo, + brickinfo); + if (ret) + goto out; - GF_FREE (tmppath); + ret = glusterd_store_perform_brick_store (brickinfo); out: - if (shandle->fd > 0) { - close (shandle->fd); - } - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, char *delete_path) { int32_t ret = -1; glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; char brickpath[PATH_MAX] = {0,}; char *ptr = NULL; char *tmppath = NULL; + xlator_t *this = NULL; - GF_ASSERT (volinfo); + this = THIS; + GF_ASSERT (this); GF_ASSERT (brickinfo); - priv = THIS->private; - + priv = this->private; GF_ASSERT (priv); - GLUSTERD_GET_BRICK_DIR (path, volinfo, priv); - tmppath = gf_strdup (brickinfo->path); ptr = strchr (tmppath, '/'); @@ -203,16 +348,17 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, ptr = strchr (tmppath, '/'); } - snprintf (brickpath, sizeof (brickpath), "%s/%s:%s", - path, brickinfo->hostname, tmppath); + snprintf (brickpath, sizeof (brickpath), + "%s/"GLUSTERD_BRICK_INFO_DIR"/%s:%s", delete_path, + brickinfo->hostname, tmppath); GF_FREE (tmppath); ret = unlink (brickpath); if ((ret < 0) && (errno != ENOENT)) { - gf_log ("", GF_LOG_ERROR, "Unlink failed on %s, reason: %s", - brickpath, strerror(errno)); + gf_log (this->name, GF_LOG_DEBUG, "Unlink failed on %s, " + "reason: %s", brickpath, strerror(errno)); ret = -1; goto out; } else { @@ -220,14 +366,16 @@ glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, } out: - if (brickinfo->shandle) - glusterd_store_handle_destroy (brickinfo->shandle); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (brickinfo->shandle) { + gf_store_handle_destroy (brickinfo->shandle); + brickinfo->shandle = NULL; + } + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } int32_t -glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) +glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo, char *delete_path) { int32_t ret = 0; glusterd_brickinfo_t *tmp = NULL; @@ -236,19 +384,24 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) DIR *dir = NULL; struct dirent *entry = NULL; char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (volinfo); list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - ret = glusterd_store_delete_brick (volinfo, tmp); + ret = glusterd_store_delete_brick (tmp, delete_path); if (ret) goto out; } - priv = THIS->private; + priv = this->private; GF_ASSERT (priv); - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); + snprintf (brickdir, sizeof (brickdir), "%s/%s", delete_path, + GLUSTERD_BRICK_INFO_DIR); dir = opendir (brickdir); @@ -259,7 +412,7 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) brickdir, entry->d_name); ret = unlink (path); if (ret && errno != ENOENT) { - gf_log ("", GF_LOG_ERROR, "Unable to unlink %s, " + gf_log (this->name, GF_LOG_DEBUG, "Unable to unlink %s, " "reason: %s", path, strerror(errno)); } glusterd_for_each_entry (entry, dir); @@ -270,641 +423,1471 @@ glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo) ret = rmdir (brickdir); out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); return ret; } -void _setopts (dict_t *this, char *key, data_t *value, void *data) +static int +_storeslaves (dict_t *this, char *key, data_t *value, void *data) { - int ret = 0; - glusterd_store_handle_t *shandle = NULL; - int exists = 0; + int32_t ret = 0; + gf_store_handle_t *shandle = NULL; + xlator_t *xl = NULL; + xl = THIS; + GF_ASSERT (xl); - shandle = (glusterd_store_handle_t *) data; + shandle = (gf_store_handle_t*)data; GF_ASSERT (shandle); + GF_ASSERT (shandle->fd > 0); + GF_ASSERT (shandle->path); + GF_ASSERT (key); + GF_ASSERT (value && value->data); + + if ((!shandle) || (shandle->fd <= 0) || (!shandle->path)) + return -1; + if (!key) - return; + return -1; if (!value || !value->data) - return; + return -1; - exists = glusterd_check_option_exists (key, NULL); - if (1 == exists) { - gf_log ("", GF_LOG_DEBUG, "Storing in volinfo:key= %s, val=%s", - key, value->data); - } else { - gf_log ("", GF_LOG_DEBUG, "Discarding:key= %s, val=%s", - key, value->data); - return; - } + gf_log (xl->name, GF_LOG_DEBUG, "Storing in volinfo:key= %s, val=%s", + key, value->data); - ret = glusterd_store_save_value (shandle, key, value->data); + ret = gf_store_save_value (shandle->fd, key, (char*)value->data); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to write into store" + gf_log (xl->name, GF_LOG_ERROR, "Unable to write into store" " handle for path: %s", shandle->path); - return; + return -1; } + return 0; } -int32_t -glusterd_store_create_volume (glusterd_volinfo_t *volinfo) + +int _storeopts (dict_t *this, char *key, data_t *value, void *data) { - int32_t ret = -1; - char filepath[PATH_MAX] = {0,}; - char buf[4096] = {0,}; - glusterd_conf_t *priv = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t brick_count = 0; + int32_t ret = 0; + int32_t exists = 0; + gf_store_handle_t *shandle = NULL; + xlator_t *xl = NULL; - GF_ASSERT (volinfo); - priv = THIS->private; + xl = THIS; + GF_ASSERT (xl); - GF_ASSERT (priv); + shandle = (gf_store_handle_t*)data; - ret = glusterd_store_create_volume_dir (volinfo->volname); + GF_ASSERT (shandle); + GF_ASSERT (shandle->fd > 0); + GF_ASSERT (shandle->path); + GF_ASSERT (key); + GF_ASSERT (value && value->data); - if (ret) - goto out; + if ((!shandle) || (shandle->fd <= 0) || (!shandle->path)) + return -1; - snprintf (filepath, 1024, "%s/%s/%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, - GLUSTERD_VOLUME_INFO_FILE); + if (!key) + return -1; + if (!value || !value->data) + return -1; - ret = glusterd_store_handle_new (filepath, &volinfo->shandle); + if (is_key_glusterd_hooks_friendly (key)) { + exists = 1; + + } else { + exists = glusterd_check_option_exists (key, NULL); + } + + if (1 == exists) { + gf_log (xl->name, GF_LOG_DEBUG, "Storing in volinfo:key= %s, " + "val=%s", key, value->data); + + } else { + gf_log (xl->name, GF_LOG_DEBUG, "Discarding:key= %s, val=%s", + key, value->data); + return 0; + } + ret = gf_store_save_value (shandle->fd, key, (char*)value->data); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to create store" - " handle for path: %s", filepath); - goto out; + gf_log (xl->name, GF_LOG_ERROR, "Unable to write into store" + " handle for path: %s", shandle->path); + return -1; } + return 0; +} + +int32_t +glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo) +{ + char *str = NULL; + char buf[PATH_MAX] = {0,}; + int32_t ret = -1; + + GF_ASSERT (fd > 0); + GF_ASSERT (volinfo); snprintf (buf, sizeof (buf), "%d", volinfo->type); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_TYPE, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->brick_count); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_COUNT, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_COUNT, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->status); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_STATUS, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STATUS, buf); if (ret) goto out; -/* snprintf (buf, sizeof (buf), "%d", volinfo->port); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_PORT, buf); + snprintf (buf, sizeof (buf), "%d", volinfo->sub_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, buf); if (ret) goto out; -*/ - snprintf (buf, sizeof (buf), "%d", volinfo->sub_count); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_SUB_COUNT, buf); + + snprintf (buf, sizeof (buf), "%d", volinfo->stripe_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", volinfo->replica_count); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, + buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->version); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_VERSION, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION, buf); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", volinfo->transport_type); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_TRANSPORT, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TRANSPORT, buf); if (ret) goto out; - uuid_unparse (volinfo->volume_id, buf); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_ID, buf); + snprintf (buf, sizeof (buf), "%s", volinfo->parent_volname); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PARENT_VOLNAME, buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to store " + GLUSTERD_STORE_KEY_PARENT_VOLNAME); + goto out; + } + + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID, + uuid_utoa (volinfo->volume_id)); if (ret) goto out; - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_store_create_brick (volinfo, brickinfo, - brick_count); + str = glusterd_auth_get_username (volinfo); + if (str) { + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_USERNAME, + str); + if (ret) + goto out; + } + + str = glusterd_auth_get_password (volinfo); + if (str) { + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PASSWORD, + str); if (ret) goto out; - brick_count++; } - dict_foreach (volinfo->dict, _setopts, volinfo->shandle); + snprintf (buf, sizeof (buf), "%d", volinfo->op_version); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_OP_VERSION, buf); + if (ret) + goto out; - ret = 0; + snprintf (buf, sizeof (buf), "%d", volinfo->client_op_version); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, + buf); + if (ret) + goto out; + if (volinfo->caps) { + snprintf (buf, sizeof (buf), "%d", volinfo->caps); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_CAPS, + buf); + if (ret) + goto out; + } -out: + snprintf (buf, sizeof (buf), "%d", volinfo->is_volume_restored); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, buf); if (ret) { - glusterd_store_delete_volume (volinfo); + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write is_volume_restored"); + goto out; } - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + snprintf (buf, sizeof (buf), "%"PRIu64, volinfo->snap_max_hard_limit); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + buf); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to write snap-max-hard-limit"); + goto out; + } +out: + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Unable to write volume " + "values for %s", volinfo->volname); return ret; } - -int32_t -glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) +static void +glusterd_store_voldirpath_set (glusterd_volinfo_t *volinfo, char *voldirpath, + size_t len) { - char pathname[PATH_MAX] = {0,}; - int32_t ret = 0; - glusterd_conf_t *priv = NULL; - DIR *dir = NULL; - struct dirent *entry = NULL; - char path[PATH_MAX] = {0,}; - struct stat st = {0, }; + glusterd_conf_t *priv = NULL; GF_ASSERT (volinfo); priv = THIS->private; + GF_ASSERT (priv); + + GLUSTERD_GET_VOLUME_DIR (voldirpath, volinfo, priv); +} + +static int32_t +glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo) +{ + int32_t ret = -1; + char voldirpath[PATH_MAX] = {0,}; + + GF_ASSERT (volinfo); + glusterd_store_voldirpath_set (volinfo, voldirpath, + sizeof (voldirpath)); + ret = gf_store_mkdir (voldirpath); + + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +static int32_t +glusterd_store_create_snap_dir (glusterd_snap_t *snap) +{ + int32_t ret = -1; + char snapdirpath[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; GF_ASSERT (priv); - snprintf (pathname, 1024, "%s/vols/%s", priv->workdir, - volinfo->volname); + GF_ASSERT (snap); - dir = opendir (pathname); - if (!dir) - goto out; - ret = glusterd_store_remove_bricks (volinfo); + GLUSTERD_GET_SNAP_DIR (snapdirpath, snap, priv); + ret = mkdir_p (snapdirpath, 0755, _gf_true); if (ret) { - gf_log ("", GF_LOG_ERROR, "Remove bricks failed"); + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snaps dir " + "%s", snapdirpath); } + return ret; +} - glusterd_for_each_entry (entry, dir); - while (entry) { +int32_t +glusterd_store_volinfo_write (int fd, glusterd_volinfo_t *volinfo) +{ + int32_t ret = -1; + gf_store_handle_t *shandle = NULL; + GF_ASSERT (fd > 0); + GF_ASSERT (volinfo); + GF_ASSERT (volinfo->shandle); - snprintf (path, PATH_MAX, "%s/%s", pathname, entry->d_name); - ret = stat (path, &st); - if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Failed to stat entry: %s:%s", - path, strerror (errno)); - goto stat_failed; - } + shandle = volinfo->shandle; + ret = glusterd_volume_exclude_options_write (fd, volinfo); + if (ret) + goto out; - if (S_ISDIR (st.st_mode)) - ret = rmdir (path); - else - ret = unlink (path); + shandle->fd = fd; + dict_foreach (volinfo->dict, _storeopts, shandle); - gf_log ("", GF_LOG_NORMAL, "%s %s", - ret?"Failed to remove":"Removed", - entry->d_name); - if (ret) - gf_log ("", GF_LOG_NORMAL, "errno:%d", errno); -stat_failed: - memset (path, 0, sizeof(path)); - glusterd_for_each_entry (entry, dir); - } + dict_foreach (volinfo->gsync_slaves, _storeslaves, shandle); + shandle->fd = 0; +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - ret = closedir (dir); - if (ret) { - gf_log ("", GF_LOG_NORMAL, "Failed to close dir, errno:%d", - errno); - } +int32_t +glusterd_store_snapinfo_write (glusterd_snap_t *snap) +{ + int32_t ret = -1; + int fd = 0; + char buf[PATH_MAX] = ""; - ret = rmdir (pathname); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Failed to rmdir: %s, errno: %d", - pathname, errno); + GF_ASSERT (snap); + + fd = gf_store_mkstemp (snap->shandle); + if (fd <= 0) + goto out; + + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_ID, + uuid_utoa (snap->snap_id)); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", snap->snap_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_STATUS, buf); + if (ret) + goto out; + + snprintf (buf, sizeof (buf), "%d", snap->snap_restored); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_RESTORED, buf); + if (ret) + goto out; + + if (snap->description) { + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_DESC, + snap->description); + if (ret) + goto out; } + snprintf (buf, sizeof (buf), "%ld", snap->time_stamp); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, buf); out: - if (volinfo->shandle) - glusterd_store_handle_destroy (volinfo->shandle); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static void +glusterd_store_rbstatepath_set (glusterd_volinfo_t *volinfo, char *rbstatepath, + size_t len) +{ + char voldirpath[PATH_MAX] = {0,}; + GF_ASSERT (volinfo); + GF_ASSERT (rbstatepath); + GF_ASSERT (len <= PATH_MAX); + + glusterd_store_voldirpath_set (volinfo, voldirpath, + sizeof (voldirpath)); + snprintf (rbstatepath, len, "%s/%s", voldirpath, + GLUSTERD_VOLUME_RBSTATE_FILE); +} + +static void +glusterd_store_volfpath_set (glusterd_volinfo_t *volinfo, char *volfpath, + size_t len) +{ + char voldirpath[PATH_MAX] = {0,}; + GF_ASSERT (volinfo); + GF_ASSERT (volfpath); + GF_ASSERT (len <= PATH_MAX); + glusterd_store_voldirpath_set (volinfo, voldirpath, + sizeof (voldirpath)); + snprintf (volfpath, len, "%s/%s", voldirpath, GLUSTERD_VOLUME_INFO_FILE); +} + +static void +glusterd_store_node_state_path_set (glusterd_volinfo_t *volinfo, + char *node_statepath, size_t len) +{ + char voldirpath[PATH_MAX] = {0,}; + GF_ASSERT (volinfo); + GF_ASSERT (node_statepath); + GF_ASSERT (len <= PATH_MAX); + + glusterd_store_voldirpath_set (volinfo, voldirpath, + sizeof (voldirpath)); + snprintf (node_statepath, len, "%s/%s", voldirpath, + GLUSTERD_NODE_STATE_FILE); +} + +static void +glusterd_store_missed_snaps_list_path_set (char *missed_snaps_list, + size_t len) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (missed_snaps_list); + GF_ASSERT (len <= PATH_MAX); + + snprintf (missed_snaps_list, len, "%s/snaps/" + GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir); +} + +static void +glusterd_store_snapfpath_set (glusterd_snap_t *snap, char *snap_fpath, + size_t len) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_fpath); + GF_ASSERT (len <= PATH_MAX); + + snprintf (snap_fpath, len, "%s/snaps/%s/%s", priv->workdir, + snap->snapname, GLUSTERD_SNAP_INFO_FILE); +} + +int32_t +glusterd_store_create_rbstate_shandle_on_absence (glusterd_volinfo_t *volinfo) +{ + char rbstatepath[PATH_MAX] = {0}; + int32_t ret = 0; + + GF_ASSERT (volinfo); + + glusterd_store_rbstatepath_set (volinfo, rbstatepath, sizeof (rbstatepath)); + ret = gf_store_handle_create_on_absence (&volinfo->rb_shandle, + rbstatepath); return ret; } +int32_t +glusterd_store_create_vol_shandle_on_absence (glusterd_volinfo_t *volinfo) +{ + char volfpath[PATH_MAX] = {0}; + int32_t ret = 0; + GF_ASSERT (volinfo); + + glusterd_store_volfpath_set (volinfo, volfpath, sizeof (volfpath)); + ret = gf_store_handle_create_on_absence (&volinfo->shandle, volfpath); + return ret; +} int32_t -glusterd_store_retrieve_value (glusterd_store_handle_t *handle, - char *key, char **value) +glusterd_store_create_nodestate_sh_on_absence (glusterd_volinfo_t *volinfo) { - int32_t ret = -1; - char scan_str[4096] = {0,}; - char *iter_key = NULL; - char *iter_val = NULL; - char *str = NULL; - char *free_str = NULL; + char node_state_path[PATH_MAX] = {0}; + int32_t ret = 0; + + GF_ASSERT (volinfo); - GF_ASSERT (handle); + glusterd_store_node_state_path_set (volinfo, node_state_path, + sizeof (node_state_path)); + ret = + gf_store_handle_create_on_absence (&volinfo->node_state_shandle, + node_state_path); - handle->fd = open (handle->path, O_RDWR); + return ret; +} - if (!handle->read) - handle->read = fdopen (handle->fd, "r"); +static int32_t +glusterd_store_create_missed_snaps_list_shandle_on_absence () +{ + char missed_snaps_list[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; - if (!handle->read) { - gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %d", - handle->path, errno); - goto out; + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + glusterd_store_missed_snaps_list_path_set (missed_snaps_list, + sizeof(missed_snaps_list)); + + ret = gf_store_handle_create_on_absence + (&priv->missed_snaps_list_shandle, + missed_snaps_list); + return ret; +} + +int32_t +glusterd_store_create_snap_shandle_on_absence (glusterd_snap_t *snap) +{ + char snapfpath[PATH_MAX] = {0}; + int32_t ret = 0; + + GF_ASSERT (snap); + + glusterd_store_snapfpath_set (snap, snapfpath, sizeof (snapfpath)); + ret = gf_store_handle_create_on_absence (&snap->shandle, snapfpath); + return ret; +} + +int32_t +glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd) +{ + int32_t ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t brick_count = 0; + + GF_ASSERT (volinfo); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_store_brickinfo (volinfo, brickinfo, + brick_count, vol_fd); + if (ret) + goto out; + brick_count++; } +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - ret = fscanf (handle->read, "%s", scan_str); +int32_t +glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int port = 0; + char buf[PATH_MAX] = {0, }; - while (ret != EOF) { - if (free_str) { - GF_FREE (free_str); - free_str = NULL; - } - str = gf_strdup (scan_str); - if (!str) + GF_ASSERT (fd > 0); + GF_ASSERT (volinfo); + + snprintf (buf, sizeof (buf), "%d", volinfo->rep_brick.rb_status); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_STATUS, buf); + if (ret) + goto out; + + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { + + snprintf (buf, sizeof (buf), "%s:%s", + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_SRC_BRICK, + buf); + if (ret) goto out; - else - free_str = str; - iter_key = strtok (str, "="); - gf_log ("", GF_LOG_DEBUG, "key %s read", iter_key); - if (!strcmp (key, iter_key)) { - gf_log ("", GF_LOG_DEBUG, "key %s found", key); - iter_val = strtok (NULL, "="); - ret = 0; - if (iter_val) - *value = gf_strdup (iter_val); + snprintf (buf, sizeof (buf), "%s:%s", + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_BRICK, + buf); + if (ret) goto out; + + switch (volinfo->transport_type) { + case GF_TRANSPORT_RDMA: + port = volinfo->rep_brick.dst_brick->rdma_port; + break; + + case GF_TRANSPORT_TCP: + case GF_TRANSPORT_BOTH_TCP_RDMA: + port = volinfo->rep_brick.dst_brick->port; + break; } - ret = fscanf (handle->read, "%s", scan_str); + snprintf (buf, sizeof (buf), "%d", port); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_PORT, + buf); + if (ret) + goto out; + uuid_unparse (volinfo->rep_brick.rb_id, buf); + ret = gf_store_save_value (fd, GF_REPLACE_BRICK_TID_KEY, buf); } - if (EOF == ret) - ret = -1; + ret = 0; out: - if (handle->fd > 0) { - close (handle->fd); - handle->read = NULL; + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_store_perform_rbstate_store (glusterd_volinfo_t *volinfo) +{ + int fd = -1; + int32_t ret = -1; + GF_ASSERT (volinfo); + + fd = gf_store_mkstemp (volinfo->rb_shandle); + if (fd <= 0) { + ret = -1; + goto out; } - if (free_str) - GF_FREE (free_str); + ret = glusterd_store_rbstate_write (fd, volinfo); + if (ret) + goto out; + + ret = gf_store_rename_tmppath (volinfo->rb_shandle); + if (ret) + goto out; +out: + if (ret && (fd > 0)) + gf_store_unlink_tmppath (volinfo->rb_shandle); + if (fd > 0) + close (fd); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int32_t -glusterd_store_save_value (glusterd_store_handle_t *handle, - char *key, char *value) +glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo) { - int32_t ret = -1; - char buf[4096] = {0,}; + int ret = -1; + char buf[PATH_MAX] = {0, }; - GF_ASSERT (handle); - GF_ASSERT (key); - GF_ASSERT (value); + GF_ASSERT (fd > 0); + GF_ASSERT (volinfo); - handle->fd = open (handle->path, O_RDWR | O_APPEND); + if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_STATUS) { + ret = 0; + goto out; + } + + snprintf (buf, sizeof (buf), "%d", volinfo->rebal.defrag_cmd); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG, buf); + if (ret) + goto out; - if (handle->fd < 0) { - gf_log ("", GF_LOG_ERROR, "Unable to open %s, errno: %d", - handle->path, errno); + snprintf (buf, sizeof (buf), "%d", volinfo->rebal.op); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_DEFRAG_OP, buf); + if (ret) goto out; + + if (volinfo->rebal.defrag_cmd) { + uuid_unparse (volinfo->rebal.rebalance_id, buf); + ret = gf_store_save_value (fd, GF_REBALANCE_TID_KEY, buf); } +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - snprintf (buf, sizeof (buf), "%s=%s\n", key, value); - ret = write (handle->fd, buf, strlen (buf)); +int32_t +glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo) +{ + int fd = -1; + int32_t ret = -1; + GF_ASSERT (volinfo); - if (ret < 0) { - gf_log ("", GF_LOG_CRITICAL, "Unable to store key: %s," - "value: %s, error: %s", key, value, - strerror (errno)); + fd = gf_store_mkstemp (volinfo->node_state_shandle); + if (fd <= 0) { ret = -1; goto out; } - ret = 0; + ret = glusterd_store_node_state_write (fd, volinfo); + if (ret) + goto out; + + ret = gf_store_rename_tmppath (volinfo->node_state_shandle); + if (ret) + goto out; out: + if (ret && (fd > 0)) + gf_store_unlink_tmppath (volinfo->node_state_shandle); + if (fd > 0) + close (fd); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} - if (handle->fd > 0) { - close (handle->fd); - handle->fd = -1; +int32_t +glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo) +{ + int fd = -1; + int32_t ret = -1; + GF_ASSERT (volinfo); + + fd = gf_store_mkstemp (volinfo->shandle); + if (fd <= 0) { + ret = -1; + goto out; } - gf_log ("", GF_LOG_DEBUG, "returning: %d", ret); + ret = glusterd_store_volinfo_write (fd, volinfo); + if (ret) + goto out; + + ret = glusterd_store_brickinfos (volinfo, fd); + if (ret) + goto out; + +out: + if (ret && (fd > 0)) + gf_store_unlink_tmppath (volinfo->shandle); + if (fd > 0) + close (fd); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } +void +glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo, + glusterd_volinfo_ver_ac_t ac) +{ + GF_ASSERT (volinfo); + + switch (ac) { + case GLUSTERD_VOLINFO_VER_AC_NONE: + break; + case GLUSTERD_VOLINFO_VER_AC_INCREMENT: + volinfo->version++; + break; + case GLUSTERD_VOLINFO_VER_AC_DECREMENT: + volinfo->version--; + break; + } +} + +void +glusterd_store_bricks_cleanup_tmp (glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (volinfo); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + gf_store_unlink_tmppath (brickinfo->shandle); + } +} + +void +glusterd_store_volume_cleanup_tmp (glusterd_volinfo_t *volinfo) +{ + GF_ASSERT (volinfo); + + glusterd_store_bricks_cleanup_tmp (volinfo); + + gf_store_unlink_tmppath (volinfo->shandle); + + gf_store_unlink_tmppath (volinfo->rb_shandle); + + gf_store_unlink_tmppath (volinfo->node_state_shandle); +} + +void +glusterd_store_snap_cleanup_tmp (glusterd_snap_t *snap) +{ + GF_ASSERT (snap); + + gf_store_unlink_tmppath (snap->shandle); +} + int32_t -glusterd_store_handle_new (char *path, glusterd_store_handle_t **handle) +glusterd_store_brickinfos_atomic_update (glusterd_volinfo_t *volinfo) { - int32_t ret = -1; - glusterd_store_handle_t *shandle = NULL; + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; - shandle = GF_CALLOC (1, sizeof (*shandle), gf_gld_mt_store_handle_t); - if (!shandle) - goto out; + GF_ASSERT (volinfo); - shandle->path = gf_strdup (path); + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = gf_store_rename_tmppath (brickinfo->shandle); + if (ret) + goto out; + } +out: + return ret; +} - if (!shandle->path) - goto out; +int32_t +glusterd_store_volinfo_atomic_update (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + GF_ASSERT (volinfo); - shandle->fd = open (path, O_RDWR | O_CREAT | O_APPEND, 0644); - if (!shandle->fd) + ret = gf_store_rename_tmppath (volinfo->shandle); + if (ret) goto out; - *handle = shandle; +out: + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't rename " + "temporary file(s): Reason %s", strerror (errno)); + return ret; +} - ret = 0; +int32_t +glusterd_store_volume_atomic_update (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + GF_ASSERT (volinfo); + + ret = glusterd_store_brickinfos_atomic_update (volinfo); + if (ret) + goto out; + + ret = glusterd_store_volinfo_atomic_update (volinfo); out: - if (ret == -1) { - if (shandle) { - if (shandle->path) - GF_FREE (shandle->path); - if (shandle->fd > 0) - close (shandle->fd); - GF_FREE (shandle); - } - } else { - close (shandle->fd); - } + return ret; +} - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); +int32_t +glusterd_store_snap_atomic_update (glusterd_snap_t *snap) +{ + int ret = -1; + GF_ASSERT (snap); + + ret = gf_store_rename_tmppath (snap->shandle); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't rename " + "temporary file(s): Reason %s", strerror (errno)); return ret; } int32_t -glusterd_store_handle_destroy (glusterd_store_handle_t *handle) +glusterd_store_snap (glusterd_snap_t *snap) { int32_t ret = -1; - if (!handle) { - ret = 0; + GF_ASSERT (snap); + + ret = glusterd_store_create_snap_dir (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap dir"); goto out; } - GF_FREE (handle->path); + ret = glusterd_store_create_snap_shandle_on_absence (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to create snap info " + "file"); + goto out; + } - GF_FREE (handle); + ret = glusterd_store_snapinfo_write (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to write snap info"); + goto out; + } - ret = 0; + ret = glusterd_store_snap_atomic_update (snap); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR,"Failed to do automic update"); + goto out; + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (ret) + glusterd_store_snap_cleanup_tmp (snap); + gf_log (THIS->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } - int32_t -glusterd_store_handle_truncate (glusterd_store_handle_t *handle) +glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac) { - int32_t ret = -1; + int32_t ret = -1; - GF_ASSERT (handle); - GF_ASSERT (handle->path); + GF_ASSERT (volinfo); - ret = truncate (handle->path, 0); + glusterd_perform_volinfo_version_action (volinfo, ac); + ret = glusterd_store_create_volume_dir (volinfo); + if (ret) + goto out; - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + ret = glusterd_store_create_vol_shandle_on_absence (volinfo); + if (ret) + goto out; + + ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo); + if (ret) + goto out; + + ret = glusterd_store_create_nodestate_sh_on_absence (volinfo); + if (ret) + goto out; + + ret = glusterd_store_perform_volume_store (volinfo); + if (ret) + goto out; + + ret = glusterd_store_volume_atomic_update (volinfo); + if (ret) { + glusterd_perform_volinfo_version_action (volinfo, + GLUSTERD_VOLINFO_VER_AC_DECREMENT); + goto out; + } + + ret = glusterd_store_perform_rbstate_store (volinfo); + if (ret) + goto out; + + ret = glusterd_store_perform_node_state_store (volinfo); + if (ret) + goto out; + + //checksum should be computed at the end + ret = glusterd_volume_compute_cksum (volinfo); + if (ret) + goto out; + +out: + if (ret) + glusterd_store_volume_cleanup_tmp (volinfo); + + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int32_t -glusterd_store_uuid () +glusterd_store_delete_volume (glusterd_volinfo_t *volinfo) { - char str[GLUSTERD_UUID_LEN] = {0,}; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - int32_t ret = -1; - glusterd_store_handle_t *handle = NULL; + char pathname[PATH_MAX] = {0,}; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + char path[PATH_MAX] = {0,}; + char delete_path[PATH_MAX] = {0,}; + char trashdir[PATH_MAX] = {0,}; + struct stat st = {0, }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); - uuid_unparse (priv->uuid, str); + GF_ASSERT (volinfo); + priv = this->private; - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_INFO_FILE); + GF_ASSERT (priv); - if (!priv->handle) { - ret = glusterd_store_handle_new (path, &handle); + GLUSTERD_GET_VOLUME_DIR (pathname, volinfo, priv); + + snprintf (delete_path, sizeof (delete_path), + "%s/"GLUSTERD_TRASH"/%s.deleted", priv->workdir, + uuid_utoa (volinfo->volume_id)); + + snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, + priv->workdir); + + ret = mkdir (trashdir, 0777); + if (ret && errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " + "directory, reason : %s", strerror (errno)); + ret = -1; + goto out; + } + + ret = rename (pathname, delete_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to rename volume " + "directory for volume %s", volinfo->volname); + rename_fail = _gf_true; + goto out; + } + + dir = opendir (delete_path); + if (!dir) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." + " Reason : %s", delete_path, strerror (errno)); + ret = 0; + goto out; + } + ret = glusterd_store_remove_bricks (volinfo, delete_path); + + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Remove bricks failed for %s", + volinfo->volname); + } + + glusterd_for_each_entry (entry, dir); + while (entry) { + + snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name); + ret = stat (path, &st); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to stat " + "entry %s : %s", path, strerror (errno)); + goto stat_failed; + } + + if (S_ISDIR (st.st_mode)) + ret = rmdir (path); + else + ret = unlink (path); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get store" - " handle!"); - goto out; + gf_log (this->name, GF_LOG_DEBUG, " Failed to remove " + "%s. Reason : %s", path, strerror (errno)); } - priv->handle = handle; + gf_log (this->name, GF_LOG_DEBUG, "%s %s", + ret ? "Failed to remove":"Removed", + entry->d_name); +stat_failed: + memset (path, 0, sizeof(path)); + glusterd_for_each_entry (entry, dir); } - ret = glusterd_store_save_value (priv->handle, GLUSTERD_STORE_UUID_KEY, - str); - + ret = closedir (dir); if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Storing uuid failed" - "ret = %d", ret); - goto out; + gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " + "Reason : %s",delete_path, strerror (errno)); } + ret = rmdir (delete_path); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", + delete_path, strerror (errno)); + } + ret = rmdir (trashdir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:" + " %s", trashdir, strerror (errno)); + } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (volinfo->shandle) { + gf_store_handle_destroy (volinfo->shandle); + volinfo->shandle = NULL; + } + ret = (rename_fail == _gf_true) ? -1: 0; + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } - +/*TODO: cleanup the duplicate code and implement a generic function for + * deleting snap/volume depending on the parameter flag */ int32_t -glusterd_retrieve_uuid () +glusterd_store_delete_snap (glusterd_snap_t *snap) { - char *uuid_str = NULL; - int32_t ret = -1; - glusterd_store_handle_t *handle = NULL; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; + char pathname[PATH_MAX] = {0,}; + int32_t ret = 0; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + char path[PATH_MAX] = {0,}; + char delete_path[PATH_MAX] = {0,}; + char trashdir[PATH_MAX] = {0,}; + struct stat st = {0, }; + xlator_t *this = NULL; + gf_boolean_t rename_fail = _gf_false; - priv = THIS->private; + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GLUSTERD_GET_SNAP_DIR (pathname, snap, priv); - if (!priv->handle) { - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_INFO_FILE); - ret = glusterd_store_handle_new (path, &handle); + snprintf (delete_path, sizeof (delete_path), + "%s/"GLUSTERD_TRASH"/snap-%s.deleted", priv->workdir, + uuid_utoa (snap->snap_id)); + + snprintf (trashdir, sizeof (trashdir), "%s/"GLUSTERD_TRASH, + priv->workdir); + + ret = mkdir (trashdir, 0777); + if (ret && errno != EEXIST) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create trash " + "directory, reason : %s", strerror (errno)); + ret = -1; + goto out; + } + + ret = rename (pathname, delete_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to rename snap " + "directory %s to %s", snap->snapname, delete_path); + rename_fail = _gf_true; + goto out; + } + + dir = opendir (delete_path); + if (!dir) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to open directory %s." + " Reason : %s", delete_path, strerror (errno)); + ret = 0; + goto out; + } + + glusterd_for_each_entry (entry, dir); + while (entry) { + snprintf (path, PATH_MAX, "%s/%s", delete_path, entry->d_name); + ret = stat (path, &st); + if (ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to stat " + "entry %s : %s", path, strerror (errno)); + goto stat_failed; + } + + if (S_ISDIR (st.st_mode)) + ret = rmdir (path); + else + ret = unlink (path); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get store " - "handle!"); - goto out; + gf_log (this->name, GF_LOG_DEBUG, " Failed to remove " + "%s. Reason : %s", path, strerror (errno)); } - priv->handle = handle; + gf_log (this->name, GF_LOG_DEBUG, "%s %s", + ret ? "Failed to remove":"Removed", + entry->d_name); +stat_failed: + memset (path, 0, sizeof(path)); + glusterd_for_each_entry (entry, dir); } - ret = glusterd_store_retrieve_value (priv->handle, - GLUSTERD_STORE_UUID_KEY, - &uuid_str); - + ret = closedir (dir); if (ret) { - gf_log ("", GF_LOG_NORMAL, "No previous uuid is present"); - goto out; + gf_log (this->name, GF_LOG_DEBUG, "Failed to close dir %s. " + "Reason : %s",delete_path, strerror (errno)); } - uuid_parse (uuid_str, priv->uuid); + ret = rmdir (delete_path); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s,err: %s", + delete_path, strerror (errno)); + } + ret = rmdir (trashdir); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to rmdir: %s, Reason:" + " %s", trashdir, strerror (errno)); + } out: - if (uuid_str) - GF_FREE (uuid_str); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + if (snap->shandle) { + gf_store_handle_destroy (snap->shandle); + snap->shandle = NULL; + } + ret = (rename_fail == _gf_true) ? -1: 0; + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } -int32_t -glusterd_store_iter_new (glusterd_store_handle_t *shandle, - glusterd_store_iter_t **iter) +int +glusterd_store_global_info (xlator_t *this) { - int32_t ret = -1; - glusterd_store_iter_t *tmp_iter = NULL; - int fd = -1; + int ret = -1; + glusterd_conf_t *conf = NULL; + char op_version_str[15] = {0,}; + char path[PATH_MAX] = {0,}; + gf_store_handle_t *handle = NULL; + char *uuid_str = NULL; + char buf[256] = {0, }; + + conf = this->private; + + uuid_str = gf_strdup (uuid_utoa (MY_UUID)); + if (!uuid_str) + goto out; - GF_ASSERT (shandle); - GF_ASSERT (iter); + if (!conf->handle) { + snprintf (path, PATH_MAX, "%s/%s", conf->workdir, + GLUSTERD_INFO_FILE); + ret = gf_store_handle_new (path, &handle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get store handle"); + goto out; + } - tmp_iter = GF_CALLOC (1, sizeof (*tmp_iter), - gf_gld_mt_store_iter_t); + conf->handle = handle; + } else + handle = conf->handle; - if (!tmp_iter) { - gf_log ("", GF_LOG_ERROR, "Out of Memory"); + /* These options need to be available for all users */ + ret = chmod (handle->path, 0644); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "chmod error for %s: %s", + GLUSTERD_INFO_FILE, strerror (errno)); goto out; } - fd = open (shandle->path, O_RDWR); - - if (fd < 0) { - gf_log ("", GF_LOG_ERROR, "Unable to open %s, errno: %d", - shandle->path, errno); + handle->fd = gf_store_mkstemp (handle); + if (handle->fd <= 0) { + ret = -1; goto out; } - tmp_iter->fd = fd; + ret = gf_store_save_value (handle->fd, GLUSTERD_STORE_UUID_KEY, + uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Storing uuid failed ret = %d", ret); + goto out; + } - tmp_iter->file = fdopen (shandle->fd, "r"); + snprintf (op_version_str, 15, "%d", conf->op_version); + ret = gf_store_save_value (handle->fd, GD_OP_VERSION_KEY, + op_version_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing op-version failed ret = %d", ret); + goto out; + } - if (!tmp_iter->file) { - gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %d", - shandle->path, errno); + snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_hard_limit); + ret = gf_store_save_value (handle->fd, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing snap-max-hard-limit failed ret = %d", ret); goto out; } - *iter = tmp_iter; - ret = 0; + snprintf (buf, sizeof (buf), "%"PRIu64, conf->snap_max_soft_limit); + ret = gf_store_save_value (handle->fd, + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Storing snap-max-soft-limit failed ret = %d", ret); + goto out; + } + ret = gf_store_rename_tmppath (handle); out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (ret && (handle->fd > 0)) + gf_store_unlink_tmppath (handle); + + if (handle->fd > 0) { + close (handle->fd); + handle->fd = 0; + } + + if (uuid_str) + GF_FREE (uuid_str); + + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to store glusterd global-info"); + return ret; } -int32_t -glusterd_store_iter_get_next (glusterd_store_iter_t *iter, - char **key, char **value) +int +glusterd_retrieve_op_version (xlator_t *this, int *op_version) { - int32_t ret = -1; - char scan_str[4096] = {0,}; - char *str = NULL; - char *free_str = NULL; - char *iter_key = NULL; - char *iter_val = NULL; + char *op_version_str = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + int tmp_version = 0; + char *tmp = NULL; + char path[PATH_MAX] = {0,}; + gf_store_handle_t *handle = NULL; - GF_ASSERT (iter); - GF_ASSERT (iter->file); + priv = this->private; - ret = fscanf (iter->file, "%s", scan_str); + if (!priv->handle) { + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_INFO_FILE); + ret = gf_store_handle_retrieve (path, &handle); - if (ret <= 0) { - ret = -1; + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get store " + "handle!"); + goto out; + } + + priv->handle = handle; + } + + ret = gf_store_retrieve_value (priv->handle, GD_OP_VERSION_KEY, + &op_version_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No previous op_version present"); goto out; } - str = gf_strdup (scan_str); - if (!str) + tmp_version = strtol (op_version_str, &tmp, 10); + if ((tmp_version <= 0) || (tmp && strlen (tmp) > 1)) { + gf_log (this->name, GF_LOG_WARNING, "invalid version number"); goto out; - else - free_str = str; + } + + *op_version = tmp_version; - iter_key = strtok (str, "="); - gf_log ("", GF_LOG_DEBUG, "key %s read", iter_key); + ret = 0; +out: + if (op_version_str) + GF_FREE (op_version_str); + return ret; +} - iter_val = strtok (NULL, "="); - gf_log ("", GF_LOG_DEBUG, "value %s read", iter_val); +int +glusterd_retrieve_sys_snap_max_limit (xlator_t *this, uint64_t *limit, + char *key) +{ + char *limit_str = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + uint64_t tmp_limit = 0; + char *tmp = NULL; + char path[PATH_MAX] = {0,}; + gf_store_handle_t *handle = NULL; - if (iter_val) - *value = gf_strdup (iter_val); - *key = gf_strdup (iter_key); + GF_ASSERT (this); + priv = this->private; - ret = 0; + GF_ASSERT (priv); + GF_ASSERT (limit); + GF_ASSERT (key); + + if (!priv->handle) { + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_INFO_FILE); + ret = gf_store_handle_retrieve (path, &handle); + + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get store " + "handle!"); + goto out; + } + + priv->handle = handle; + } + ret = gf_store_retrieve_value (priv->handle, + key, + &limit_str); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No previous %s present", key); + goto out; + } + + tmp_limit = strtoul (limit_str, &tmp, 10); + if ((tmp_limit <= 0) || (tmp && strlen (tmp) > 1)) { + gf_log (this->name, GF_LOG_WARNING, "invalid version number"); + goto out; + } + + *limit = tmp_limit; + + ret = 0; out: - if (free_str) - GF_FREE (free_str); + if (limit_str) + GF_FREE (limit_str); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; } - -int32_t -glusterd_store_iter_get_matching (glusterd_store_iter_t *iter, - char *key, char **value) +static int +glusterd_restore_op_version (xlator_t *this) { - int32_t ret = -1; - char *tmp_key = NULL; - char *tmp_value = NULL; + glusterd_conf_t *conf = NULL; + int ret = 0; + int op_version = 0; - ret = glusterd_store_iter_get_next (iter, &tmp_key, &tmp_value); - while (!ret) { - if (!strncmp (key, tmp_key, strlen (key))){ - *value = tmp_value; - GF_FREE (tmp_key); + conf = this->private; + + ret = glusterd_retrieve_sys_snap_max_limit (this, + &conf->snap_max_hard_limit, + GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to retrieve system snap-max-hard-limit, " + "setting it to default value(%d)", + GLUSTERD_SNAPS_MAX_HARD_LIMIT); + conf->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + } + + ret = glusterd_retrieve_sys_snap_max_limit (this, + &conf->snap_max_soft_limit, + GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to retrieve system snap-max-soft-limit, " + "setting it to default value(%d)", + GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT); + conf->snap_max_soft_limit = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT; + } + + ret = glusterd_retrieve_op_version (this, &op_version); + if (!ret) { + if ((op_version < GD_OP_VERSION_MIN) || + (op_version > GD_OP_VERSION_MAX)) { + gf_log (this->name, GF_LOG_ERROR, + "wrong op-version (%d) retrieved", op_version); + ret = -1; goto out; } - GF_FREE (tmp_key); - GF_FREE (tmp_value); - ret = glusterd_store_iter_get_next (iter, &tmp_key, - &tmp_value); + conf->op_version = op_version; + gf_log ("glusterd", GF_LOG_INFO, + "retrieved op-version: %d", conf->op_version); + goto out; } + + /* op-version can be missing from the store file in 2 cases, + * 1. This is a new install of glusterfs + * 2. This is an upgrade of glusterfs from a version without op-version + * to a version with op-version (eg. 3.3 -> 3.4) + * + * Detection of a new install or an upgrade from an older install can be + * done by checking for the presence of the its peer-id in the store + * file. If peer-id is present, the installation is an upgrade else, it + * is a new install. + * + * For case 1, set op-version to GD_OP_VERSION_MAX. + * For case 2, set op-version to GD_OP_VERSION_MIN. + */ + ret = glusterd_retrieve_uuid(); + if (ret) { + gf_log (this->name, GF_LOG_INFO, "Detected new install. Setting" + " op-version to maximum : %d", GD_OP_VERSION_MAX); + conf->op_version = GD_OP_VERSION_MAX; + } else { + gf_log (this->name, GF_LOG_INFO, "Upgrade detected. Setting" + " op-version to minimum : %d", GD_OP_VERSION_MIN); + conf->op_version = GD_OP_VERSION_MIN; + } + ret = 0; out: return ret; } int32_t -glusterd_store_iter_destroy (glusterd_store_iter_t *iter) +glusterd_retrieve_uuid () { + char *uuid_str = NULL; int32_t ret = -1; + gf_store_handle_t *handle = NULL; + glusterd_conf_t *priv = NULL; + char path[PATH_MAX] = {0,}; + + priv = THIS->private; + + if (!priv->handle) { + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_INFO_FILE); + ret = gf_store_handle_retrieve (path, &handle); - GF_ASSERT (iter); - GF_ASSERT (iter->fd > 0); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get store" + "handle!"); + goto out; + } - ret = fclose (iter->file); + priv->handle = handle; + } + + ret = gf_store_retrieve_value (priv->handle, GLUSTERD_STORE_UUID_KEY, + &uuid_str); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to close fd: %d, ret: %d, " - "errno: %d" ,iter->fd, ret, errno); + gf_log ("", GF_LOG_DEBUG, "No previous uuid is present"); + goto out; } - GF_FREE (iter); + uuid_parse (uuid_str, priv->uuid); +out: + GF_FREE (uuid_str); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } + int32_t glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) { - int32_t ret = 0; glusterd_brickinfo_t *brickinfo = NULL; - glusterd_store_iter_t *iter = NULL; + gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; char brickdir[PATH_MAX] = {0,}; @@ -912,19 +1895,19 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) glusterd_conf_t *priv = NULL; int32_t brick_count = 0; char tmpkey[4096] = {0,}; - glusterd_store_iter_t *tmpiter = NULL; + gf_store_iter_t *tmpiter = NULL; char *tmpvalue = NULL; - struct pmap_registry *pmap = NULL; + struct pmap_registry *pmap = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; GF_ASSERT (volinfo); GF_ASSERT (volinfo->volname); priv = THIS->private; - GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv) - - ret = glusterd_store_iter_new (volinfo->shandle, &tmpiter); + GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv); + ret = gf_store_iter_new (volinfo->shandle, &tmpiter); if (ret) goto out; @@ -935,26 +1918,30 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) goto out; snprintf (tmpkey, sizeof (tmpkey), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,brick_count); - ret = glusterd_store_iter_get_matching (tmpiter, tmpkey, - &tmpvalue); + ret = gf_store_iter_get_matching (tmpiter, tmpkey, &tmpvalue); snprintf (path, sizeof (path), "%s/%s", brickdir, tmpvalue); GF_FREE (tmpvalue); tmpvalue = NULL; - ret = glusterd_store_handle_new (path, &brickinfo->shandle); + ret = gf_store_handle_retrieve (path, &brickinfo->shandle); if (ret) goto out; - ret = glusterd_store_iter_new (brickinfo->shandle, &iter); + ret = gf_store_iter_new (brickinfo->shandle, &iter); if (ret) goto out; - ret = glusterd_store_iter_get_next (iter, &key, &value); - + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Unable to iterate " + "the store for brick: %s, reason: %s", path, + gf_store_strerror (op_errno)); + goto out; + } while (!ret) { if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME, strlen (GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) { @@ -966,11 +1953,52 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PORT, strlen (GLUSTERD_STORE_KEY_BRICK_PORT))) { gf_string2int (value, &brickinfo->port); - /* This is required to have proper ports - assigned to bricks after restart */ - pmap = pmap_registry_get (THIS); - if (pmap->last_alloc <= brickinfo->port) - pmap->last_alloc = brickinfo->port + 1; + + if (brickinfo->port < priv->base_port) { + /* This is required to adhere to the + IANA standards */ + brickinfo->port = 0; + } else { + /* This is required to have proper ports + assigned to bricks after restart */ + pmap = pmap_registry_get (THIS); + if (pmap->last_alloc <= brickinfo->port) + pmap->last_alloc = + brickinfo->port + 1; + } + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, + strlen (GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) { + gf_string2int (value, &brickinfo->rdma_port); + + if (brickinfo->rdma_port < priv->base_port) { + /* This is required to adhere to the + IANA standards */ + brickinfo->rdma_port = 0; + } else { + /* This is required to have proper ports + assigned to bricks after restart */ + pmap = pmap_registry_get (THIS); + if (pmap->last_alloc <= + brickinfo->rdma_port) + pmap->last_alloc = + brickinfo->rdma_port +1; + } + + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED, + strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) { + gf_string2int (value, &brickinfo->decommissioned); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH, + strlen (GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) { + strncpy (brickinfo->device_path, value, + sizeof (brickinfo->device_path)); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS, + strlen (GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) { + gf_string2int (value, &brickinfo->snap_status); + } else if (!strncmp (key, + GLUSTERD_STORE_KEY_BRICK_VGNAME, + strlen (GLUSTERD_STORE_KEY_BRICK_VGNAME))) { + strncpy (brickinfo->vg, value, + sizeof (brickinfo->vg)); } else { gf_log ("", GF_LOG_ERROR, "Unknown key: %s", key); @@ -981,10 +2009,13 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value); + ret = gf_store_iter_get_next (iter, &key, &value, + &op_errno); } - ret = glusterd_store_iter_destroy (iter); + if (op_errno != GD_STORE_EOF) + goto out; + ret = gf_store_iter_destroy (iter); if (ret) goto out; @@ -993,7 +2024,7 @@ glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo) brick_count++; } - ret = glusterd_store_iter_destroy (tmpiter); + ret = gf_store_iter_destroy (tmpiter); if (ret) goto out; out: @@ -1004,92 +2035,334 @@ out: int32_t -glusterd_store_retrieve_volume (char *volname) +glusterd_store_retrieve_rbstate (glusterd_volinfo_t *volinfo) { - int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; - glusterd_store_iter_t *iter = NULL; - char *key = NULL; - char *value = NULL; - char volpath[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - int exists = 0; + int32_t ret = -1; + gf_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + char volpath[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + char path[PATH_MAX] = {0,}; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + xlator_t *this = NULL; - ret = glusterd_volinfo_new (&volinfo); + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); + + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); + snprintf (path, sizeof (path), "%s/%s", volpath, + GLUSTERD_VOLUME_RBSTATE_FILE); + + ret = gf_store_handle_retrieve (path, &volinfo->rb_shandle); if (ret) goto out; - strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + ret = gf_store_iter_new (volinfo->rb_shandle, &iter); - priv = THIS->private; + if (ret) + goto out; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) + goto out; + + while (!ret) { + if (!strncmp (key, GLUSTERD_STORE_KEY_RB_STATUS, + strlen (GLUSTERD_STORE_KEY_RB_STATUS))) { + volinfo->rep_brick.rb_status = atoi (value); + } + + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { + if (!strncmp (key, GLUSTERD_STORE_KEY_RB_SRC_BRICK, + strlen (GLUSTERD_STORE_KEY_RB_SRC_BRICK))) { + ret = glusterd_brickinfo_new_from_brick (value, + &volinfo->rep_brick.src_brick); + if (ret) + goto out; + } else if (!strncmp (key, GLUSTERD_STORE_KEY_RB_DST_BRICK, + strlen (GLUSTERD_STORE_KEY_RB_DST_BRICK))) { + ret = glusterd_brickinfo_new_from_brick (value, + &volinfo->rep_brick.dst_brick); + if (ret) + goto out; + } else if (!strncmp (key, GLUSTERD_STORE_KEY_RB_DST_PORT, + strlen (GLUSTERD_STORE_KEY_RB_DST_PORT))) { + switch (volinfo->transport_type) { + case GF_TRANSPORT_RDMA: + volinfo->rep_brick.dst_brick->rdma_port = + atoi (value); + break; + + case GF_TRANSPORT_TCP: + case GF_TRANSPORT_BOTH_TCP_RDMA: + volinfo->rep_brick.dst_brick->port = + atoi (value); + break; + } + } else if (!strncmp (key, GF_REPLACE_BRICK_TID_KEY, + strlen (GF_REPLACE_BRICK_TID_KEY))) { + uuid_parse (value, + volinfo->rep_brick.rb_id); + } + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + } + + if (op_errno != GD_STORE_EOF) + goto out; + + ret = gf_store_iter_destroy (iter); + + if (ret) + goto out; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return ret; +} + +int32_t +glusterd_store_retrieve_node_state (glusterd_volinfo_t *volinfo) +{ + int32_t ret = -1; + gf_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + char volpath[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + char path[PATH_MAX] = {0,}; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv); snprintf (path, sizeof (path), "%s/%s", volpath, - GLUSTERD_VOLUME_INFO_FILE); + GLUSTERD_NODE_STATE_FILE); - ret = glusterd_store_handle_new (path, &volinfo->shandle); + ret = gf_store_handle_retrieve (path, &volinfo->node_state_shandle); + if (ret) + goto out; + + ret = gf_store_iter_new (volinfo->node_state_shandle, &iter); + + if (ret) + goto out; + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); if (ret) goto out; - ret = glusterd_store_iter_new (volinfo->shandle, &iter); + while (ret == 0) { + if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, + strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { + volinfo->rebal.defrag_cmd = atoi (value); + } + + if (volinfo->rebal.defrag_cmd) { + if (!strncmp (key, GF_REBALANCE_TID_KEY, + strlen (GF_REBALANCE_TID_KEY))) + uuid_parse (value, volinfo->rebal.rebalance_id); + + if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP, + strlen (GLUSTERD_STORE_KEY_DEFRAG_OP))) + volinfo->rebal.op = atoi (value); + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + } + + if (op_errno != GD_STORE_EOF) + goto out; + + ret = gf_store_iter_destroy (iter); if (ret) goto out; - ret = glusterd_store_iter_get_next (iter, &key, &value); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return ret; +} + + +int +glusterd_store_update_volinfo (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + int exists = 0; + char *key = NULL; + char *value = NULL; + char volpath[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + + this = THIS; + GF_ASSERT (this); + conf = THIS->private; + GF_ASSERT (volinfo); + + GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf); + + snprintf (path, sizeof (path), "%s/%s", volpath, + GLUSTERD_VOLUME_INFO_FILE); + + ret = gf_store_handle_retrieve (path, &volinfo->shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "volinfo handle is NULL"); + goto out; + } + + ret = gf_store_iter_new (volinfo->shandle, &iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get new store " + "iter"); + goto out; + } + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get next store " + "iter"); + goto out; + } while (!ret) { + gf_log ("", GF_LOG_DEBUG, "key = %s value = %s", key, value); if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TYPE, strlen (GLUSTERD_STORE_KEY_VOL_TYPE))) { volinfo->type = atoi (value); } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_COUNT, - strlen (GLUSTERD_STORE_KEY_VOL_COUNT))) { + strlen (GLUSTERD_STORE_KEY_VOL_COUNT))) { volinfo->brick_count = atoi (value); } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STATUS, - strlen (GLUSTERD_STORE_KEY_VOL_STATUS))) { + strlen (GLUSTERD_STORE_KEY_VOL_STATUS))) { volinfo->status = atoi (value); } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_VERSION, - strlen (GLUSTERD_STORE_KEY_VOL_VERSION))) { + strlen (GLUSTERD_STORE_KEY_VOL_VERSION))) { volinfo->version = atoi (value); } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_PORT, - strlen (GLUSTERD_STORE_KEY_VOL_PORT))) { + strlen (GLUSTERD_STORE_KEY_VOL_PORT))) { volinfo->port = atoi (value); } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_SUB_COUNT, - strlen (GLUSTERD_STORE_KEY_VOL_SUB_COUNT))) { + strlen (GLUSTERD_STORE_KEY_VOL_SUB_COUNT))) { volinfo->sub_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, + strlen (GLUSTERD_STORE_KEY_VOL_STRIPE_CNT))) { + volinfo->stripe_count = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, + strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) { + volinfo->replica_count = atoi (value); } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT, - strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) { + strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) { volinfo->transport_type = atoi (value); + volinfo->nfs_transport_type = volinfo->transport_type; + if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { + volinfo->nfs_transport_type = GF_DEFAULT_NFS_TRANSPORT; + } } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_ID, - strlen (GLUSTERD_STORE_KEY_VOL_ID))) { + strlen (GLUSTERD_STORE_KEY_VOL_ID))) { ret = uuid_parse (value, volinfo->volume_id); if (ret) gf_log ("", GF_LOG_WARNING, "failed to parse uuid"); + + } else if (!strncmp (key, GLUSTERD_STORE_KEY_USERNAME, + strlen (GLUSTERD_STORE_KEY_USERNAME))) { + + glusterd_auth_set_username (volinfo, value); + + } else if (!strncmp (key, GLUSTERD_STORE_KEY_PASSWORD, + strlen (GLUSTERD_STORE_KEY_PASSWORD))) { + + glusterd_auth_set_password (volinfo, value); + + } else if (strstr (key, "slave")) { + ret = dict_set_dynstr (volinfo->gsync_slaves, key, + gf_strdup (value)); + if (ret) { + gf_log ("",GF_LOG_ERROR, "Error in " + "dict_set_str"); + goto out; + } + gf_log ("", GF_LOG_DEBUG, "Parsed as "GEOREP" " + " slave:key=%s,value:%s", key, value); + + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_OP_VERSION, + strlen (GLUSTERD_STORE_KEY_VOL_OP_VERSION))) { + volinfo->op_version = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION, + strlen (GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) { + volinfo->client_op_version = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_CAPS, + strlen (GLUSTERD_STORE_KEY_VOL_CAPS))) { + volinfo->caps = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, + strlen (GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) { + volinfo->snap_max_hard_limit = (uint64_t) atoll (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_IS_RESTORED, + strlen (GLUSTERD_STORE_KEY_VOL_IS_RESTORED))) { + volinfo->is_volume_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_PARENT_VOLNAME, + strlen (GLUSTERD_STORE_KEY_PARENT_VOLNAME))) { + strncpy (volinfo->parent_volname, value, sizeof(volinfo->parent_volname) - 1); } else { - exists = glusterd_check_option_exists (key, NULL); - if (exists == -1) { + + if (is_key_glusterd_hooks_friendly (key)) { + exists = 1; + + } else { + exists = glusterd_check_option_exists (key, + NULL); + } + + switch (exists) { + case -1: ret = -1; goto out; - } - if (exists) { + + case 0: + gf_log ("", GF_LOG_ERROR, "Unknown key: %s", + key); + break; + + case 1: ret = dict_set_str(volinfo->dict, key, - gf_strdup (value)); + gf_strdup (value)); if (ret) { gf_log ("",GF_LOG_ERROR, "Error in " - "dict_set_str"); + "dict_set_str"); goto out; } gf_log ("", GF_LOG_DEBUG, "Parsed as Volume-" - "set:key=%s,value:%s", - key, value); + "set:key=%s,value:%s", key, value); + break; } - else - gf_log ("", GF_LOG_ERROR, "Unknown key: %s", - key); } GF_FREE (key); @@ -1097,14 +2370,98 @@ glusterd_store_retrieve_volume (char *volname) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); } - ret = glusterd_store_iter_destroy (iter); + /* backward compatibility */ + { + + switch (volinfo->type) { + + case GF_CLUSTER_TYPE_NONE: + volinfo->stripe_count = 1; + volinfo->replica_count = 1; + break; + + case GF_CLUSTER_TYPE_STRIPE: + volinfo->stripe_count = volinfo->sub_count; + volinfo->replica_count = 1; + break; + + case GF_CLUSTER_TYPE_REPLICATE: + volinfo->stripe_count = 1; + volinfo->replica_count = volinfo->sub_count; + break; + + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + /* Introduced in 3.3 */ + GF_ASSERT (volinfo->stripe_count > 0); + GF_ASSERT (volinfo->replica_count > 0); + break; + default: + GF_ASSERT (0); + break; + } + + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); + + volinfo->subvol_count = (volinfo->brick_count / + volinfo->dist_leaf_count); + + /* Only calculate volume op-versions if they are not found */ + if (!volinfo->op_version && !volinfo->client_op_version) + gd_update_volume_op_versions (volinfo); + } + + if (op_errno != GD_STORE_EOF) + goto out; + + ret = gf_store_iter_destroy (iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store " + "iter"); + goto out; + } + + ret = 0; +out: + return ret; +} + +glusterd_volinfo_t* +glusterd_store_retrieve_volume (char *volname, glusterd_snap_t *snap) +{ + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *origin_volinfo = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volname); + + ret = glusterd_volinfo_new (&volinfo); if (ret) goto out; + priv = THIS->private; + + strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + volinfo->snapshot = snap; + if (snap) + volinfo->is_snap_volume = _gf_true; + + ret = glusterd_store_update_volinfo (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to update volinfo " + "for %s volume", volname); + goto out; + } + ret = glusterd_store_retrieve_bricks (volinfo); if (ret) goto out; @@ -1113,52 +2470,193 @@ glusterd_store_retrieve_volume (char *volname) if (ret) goto out; - list_add_tail (&volinfo->vol_list, &priv->volumes); - + if (!snap) { + list_add_tail (&volinfo->vol_list, &priv->volumes); + } else { + ret = glusterd_volinfo_find (volinfo->parent_volname, + &origin_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Parent volinfo " + "not found for %s volume", volname); + goto out; + } + glusterd_list_add_snapvol (origin_volinfo, volinfo); + } out: - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + if (ret) { + if (volinfo) + glusterd_volinfo_delete (volinfo); + volinfo = NULL; + } + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return volinfo; +} + +inline void +glusterd_store_set_options_path (glusterd_conf_t *conf, char *path, size_t len) +{ + snprintf (path, len, "%s/options", conf->workdir); +} +int +_store_global_opts (dict_t *this, char *key, data_t *value, void *data) +{ + gf_store_handle_t *shandle = data; + + gf_store_save_value (shandle->fd, key, (char*)value->data); + return 0; +} + +int32_t +glusterd_store_options (xlator_t *this, dict_t *opts) +{ + gf_store_handle_t *shandle = NULL; + glusterd_conf_t *conf = NULL; + char path[PATH_MAX] = {0}; + int fd = -1; + int32_t ret = -1; + + conf = this->private; + glusterd_store_set_options_path (conf, path, sizeof (path)); + + ret = gf_store_handle_new (path, &shandle); + if (ret) + goto out; + + fd = gf_store_mkstemp (shandle); + if (fd <= 0) { + ret = -1; + goto out; + } + + shandle->fd = fd; + dict_foreach (opts, _store_global_opts, shandle); + shandle->fd = 0; + ret = gf_store_rename_tmppath (shandle); + if (ret) + goto out; +out: + gf_store_handle_destroy (shandle); + if (fd >=0 ) + close (fd); return ret; } +int32_t +glusterd_store_retrieve_options (xlator_t *this) +{ + char path[PATH_MAX] = {0}; + glusterd_conf_t *conf = NULL; + gf_store_handle_t *shandle = NULL; + gf_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + gf_store_op_errno_t op_errno = 0; + int ret = -1; + + conf = this->private; + glusterd_store_set_options_path (conf, path, sizeof (path)); + + ret = gf_store_handle_retrieve (path, &shandle); + if (ret) + goto out; + + ret = gf_store_iter_new (shandle, &iter); + if (ret) + goto out; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + while (!ret) { + ret = dict_set_dynstr (conf->opts, key, value); + if (ret) { + GF_FREE (key); + GF_FREE (value); + goto out; + } + GF_FREE (key); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + } + if (op_errno != GD_STORE_EOF) + goto out; + ret = 0; +out: + gf_store_iter_destroy (iter); + gf_store_handle_destroy (shandle); + return ret; +} int32_t -glusterd_store_retrieve_volumes (xlator_t *this) +glusterd_store_retrieve_volumes (xlator_t *this, glusterd_snap_t *snap) { - int32_t ret = 0; - char path[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - DIR *dir = NULL; - struct dirent *entry = NULL; + int32_t ret = -1; + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + glusterd_volinfo_t *volinfo = NULL; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - snprintf (path, PATH_MAX, "%s/%s", priv->workdir, - GLUSTERD_VOLUME_DIR_PREFIX); + if (snap) + snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, + snap->snapname); + else + snprintf (path, PATH_MAX, "%s/%s", priv->workdir, + GLUSTERD_VOLUME_DIR_PREFIX); dir = opendir (path); if (!dir) { gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); - ret = -1; goto out; } glusterd_for_each_entry (entry, dir); while (entry) { - ret = glusterd_store_retrieve_volume (entry->d_name); - if (ret) { + if ( entry->d_type != DT_DIR ) + goto next; + + volinfo = glusterd_store_retrieve_volume (entry->d_name, snap); + if (!volinfo) { gf_log ("", GF_LOG_ERROR, "Unable to restore " "volume: %s", entry->d_name); + ret = -1; goto out; } + + ret = glusterd_store_retrieve_rbstate (volinfo); + if (ret) { + /* Backward compatibility */ + gf_log ("", GF_LOG_INFO, "Creating a new rbstate " + "for volume: %s.", entry->d_name); + ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo); + ret = glusterd_store_perform_rbstate_store (volinfo); + } + + ret = glusterd_store_retrieve_node_state (volinfo); + if (ret) { + /* Backward compatibility */ + gf_log ("", GF_LOG_INFO, "Creating a new node_state " + "for volume: %s.", entry->d_name); + glusterd_store_create_nodestate_sh_on_absence (volinfo); + ret = glusterd_store_perform_node_state_store (volinfo); + + } + +next: glusterd_for_each_entry (entry, dir); } + ret = 0; out: if (dir) closedir (dir); @@ -1168,84 +2666,481 @@ out: } int32_t -glusterd_store_update_volume (glusterd_volinfo_t *volinfo) +glusterd_resolve_snap_bricks (xlator_t *this, glusterd_snap_t *snap) { - int32_t ret = -1; - char buf[1024] = {0,}; + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; - int32_t brick_count = 0; + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, snap, out); - list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - ret = glusterd_store_delete_brick (volinfo, tmp); - //if (ret) - // goto out; + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "resolve brick failed in restore"); + goto out; + } + } } - ret = glusterd_store_handle_truncate (volinfo->shandle); + ret = 0; - snprintf (buf, sizeof (buf), "%d", volinfo->type); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_TYPE, buf); - if (ret) +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + + return ret; +} + +int +glusterd_store_update_snap (glusterd_snap_t *snap) +{ + int ret = -1; + char *key = NULL; + char *value = NULL; + char snappath[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + gf_store_iter_t *iter = NULL; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + + this = THIS; + conf = this->private; + GF_ASSERT (snap); + + GLUSTERD_GET_SNAP_DIR (snappath, snap, conf); + + snprintf (path, sizeof (path), "%s/%s", snappath, + GLUSTERD_SNAP_INFO_FILE); + + ret = gf_store_handle_retrieve (path, &snap->shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "snap handle is NULL"); goto out; + } - snprintf (buf, sizeof (buf), "%d", volinfo->brick_count); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_COUNT, buf); - if (ret) + ret = gf_store_iter_new (snap->shandle, &iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get new store " + "iter"); goto out; + } - snprintf (buf, sizeof (buf), "%d", volinfo->status); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_STATUS, buf); - if (ret) + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get next store " + "iter"); goto out; + } - snprintf (buf, sizeof (buf), "%d", volinfo->sub_count); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_SUB_COUNT, buf); - if (ret) + while (!ret) { + gf_log (this->name, GF_LOG_DEBUG, "key = %s value = %s", + key, value); + + if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_ID, + strlen (GLUSTERD_STORE_KEY_SNAP_ID))) { + ret = uuid_parse (value, snap->snap_id); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "Failed to parse uuid"); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_RESTORED, + strlen (GLUSTERD_STORE_KEY_SNAP_RESTORED))) { + snap->snap_restored = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_STATUS, + strlen (GLUSTERD_STORE_KEY_SNAP_STATUS))) { + snap->snap_status = atoi (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_DESC, + strlen (GLUSTERD_STORE_KEY_SNAP_DESC))) { + snap->description = gf_strdup (value); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, + strlen (GLUSTERD_STORE_KEY_SNAP_TIMESTAMP))) { + snap->time_stamp = atoi (value); + } + + GF_FREE (key); + GF_FREE (value); + key = NULL; + value = NULL; + + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + } + + if (op_errno != GD_STORE_EOF) goto out; - snprintf (buf, sizeof (buf), "%d", volinfo->version); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_VERSION, buf); - if (ret) + ret = gf_store_iter_destroy (iter); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to destroy store " + "iter"); + } + +out: + return ret; +} + +int32_t +glusterd_store_retrieve_snap (char *snapname) +{ + int32_t ret = -1; + dict_t *dict = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snapname); + + dict = dict_new(); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create dict"); + ret = -1; goto out; + } - snprintf (buf, sizeof (buf), "%d", volinfo->transport_type); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_TRANSPORT, buf); - if (ret) + snap = glusterd_new_snap_object (); + if (!snap) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + " snap object"); goto out; + } - uuid_unparse (volinfo->volume_id, buf); - ret = glusterd_store_save_value (volinfo->shandle, - GLUSTERD_STORE_KEY_VOL_ID, buf); - if (ret) + strncpy (snap->snapname, snapname, strlen(snapname)); + ret = glusterd_store_update_snap (snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to update snapshot " + "for %s snap", snapname); goto out; + } - list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - ret = glusterd_store_create_brick (volinfo, brickinfo, - brick_count); + ret = glusterd_store_retrieve_volumes (this, snap); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to retrieve " + "snap volumes for snap %s", snapname); + goto out; + } + + /* Unlike bricks of normal volumes which are resolved at the end of + the glusterd restore, the bricks belonging to the snap volumes of + each snap should be resolved as part of snapshot restore itself. + Because if the snapshot has to be removed, then resolving bricks + helps glusterd in understanding what all bricks have its own uuid + and killing those bricks. + */ + ret = glusterd_resolve_snap_bricks (this, snap); + if (ret) + gf_log (this->name, GF_LOG_WARNING, "resolving the snap bricks" + " failed (snap: %s)", snap?snap->snapname:""); + + /* When the snapshot command from cli is received, the on disk and + in memory structures for the snapshot are created (with the status) + being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is + taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd + dies after taking the backend snapshot, but before updating the + status, then when glusterd comes up, it should treat that snapshot + as a failed snapshot and clean it up. + */ + if (snap->snap_status != GD_SNAP_STATUS_IN_USE) { + ret = glusterd_snap_remove (dict, snap, _gf_true, _gf_true); if (ret) - goto out; - brick_count++; + gf_log (this->name, GF_LOG_WARNING, "failed to remove" + " the snapshot %s", snap->snapname); + goto out; + } + + /* TODO: list_add_order can do 'N-square' comparisions and + is not efficient. Find a better solution to store the snap + in order */ + list_add_order (&snap->snap_list, &priv->snapshots, + glusterd_compare_snap_time); + +out: + if (dict) + dict_unref (dict); + + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +/* Read the missed_snap_list and update the in-memory structs */ +int32_t +glusterd_store_retrieve_missed_snaps_list (xlator_t *this) +{ + char buf[PATH_MAX] = ""; + char path[PATH_MAX] = ""; + char *missed_node_info = NULL; + char *brick_path = NULL; + char *value = NULL; + char *save_ptr = NULL; + FILE *fp = NULL; + int32_t brick_num = -1; + int32_t snap_op = -1; + int32_t snap_status = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + gf_store_op_errno_t store_errno = GD_STORE_SUCCESS; + + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + /* Get the path of the missed_snap_list */ + glusterd_store_missed_snaps_list_path_set (path, sizeof(path)); + + fp = fopen (path, "r"); + if (!fp) { + /* If errno is ENOENT then there are no missed snaps yet */ + if (errno != ENOENT) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to open %s. " + "Error: %s", path, strerror(errno)); + } else { + gf_log (this->name, GF_LOG_INFO, + "No missed snaps list."); + ret = 0; + } + goto out; } - dict_foreach (volinfo->dict, _setopts, volinfo->shandle); + do { + ret = gf_store_read_and_tokenize (fp, buf, + &missed_node_info, &value, + &store_errno); + if (ret) { + if (store_errno == GD_STORE_EOF) { + gf_log (this->name, + GF_LOG_DEBUG, + "EOF for missed_snap_list"); + ret = 0; + break; + } + gf_log (this->name, GF_LOG_ERROR, + "Failed to fetch data from " + "missed_snaps_list. Error: %s", + gf_store_strerror (store_errno)); + goto out; + } + + /* Fetch the brick_num, brick_path, snap_op and snap status */ + brick_num = atoi(strtok_r (value, ":", &save_ptr)); + brick_path = strtok_r (NULL, ":", &save_ptr); + snap_op = atoi(strtok_r (NULL, ":", &save_ptr)); + snap_status = atoi(strtok_r (NULL, ":", &save_ptr)); + + if (!missed_node_info || !brick_path || + brick_num < 1 || snap_op < 1 || + snap_status < 1) { + gf_log (this->name, GF_LOG_ERROR, + "Invalid missed_snap_entry"); + ret = -1; + goto out; + } + + ret = glusterd_store_missed_snaps_list (missed_node_info, + brick_num, + brick_path, + snap_op, + snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to store missed snaps_list"); + goto out; + } + + } while (store_errno == GD_STORE_SUCCESS); ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_store_retrieve_snaps (xlator_t *this) +{ + int32_t ret = 0; + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + DIR *dir = NULL; + struct dirent *entry = NULL; + + GF_ASSERT (this); + priv = this->private; + + GF_ASSERT (priv); + + snprintf (path, PATH_MAX, "%s/snaps", priv->workdir); + dir = opendir (path); + + if (!dir) { + /* If snaps dir doesn't exists ignore the error for + backward compatibility */ + if (errno != ENOENT) { + ret = -1; + gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path); + } + goto out; + } + + glusterd_for_each_entry (entry, dir); + + while (entry) { + if (entry->d_type == DT_DIR) { + ret = glusterd_store_retrieve_snap (entry->d_name); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to restore snapshot: %s", + entry->d_name); + goto out; + } + } + + glusterd_for_each_entry (entry, dir); + } + + /* Retrieve missed_snaps_list */ + ret = glusterd_store_retrieve_missed_snaps_list (this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to retrieve missed_snaps_list"); + goto out; + } out: + if (dir) + closedir (dir); gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; } +/* Writes all the contents of conf->missed_snap_list */ +int32_t +glusterd_store_write_missed_snapinfo (int32_t fd) +{ + char value[PATH_MAX] = ""; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_missed_snap_info *missed_snapinfo = NULL; + glusterd_snap_op_t *snap_opinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + + priv = this->private; + GF_ASSERT (priv); + + /* Write the missed_snap_entry */ + list_for_each_entry (missed_snapinfo, &priv->missed_snaps_list, + missed_snaps) { + list_for_each_entry (snap_opinfo, + &missed_snapinfo->snap_ops, + snap_ops_list) { + snprintf (value, sizeof(value), "%d:%s:%d:%d", + snap_opinfo->brick_num, + snap_opinfo->brick_path, + snap_opinfo->op, snap_opinfo->status); + ret = gf_store_save_value + (fd, + missed_snapinfo->node_snap_info, + value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snapinfo"); + goto out; + } + } + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +/* Adds the missed snap entries to the in-memory conf->missed_snap_list * + * and writes them to disk */ +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, int32_t missed_snap_count) +{ + int32_t fd = -1; + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT(this); + GF_ASSERT(dict); + + priv = this->private; + GF_ASSERT (priv); + + if (missed_snap_count < 1) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = glusterd_store_create_missed_snaps_list_shandle_on_absence (); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to obtain " + "missed_snaps_list store handle."); + goto out; + } + + fd = gf_store_mkstemp (priv->missed_snaps_list_shandle); + if (fd <= 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to create tmp file"); + ret = -1; + goto out; + } + + ret = glusterd_add_missed_snaps_to_list (dict, missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snaps to list"); + goto out; + } + + ret = glusterd_store_write_missed_snapinfo (fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to write missed snaps to disk"); + goto out; + } + + ret = gf_store_rename_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to rename the tmp file"); + goto out; + } +out: + if (ret && (fd > 0)) { + ret = gf_store_unlink_tmppath (priv->missed_snaps_list_shandle); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to unlink the tmp file"); + } + ret = -1; + } + + if (fd > 0) + close (fd); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) { @@ -1253,7 +3148,6 @@ glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) glusterd_conf_t *priv = NULL; char peerdir[PATH_MAX] = {0,}; char filepath[PATH_MAX] = {0,}; - char str[512] = {0,}; char hostname_path[PATH_MAX] = {0,}; @@ -1277,9 +3171,9 @@ glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) goto out; } } else { - uuid_unparse (peerinfo->uuid, str); - snprintf (filepath, PATH_MAX, "%s/%s", peerdir, str); + snprintf (filepath, PATH_MAX, "%s/%s", peerdir, + uuid_utoa (peerinfo->uuid)); snprintf (hostname_path, PATH_MAX, "%s/%s", peerdir, peerinfo->hostname); @@ -1294,105 +3188,200 @@ glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo) ret = 0; out: - if (peerinfo->shandle) - glusterd_store_handle_destroy(peerinfo->shandle); + if (peerinfo->shandle) { + gf_store_handle_destroy (peerinfo->shandle); + peerinfo->shandle = NULL; + } gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; } +void +glusterd_store_peerinfo_dirpath_set (char *path, size_t len) +{ + glusterd_conf_t *priv = NULL; + GF_ASSERT (path); + GF_ASSERT (len >= PATH_MAX); + + priv = THIS->private; + snprintf (path, len, "%s/peers", priv->workdir); +} int32_t -glusterd_store_update_peerinfo (glusterd_peerinfo_t *peerinfo) +glusterd_store_create_peer_dir () { - int32_t ret = -1; - struct stat stbuf = {0,}; - glusterd_conf_t *priv = NULL; - char peerdir[PATH_MAX] = {0,}; - char filepath[PATH_MAX] = {0,}; - char str[512] = {0,}; - char buf[4096] = {0,}; - glusterd_peer_hostname_t *hname = NULL; - int i = 0; - char hostname_path[PATH_MAX] = {0,}; + int32_t ret = 0; + char path[PATH_MAX]; + glusterd_store_peerinfo_dirpath_set (path, sizeof (path)); + ret = gf_store_mkdir (path); - if (!peerinfo) { - ret = 0; - goto out; - } + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} - priv = THIS->private; +static void +glusterd_store_uuid_peerpath_set (glusterd_peerinfo_t *peerinfo, char *peerfpath, + size_t len) +{ + char peerdir[PATH_MAX]; + char str[50] = {0}; - snprintf (peerdir, PATH_MAX, "%s/peers", priv->workdir); + GF_ASSERT (peerinfo); + GF_ASSERT (peerfpath); + GF_ASSERT (len >= PATH_MAX); - ret = stat (peerdir, &stbuf); + glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir)); + uuid_unparse (peerinfo->uuid, str); + snprintf (peerfpath, len, "%s/%s", peerdir, str); +} - if (-1 == ret) { - ret = mkdir (peerdir, 0777); - if (ret) - goto out; - } +static void +glusterd_store_hostname_peerpath_set (glusterd_peerinfo_t *peerinfo, + char *peerfpath, size_t len) +{ + char peerdir[PATH_MAX]; - if (uuid_is_null (peerinfo->uuid)) { + GF_ASSERT (peerinfo); + GF_ASSERT (peerfpath); + GF_ASSERT (len >= PATH_MAX); - if (peerinfo->hostname) { - snprintf (filepath, PATH_MAX, "%s/%s", peerdir, - peerinfo->hostname); - } else { - ret = 0; - goto out; - } - } else { - uuid_unparse (peerinfo->uuid, str); + glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir)); + snprintf (peerfpath, len, "%s/%s", peerdir, peerinfo->hostname); +} - snprintf (filepath, PATH_MAX, "%s/%s", peerdir, str); - snprintf (hostname_path, PATH_MAX, "%s/%s", - peerdir, peerinfo->hostname); +int32_t +glusterd_store_peerinfo_hostname_shandle_create (glusterd_peerinfo_t *peerinfo) +{ + char peerfpath[PATH_MAX]; + int32_t ret = -1; - ret = stat (hostname_path, &stbuf); + glusterd_store_hostname_peerpath_set (peerinfo, peerfpath, + sizeof (peerfpath)); + ret = gf_store_handle_create_on_absence (&peerinfo->shandle, + peerfpath); + return ret; +} - if (!ret) { - gf_log ("", GF_LOG_DEBUG, "Destroying store handle"); - glusterd_store_handle_destroy (peerinfo->shandle); - peerinfo->shandle = NULL; - ret = remove (hostname_path); - } +int32_t +glusterd_store_peerinfo_uuid_shandle_create (glusterd_peerinfo_t *peerinfo) +{ + char peerfpath[PATH_MAX]; + int32_t ret = -1; + + glusterd_store_uuid_peerpath_set (peerinfo, peerfpath, + sizeof (peerfpath)); + ret = gf_store_handle_create_on_absence (&peerinfo->shandle, + peerfpath); + return ret; +} + +int32_t +glusterd_peerinfo_hostname_shandle_check_destroy (glusterd_peerinfo_t *peerinfo) +{ + char peerfpath[PATH_MAX]; + int32_t ret = -1; + struct stat stbuf = {0,}; + + glusterd_store_hostname_peerpath_set (peerinfo, peerfpath, + sizeof (peerfpath)); + ret = stat (peerfpath, &stbuf); + if (!ret) { + if (peerinfo->shandle) + gf_store_handle_destroy (peerinfo->shandle); + peerinfo->shandle = NULL; + ret = unlink (peerfpath); } + return ret; +} +int32_t +glusterd_store_create_peer_shandle (glusterd_peerinfo_t *peerinfo) +{ + int32_t ret = 0; - if (!peerinfo->shandle) { - ret = glusterd_store_handle_new (filepath, &peerinfo->shandle); - if (ret) - goto out; - ret = glusterd_store_handle_truncate (peerinfo->shandle); + GF_ASSERT (peerinfo); + + if (glusterd_peerinfo_is_uuid_unknown (peerinfo)) { + ret = glusterd_store_peerinfo_hostname_shandle_create (peerinfo); } else { - ret = glusterd_store_handle_truncate (peerinfo->shandle); - if (ret) - goto out; + ret = glusterd_peerinfo_hostname_shandle_check_destroy (peerinfo); + ret = glusterd_store_peerinfo_uuid_shandle_create (peerinfo); } + return ret; +} + +int32_t +glusterd_store_peer_write (int fd, glusterd_peerinfo_t *peerinfo) +{ + char buf[50] = {0}; + int32_t ret = 0; - ret = glusterd_store_save_value (peerinfo->shandle, - GLUSTERD_STORE_KEY_PEER_UUID, str); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_UUID, + uuid_utoa (peerinfo->uuid)); if (ret) goto out; snprintf (buf, sizeof (buf), "%d", peerinfo->state.state); - ret = glusterd_store_save_value (peerinfo->shandle, - GLUSTERD_STORE_KEY_PEER_STATE, buf); + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_STATE, buf); if (ret) goto out; - list_for_each_entry (hname, &peerinfo->hostnames, hostname_list) { - i++; - snprintf (buf, sizeof (buf), "%s%d", - GLUSTERD_STORE_KEY_PEER_HOSTNAME, i); - ret = glusterd_store_save_value (peerinfo->shandle, - buf, hname->hostname); - if (ret) - goto out; + ret = gf_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_HOSTNAME "1", + peerinfo->hostname); + if (ret) + goto out; + +out: + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_store_perform_peer_store (glusterd_peerinfo_t *peerinfo) +{ + int fd = -1; + int32_t ret = -1; + + GF_ASSERT (peerinfo); + + fd = gf_store_mkstemp (peerinfo->shandle); + if (fd <= 0) { + ret = -1; + goto out; } + ret = glusterd_store_peer_write (fd, peerinfo); + if (ret) + goto out; + + ret = gf_store_rename_tmppath (peerinfo->shandle); +out: + if (ret && (fd > 0)) + gf_store_unlink_tmppath (peerinfo->shandle); + if (fd > 0) + close (fd); + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo) +{ + int32_t ret = -1; + + GF_ASSERT (peerinfo); + + ret = glusterd_store_create_peer_dir (); + if (ret) + goto out; + + ret = glusterd_store_create_peer_shandle (peerinfo); + if (ret) + goto out; + + ret = glusterd_store_perform_peer_store (peerinfo); out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; @@ -1410,12 +3399,13 @@ glusterd_store_retrieve_peers (xlator_t *this) uuid_t uuid = {0,}; char *hostname = NULL; int32_t state = 0; - glusterd_store_handle_t *shandle = NULL; + gf_store_handle_t *shandle = NULL; char filepath[PATH_MAX] = {0,}; - glusterd_store_iter_t *iter = NULL; + gf_store_iter_t *iter = NULL; char *key = NULL; char *value = NULL; glusterd_peerctx_args_t args = {0}; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; GF_ASSERT (this); priv = this->private; @@ -1437,15 +3427,17 @@ glusterd_store_retrieve_peers (xlator_t *this) while (entry) { snprintf (filepath, PATH_MAX, "%s/%s", path, entry->d_name); - ret = glusterd_store_handle_new (filepath, &shandle); + ret = gf_store_handle_retrieve (filepath, &shandle); if (ret) goto out; - ret = glusterd_store_iter_new (shandle, &iter); + ret = gf_store_iter_new (shandle, &iter); if (ret) goto out; - ret = glusterd_store_iter_get_next (iter, &key, &value); + ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); + if (ret) + goto out; while (!ret) { @@ -1471,15 +3463,18 @@ glusterd_store_retrieve_peers (xlator_t *this) key = NULL; value = NULL; - ret = glusterd_store_iter_get_next (iter, &key, &value); + ret = gf_store_iter_get_next (iter, &key, &value, + &op_errno); } + if (op_errno != GD_STORE_EOF) + goto out; - (void) glusterd_store_iter_destroy (iter); + (void) gf_store_iter_destroy (iter); - args.mode = GD_MODE_SWITCH_ON; ret = glusterd_friend_add (hostname, 0, state, &uuid, - NULL, &peerinfo, 1, &args); + &peerinfo, 1, NULL); + GF_FREE (hostname); if (ret) goto out; @@ -1487,6 +3482,13 @@ glusterd_store_retrieve_peers (xlator_t *this) glusterd_for_each_entry (entry, dir); } + args.mode = GD_MODE_ON; + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + ret = glusterd_friend_rpc_create (this, peerinfo, &args); + if (ret) + goto out; + } + out: if (dir) closedir (dir); @@ -1528,13 +3530,23 @@ out: int32_t glusterd_restore () { - int ret = -1; + int32_t ret = -1; xlator_t *this = NULL; this = THIS; - ret = glusterd_store_retrieve_volumes (this); + ret = glusterd_restore_op_version (this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to restore op_version"); + goto out; + } + + ret = glusterd_store_retrieve_volumes (this, NULL); + if (ret) + goto out; + ret = glusterd_store_retrieve_snaps (this); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h index 650211440..1b5cebc0c 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.h +++ b/xlators/mgmt/glusterd/src/glusterd-store.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_HA_H_ #define _GLUSTERD_HA_H_ @@ -30,6 +20,7 @@ #include "glusterfs.h" #include "xlator.h" +#include "run.h" #include "logging.h" #include "call-stub.h" #include "fd.h" @@ -37,26 +28,62 @@ #include "glusterd.h" #include "rpcsvc.h" - -#define GLUSTERD_STORE_UUID_KEY "UUID" - -#define GLUSTERD_STORE_KEY_VOL_TYPE "type" -#define GLUSTERD_STORE_KEY_VOL_COUNT "count" -#define GLUSTERD_STORE_KEY_VOL_STATUS "status" -#define GLUSTERD_STORE_KEY_VOL_PORT "port" -#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" -#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" -#define GLUSTERD_STORE_KEY_VOL_VERSION "version" -#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" -#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" - -#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_BRICK_PATH "path" -#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" - -#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" -#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" -#define GLUSTERD_STORE_KEY_PEER_STATE "state" +typedef enum glusterd_store_ver_ac_{ + GLUSTERD_VOLINFO_VER_AC_NONE = 0, + GLUSTERD_VOLINFO_VER_AC_INCREMENT = 1, + GLUSTERD_VOLINFO_VER_AC_DECREMENT = 2, +} glusterd_volinfo_ver_ac_t; + + +#define GLUSTERD_STORE_UUID_KEY "UUID" + +#define GLUSTERD_STORE_KEY_VOL_TYPE "type" +#define GLUSTERD_STORE_KEY_VOL_COUNT "count" +#define GLUSTERD_STORE_KEY_VOL_STATUS "status" +#define GLUSTERD_STORE_KEY_VOL_PORT "port" +#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count" +#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count" +#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count" +#define GLUSTERD_STORE_KEY_VOL_BRICK "brick" +#define GLUSTERD_STORE_KEY_VOL_VERSION "version" +#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type" +#define GLUSTERD_STORE_KEY_VOL_ID "volume-id" +#define GLUSTERD_STORE_KEY_VOL_IS_RESTORED "is-volume-restored" +#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status" +#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src" +#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst" +#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port" +#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status" +#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op" +#define GLUSTERD_STORE_KEY_USERNAME "username" +#define GLUSTERD_STORE_KEY_PASSWORD "password" +#define GLUSTERD_STORE_KEY_PARENT_VOLNAME "parent_volname" +#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version" +#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version" + +#define GLUSTERD_STORE_KEY_SNAP_NAME "name" +#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id" +#define GLUSTERD_STORE_KEY_SNAP_DESC "desc" +#define GLUSTERD_STORE_KEY_SNAP_TIMESTAMP "time-stamp" +#define GLUSTERD_STORE_KEY_SNAP_STATUS "status" +#define GLUSTERD_STORE_KEY_SNAP_RESTORED "snap-restored" +#define GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT "snap-max-hard-limit" +#define GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT "snap-max-soft-limit" + +#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_BRICK_PATH "path" +#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port" +#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port" +#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned" +#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg" +#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path" +#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status" + +#define GLUSTERD_STORE_KEY_PEER_UUID "uuid" +#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname" +#define GLUSTERD_STORE_KEY_PEER_STATE "state" + +#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" #define glusterd_for_each_entry(entry, dir) \ do {\ @@ -64,52 +91,74 @@ if (dir) {\ entry = readdir (dir);\ while (entry && (!strcmp (entry->d_name, ".") ||\ + !fnmatch ("*.tmp", entry->d_name, 0) ||\ !strcmp (entry->d_name, ".."))) {\ entry = readdir (dir);\ }\ }\ } while (0); \ + int32_t -glusterd_store_create_volume (glusterd_volinfo_t *volinfo); +glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac); int32_t glusterd_store_delete_volume (glusterd_volinfo_t *volinfo); int32_t -glusterd_store_uuid (); +glusterd_store_delete_snap (glusterd_snap_t *snap); int32_t -glusterd_store_handle_new (char *path, glusterd_store_handle_t **handle); +glusterd_retrieve_uuid (); int32_t -glusterd_store_save_value (glusterd_store_handle_t *handle, - char *key, char *value); +glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo); int32_t -glusterd_store_retrieve_value (glusterd_store_handle_t *handle, - char *key, char **value); +glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo); int32_t -glusterd_store_update_volume (glusterd_volinfo_t *volinfo); +glusterd_store_delete_brick (glusterd_brickinfo_t *brickinfo, + char *delete_path); int32_t -glusterd_retrieve_uuid (); +glusterd_restore (); + +void +glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo, + glusterd_volinfo_ver_ac_t ac); +gf_boolean_t +glusterd_store_is_valid_brickpath (char *volname, char *brick); int32_t -glusterd_store_update_peerinfo (glusterd_peerinfo_t *peerinfo); +glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo); + +int +glusterd_retrieve_op_version (xlator_t *this, int *op_version); + +int +glusterd_store_global_info (xlator_t *this); int32_t -glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo); +glusterd_store_retrieve_options (xlator_t *this); + +int32_t +glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo); int32_t -glusterd_store_delete_brick (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); +glusterd_store_options (xlator_t *this, dict_t *opts); + +void +glusterd_replace_slash_with_hyphen (char *str); int32_t -glusterd_store_handle_destroy (glusterd_store_handle_t *handle); +glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo); int32_t -glusterd_restore (); +glusterd_store_snap (glusterd_snap_t *snap); + +int32_t +glusterd_store_update_missed_snaps (dict_t *dict, + int32_t missed_snap_count); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c new file mode 100644 index 000000000..438df8266 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -0,0 +1,1639 @@ +/* + Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +/* rpc related syncops */ +#include "rpc-clnt.h" +#include "protocol-common.h" +#include "xdr-generic.h" +#include "glusterd1-xdr.h" +#include "glusterd-syncop.h" + +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" + +extern glusterd_op_info_t opinfo; + +void +gd_synctask_barrier_wait (struct syncargs *args, int count) +{ + glusterd_conf_t *conf = THIS->private; + + synclock_unlock (&conf->big_lock); + synctask_barrier_wait (args, count); + synclock_lock (&conf->big_lock); + + syncbarrier_destroy (&args->barrier); +} + +static void +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int len = -1; + char *peer_str = NULL; + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + err_str[len] = '\0'; + } + + switch (op_code){ + case GLUSTERD_MGMT_V3_LOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Locking failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_UNLOCK: + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Unlocking failed " + "on %s. %s", peer_str, err_str); + break; + } + } + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + len = snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + err_str[len] = '\0'; + + gf_log ("", GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +static void +gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + int len = -1; + char *peer_str = NULL; + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) { + len = snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + err_str[len] = '\0'; + } + + switch (op_code){ + case GLUSTERD_MGMT_CLUSTER_UNLOCK : + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Unlocking failed on %s. %s", + peer_str, err_str); + break; + } + case GLUSTERD_MGMT_STAGE_OP : + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Staging failed on %s. %s", + peer_str, err_str); + break; + } + case GLUSTERD_MGMT_COMMIT_OP : + { + len = snprintf (op_err, sizeof(op_err) - 1, + "Commit failed on %s. %s", + peer_str, err_str); + break; + } + } + op_err[len] = '\0'; + + if (args->errstr) { + len = snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + len = snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + err_str[len] = '\0'; + + gf_log ("", GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +void +gd_syncargs_init (struct syncargs *args, dict_t *op_ctx) +{ + args->dict = op_ctx; + pthread_mutex_init (&args->lock_dict, NULL); +} + +static void +gd_stage_op_req_free (gd1_mgmt_stage_op_req *req) +{ + if (!req) + return; + + GF_FREE (req->buf.buf_val); + GF_FREE (req); +} + +static void +gd_commit_op_req_free (gd1_mgmt_commit_op_req *req) +{ + if (!req) + return; + + GF_FREE (req->buf.buf_val); + GF_FREE (req); +} + +static void +gd_brick_op_req_free (gd1_mgmt_brick_op_req *req) +{ + if (!req) + return; + + if (strcmp (req->name, "") != 0) + GF_FREE (req->name); + GF_FREE (req->input.input_val); + GF_FREE (req); +} + +int +gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local, + void *cookie, rpc_clnt_prog_t *prog, int procnum, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) +{ + int ret = -1; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + int count = 0; + struct iovec iov = {0, }; + ssize_t req_size = 0; + call_frame_t *frame = NULL; + + GF_ASSERT (rpc); + if (!req) + goto out; + + req_size = xdr_sizeof (xdrproc, req); + iobuf = iobuf_get2 (rpc->ctx->iobuf_pool, req_size); + if (!iobuf) + goto out; + + iobref = iobref_new (); + if (!iobref) + goto out; + + frame = create_frame (THIS, THIS->ctx->pool); + if (!frame) + goto out; + + iobref_add (iobref, iobuf); + + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_pagesize (iobuf); + + /* Create the xdr payload */ + ret = xdr_serialize_generic (iov, req, xdrproc); + if (ret == -1) + goto out; + + iov.iov_len = ret; + count = 1; + + frame->local = local; + frame->cookie = cookie; + + /* Send the msg */ + ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn, + &iov, count, NULL, 0, iobref, + frame, NULL, 0, NULL, 0, NULL); + + /* TODO: do we need to start ping also? */ + +out: + iobref_unref (iobref); + iobuf_unref (iobuf); + + return ret; +} + +/* Defined in glusterd-rpc-ops.c */ +extern struct rpc_clnt_program gd_mgmt_prog; +extern struct rpc_clnt_program gd_brick_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; + +int +glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) +{ + int ret = 0; + + switch (op) { + case GD_OP_REPLACE_BRICK: + ret = glusterd_rb_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_SYNC_VOLUME: + ret = glusterd_sync_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_PROFILE_VOLUME: + ret = glusterd_profile_volume_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_GSYNC_CREATE: + break; + + case GD_OP_GSYNC_SET: + ret = glusterd_gsync_use_rsp_dict (aggr, rsp, NULL); + if (ret) + goto out; + break; + + case GD_OP_STATUS_VOLUME: + ret = glusterd_volume_status_copy_to_op_ctx_dict (aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_REBALANCE: + case GD_OP_DEFRAG_BRICK_VOLUME: + ret = glusterd_volume_rebalance_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_volume_heal_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + + break; + + case GD_OP_QUOTA: + case GD_OP_CLEARLOCKS_VOLUME: + ret = glusterd_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + + break; + + case GD_OP_SYS_EXEC: + ret = glusterd_sys_exec_output_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + + case GD_OP_SNAP: + ret = glusterd_snap_use_rsp_dict (aggr, rsp); + if (ret) + goto out; + break; + + default: + break; + } +out: + return ret; +} + +int32_t +_gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_cluster_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + /* Set peer as locked, so we unlock only the locked peers */ + if (rsp.op_ret == 0) + peerinfo->locked = _gf_true; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_CLUSTER_LOCK, peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_lock_cbk); +} + +int +gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int ret = -1; + gd1_mgmt_cluster_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + uuid_copy (req.uuid, my_uuid); + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_prog, + GLUSTERD_MGMT_CLUSTER_LOCK, + gd_syncop_mgmt_lock_cbk, + (xdrproc_t) xdr_gd1_mgmt_cluster_lock_req); + synclock_lock (&conf->big_lock); + return ret; +} + +int32_t +gd_syncop_mgmt_v3_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_LOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_v3_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_syncop_mgmt_v3_lock_cbk_fn); +} + +int +gd_syncop_mgmt_v3_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_v3_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_LOCK, + gd_syncop_mgmt_v3_lock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_lock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +gd_syncop_mgmt_v3_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_v3_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + /* Set peer as locked, so we unlock only the locked peers */ + if (rsp.op_ret == 0) + peerinfo->locked = _gf_true; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_UNLOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_v3_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + gd_syncop_mgmt_v3_unlock_cbk_fn); +} + +int +gd_syncop_mgmt_v3_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_v3_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_UNLOCK, + gd_syncop_mgmt_v3_unlock_cbk, + (xdrproc_t) xdr_gd1_mgmt_v3_unlock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +_gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_cluster_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + peerinfo->locked = _gf_false; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_CLUSTER_UNLOCK, peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_unlock_cbk); +} + + +int +gd_syncop_mgmt_unlock (glusterd_peerinfo_t *peerinfo, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid) +{ + int ret = -1; + gd1_mgmt_cluster_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + uuid_copy (req.uuid, my_uuid); + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_prog, + GLUSTERD_MGMT_CLUSTER_UNLOCK, + gd_syncop_mgmt_unlock_cbk, + (xdrproc_t) xdr_gd1_mgmt_cluster_lock_req); + synclock_lock (&conf->big_lock); + return ret; +} + +int32_t +_gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + gd1_mgmt_stage_op_rsp rsp = {{0},}; + struct syncargs *args = NULL; + xlator_t *this = NULL; + dict_t *rsp_dict = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int op_ret = -1; + int op_errno = -1; + + this = THIS; + frame = myframe; + args = frame->local; + frame->local = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp); + if (ret < 0) + goto out; + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + GF_FREE (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Staging response " + "for 'Volume %s' received from unknown " + "peer: %s", gd_op_list[rsp.op], + uuid_utoa (rsp.uuid)); + goto out; + } + + uuid_copy (args->uuid, rsp.uuid); + if (rsp.op == GD_OP_REPLACE_BRICK) { + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + } + pthread_mutex_unlock (&args->lock_dict); + } + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_collate_errors (args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_STAGE_OP, peerinfo, rsp.uuid); + + if (rsp_dict) + dict_unref (rsp_dict); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_stage_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_stage_op_cbk); +} + + +int +gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid, int op, + dict_t *dict_out, dict_t *op_ctx) +{ + gd1_mgmt_stage_op_req *req = NULL; + glusterd_conf_t *conf = THIS->private; + int ret = -1; + + req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_stage_req_t); + if (!req) + goto out; + + uuid_copy (req->uuid, my_uuid); + req->op = op; + + ret = dict_allocate_and_serialize (dict_out, + &req->buf.buf_val, &req->buf.buf_len); + if (ret) + goto out; + + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (rpc, req, args, NULL, &gd_mgmt_prog, + GLUSTERD_MGMT_STAGE_OP, + gd_syncop_stage_op_cbk, + (xdrproc_t) xdr_gd1_mgmt_stage_op_req); + synclock_lock (&conf->big_lock); +out: + gd_stage_op_req_free (req); + return ret; + +} + +int32_t +_gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + struct syncargs *args = NULL; + gd1_mgmt_brick_op_rsp rsp = {0,}; + int ret = -1; + call_frame_t *frame = NULL; + + frame = myframe; + args = frame->local; + frame->local = NULL; + + /* initialize */ + args->op_ret = -1; + args->op_errno = EINVAL; + + if (-1 == req->rpc_status) { + args->op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); + if (ret < 0) + goto out; + + if (rsp.output.output_len) { + args->dict = dict_new (); + if (!args->dict) { + ret = -1; + args->op_errno = ENOMEM; + goto out; + } + + ret = dict_unserialize (rsp.output.output_val, + rsp.output.output_len, + &args->dict); + if (ret < 0) + goto out; + } + + args->op_ret = rsp.op_ret; + args->op_errno = rsp.op_errno; + args->errstr = gf_strdup (rsp.op_errstr); + +out: + if ((rsp.op_errstr) && (strcmp (rsp.op_errstr, "") != 0)) + free (rsp.op_errstr); + free (rsp.output.output_val); + + STACK_DESTROY (frame->root); + __wake (args); + + return 0; +} + +int32_t +gd_syncop_brick_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_brick_op_cbk); +} + +int +gd_syncop_mgmt_brick_op (struct rpc_clnt *rpc, glusterd_pending_node_t *pnode, + int op, dict_t *dict_out, dict_t *op_ctx, + char **errstr) +{ + struct syncargs args = {0, }; + gd1_mgmt_brick_op_req *req = NULL; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + args.op_ret = -1; + args.op_errno = ENOTCONN; + + if ((pnode->type == GD_NODE_NFS) || + ((pnode->type == GD_NODE_SHD) && + (op == GD_OP_STATUS_VOLUME))) { + ret = glusterd_node_op_build_payload + (op, &req, dict_out); + + } else { + ret = glusterd_brick_op_build_payload + (op, pnode->node, &req, dict_out); + + } + + if (ret) + goto out; + + GD_SYNCOP (rpc, (&args), NULL, gd_syncop_brick_op_cbk, req, + &gd_brick_prog, req->op, xdr_gd1_mgmt_brick_op_req); + + if (args.errstr) { + if ((strlen(args.errstr) > 0) && errstr) + *errstr = args.errstr; + else + GF_FREE (args.errstr); + } + + if (GD_OP_STATUS_VOLUME == op) { + ret = dict_set_int32 (args.dict, "index", pnode->index); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting index on brick status" + " rsp dict"); + args.op_ret = -1; + goto out; + } + } + if (args.op_ret == 0) + glusterd_handle_node_rsp (dict_out, pnode->node, op, + args.dict, op_ctx, errstr, + pnode->type); + +out: + errno = args.op_errno; + if (args.dict) + dict_unref (args.dict); + gd_brick_op_req_free (req); + return args.op_ret; + +} + +int32_t +_gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + gd1_mgmt_commit_op_rsp rsp = {{0},}; + struct syncargs *args = NULL; + xlator_t *this = NULL; + dict_t *rsp_dict = NULL; + call_frame_t *frame = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + int op_ret = -1; + int op_errno = -1; + + this = THIS; + frame = myframe; + args = frame->local; + frame->local = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp); + if (ret < 0) { + goto out; + } + + if (rsp.dict.dict_len) { + /* Unserialize the dictionary */ + rsp_dict = dict_new (); + + ret = dict_unserialize (rsp.dict.dict_val, + rsp.dict.dict_len, + &rsp_dict); + if (ret < 0) { + GF_FREE (rsp.dict.dict_val); + goto out; + } else { + rsp_dict->extra_stdfree = rsp.dict.dict_val; + } + } + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Commit response " + "for 'Volume %s' received from unknown " + "peer: %s", gd_op_list[rsp.op], + uuid_utoa (rsp.uuid)); + goto out; + } + + uuid_copy (args->uuid, rsp.uuid); + pthread_mutex_lock (&args->lock_dict); + { + ret = glusterd_syncop_aggr_rsp_dict (rsp.op, args->dict, + rsp_dict); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from " + " node/brick"); + } + pthread_mutex_unlock (&args->lock_dict); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; + +out: + gd_collate_errors (args, op_ret, op_errno, rsp.op_errstr, + GLUSTERD_MGMT_COMMIT_OP, peerinfo, rsp.uuid); + if (rsp_dict) + dict_unref (rsp_dict); + + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + + return 0; +} + +int32_t +gd_syncop_commit_op_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_commit_op_cbk); +} + + +int +gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *args, + uuid_t my_uuid, uuid_t recv_uuid, + int op, dict_t *dict_out, dict_t *op_ctx) +{ + glusterd_conf_t *conf = THIS->private; + gd1_mgmt_commit_op_req *req = NULL; + int ret = -1; + + req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_commit_req_t); + if (!req) + goto out; + + uuid_copy (req->uuid, my_uuid); + req->op = op; + + ret = dict_allocate_and_serialize (dict_out, + &req->buf.buf_val, &req->buf.buf_len); + if (ret) + goto out; + + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (rpc, req, args, NULL, &gd_mgmt_prog, + GLUSTERD_MGMT_COMMIT_OP , + gd_syncop_commit_op_cbk, + (xdrproc_t) xdr_gd1_mgmt_commit_op_req); + synclock_lock (&conf->big_lock); +out: + gd_commit_op_req_free (req); + return ret; +} + + +int +gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, + glusterd_op_t op) +{ + glusterd_peerinfo_t *peerinfo = NULL; + int npeers = 0; + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (!peerinfo->connected) + continue; + if (op != GD_OP_SYNC_VOLUME && + peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) + continue; + + list_add_tail (&peerinfo->op_peers_list, xact_peers); + npeers++; + } + return npeers; +} + +int +gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + char **op_errstr, int npeers, uuid_t txn_id) +{ + int ret = -1; + int peer_cnt = 0; + uuid_t peer_uuid = {0}; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; + + if (!npeers) { + ret = 0; + goto out; + } + + this = THIS; + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + if (conf->op_version < 3) { + /* Reset lock status */ + peerinfo->locked = _gf_false; + gd_syncop_mgmt_lock (peerinfo, &args, + MY_UUID, peer_uuid); + } else + gd_syncop_mgmt_v3_lock (op, op_ctx, peerinfo, &args, + MY_UUID, peer_uuid, txn_id); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.op_ret) { + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else { + ret = gf_asprintf (op_errstr, "Another transaction could be " + "in progress. Please try again after " + "sometime."); + if (ret == -1) + *op_errstr = NULL; + + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock"); + + } + } + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for 'Volume %s' " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); +out: + return ret; +} + +int +gd_stage_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, + dict_t *req_dict, char **op_errstr, int npeers) +{ + int ret = -1; + int peer_cnt = 0; + dict_t *rsp_dict = NULL; + char *hostname = NULL; + xlator_t *this = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + uuid_t tmp_uuid = {0}; + char *errstr = NULL; + struct syncargs args = {0}; + + this = THIS; + rsp_dict = dict_new (); + if (!rsp_dict) + goto out; + + ret = glusterd_op_stage_validate (op, req_dict, op_errstr, rsp_dict); + if (ret) { + hostname = "localhost"; + goto stage_done; + } + + if ((op == GD_OP_REPLACE_BRICK)) { + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response from node/brick"); + goto out; + } + } + dict_unref (rsp_dict); + rsp_dict = NULL; + +stage_done: + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_STAGE_FAIL, + gd_op_list[op], hostname, (*op_errstr) ? ":" : " ", + (*op_errstr) ? *op_errstr : " "); + if (*op_errstr == NULL) + gf_asprintf (op_errstr, OPERRSTR_STAGE_FAIL, hostname); + goto out; + } + + if (!npeers) { + ret = 0; + goto out; + } + + gd_syncargs_init (&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + ret = gd_syncop_mgmt_stage_op (peerinfo->rpc, &args, + MY_UUID, tmp_uuid, + op, req_dict, op_ctx); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else if (dict_get_str (op_ctx, "errstr", &errstr) == 0) + *op_errstr = gf_strdup (errstr); + + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent stage op req for 'Volume %s' " + "to %d peers", gd_op_list[op], peer_cnt); +out: + if (rsp_dict) + dict_unref (rsp_dict); + return ret; +} + +int +gd_commit_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, + dict_t *req_dict, char **op_errstr, int npeers) +{ + dict_t *rsp_dict = NULL; + int peer_cnt = -1; + int ret = -1; + char *hostname = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t tmp_uuid = {0}; + char *errstr = NULL; + struct syncargs args = {0}; + + this = THIS; + rsp_dict = dict_new (); + if (!rsp_dict) { + ret = -1; + goto out; + } + + ret = glusterd_op_commit_perform (op, req_dict, op_errstr, rsp_dict); + if (ret) { + hostname = "localhost"; + goto commit_done; + } + if (op != GD_OP_SYNC_VOLUME) { + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + "Failed to aggregate response " + "from node/brick"); + goto out; + } + } + dict_unref (rsp_dict); + rsp_dict = NULL; + +commit_done: + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_COMMIT_FAIL, + gd_op_list[op], hostname, (*op_errstr) ? ":" : " ", + (*op_errstr) ? *op_errstr : " "); + if (*op_errstr == NULL) + gf_asprintf (op_errstr, OPERRSTR_COMMIT_FAIL, + hostname); + goto out; + } + + if (!npeers) { + ret = 0; + goto out; + } + + gd_syncargs_init (&args, op_ctx); + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry (peerinfo, peers, op_peers_list) { + ret = gd_syncop_mgmt_commit_op (peerinfo->rpc, &args, + MY_UUID, tmp_uuid, + op, req_dict, op_ctx); + peer_cnt++; + } + gd_synctask_barrier_wait((&args), peer_cnt); + ret = args.op_ret; + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else if (dict_get_str (op_ctx, "errstr", &errstr) == 0) + *op_errstr = gf_strdup (errstr); + + gf_log (this->name, GF_LOG_DEBUG, "Sent commit op req for 'Volume %s' " + "to %d peers", gd_op_list[op], peer_cnt); +out: + if (!ret) + glusterd_op_modify_op_ctx (op, op_ctx); + + if (rsp_dict) + dict_unref (rsp_dict); + + GF_FREE (args.errstr); + args.errstr = NULL; + + return ret; +} + +int +gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int op_ret, + rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr, + int npeers, char *volname, gf_boolean_t is_acquired, + uuid_t txn_id) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_peerinfo_t *tmp = NULL; + uuid_t tmp_uuid = {0}; + int peer_cnt = 0; + int ret = -1; + xlator_t *this = NULL; + struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; + + if (!npeers) { + ret = 0; + goto out; + } + + /* If the lock has not been held during this + * transaction, do not send unlock requests */ + if (!is_acquired) + goto out; + + this = THIS; + synctask_barrier_init((&args)); + peer_cnt = 0; + list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { + if (conf->op_version < 3) { + /* Only unlock peers that were locked */ + if (peerinfo->locked) + gd_syncop_mgmt_unlock (peerinfo, &args, + MY_UUID, tmp_uuid); + } else + gd_syncop_mgmt_v3_unlock (op_ctx, peerinfo, + &args, MY_UUID, + tmp_uuid, txn_id); + peer_cnt++; + list_del_init (&peerinfo->op_peers_list); + } + gd_synctask_barrier_wait((&args), peer_cnt); + ret = args.op_ret; + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to unlock " + "on some peer(s)"); + } + +out: + glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr); + glusterd_op_clear_op (op); + if (is_acquired) { + /* Based on the op-version, we release * + * the cluster or mgmt_v3 lock */ + if (conf->op_version < 3) + glusterd_unlock (MY_UUID); + else { + ret = glusterd_mgmt_v3_unlock (volname, MY_UUID, + "vol"); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } + } + + return 0; +} + +int +gd_get_brick_count (struct list_head *bricks) +{ + glusterd_pending_node_t *pending_node = NULL; + int npeers = 0; + list_for_each_entry (pending_node, bricks, list) { + npeers++; + } + return npeers; +} + +int +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr) +{ + glusterd_pending_node_t *pending_node = NULL; + struct list_head selected = {0,}; + xlator_t *this = NULL; + int brick_count = 0; + int ret = -1; + rpc_clnt_t *rpc = NULL; + dict_t *rsp_dict = NULL; + glusterd_conf_t *conf = NULL; + + this = THIS; + conf = this->private; + rsp_dict = dict_new (); + if (!rsp_dict) { + ret = -1; + goto out; + } + + INIT_LIST_HEAD (&selected); + ret = glusterd_op_bricks_select (op, req_dict, op_errstr, &selected, rsp_dict); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + (*op_errstr)? *op_errstr: "Brick op failed. Check " + "glusterd log file for more details."); + goto out; + } + + if (op == GD_OP_HEAL_VOLUME) { + ret = glusterd_syncop_aggr_rsp_dict (op, op_ctx, rsp_dict); + if (ret) + goto out; + } + dict_unref (rsp_dict); + rsp_dict = NULL; + + brick_count = 0; + list_for_each_entry (pending_node, &selected, list) { + rpc = glusterd_pending_node_get_rpc (pending_node); + if (!rpc) { + if (pending_node->type == GD_NODE_REBALANCE) { + ret = 0; + glusterd_defrag_volume_node_rsp (req_dict, + NULL, op_ctx); + goto out; + } + + ret = -1; + gf_log (this->name, GF_LOG_ERROR, "Brick Op failed " + "due to rpc failure."); + goto out; + } + ret = gd_syncop_mgmt_brick_op (rpc, pending_node, op, req_dict, + op_ctx, op_errstr); + if (ret) + goto out; + + brick_count++; + } + + ret = 0; +out: + if (rsp_dict) + dict_unref (rsp_dict); + gf_log (this->name, GF_LOG_DEBUG, "Sent op req to %d bricks", + brick_count); + return ret; +} + +void +gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) +{ + int ret = -1; + int npeers = 0; + dict_t *req_dict = NULL; + glusterd_conf_t *conf = NULL; + glusterd_op_t op = 0; + int32_t tmp_op = 0; + char *op_errstr = NULL; + char *tmp = NULL; + char *volname = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *txn_id = NULL; + uuid_t *originator_uuid = NULL; + glusterd_op_info_t txn_opinfo; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + /* Generate a transaction-id for this operation and + * save it in the dict */ + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!txn_id) { + ret = -1; + goto out; + } + + uuid_generate (*txn_id); + + ret = dict_set_bin (op_ctx, "transaction_id", + txn_id, sizeof(*txn_id)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } else + gf_log (this->name, GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (*txn_id)); + + /* Save the MY_UUID as the originator_uuid */ + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (op_ctx, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + + ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume " + "operation"); + goto out; + } + + op = tmp_op; + + /* Based on the op_version, acquire a cluster or mgmt_v3 lock */ + if (conf->op_version < 3) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock"); + gf_asprintf (&op_errstr, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (op_ctx, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Failed to get volume " + "name"); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_mgmt_v3_lock (volname, MY_UUID, "vol"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + gf_asprintf (&op_errstr, + "Another transaction is in progress " + "for %s. Please try again after sometime.", + volname); + goto out; + } + } + + is_acquired = _gf_true; + +local_locking_done: + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_opinfo, NULL, &op, NULL, NULL); + ret = glusterd_set_txn_opinfo (txn_id, &txn_opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + + opinfo = txn_opinfo; + + INIT_LIST_HEAD (&conf->xaction_peers); + + /* Make 'volume status tasks' command a local operation. + * This is accomplished by setting npeers to 0. + */ + if (!glusterd_is_status_tasks_op (op, op_ctx)) + npeers = gd_build_peers_list (&conf->peers, + &conf->xaction_peers, op); + + /* If no volname is given as a part of the command, locks will + * not be held */ + if (volname) { + ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, npeers, *txn_id); + if (ret) + goto out; + } + + ret = glusterd_op_build_payload (&req_dict, &op_errstr, op_ctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, LOGSTR_BUILD_PAYLOAD, + gd_op_list[op]); + if (op_errstr == NULL) + gf_asprintf (&op_errstr, OPERRSTR_BUILD_PAYLOAD); + goto out; + } + + ret = gd_stage_op_phase (&conf->xaction_peers, op, op_ctx, req_dict, + &op_errstr, npeers); + if (ret) + goto out; + + ret = gd_brick_op_phase (op, op_ctx, req_dict, &op_errstr); + if (ret) + goto out; + + ret = gd_commit_op_phase (&conf->xaction_peers, op, op_ctx, req_dict, + &op_errstr, npeers); + if (ret) + goto out; + + ret = 0; +out: + (void) gd_unlock_op_phase (conf, op, ret, req, op_ctx, op_errstr, + npeers, volname, is_acquired, *txn_id); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + + if (volname) + GF_FREE (volname); + + if (req_dict) + dict_unref (req_dict); + + if (op_errstr) { + GF_FREE (op_errstr); + op_errstr = NULL; + } + + return; +} + +int32_t +glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op, + void *dict) +{ + int ret = 0; + + ret = dict_set_int32 (dict, GD_SYNC_OPCODE_KEY, op); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "dict set failed for setting operations"); + goto out; + } + + gd_sync_task_begin (dict, req); + ret = 0; +out: + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.h b/xlators/mgmt/glusterd/src/glusterd-syncop.h new file mode 100644 index 000000000..e83ea2f4c --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.h @@ -0,0 +1,71 @@ +/* + Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef __RPC_SYNCOP_H +#define __RPC_SYNCOP_H + +#include "syncop.h" +#include "glusterd-sm.h" +#include "glusterd.h" + +#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation" + +/* gd_syncop_* */ +#define GD_SYNCOP(rpc, stb, cookie, cbk, req, prog, procnum, xdrproc) do { \ + int ret = 0; \ + struct synctask *task = NULL; \ + glusterd_conf_t *conf= THIS->private; \ + \ + task = synctask_get (); \ + stb->task = task; \ + \ + /*This is to ensure that the brick_op_cbk is able to \ + * take the big lock*/ \ + synclock_unlock (&conf->big_lock); \ + ret = gd_syncop_submit_request (rpc, req, stb, cookie, \ + prog, procnum, cbk, \ + (xdrproc_t)xdrproc); \ + if (!ret) \ + synctask_yield (stb->task); \ + synclock_lock (&conf->big_lock); \ + } while (0) + + +int gd_syncop_submit_request (struct rpc_clnt *rpc, void *req, void *local, + void *cookie, rpc_clnt_prog_t *prog, int procnum, + fop_cbk_fn_t cbkfn, xdrproc_t xdrproc); + + +int gd_syncop_mgmt_lock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg, + uuid_t my_uuid, uuid_t recv_uuid); +int gd_syncop_mgmt_unlock (glusterd_peerinfo_t *peerinfo, struct syncargs *arg, + uuid_t my_uuid, uuid_t recv_uuid); +int gd_syncop_mgmt_stage_op (struct rpc_clnt *rpc, struct syncargs *arg, + uuid_t my_uuid, uuid_t recv_uuid, int op, + dict_t *dict_out, dict_t *op_ctx); +int gd_syncop_mgmt_commit_op (struct rpc_clnt *rpc, struct syncargs *arg, + uuid_t my_uuid, uuid_t recv_uuid, int op, + dict_t *dict_out, dict_t *op_ctx); + +void +gd_synctask_barrier_wait (struct syncargs *args, int count); + +int +gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, + glusterd_op_t op); +int +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr); + +int +glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp); + +void +gd_syncargs_init (struct syncargs *args, dict_t *op_ctx); +#endif /* __RPC_SYNCOP_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 7746ec49c..e8ae05851 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1,29 +1,18 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" #endif #include <inttypes.h> - #include "globals.h" #include "glusterfs.h" #include "compat.h" @@ -33,8 +22,11 @@ #include "timer.h" #include "defaults.h" #include "compat.h" +#include "syncop.h" +#include "run.h" #include "compat-errno.h" #include "statedump.h" +#include "syscall.h" #include "glusterd-mem-types.h" #include "glusterd.h" #include "glusterd-op-sm.h" @@ -43,7 +35,10 @@ #include "glusterd-store.h" #include "glusterd-volgen.h" #include "glusterd-pmap.h" +#include "glusterfs-acl.h" +#include "glusterd-locks.h" +#include "xdr-generic.h" #include <sys/resource.h> #include <inttypes.h> #include <signal.h> @@ -51,14 +46,53 @@ #include <net/if.h> #include <sys/ioctl.h> #include <sys/socket.h> +#include <rpc/pmap_clnt.h> +#include <unistd.h> +#include <fnmatch.h> +#include <sys/statvfs.h> +#include <ifaddrs.h> +#ifdef HAVE_BD_XLATOR +#include <lvm2app.h> +#endif + + +#ifdef GF_LINUX_HOST_OS +#include <mntent.h> +#endif #ifdef GF_SOLARIS_HOST_OS #include <sys/sockio.h> #endif +#define NFS_PROGRAM 100003 +#define NFSV3_VERSION 3 + +#define MOUNT_PROGRAM 100005 +#define MOUNTV3_VERSION 3 +#define MOUNTV1_VERSION 1 + +#define NLM_PROGRAM 100021 +#define NLMV4_VERSION 4 +#define NLMV1_VERSION 1 + +#define CEILING_POS(X) (((X)-(int)(X)) > 0 ? (int)((X)+1) : (int)(X)) + static glusterd_lock_t lock; -static int32_t +char* +gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo) +{ + if ((peerinfo == NULL) || uuid_is_null (peerinfo->uuid)) + return NULL; + + if (peerinfo->uuid_str[0] == '\0') + uuid_utoa_r (peerinfo->uuid, peerinfo->uuid_str); + + return peerinfo->uuid_str; +} + + +int32_t glusterd_get_lock_owner (uuid_t *uuid) { uuid_copy (*uuid, lock.owner) ; @@ -81,117 +115,18 @@ glusterd_unset_lock_owner (uuid_t owner) return 0; } -int32_t -glusterd_is_local_addr (char *hostname) +gf_boolean_t +glusterd_is_fuse_available () { - int32_t ret = -1; - struct addrinfo *result = NULL; - struct addrinfo *res = NULL; - int32_t found = 0; - struct ifconf buf = {0,}; - char nodename[256] = {0,}; - - if ((!strcmp (hostname, "localhost")) || - (!strcmp (hostname, "127.0.0.1"))) { - found = 1; - goto out; - } - - ret = gethostname (nodename, 256); - if (ret) - goto out; - - if ((!strcmp (nodename, hostname))) { - found = 1; - goto out; - } - - ret = getaddrinfo (hostname, NULL, NULL, &result); - - if (ret != 0) { - gf_log ("", GF_LOG_ERROR, "error in getaddrinfo: %s\n", - gai_strerror(ret)); - goto out; - } - - for (res = result; res != NULL; res = res->ai_next) { - char hname[1024] = ""; - - ret = getnameinfo (res->ai_addr, res->ai_addrlen, hname, - NI_MAXHOST, NULL, 0, NI_NUMERICHOST); - if (ret) - goto out; - - if (!strncasecmp (hname, "127", 3)) { - ret = 0; - gf_log ("", GF_LOG_NORMAL, "local addr found"); - found = 1; - break; - } - } - - if (!found) { - int sd = -1; - struct ifreq *ifr = NULL; - int32_t size = 0; - int32_t num_req = 0; - struct sockaddr_in sa = {0,}; - - sd = socket (PF_UNIX, SOCK_DGRAM, 0); - if (sd == -1) - goto out; - - buf.ifc_len = sizeof (struct ifreq); - buf.ifc_req = GF_CALLOC (1, sizeof (struct ifreq), - gf_gld_mt_ifreq); - size = buf.ifc_len; - - ret = ioctl (sd, SIOCGIFCONF, &buf); - if (ret) { - close (sd); - goto out; - } - - while (size <= buf.ifc_len) { - size += sizeof (struct ifreq); - buf.ifc_len = size; - buf.ifc_req = GF_REALLOC (buf.ifc_req, size); - ret = ioctl (sd, SIOCGIFCONF, &buf); - if (ret) { - close (sd); - goto out; - } - } - - ifr = buf.ifc_req; - num_req = size / sizeof (struct ifreq) - 1; - - while (num_req--) { - char *addr = inet_ntoa ( *(struct in_addr *) - &ifr->ifr_addr.sa_data[sizeof(sa.sin_port)]); - if (!strcmp (addr, hostname)) { - gf_log ("", GF_LOG_DEBUG, "%s found as local", - addr); - found = 1; - } - ifr++; - } - - if (sd > 0) - close (sd); - } + int fd = 0; + fd = open ("/dev/fuse", O_RDWR); - -out: - if (result) - freeaddrinfo (result); - - if (buf.ifc_req) - GF_FREE (buf.ifc_req); - - return !found; + if (fd > -1 && !close (fd)) + return _gf_true; + else + return _gf_false; } int32_t @@ -202,25 +137,28 @@ glusterd_lock (uuid_t uuid) char new_owner_str[50]; char owner_str[50]; int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (uuid); - uuid_unparse (uuid, new_owner_str); glusterd_get_lock_owner (&owner); if (!uuid_is_null (owner)) { - uuid_unparse (owner, owner_str); - gf_log ("glusterd", GF_LOG_ERROR, "Unable to get lock" - " for uuid: %s, lock held by: %s", new_owner_str, - owner_str); + gf_log (this->name, GF_LOG_ERROR, "Unable to get lock" + " for uuid: %s, lock held by: %s", + uuid_utoa_r (uuid, new_owner_str), + uuid_utoa_r (owner, owner_str)); goto out; } ret = glusterd_set_lock_owner (uuid); if (!ret) { - gf_log ("glusterd", GF_LOG_NORMAL, "Cluster lock held by" - " %s", new_owner_str); + gf_log (this->name, GF_LOG_DEBUG, "Cluster lock held by" + " %s", uuid_utoa (uuid)); } out: @@ -235,30 +173,33 @@ glusterd_unlock (uuid_t uuid) char new_owner_str[50]; char owner_str[50]; int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (uuid); - uuid_unparse (uuid, new_owner_str); glusterd_get_lock_owner (&owner); - if (NULL == owner) { - gf_log ("glusterd", GF_LOG_ERROR, "Cluster lock not held!"); + if (uuid_is_null (owner)) { + gf_log (this->name, GF_LOG_ERROR, "Cluster lock not held!"); goto out; } ret = uuid_compare (uuid, owner); if (ret) { - uuid_unparse (owner, owner_str); - gf_log ("glusterd", GF_LOG_ERROR, "Cluster lock held by %s" - " ,unlock req from %s!", owner_str, new_owner_str); + gf_log (this->name, GF_LOG_ERROR, "Cluster lock held by %s ," + "unlock req from %s!", uuid_utoa_r (owner ,owner_str) + , uuid_utoa_r (uuid, new_owner_str)); goto out; } ret = glusterd_unset_lock_owner (uuid); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, "Unable to clear cluster " + gf_log (this->name, GF_LOG_ERROR, "Unable to clear cluster " "lock"); goto out; } @@ -279,68 +220,70 @@ glusterd_get_uuid (uuid_t *uuid) GF_ASSERT (priv); - uuid_copy (*uuid, priv->uuid); + uuid_copy (*uuid, MY_UUID); return 0; } int -glusterd_submit_request (glusterd_peerinfo_t *peerinfo, void *req, +glusterd_submit_request (struct rpc_clnt *rpc, void *req, call_frame_t *frame, rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref, - gd_serialize_t sfunc, xlator_t *this, - fop_cbk_fn_t cbkfn) + xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc) { int ret = -1; struct iobuf *iobuf = NULL; int count = 0; char new_iobref = 0, start_ping = 0; struct iovec iov = {0, }; + ssize_t req_size = 0; - GF_ASSERT (peerinfo); + GF_ASSERT (rpc); GF_ASSERT (this); - iobuf = iobuf_get (this->ctx->iobuf_pool); - if (!iobuf) { - goto out; - }; + if (req) { + req_size = xdr_sizeof (xdrproc, req); + iobuf = iobuf_get2 (this->ctx->iobuf_pool, req_size); + if (!iobuf) { + goto out; + }; - if (!iobref) { - iobref = iobref_new (); if (!iobref) { - goto out; - } + iobref = iobref_new (); + if (!iobref) { + goto out; + } - new_iobref = 1; - } + new_iobref = 1; + } - iobref_add (iobref, iobuf); + iobref_add (iobref, iobuf); - iov.iov_base = iobuf->ptr; - iov.iov_len = 128 * GF_UNIT_KB; + iov.iov_base = iobuf->ptr; + iov.iov_len = iobuf_pagesize (iobuf); - /* Create the xdr payload */ - if (req && sfunc) { - ret = sfunc (iov, req); + /* Create the xdr payload */ + ret = xdr_serialize_generic (iov, req, xdrproc); if (ret == -1) { goto out; } iov.iov_len = ret; count = 1; } + /* Send the msg */ - ret = rpc_clnt_submit (peerinfo->rpc, prog, procnum, cbkfn, + ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn, &iov, count, NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL); if (ret == 0) { - pthread_mutex_lock (&peerinfo->rpc->conn.lock); + pthread_mutex_lock (&rpc->conn.lock); { - if (!peerinfo->rpc->conn.ping_started) { + if (!rpc->conn.ping_started) { start_ping = 1; } } - pthread_mutex_unlock (&peerinfo->rpc->conn.lock); + pthread_mutex_unlock (&rpc->conn.lock); } if (start_ping) @@ -357,18 +300,19 @@ out: return ret; } - struct iobuf * glusterd_serialize_reply (rpcsvc_request_t *req, void *arg, - gd_serialize_t sfunc, struct iovec *outmsg) + struct iovec *outmsg, xdrproc_t xdrproc) { struct iobuf *iob = NULL; ssize_t retlen = -1; + ssize_t rsp_size = 0; /* First, get the io buffer into which the reply in arg will * be serialized. */ - iob = iobuf_get (req->svc->ctx->iobuf_pool); + rsp_size = xdr_sizeof (xdrproc, arg); + iob = iobuf_get2 (req->svc->ctx->iobuf_pool, rsp_size); if (!iob) { gf_log ("", GF_LOG_ERROR, "Failed to get iobuf"); goto ret; @@ -381,7 +325,7 @@ glusterd_serialize_reply (rpcsvc_request_t *req, void *arg, /* retlen is used to received the error since size_t is unsigned and we * need -1 for error notification during encoding. */ - retlen = sfunc (*outmsg, arg); + retlen = xdr_serialize_generic (*outmsg, arg, xdrproc); if (retlen == -1) { gf_log ("", GF_LOG_ERROR, "Failed to encode message"); goto ret; @@ -400,7 +344,7 @@ ret: int glusterd_submit_reply (rpcsvc_request_t *req, void *arg, struct iovec *payload, int payloadcount, - struct iobref *iobref, gd_serialize_t sfunc) + struct iobref *iobref, xdrproc_t xdrproc) { struct iobuf *iob = NULL; int ret = -1; @@ -412,7 +356,6 @@ glusterd_submit_reply (rpcsvc_request_t *req, void *arg, goto out; } - if (!iobref) { iobref = iobref_new (); if (!iobref) { @@ -423,14 +366,13 @@ glusterd_submit_reply (rpcsvc_request_t *req, void *arg, new_iobref = 1; } - iob = glusterd_serialize_reply (req, arg, sfunc, &rsp); + iob = glusterd_serialize_reply (req, arg, &rsp, xdrproc); if (!iob) { gf_log ("", GF_LOG_ERROR, "Failed to serialize reply"); - goto out; + } else { + iobref_add (iobref, iob); } - iobref_add (iobref, iob); - ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount, iobref); @@ -438,7 +380,6 @@ glusterd_submit_reply (rpcsvc_request_t *req, void *arg, * we can safely unref the iob in the hope that RPC layer must have * ref'ed the iob on receiving into the txlist. */ - iobuf_unref (iob); if (ret == -1) { gf_log ("", GF_LOG_ERROR, "Reply submission failed"); goto out; @@ -451,6 +392,8 @@ out: iobref_unref (iobref); } + if (iob) + iobuf_unref (iob); return ret; } @@ -470,7 +413,7 @@ glusterd_check_volume_exists (char *volname) ret = stat (pathname, &stbuf); if (ret) { - gf_log ("", GF_LOG_DEBUG, "Volume %s does not exist." + gf_log (THIS->name, GF_LOG_DEBUG, "Volume %s does not exist." "stat failed with errno : %d on path: %s", volname, errno, pathname); return _gf_false; @@ -493,26 +436,300 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) if (!new_volinfo) goto out; + LOCK_INIT (&new_volinfo->lock); INIT_LIST_HEAD (&new_volinfo->vol_list); + INIT_LIST_HEAD (&new_volinfo->snapvol_list); INIT_LIST_HEAD (&new_volinfo->bricks); + INIT_LIST_HEAD (&new_volinfo->snap_volumes); new_volinfo->dict = dict_new (); if (!new_volinfo->dict) { - if (new_volinfo) - GF_FREE (new_volinfo); + GF_FREE (new_volinfo); + + goto out; + } + + new_volinfo->gsync_slaves = dict_new (); + if (!new_volinfo->gsync_slaves) { + GF_FREE (new_volinfo); goto out; } + snprintf (new_volinfo->parent_volname, GLUSTERD_MAX_VOLUME_NAME, "N/A"); + + new_volinfo->snap_max_hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT; + + new_volinfo->xl = THIS; + *volinfo = new_volinfo; ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* This function will create a new volinfo and then + * dup the entries from volinfo to the new_volinfo. + * + * @param volinfo volinfo which will be duplicated + * @param dup_volinfo new volinfo which will be created + * @param set_userauth if this true then auth info is also set + * + * @return 0 on success else -1 + */ +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_volinfo, out); + + ret = glusterd_volinfo_new (&new_volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "not able to create the " + "duplicate volinfo for the volume %s", + volinfo->volname); + goto out; + } + + new_volinfo->type = volinfo->type; + new_volinfo->replica_count = volinfo->replica_count; + new_volinfo->stripe_count = volinfo->stripe_count; + new_volinfo->dist_leaf_count = volinfo->dist_leaf_count; + new_volinfo->sub_count = volinfo->sub_count; + new_volinfo->transport_type = volinfo->transport_type; + new_volinfo->nfs_transport_type = volinfo->nfs_transport_type; + new_volinfo->brick_count = volinfo->brick_count; + + dict_copy (volinfo->dict, new_volinfo->dict); + gd_update_volume_op_versions (new_volinfo); + + if (set_userauth) { + glusterd_auth_set_username (new_volinfo, + volinfo->auth.username); + glusterd_auth_set_password (new_volinfo, + volinfo->auth.password); + } + + *dup_volinfo = new_volinfo; + ret = 0; +out: + if (ret && (NULL != new_volinfo)) { + (void) glusterd_volinfo_delete (new_volinfo); + } + return ret; +} + +/* This function will duplicate brickinfo + * + * @param brickinfo Source brickinfo + * @param dup_brickinfo Destination brickinfo + * + * @return 0 on success else -1 + */ +int32_t +glusterd_brickinfo_dup (glusterd_brickinfo_t *brickinfo, + glusterd_brickinfo_t *dup_brickinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, brickinfo, out); + GF_VALIDATE_OR_GOTO (this->name, dup_brickinfo, out); + + strcpy (dup_brickinfo->hostname, brickinfo->hostname); + strcpy (dup_brickinfo->path, brickinfo->path); + strcpy (dup_brickinfo->device_path, brickinfo->device_path); + ret = gf_canonicalize_path (dup_brickinfo->path); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to canonicalize " + "brick path"); + goto out; + } + uuid_copy (dup_brickinfo->uuid, brickinfo->uuid); + + dup_brickinfo->port = brickinfo->port; + dup_brickinfo->rdma_port = brickinfo->rdma_port; + if (NULL != brickinfo->logfile) { + dup_brickinfo->logfile = gf_strdup (brickinfo->logfile); + if (NULL == dup_brickinfo->logfile) { + ret = -1; + goto out; + } + } + dup_brickinfo->status = brickinfo->status; + dup_brickinfo->snap_status = brickinfo->snap_status; +out: + return ret; +} + +/* This function will copy snap volinfo to the new + * passed volinfo and regenerate backend store files + * for the restored snap. + * + * @param new_volinfo new volinfo + * @param snap_volinfo volinfo of snap volume + * + * @return 0 on success and -1 on failure + * + * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves + */ +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo) +{ + int32_t brick_count = -1; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + + GF_VALIDATE_OR_GOTO (this->name, new_volinfo, out); + GF_VALIDATE_OR_GOTO (this->name, snap_volinfo, out); + + brick_count = 0; + list_for_each_entry (brickinfo, &snap_volinfo->bricks, brick_list) { + ret = glusterd_brickinfo_new (&new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create " + "new brickinfo"); + goto out; + } + + /* Duplicate brickinfo */ + ret = glusterd_brickinfo_dup (brickinfo, new_brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to dup " + "brickinfo"); + goto out; + } + + /* If the brick is not of this peer, or snapshot is missed * + * for the brick do not replace the xattr for it */ + if ((!uuid_compare (brickinfo->uuid, MY_UUID)) && + (brickinfo->snap_status != -1)) { + /* We need to replace the volume id of all the bricks + * to the volume id of the origin volume. new_volinfo + * has the origin volume's volume id*/ + ret = sys_lsetxattr (new_brickinfo->path, + GF_XATTR_VOL_ID_KEY, + new_volinfo->volume_id, + sizeof (new_volinfo->volume_id), + XATTR_REPLACE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to " + "set extended attribute %s on %s. " + "Reason: %s, snap: %s", + GF_XATTR_VOL_ID_KEY, + new_brickinfo->path, strerror (errno), + new_volinfo->volname); + goto out; + } + } + + /* If a snapshot is pending for this brick then + * restore should also be pending + */ + if (brickinfo->snap_status == -1) { + /* Adding missed delete to the dict */ + ret = glusterd_add_missed_snaps_to_dict + (rsp_dict, + snap_volinfo->volname, + brickinfo, + brick_count + 1, + GF_SNAP_OPTION_TYPE_RESTORE); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add missed snapshot info " + "for %s:%s in the rsp_dict", + brickinfo->hostname, + brickinfo->path); + goto out; + } + } + + list_add_tail (&new_brickinfo->brick_list, + &new_volinfo->bricks); + /* ownership of new_brickinfo is passed to new_volinfo */ + new_brickinfo = NULL; + brick_count++; + } + + /* Regenerate all volfiles */ + ret = glusterd_create_volfiles_and_notify_services (new_volinfo); + +out: + if (ret && (NULL != new_brickinfo)) { + (void) glusterd_brickinfo_delete (new_brickinfo); + } + return ret; } +void +glusterd_auth_cleanup (glusterd_volinfo_t *volinfo) { + + GF_ASSERT (volinfo); + + GF_FREE (volinfo->auth.username); + + GF_FREE (volinfo->auth.password); +} + +char * +glusterd_auth_get_username (glusterd_volinfo_t *volinfo) { + + GF_ASSERT (volinfo); + + return volinfo->auth.username; +} + +char * +glusterd_auth_get_password (glusterd_volinfo_t *volinfo) { + + GF_ASSERT (volinfo); + + return volinfo->auth.password; +} + +int32_t +glusterd_auth_set_username (glusterd_volinfo_t *volinfo, char *username) { + + GF_ASSERT (volinfo); + GF_ASSERT (username); + + volinfo->auth.username = gf_strdup (username); + return 0; +} + +int32_t +glusterd_auth_set_password (glusterd_volinfo_t *volinfo, char *password) { + + GF_ASSERT (volinfo); + GF_ASSERT (password); + + volinfo->auth.password = gf_strdup (password); + return 0; +} + int32_t glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo) { @@ -522,8 +739,7 @@ glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo) list_del_init (&brickinfo->brick_list); - if (brickinfo->logfile) - GF_FREE (brickinfo->logfile); + GF_FREE (brickinfo->logfile); GF_FREE (brickinfo); ret = 0; @@ -532,11 +748,11 @@ glusterd_brickinfo_delete (glusterd_brickinfo_t *brickinfo) } int32_t -glusterd_volume_bricks_delete (glusterd_volinfo_t *volinfo) +glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo) { glusterd_brickinfo_t *brickinfo = NULL; glusterd_brickinfo_t *tmp = NULL; - int32_t ret = -1; + int32_t ret = 0; GF_ASSERT (volinfo); @@ -548,7 +764,7 @@ glusterd_volume_bricks_delete (glusterd_volinfo_t *volinfo) } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -560,21 +776,27 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) GF_ASSERT (volinfo); list_del_init (&volinfo->vol_list); + list_del_init (&volinfo->snapvol_list); - ret = glusterd_volume_bricks_delete (volinfo); + ret = glusterd_volume_brickinfos_delete (volinfo); if (ret) goto out; - dict_unref (volinfo->dict); + if (volinfo->dict) + dict_unref (volinfo->dict); + if (volinfo->gsync_slaves) + dict_unref (volinfo->gsync_slaves); + GF_FREE (volinfo->logdir); + + glusterd_auth_cleanup (volinfo); GF_FREE (volinfo); ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } - int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) { @@ -596,7 +818,7 @@ glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo) ret = 0; out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -604,43 +826,59 @@ int32_t glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo) { int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); GF_ASSERT (brickinfo); ret = glusterd_hostname_to_uuid (brickinfo->hostname, brickinfo->uuid); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } int32_t -glusterd_brickinfo_from_brick (char *brick, - glusterd_brickinfo_t **brickinfo) +glusterd_brickinfo_new_from_brick (char *brick, + glusterd_brickinfo_t **brickinfo) { int32_t ret = -1; glusterd_brickinfo_t *new_brickinfo = NULL; char *hostname = NULL; char *path = NULL; - char *tmp = NULL; - char *tmpstr = NULL; + char *tmp_host = NULL; + char *tmp_path = NULL; + char *vg = NULL; GF_ASSERT (brick); GF_ASSERT (brickinfo); - tmp = gf_strdup (brick); - if (!tmp) { - gf_log ("glusterd", GF_LOG_ERROR, - "Out of memory"); + tmp_host = gf_strdup (brick); + if (tmp_host && !get_host_name (tmp_host, &hostname)) + goto out; + tmp_path = gf_strdup (brick); + if (tmp_path && !get_path_name (tmp_path, &path)) goto out; - } - - hostname = strtok_r (tmp, ":", &tmpstr); - path = strtok_r (NULL, ":", &tmpstr); GF_ASSERT (hostname); GF_ASSERT (path); ret = glusterd_brickinfo_new (&new_brickinfo); + if (ret) + goto out; +#ifdef HAVE_BD_XLATOR + vg = strchr (path, '?'); + /* ? is used as a delimiter for vg */ + if (vg) { + strncpy (new_brickinfo->vg, vg + 1, PATH_MAX - 1); + *vg = '\0'; + } + new_brickinfo->caps = CAPS_BD; +#else + vg = NULL; /* Avoid compiler warnings when BD not enabled */ +#endif + ret = gf_canonicalize_path (path); if (ret) goto out; @@ -651,9 +889,258 @@ glusterd_brickinfo_from_brick (char *brick, ret = 0; out: - if (tmp) - GF_FREE (tmp); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + GF_FREE (tmp_host); + if (tmp_host) + GF_FREE (tmp_path); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static gf_boolean_t +_is_prefix (char *str1, char *str2) +{ + GF_ASSERT (str1); + GF_ASSERT (str2); + + int i = 0; + int len1 = 0; + int len2 = 0; + int small_len = 0; + char *bigger = NULL; + gf_boolean_t prefix = _gf_true; + + len1 = strlen (str1); + len2 = strlen (str2); + small_len = min (len1, len2); + for (i = 0; i < small_len; i++) { + if (str1[i] != str2[i]) { + prefix = _gf_false; + break; + } + } + + if (len1 < len2) + bigger = str2; + + else if (len1 > len2) + bigger = str1; + + else + return prefix; + + if (bigger[small_len] != '/') + prefix = _gf_false; + + return prefix; +} + +/* Checks if @path is available in the peer identified by @uuid + * 'availability' is determined by querying current state of volumes + * in the cluster. */ +gf_boolean_t +glusterd_is_brickpath_available (uuid_t uuid, char *path) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_conf_t *priv = NULL; + gf_boolean_t available = _gf_false; + char tmp_path[PATH_MAX+1] = {0}; + char tmp_brickpath[PATH_MAX+1] = {0}; + + priv = THIS->private; + + strncpy (tmp_path, path, PATH_MAX); + /* path may not yet exist */ + if (!realpath (path, tmp_path)) { + if (errno != ENOENT) { + goto out; + } + /* When realpath(3) fails, tmp_path is undefined. */ + strncpy(tmp_path,path,PATH_MAX); + } + + list_for_each_entry (volinfo, &priv->volumes, vol_list) { + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (uuid, brickinfo->uuid)) + continue; + + if (!realpath (brickinfo->path, tmp_brickpath)) { + if (errno == ENOENT) + strncpy (tmp_brickpath, brickinfo->path, + PATH_MAX); + else + goto out; + } + + if (_is_prefix (tmp_brickpath, tmp_path)) + goto out; + } + } + available = _gf_true; +out: + return available; +} + +#ifdef HAVE_BD_XLATOR +/* + * Sets the tag of the format "trusted.glusterfs.volume-id:<uuid>" in + * the brick VG. It is used to avoid using same VG for another brick. + * @volume-id - gfid, @brick - brick info, @msg - Error message returned + * to the caller + */ +int +glusterd_bd_set_vg_tag (unsigned char *volume_id, glusterd_brickinfo_t *brick, + char *msg, int msg_size) +{ + lvm_t handle = NULL; + vg_t vg = NULL; + char *uuid = NULL; + int ret = -1; + + gf_asprintf (&uuid, "%s:%s", GF_XATTR_VOL_ID_KEY, + uuid_utoa (volume_id)); + if (!uuid) { + snprintf (msg, sizeof(*msg), "Could not allocate memory " + "for tag"); + return -1; + } + + handle = lvm_init (NULL); + if (!handle) { + snprintf (msg, sizeof(*msg), "lvm_init failed"); + goto out; + } + + vg = lvm_vg_open (handle, brick->vg, "w", 0); + if (!vg) { + snprintf (msg, sizeof(*msg), "Could not open VG %s", + brick->vg); + goto out; + } + + if (lvm_vg_add_tag (vg, uuid) < 0) { + snprintf (msg, sizeof(*msg), "Could not set tag %s for " + "VG %s", uuid, brick->vg); + goto out; + } + lvm_vg_write (vg); + ret = 0; +out: + GF_FREE (uuid); + + if (vg) + lvm_vg_close (vg); + if (handle) + lvm_quit (handle); + + return ret; +} +#endif + +int +glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo, + uuid_t volume_id, char **op_errstr, + gf_boolean_t is_force) +{ + int ret = -1; + char parentdir[PATH_MAX] = {0,}; + struct stat parent_st = {0,}; + struct stat brick_st = {0,}; + struct stat root_st = {0,}; + char msg[2048] = {0,}; + gf_boolean_t is_created = _gf_false; + + ret = mkdir (brickinfo->path, 0777); + if (ret) { + if (errno != EEXIST) { + snprintf (msg, sizeof (msg), "Failed to create brick " + "directory for brick %s:%s. Reason : %s ", + brickinfo->hostname, brickinfo->path, + strerror (errno)); + goto out; + } + } else { + is_created = _gf_true; + } + + ret = lstat (brickinfo->path, &brick_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s", + brickinfo->path, strerror (errno)); + goto out; + } + + if ((!is_created) && (!S_ISDIR (brick_st.st_mode))) { + snprintf (msg, sizeof (msg), "The provided path %s which is " + "already present, is not a directory", + brickinfo->path); + ret = -1; + goto out; + } + + snprintf (parentdir, sizeof (parentdir), "%s/..", brickinfo->path); + + ret = lstat ("/", &root_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on /. Reason : %s", + strerror (errno)); + goto out; + } + + ret = lstat (parentdir, &parent_st); + if (ret) { + snprintf (msg, sizeof (msg), "lstat failed on %s. Reason : %s", + parentdir, strerror (errno)); + goto out; + } + + if (!is_force) { + if (brick_st.st_dev != parent_st.st_dev) { + snprintf (msg, sizeof (msg), "The brick %s:%s is a " + "mount point. Please create a sub-directory " + "under the mount point and use that as the " + "brick directory. Or use 'force' at the end " + "of the command if you want to override this " + "behavior.", brickinfo->hostname, + brickinfo->path); + ret = -1; + goto out; + } + else if (parent_st.st_dev == root_st.st_dev) { + snprintf (msg, sizeof (msg), "The brick %s:%s is " + "is being created in the root partition. It " + "is recommended that you don't use the " + "system's root partition for storage backend." + " Or use 'force' at the end of the command if" + " you want to override this behavior.", + brickinfo->hostname, brickinfo->path); + ret = -1; + goto out; + } + } + +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) { + ret = glusterd_bd_set_vg_tag (volume_id, brickinfo, msg, + sizeof(msg)); + if (ret) + goto out; + } +#endif + ret = glusterd_check_and_set_brick_xattr (brickinfo->hostname, + brickinfo->path, volume_id, + op_errstr, is_force); + if (ret) + goto out; + + ret = 0; + +out: + if (ret && is_created) + rmdir (brickinfo->path); + if (ret && !*op_errstr && msg[0] != '\0') + *op_errstr = gf_strdup (msg); + return ret; } @@ -665,6 +1152,9 @@ glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path, glusterd_brickinfo_t *brickiter = NULL; uuid_t peer_uuid = {0}; int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; if (uuid) { uuid_copy (peer_uuid, uuid); @@ -676,25 +1166,25 @@ glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path, ret = -1; list_for_each_entry (brickiter, &volinfo->bricks, brick_list) { - if (uuid_is_null (brickiter->uuid)) { - ret = glusterd_resolve_brick (brickiter); - if (ret) - goto out; - } - if ((!uuid_compare (peer_uuid, brickiter->uuid)) && - !strcmp (brickiter->path, path)) { - gf_log ("", GF_LOG_NORMAL, "Found brick"); + if ((uuid_is_null (brickiter->uuid)) && + (glusterd_resolve_brick (brickiter) != 0)) + goto out; + if (uuid_compare (peer_uuid, brickiter->uuid)) + continue; + + if (strcmp (brickiter->path, path) == 0) { + gf_log (this->name, GF_LOG_DEBUG, LOGSTR_FOUND_BRICK, + brickiter->hostname, brickiter->path, + volinfo->volname); ret = 0; if (brickinfo) *brickinfo = brickiter; break; - } else { - ret = -1; } } out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -704,65 +1194,135 @@ glusterd_volume_brickinfo_get_by_brick (char *brick, glusterd_brickinfo_t **brickinfo) { int32_t ret = -1; - char *hostname = NULL; - char *path = NULL; - char *dup_brick = NULL; - char *free_ptr = NULL; + glusterd_brickinfo_t *tmp_brickinfo = NULL; GF_ASSERT (brick); GF_ASSERT (volinfo); - gf_log ("", GF_LOG_NORMAL, "brick: %s", brick); - - dup_brick = gf_strdup (brick); - if (!dup_brick) { - gf_log ("", GF_LOG_ERROR, - "Out of memory"); - ret = -1; - goto out; - } else { - free_ptr = dup_brick; - } - - hostname = strtok (dup_brick, ":"); - path = strtok (NULL, ":"); - - if (!hostname || !path) { - gf_log ("", GF_LOG_ERROR, - "brick %s is not of form <HOSTNAME>:<export-dir>", - brick); - ret = -1; + ret = glusterd_brickinfo_new_from_brick (brick, &tmp_brickinfo); + if (ret) goto out; - } - ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo, + ret = glusterd_volume_brickinfo_get (NULL, tmp_brickinfo->hostname, + tmp_brickinfo->path, volinfo, brickinfo); + (void) glusterd_brickinfo_delete (tmp_brickinfo); out: - if (free_ptr) - GF_FREE (free_ptr); - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } +gf_boolean_t +glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname, + char *path) +{ + gf_boolean_t decommissioned = _gf_false; + glusterd_brickinfo_t *brickinfo = NULL; + int ret = -1; + + ret = glusterd_volume_brickinfo_get (NULL, hostname, path, volinfo, + &brickinfo); + if (ret) + goto out; + decommissioned = brickinfo->decommissioned; +out: + return decommissioned; +} + int32_t glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) { GF_ASSERT (peerinfo); - glusterd_peerctx_t *peerctx = NULL; + glusterd_peerctx_t *peerctx = NULL; + gf_boolean_t quorum_action = _gf_false; + glusterd_conf_t *priv = THIS->private; + if (peerinfo->quorum_contrib != QUORUM_NONE) + quorum_action = _gf_true; if (peerinfo->rpc) { + /* cleanup the saved-frames before last unref */ + synclock_unlock (&priv->big_lock); + rpc_clnt_connection_cleanup (&peerinfo->rpc->conn); + synclock_lock (&priv->big_lock); + peerctx = peerinfo->rpc->mydata; peerinfo->rpc->mydata = NULL; peerinfo->rpc = rpc_clnt_unref (peerinfo->rpc); peerinfo->rpc = NULL; - GF_FREE (peerctx); + if (peerctx) { + GF_FREE (peerctx->errstr); + GF_FREE (peerctx); + } } glusterd_peer_destroy (peerinfo); + if (quorum_action) + glusterd_do_quorum_action (); return 0; } +int +glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_conf_t *priv = NULL; + + if (!volume_id) + return -1; + + this = THIS; + priv = this->private; + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + voliter->volname); + break; + } + return ret; +} + +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + glusterd_snap_t *snap = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (volinfo); + + if (uuid_is_null(volume_id)) { + gf_log (this->name, GF_LOG_WARNING, "Volume UUID is NULL"); + goto out; + } + + list_for_each_entry (snap, &priv->snapshots, snap_list) { + list_for_each_entry (voliter, &snap->volumes, vol_list) { + if (uuid_compare (volume_id, voliter->volume_id)) + continue; + *volinfo = voliter; + ret = 0; + goto out; + } + } + + gf_log (this->name, GF_LOG_WARNING, "Snap volume not found"); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) { @@ -772,131 +1332,259 @@ glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo) glusterd_conf_t *priv = NULL; GF_ASSERT (volname); - this = THIS; GF_ASSERT (this); priv = this->private; + GF_ASSERT (priv); list_for_each_entry (tmp_volinfo, &priv->volumes, vol_list) { if (!strcmp (tmp_volinfo->volname, volname)) { - gf_log ("", GF_LOG_DEBUG, "Volume %s found", volname); + gf_log (this->name, GF_LOG_DEBUG, "Volume %s found", + volname); ret = 0; *volinfo = tmp_volinfo; break; } } - - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } - int32_t -glusterd_service_stop (const char *service, char *pidfile, int sig, - gf_boolean_t force_kill) +glusterd_snap_volinfo_find (char *snap_volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) { - int32_t ret = -1; - pid_t pid = -1; - FILE *file = NULL; - gf_boolean_t is_locked = _gf_false; + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; - file = fopen (pidfile, "r+"); + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (snap_volname); - if (!file) { - gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", - pidfile); - if (errno == ENOENT) { - gf_log ("",GF_LOG_TRACE, "%s may not be running", - service); + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->volname, snap_volname)) { ret = 0; + *volinfo = snap_vol; goto out; } - ret = -1; - goto out; } - ret = lockf (fileno (file), F_TLOCK, 0); - if (!ret) { - is_locked = _gf_true; - ret = unlink (pidfile); - if (ret && (ENOENT != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "unlink stale pidfile: %s", pidfile); + + gf_log (this->name, GF_LOG_WARNING, "Snap volume %s not found", + snap_volname); +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_volinfo_t *snap_vol = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (snap); + GF_ASSERT (origin_volname); + + list_for_each_entry (snap_vol, &snap->volumes, vol_list) { + if (!strcmp (snap_vol->parent_volname, origin_volname)) { + ret = 0; + *volinfo = snap_vol; + goto out; } - goto out; } + gf_log (this->name, GF_LOG_DEBUG, "Snap volume not found(snap: %s, " + "origin-volume: %s", snap->snapname, origin_volname); - ret = fscanf (file, "%d", &pid); - if (ret <= 0) { - gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s", - pidfile); - ret = -1; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_service_stop (const char *service, char *pidfile, int sig, + gf_boolean_t force_kill) +{ + int32_t ret = -1; + pid_t pid = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + if (!glusterd_is_service_running (pidfile, &pid)) { + ret = 0; + gf_log (this->name, GF_LOG_INFO, "%s already stopped", service); goto out; } - fclose (file); - file = NULL; - - gf_log ("", GF_LOG_NORMAL, "Stopping gluster %s running in pid: %d", - service, pid); + gf_log (this->name, GF_LOG_DEBUG, "Stopping gluster %s running in pid: " + "%d", service, pid); ret = kill (pid, sig); + if (!force_kill) + goto out; - if (force_kill) { - sleep (1); - file = fopen (pidfile, "r+"); - if (!file) { - ret = 0; - goto out; - } - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = kill (pid, SIGKILL); - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "kill pid %d reason: %s", pid, - strerror(errno)); - goto out; - } - - } else if (0 == ret){ - is_locked = _gf_true; - } - ret = unlink (pidfile); - if (ret && (ENOENT != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to " - "unlink pidfile: %s", pidfile); + sleep (1); + if (glusterd_is_service_running (pidfile, NULL)) { + ret = kill (pid, SIGKILL); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to " + "kill pid %d reason: %s", pid, + strerror(errno)); goto out; } } ret = 0; out: - if (is_locked && file) - lockf (fileno (file), F_ULOCK, 0); - if (file) - fclose (file); + return ret; +} + +void +glusterd_set_socket_filepath (char *sock_filepath, char *sockpath, size_t len) +{ + char md5_sum[MD5_DIGEST_LENGTH*2+1] = {0,}; + + md5_wrapper ((unsigned char *) sock_filepath, strlen(sock_filepath), md5_sum); + snprintf (sockpath, len, "%s/%s.socket", GLUSTERD_SOCK_DIR, md5_sum); +} + +void +glusterd_set_brick_socket_filepath (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *sockpath, size_t len) +{ + char export_path[PATH_MAX] = {0,}; + char sock_filepath[PATH_MAX] = {0,}; + char volume_dir[PATH_MAX] = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int expected_file_len = 0; + + expected_file_len = strlen (GLUSTERD_SOCK_DIR) + strlen ("/") + + MD5_DIGEST_LENGTH*2 + strlen (".socket") + 1; + GF_ASSERT (len >= expected_file_len); + this = THIS; + GF_ASSERT (this); + + priv = this->private; + + GLUSTERD_GET_VOLUME_DIR (volume_dir, volinfo, priv); + GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, export_path); + snprintf (sock_filepath, PATH_MAX, "%s/run/%s-%s", + volume_dir, brickinfo->hostname, export_path); + + glusterd_set_socket_filepath (sock_filepath, sockpath, len); +} + +/* connection happens only if it is not aleady connected, + * reconnections are taken care by rpc-layer + */ +int32_t +glusterd_brick_connect (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *socketpath) +{ + int ret = 0; + char volume_id_str[64]; + char *brickid = NULL; + dict_t *options = NULL; + struct rpc_clnt *rpc = NULL; + glusterd_conf_t *priv = THIS->private; + + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); + GF_ASSERT (socketpath); + + if (brickinfo->rpc == NULL) { + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. + * The default timeout of 30mins used for unreliable network + * connections is too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, socketpath, + 600); + if (ret) + goto out; + + uuid_utoa_r (volinfo->volume_id, volume_id_str); + ret = gf_asprintf (&brickid, "%s:%s:%s", volume_id_str, + brickinfo->hostname, brickinfo->path); + if (ret < 0) + goto out; + + synclock_unlock (&priv->big_lock); + ret = glusterd_rpc_create (&rpc, options, + glusterd_brick_rpc_notify, + brickid); + synclock_lock (&priv->big_lock); + if (ret) + goto out; + brickinfo->rpc = rpc; + } +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Caller should ensure that brick process is not running*/ +static void +_reap_brick_process (char *pidfile, char *brickpath) +{ + unlink (pidfile); + /* Brick process is not running and pmap may have an entry for it.*/ + pmap_registry_remove (THIS, 0, brickpath, + GF_PMAP_PORT_BRICKSERVER, NULL); +} + +static int +_mk_rundir_p (glusterd_volinfo_t *volinfo) +{ + char voldir[PATH_MAX] = {0,}; + char rundir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + int ret = -1; + + this = THIS; + priv = this->private; + GLUSTERD_GET_VOLUME_DIR (voldir, volinfo, priv); + snprintf (rundir, sizeof (rundir)-1, "%s/run", voldir); + ret = mkdir_p (rundir, 0777, _gf_true); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to create rundir"); return ret; } int32_t glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, + gf_boolean_t wait) { int32_t ret = -1; xlator_t *this = NULL; glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; + char pidfile[PATH_MAX+1] = {0,}; char volfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - char cmd_str[8192] = {0,}; - char rundir[PATH_MAX] = {0,}; + runner_t runner = {0,}; char exp_path[PATH_MAX] = {0,}; char logfile[PATH_MAX] = {0,}; int port = 0; - FILE *file = NULL; - gf_boolean_t is_locked = _gf_false; + int rdma_port = 0; + char socketpath[PATH_MAX] = {0}; + char glusterd_uuid[1024] = {0,}; + char valgrind_logfile[PATH_MAX] = {0}; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -905,102 +1593,149 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, GF_ASSERT (this); priv = this->private; + GF_ASSERT (priv); - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - snprintf (rundir, PATH_MAX, "%s/run", path); - ret = mkdir (rundir, 0777); - - if ((ret == -1) && (EEXIST != errno)) { - gf_log ("", GF_LOG_ERROR, "Unable to create rundir %s", - rundir); + if (brickinfo->snap_status == -1) { + gf_log (this->name, GF_LOG_INFO, + "Snapshot is pending on %s:%s. " + "Hence not starting the brick", + brickinfo->hostname, + brickinfo->path); + ret = 0; goto out; } - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); + ret = _mk_rundir_p (volinfo); + if (ret) + goto out; - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = 0; - gf_log ("", GF_LOG_NORMAL, "brick %s:%s " - "already started", brickinfo->hostname, - brickinfo->path); - goto out; - } - } + glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, + sizeof (socketpath)); - ret = pmap_registry_search (this, brickinfo->path, - GF_PMAP_PORT_BRICKSERVER); - if (ret) { - ret = 0; - file = fopen (pidfile, "r+"); - if (file) { - ret = lockf (fileno (file), F_TLOCK, 0); - if (ret && ((EAGAIN == errno) || (EACCES == errno))) { - ret = 0; - gf_log ("", GF_LOG_NORMAL, "brick %s:%s " - "already started", brickinfo->hostname, - brickinfo->path); - goto out; - } else if (0 == ret) { - is_locked = _gf_true; - } + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); + if (glusterd_is_service_running (pidfile, NULL)) + goto connect; + + _reap_brick_process (pidfile, brickinfo->path); + + port = brickinfo->port; + if (!port) + port = pmap_registry_alloc (THIS); + + /* Build the exp_path, before starting the glusterfsd even in + valgrind mode. Otherwise all the glusterfsd processes start + writing the valgrind log to the same file. + */ + GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); + runinit (&runner); + + if (priv->valgrind) { + /* Run bricks with valgrind */ + if (volinfo->logdir) { + snprintf (valgrind_logfile, PATH_MAX, + "%s/valgrind-%s-%s.log", + volinfo->logdir, + volinfo->volname, exp_path); + } else { + snprintf (valgrind_logfile, PATH_MAX, + "%s/bricks/valgrind-%s-%s.log", + DEFAULT_LOG_FILE_DIRECTORY, + volinfo->volname, exp_path); } - /* This means, pmap has the entry, remove it */ - ret = pmap_registry_remove (this, 0, brickinfo->path, - GF_PMAP_PORT_BRICKSERVER, NULL); + + runner_add_args (&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); } - unlink (pidfile); - gf_log ("", GF_LOG_NORMAL, "About to start glusterfs" - " for brick %s:%s", brickinfo->hostname, - brickinfo->path); - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); - snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, - brickinfo->hostname, exp_path); + if (volinfo->is_snap_volume) { + snprintf (volfile, PATH_MAX,"/%s/%s/%s.%s.%s", + GLUSTERD_VOL_SNAP_DIR_PREFIX, + volinfo->snapshot->snapname, volinfo->volname, + brickinfo->hostname, exp_path); + } else { + snprintf (volfile, PATH_MAX, "%s.%s.%s", volinfo->volname, + brickinfo->hostname, exp_path); + } - if (!brickinfo->logfile) { - snprintf (logfile, PATH_MAX, "%s/logs/bricks/%s.log", - priv->workdir, exp_path); + if (volinfo->logdir) { + snprintf (logfile, PATH_MAX, "%s/%s.log", + volinfo->logdir, exp_path); + } else { + snprintf (logfile, PATH_MAX, "%s/bricks/%s.log", + DEFAULT_LOG_FILE_DIRECTORY, exp_path); + } + if (!brickinfo->logfile) brickinfo->logfile = gf_strdup (logfile); + + (void) snprintf (glusterd_uuid, 1024, "*-posix.glusterd-uuid=%s", + uuid_utoa (MY_UUID)); + runner_add_args (&runner, SBIN_DIR"/glusterfsd", + "-s", brickinfo->hostname, "--volfile-id", volfile, + "-p", pidfile, "-S", socketpath, + "--brick-name", brickinfo->path, + "-l", brickinfo->logfile, + "--xlator-option", glusterd_uuid, + NULL); + + runner_add_arg (&runner, "--brick-port"); + if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { + runner_argprintf (&runner, "%d", port); + } else { + rdma_port = brickinfo->rdma_port; + if (!rdma_port) + rdma_port = pmap_registry_alloc (THIS); + runner_argprintf (&runner, "%d,%d", port, rdma_port); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "%s-server.transport.rdma.listen-port=%d", + volinfo->volname, rdma_port); } - port = brickinfo->port; - if (!port) - port = pmap_registry_alloc (THIS); + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "%s-server.listen-port=%d", + volinfo->volname, port); - snprintf (cmd_str, 8192, - "%s/sbin/glusterfsd --xlator-option %s-server.listen-port=%d " - "-s localhost --volfile-id %s -p %s --brick-name %s " - "--brick-port %d -l %s", GFS_PREFIX, volinfo->volname, - port, volfile, pidfile, brickinfo->path, port, - brickinfo->logfile); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); - gf_log ("",GF_LOG_DEBUG,"Starting GlusterFS Command Executed: \n %s \n", cmd_str); - ret = gf_system (cmd_str); + runner_log (&runner, "", GF_LOG_DEBUG, "Starting GlusterFS"); + if (wait) { + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); - if (ret == 0) { - //pmap_registry_bind (THIS, port, brickinfo->path); - brickinfo->port = port; + } else { + ret = runner_run_nowait (&runner); + } + + if (ret) + goto out; + + brickinfo->port = port; + brickinfo->rdma_port = rdma_port; + +connect: + ret = glusterd_brick_connect (volinfo, brickinfo, socketpath); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to connect to brick %s:%s on %s", + brickinfo->hostname, brickinfo->path, socketpath); + goto out; } out: - if (is_locked && file) - lockf (fileno (file), F_ULOCK, 0); - if (file) - fclose (file); return ret; } int32_t -glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) +glusterd_brick_unlink_socket_file (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) { + char path[PATH_MAX] = {0,}; + char socketpath[PATH_MAX] = {0}; xlator_t *this = NULL; glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; + int ret = 0; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1009,12 +1744,79 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, GF_ASSERT (this); priv = this->private; - GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); - GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname, - brickinfo->path); + glusterd_set_brick_socket_filepath (volinfo, brickinfo, socketpath, + sizeof (socketpath)); + ret = unlink (socketpath); + if (ret && (ENOENT == errno)) { + ret = 0; + } else { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove %s" + " error: %s", socketpath, strerror (errno)); + } + + return ret; +} + +int32_t +glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) +{ + rpc_clnt_t *rpc = NULL; + glusterd_conf_t *priv = THIS->private; + + GF_ASSERT (brickinfo); + + if (!brickinfo) { + gf_log_callingfn ("glusterd", GF_LOG_WARNING, "!brickinfo"); + return -1; + } + + rpc = brickinfo->rpc; + brickinfo->rpc = NULL; + + if (rpc) { + synclock_unlock (&priv->big_lock); + rpc_clnt_unref (rpc); + synclock_lock (&priv->big_lock); + } + + return 0; +} + +int32_t +glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + gf_boolean_t del_brick) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX] = {0,}; + int ret = 0; + + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); + + this = THIS; + GF_ASSERT (this); - return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false); + priv = this->private; + if (del_brick) + list_del_init (&brickinfo->brick_list); + + if (GLUSTERD_STATUS_STARTED == volinfo->status) { + (void) glusterd_brick_disconnect (brickinfo); + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); + ret = glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_false); + if (ret == 0) { + glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); + (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo); + } + } + + if (del_brick) + glusterd_delete_brick (volinfo, brickinfo); + + return ret; } int32_t @@ -1043,66 +1845,113 @@ out: return ret; } -int32_t -glusterd_peer_destroy (glusterd_peerinfo_t *peerinfo) +/* Free LINE[0..N-1] and then the LINE buffer. */ +static void +free_lines (char **line, size_t n) { - int32_t ret = -1; - glusterd_peer_hostname_t *name = NULL; - glusterd_peer_hostname_t *tmp = NULL; + size_t i; + for (i = 0; i < n; i++) + GF_FREE (line[i]); + GF_FREE (line); +} - if (!peerinfo) +char ** +glusterd_readin_file (const char *filepath, int *line_count) +{ + int ret = -1; + int n = 8; + int counter = 0; + char buffer[PATH_MAX + 256] = {0}; + char **lines = NULL; + FILE *fp = NULL; + void *p; + + fp = fopen (filepath, "r"); + if (!fp) goto out; - ret = glusterd_store_delete_peerinfo (peerinfo); + lines = GF_CALLOC (1, n * sizeof (*lines), gf_gld_mt_charptr); + if (!lines) + goto out; - if (ret) { - gf_log ("", GF_LOG_ERROR, "Deleting peer info failed"); - } + for (counter = 0; fgets (buffer, sizeof (buffer), fp); counter++) { - list_del_init (&peerinfo->uuid_list); - list_for_each_entry_safe (name, tmp, &peerinfo->hostnames, - hostname_list) { - list_del_init (&name->hostname_list); - GF_FREE (name->hostname); - GF_FREE (name); + if (counter == n-1) { + n *= 2; + p = GF_REALLOC (lines, n * sizeof (char *)); + if (!p) { + free_lines (lines, n/2); + lines = NULL; + goto out; + } + lines = p; + } + + lines[counter] = gf_strdup (buffer); } - list_del_init (&peerinfo->hostnames); - if (peerinfo->hostname) - GF_FREE (peerinfo->hostname); - GF_FREE (peerinfo); - peerinfo = NULL; + lines[counter] = NULL; + /* Reduce allocation to minimal size. */ + p = GF_REALLOC (lines, (counter + 1) * sizeof (char *)); + if (!p) { + free_lines (lines, counter); + lines = NULL; + goto out; + } + lines = p; + *line_count = counter; ret = 0; -out: - return ret; + out: + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "%s", strerror (errno)); + if (fp) + fclose (fp); + + return lines; } +int +glusterd_compare_lines (const void *a, const void *b) { -gf_boolean_t -glusterd_is_cli_op_req (int32_t op) + return strcmp(* (char * const *) a, * (char * const *) b); +} + +int +glusterd_sort_and_redirect (const char *src_filepath, int dest_fd) { - switch (op) { - case GD_MGMT_CLI_CREATE_VOLUME: - case GD_MGMT_CLI_START_VOLUME: - case GD_MGMT_CLI_STOP_VOLUME: - case GD_MGMT_CLI_DELETE_VOLUME: - case GD_MGMT_CLI_DEFRAG_VOLUME: - case GD_MGMT_CLI_ADD_BRICK: - case GD_MGMT_CLI_REMOVE_BRICK: - case GD_MGMT_CLI_REPLACE_BRICK: - case GD_MGMT_CLI_LOG_FILENAME: - case GD_MGMT_CLI_LOG_LOCATE: - case GD_MGMT_CLI_LOG_ROTATE: - case GD_MGMT_CLI_SYNC_VOLUME: - return _gf_true; - break; + int ret = -1; + int line_count = 0; + int counter = 0; + char **lines = NULL; + + + if (!src_filepath || dest_fd < 0) + goto out; + + lines = glusterd_readin_file (src_filepath, &line_count); + if (!lines) + goto out; + + qsort (lines, line_count, sizeof (*lines), glusterd_compare_lines); + + for (counter = 0; lines[counter]; counter++) { + + ret = write (dest_fd, lines[counter], + strlen (lines[counter])); + if (ret < 0) + goto out; + + GF_FREE (lines[counter]); } - return _gf_false; -} + ret = 0; + out: + GF_FREE (lines); + return ret; +} int glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) @@ -1117,11 +1966,11 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) char buf[4096] = {0,}; char sort_filepath[PATH_MAX] = {0}; gf_boolean_t unlink_sortfile = _gf_false; - char sort_cmd[2*PATH_MAX + 32]; int sort_fd = 0; + xlator_t *this = NULL; GF_ASSERT (volinfo); - + this = THIS; priv = THIS->private; GF_ASSERT (priv); @@ -1130,10 +1979,10 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) snprintf (cksum_path, sizeof (cksum_path), "%s/%s", path, GLUSTERD_CKSUM_FILE); - fd = open (cksum_path, O_RDWR | O_APPEND | O_CREAT| O_TRUNC, 0644); + fd = open (cksum_path, O_RDWR | O_APPEND | O_CREAT| O_TRUNC, 0600); if (-1 == fd) { - gf_log ("", GF_LOG_ERROR, "Unable to open %s, errno: %d", + gf_log (this->name, GF_LOG_ERROR, "Unable to open %s, errno: %d", cksum_path, errno); ret = -1; goto out; @@ -1141,31 +1990,36 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) snprintf (filepath, sizeof (filepath), "%s/%s", path, GLUSTERD_VOLUME_INFO_FILE); - snprintf (sort_filepath, sizeof (sort_filepath), "/tmp/%s.XXXXXX", - volinfo->volname); - sort_fd = mkstemp(sort_filepath); + snprintf (sort_filepath, sizeof (sort_filepath), + "/tmp/%s.XXXXXX", volinfo->volname); + + sort_fd = mkstemp (sort_filepath); if (sort_fd < 0) { - gf_log ("", GF_LOG_ERROR, "Could not generate temp file, " - "reason: %s for volume: %s", strerror (errno), + gf_log (this->name, GF_LOG_ERROR, "Could not generate temp " + "file, reason: %s for %s: %s", strerror (errno), + (volinfo->is_snap_volume)?"snap":"volume", volinfo->volname); goto out; } else { unlink_sortfile = _gf_true; - close (sort_fd); } - snprintf (sort_cmd, sizeof (sort_cmd), "sort %s -o %s", - filepath, sort_filepath); - ret = system (sort_cmd); + /* sort the info file, result in sort_filepath */ + + ret = glusterd_sort_and_redirect (filepath, sort_fd); if (ret) { - gf_log ("", GF_LOG_ERROR, "failed to sort file %s to %s", - filepath, sort_filepath); + gf_log (this->name, GF_LOG_ERROR, "sorting info file failed"); goto out; } + + ret = close (sort_fd); + if (ret) + goto out; + ret = get_checksum_for_path (sort_filepath, &cksum); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get checksum" + gf_log (this->name, GF_LOG_ERROR, "Unable to get checksum" " for path: %s", sort_filepath); goto out; } @@ -1183,61 +2037,89 @@ glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo) if (ret) goto out; - volinfo->cksum = cksum; - + volinfo->cksum = cksum; out: - if (fd > 0) + if (fd > 0) close (fd); - - if (unlink_sortfile) + if (unlink_sortfile) unlink (sort_filepath); - gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; + return ret; } -void -_add_volume_option_to_dict (dict_t *this, char *key, data_t *value, void *data) +int +_add_dict_to_prdict (dict_t *this, char *key, data_t *value, void *data) { - int exists = 0; - glusterd_volopt_ctx_t *ctx = NULL; + glusterd_dict_ctx_t *ctx = NULL; char optkey[512] = {0,}; int ret = -1; - exists = glusterd_check_option_exists (key, NULL); - if (0 == exists) - return; - ctx = data; - snprintf (optkey, sizeof (optkey), "volume%d.key%d", ctx->count, - ctx->opt_count); + snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix, + ctx->key_name, ctx->opt_count); ret = dict_set_str (ctx->dict, optkey, key); if (ret) - gf_log ("", GF_LOG_ERROR, "option add for key%d %s", - ctx->count, key); - snprintf (optkey, sizeof (optkey), "volume%d.value%d", ctx->count, - ctx->opt_count); + gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", + ctx->key_name, ctx->opt_count, key); + snprintf (optkey, sizeof (optkey), "%s.%s%d", ctx->prefix, + ctx->val_name, ctx->opt_count); ret = dict_set_str (ctx->dict, optkey, value->data); if (ret) - gf_log ("", GF_LOG_ERROR, "option add for value%d %s", - ctx->count, value->data); + gf_log ("", GF_LOG_ERROR, "option add for %s%d %s", + ctx->val_name, ctx->opt_count, value->data); ctx->opt_count++; - return; + return ret; +} + +int32_t +glusterd_add_bricks_hname_path_to_dict (dict_t *dict, + glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + int ret = 0; + char key[256] = {0}; + int index = 0; + + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + snprintf (key, sizeof (key), "%d-hostname", index); + ret = dict_set_str (dict, key, brickinfo->hostname); + if (ret) + goto out; + + snprintf (key, sizeof (key), "%d-path", index); + ret = dict_set_str (dict, key, brickinfo->path); + if (ret) + goto out; + + index++; + } +out: + return ret; } int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count) { - int32_t ret = -1; - char key[512] = {0,}; - glusterd_brickinfo_t *brickinfo = NULL; - int32_t i = 1; - char uuid_str[50] = {0,}; - char *volume_id_str = NULL; - glusterd_volopt_ctx_t ctx = {0}; + int32_t ret = -1; + char prefix[512] = {0,}; + char key[512] = {0,}; + glusterd_brickinfo_t *brickinfo = NULL; + int32_t i = 1; + char *volume_id_str = NULL; + char *src_brick = NULL; + char *dst_brick = NULL; + char *str = NULL; + glusterd_dict_ctx_t ctx = {0}; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (dict); GF_ASSERT (volinfo); @@ -1252,6 +2134,15 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_set_int32 (dict, key, volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to set " + "is_volume_restored option for %s volume", + volinfo->volname); + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.brick_count", count); ret = dict_set_int32 (dict, key, volinfo->brick_count); @@ -1277,28 +2168,157 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, goto out; memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.stripe_count", count); + ret = dict_set_int32 (dict, key, volinfo->stripe_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.replica_count", count); + ret = dict_set_int32 (dict, key, volinfo->replica_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.dist_count", count); + ret = dict_set_int32 (dict, key, volinfo->dist_leaf_count); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.ckusm", count); ret = dict_set_int64 (dict, key, volinfo->cksum); if (ret) goto out; - uuid_unparse (volinfo->volume_id, uuid_str); - volume_id_str = gf_strdup (uuid_str); - if (!volume_id_str) + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.transport_type", count); + ret = dict_set_uint32 (dict, key, volinfo->transport_type); + if (ret) + goto out; + + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_set_uint32 (dict, key, volinfo->is_snap_volume); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_set_uint64 (dict, key, volinfo->snap_max_hard_limit); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Unable to set %s", key); goto out; + } + volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id)); + if (!volume_id_str) { + ret = -1; + goto out; + } memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d.volume_id", count); + snprintf (key, sizeof (key), "volume%d.volume_id", count); ret = dict_set_dynstr (dict, key, volume_id_str); if (ret) goto out; + volume_id_str = NULL; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.username", count); + str = glusterd_auth_get_username (volinfo); + if (str) { + ret = dict_set_dynstr (dict, key, gf_strdup (str)); + if (ret) + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.password", count); + str = glusterd_auth_get_password (volinfo); + if (str) { + ret = dict_set_dynstr (dict, key, gf_strdup (str)); + if (ret) + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d.rebalance", count); + ret = dict_set_int32 (dict, key, volinfo->rebal.defrag_cmd); + if (ret) + goto out; + if (volinfo->rebal.defrag_cmd) { + rebalance_id_str = gf_strdup (uuid_utoa + (volinfo->rebal.rebalance_id)); + if (!rebalance_id_str) { + ret = -1; + goto out; + } + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d.rebalance-id", count); + ret = dict_set_dynstr (dict, key, rebalance_id_str); + if (ret) + goto out; + rebalance_id_str = NULL; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_set_uint32 (dict, key, volinfo->rebal.op); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); + ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status); + if (ret) + goto out; + + if (volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, + count); + gf_asprintf (&src_brick, "%s:%s", + volinfo->rep_brick.src_brick->hostname, + volinfo->rep_brick.src_brick->path); + ret = dict_set_dynstr (dict, key, src_brick); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, + count); + gf_asprintf (&dst_brick, "%s:%s", + volinfo->rep_brick.dst_brick->hostname, + volinfo->rep_brick.dst_brick->path); + ret = dict_set_dynstr (dict, key, dst_brick); + if (ret) + goto out; + + rb_id_str = gf_strdup (uuid_utoa (volinfo->rep_brick.rb_id)); + if (!rb_id_str) { + ret = -1; + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_set_dynstr (dict, key, rb_id_str); + if (ret) + goto out; + rb_id_str = NULL; + } + + snprintf (prefix, sizeof (prefix), "volume%d", count); ctx.dict = dict; - ctx.count = count; + ctx.prefix = prefix; ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "value"; GF_ASSERT (volinfo->dict); - dict_foreach (volinfo->dict, _add_volume_option_to_dict, &ctx); + dict_foreach (volinfo->dict, _add_dict_to_prdict, &ctx); ctx.opt_count--; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.opt-count", count); @@ -1306,6 +2326,22 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + ctx.dict = dict; + ctx.prefix = prefix; + ctx.opt_count = 1; + ctx.key_name = "slave-num"; + ctx.val_name = "slave-val"; + GF_ASSERT (volinfo->gsync_slaves); + + dict_foreach (volinfo->gsync_slaves, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.gsync-count", count); + ret = dict_set_int32 (dict, key, ctx.opt_count); + if (ret) + goto out; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.brick%d.hostname", @@ -1321,11 +2357,48 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + count, i); + ret = dict_set_int32 (dict, key, brickinfo->snap_status); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_status for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + count, i); + ret = dict_set_str (dict, key, brickinfo->device_path); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set snap_device for %s:%s", + brickinfo->hostname, + brickinfo->path); + goto out; + } + i++; } + /* Add volume op-versions to dict. This prevents volume inconsistencies + * in the cluster + */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.op-version", count); + ret = dict_set_int32 (dict, key, volinfo->op_version); + if (ret) + goto out; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.client-op-version", count); + ret = dict_set_int32 (dict, key, volinfo->client_op_version); out: + GF_FREE (volume_id_str); + GF_FREE (rebalance_id_str); + GF_FREE (rb_id_str); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; @@ -1339,6 +2412,7 @@ glusterd_build_volume_dict (dict_t **vols) glusterd_conf_t *priv = NULL; glusterd_volinfo_t *volinfo = NULL; int32_t count = 0; + glusterd_dict_ctx_t ctx = {0}; priv = THIS->private; @@ -1359,6 +2433,17 @@ glusterd_build_volume_dict (dict_t **vols) if (ret) goto out; + ctx.dict = dict; + ctx.prefix = "global"; + ctx.opt_count = 1; + ctx.key_name = "key"; + ctx.val_name = "val"; + dict_foreach (priv->opts, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + ret = dict_set_int32 (dict, "global-opt-count", ctx.opt_count); + if (ret) + goto out; + *vols = dict; out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); @@ -1369,7 +2454,8 @@ out: } int32_t -glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) +glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status, + char *hostname) { int32_t ret = -1; @@ -1405,8 +2491,8 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) //Mismatch detected ret = 0; gf_log ("", GF_LOG_ERROR, "Version of volume %s differ." - "local version = %d, remote version = %d", - volinfo->volname, volinfo->version, version); + "local version = %d, remote version = %d on peer %s", + volinfo->volname, volinfo->version, version, hostname); *status = GLUSTERD_VOL_COMP_UPDATE_REQ; goto out; } else if (version < volinfo->version) { @@ -1425,8 +2511,8 @@ glusterd_compare_friend_volume (dict_t *vols, int32_t count, int32_t *status) if (cksum != volinfo->cksum) { ret = 0; gf_log ("", GF_LOG_ERROR, "Cksums of volume %s differ." - " local cksum = %d, remote cksum = %d", - volinfo->volname, volinfo->cksum, cksum); + " local cksum = %u, remote cksum = %u on peer %s", + volinfo->volname, volinfo->cksum, cksum, hostname); *status = GLUSTERD_VOL_COMP_RJT; goto out; } @@ -1439,200 +2525,962 @@ out: return ret; } -int32_t -glusterd_import_friend_volume_opts (dict_t *vols, int count, - glusterd_volinfo_t *volinfo, - int new_volinfo) +static int32_t +import_prdict_dict (dict_t *vols, dict_t *dst_dict, char *key_prefix, + char *value_prefix, int opt_count, char *prefix) { char key[512] = {0,}; - int32_t ret = -1; + int32_t ret = 0; int i = 1; - int opt_count = 0; char *opt_key = NULL; char *opt_val = NULL; char *dup_opt_val = NULL; + char msg[2048] = {0}; - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.opt-count", count); - ret = dict_get_int32 (vols, key, &opt_count); - if (ret) - goto out; - if (!new_volinfo) { - ret = glusterd_options_reset (volinfo); - if (ret) { - gf_log ("", GF_LOG_ERROR, "options reset failed"); - goto out; - } - } while (i <= opt_count) { memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.key%d", - count, i); + snprintf (key, sizeof (key), "%s.%s%d", + prefix, key_prefix, i); ret = dict_get_str (vols, key, &opt_key); - if (ret) + if (ret) { + snprintf (msg, sizeof (msg), "Volume dict key not " + "specified"); goto out; + } memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.value%d", - count, i); + snprintf (key, sizeof (key), "%s.%s%d", + prefix, value_prefix, i); ret = dict_get_str (vols, key, &opt_val); - if (ret) + if (ret) { + snprintf (msg, sizeof (msg), "Volume dict value not " + "specified"); goto out; + } dup_opt_val = gf_strdup (opt_val); if (!dup_opt_val) { ret = -1; goto out; } - ret = dict_set_dynstr (volinfo->dict, opt_key, dup_opt_val); - if (ret) + ret = dict_set_dynstr (dst_dict, opt_key, dup_opt_val); + if (ret) { + snprintf (msg, sizeof (msg), "Volume set %s %s " + "unsuccessful", opt_key, dup_opt_val); goto out; + } i++; } + out: + if (msg[0]) + gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; + } -int32_t -glusterd_import_friend_volume (dict_t *vols, int count) +gf_boolean_t +glusterd_is_quorum_option (char *option) +{ + gf_boolean_t res = _gf_false; + int i = 0; + char *keys[] = {GLUSTERD_QUORUM_TYPE_KEY, + GLUSTERD_QUORUM_RATIO_KEY, NULL}; + + for (i = 0; keys[i]; i++) { + if (strcmp (option, keys[i]) == 0) { + res = _gf_true; + break; + } + } + return res; +} + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value) { + int ret = 0; + gf_boolean_t reconfigured = _gf_false; + gf_boolean_t all = _gf_false; + char *oldquorum = NULL; + char *newquorum = NULL; + char *oldratio = NULL; + char *newratio = NULL; + + if ((strcmp ("all", option) != 0) && + !glusterd_is_quorum_option (option)) + goto out; + + if (strcmp ("all", option) == 0) + all = _gf_true; + + if (all || (strcmp (GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) { + newquorum = value; + ret = dict_get_str (options, GLUSTERD_QUORUM_TYPE_KEY, + &oldquorum); + } + + if (all || (strcmp (GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) { + newratio = value; + ret = dict_get_str (options, GLUSTERD_QUORUM_RATIO_KEY, + &oldratio); + } + + reconfigured = _gf_true; + + if (oldquorum && newquorum && (strcmp (oldquorum, newquorum) == 0)) + reconfigured = _gf_false; + if (oldratio && newratio && (strcmp (oldratio, newratio) == 0)) + reconfigured = _gf_false; + + if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) && + (newquorum == NULL)) + reconfigured = _gf_false; +out: + return reconfigured; +} + +static inline gf_boolean_t +_is_contributing_to_quorum (gd_quorum_contrib_t contrib) +{ + if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN)) + return _gf_true; + return _gf_false; +} + +static inline gf_boolean_t +_does_quorum_meet (int active_count, int quorum_count) +{ + return (active_count >= quorum_count); +} + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, + int *quorum_count) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + int inquorum_count = 0; + char *val = NULL; + double quorum_percentage = 0.0; + gf_boolean_t ratio = _gf_false; + int count = 0; + + conf = this->private; + //Start with counting self + inquorum_count = 1; + if (active_count) + *active_count = 1; + list_for_each_entry (peerinfo, &conf->peers, uuid_list) { + if (peerinfo->quorum_contrib == QUORUM_WAITING) + goto out; + + if (_is_contributing_to_quorum (peerinfo->quorum_contrib)) + inquorum_count = inquorum_count + 1; + + if (active_count && (peerinfo->quorum_contrib == QUORUM_UP)) + *active_count = *active_count + 1; + } + + ret = dict_get_str (conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val); + if (ret == 0) { + ratio = _gf_true; + ret = gf_string2percent (val, &quorum_percentage); + if (!ret) + ratio = _gf_true; + } + if (ratio) + count = CEILING_POS (inquorum_count * + quorum_percentage / 100.0); + else + count = (inquorum_count * 50 / 100) + 1; + *quorum_count = count; + ret = 0; +out: + return ret; +} + +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo) +{ + gf_boolean_t res = _gf_false; + char *quorum_type = NULL; + int ret = 0; + + ret = dict_get_str (volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, + &quorum_type); + if (ret) + goto out; + + if (strcmp (quorum_type, GLUSTERD_SERVER_QUORUM) == 0) + res = _gf_true; +out: + return res; +} + +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this) +{ + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + + conf = this->private; + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (glusterd_is_volume_in_server_quorum (volinfo)) { + return _gf_true; + } + } + return _gf_false; +} + +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this) +{ + int quorum_count = 0; + int active_count = 0; + gf_boolean_t in = _gf_false; + glusterd_conf_t *conf = NULL; + int ret = -1; + + conf = this->private; + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) + goto out; + + if (!_does_quorum_meet (active_count, quorum_count)) { + goto out; + } + + in = _gf_true; +out: + return in; +} + +int +glusterd_spawn_daemons (void *opaque) +{ + glusterd_conf_t *conf = THIS->private; + gf_boolean_t start_bricks = !conf->restart_done; + + if (start_bricks) { + glusterd_restart_bricks (conf); + conf->restart_done = _gf_true; + } + glusterd_restart_gsyncds (conf); + glusterd_restart_rebalance (conf); + return 0; +} + +void +glusterd_do_volume_quorum_action (xlator_t *this, glusterd_volinfo_t *volinfo, + gf_boolean_t meets_quorum) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *conf = NULL; + + conf = this->private; + if (volinfo->status != GLUSTERD_STATUS_STARTED) + goto out; + + if (!glusterd_is_volume_in_server_quorum (volinfo)) + meets_quorum = _gf_true; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!glusterd_is_local_brick (this, volinfo, brickinfo)) + continue; + if (meets_quorum) + glusterd_brick_start (volinfo, brickinfo, _gf_false); + else + glusterd_brick_stop (volinfo, brickinfo, _gf_false); + } +out: + return; +} + +int +glusterd_do_quorum_action () +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int active_count = 0; + int quorum_count = 0; + gf_boolean_t meets = _gf_false; + + this = THIS; + conf = this->private; + + conf->pending_quorum_action = _gf_true; + ret = glusterd_lock (conf->uuid); + if (ret) + goto out; + + { + ret = glusterd_get_quorum_cluster_counts (this, &active_count, + &quorum_count); + if (ret) + goto unlock; + + if (_does_quorum_meet (active_count, quorum_count)) + meets = _gf_true; + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + glusterd_do_volume_quorum_action (this, volinfo, meets); + } + } +unlock: + (void)glusterd_unlock (conf->uuid); + conf->pending_quorum_action = _gf_false; +out: + return ret; +} + +int32_t +glusterd_import_friend_volume_opts (dict_t *vols, int count, + glusterd_volinfo_t *volinfo) +{ + char key[512] = {0,}; int32_t ret = -1; - glusterd_conf_t *priv = NULL; + int opt_count = 0; + char msg[2048] = {0}; + char volume_prefix[1024] = {0}; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.opt-count", count); + ret = dict_get_int32 (vols, key, &opt_count); + if (ret) { + snprintf (msg, sizeof (msg), "Volume option count not " + "specified for %s", volinfo->volname); + goto out; + } + + snprintf (volume_prefix, sizeof (volume_prefix), "volume%d", count); + ret = import_prdict_dict (vols, volinfo->dict, "key", "value", + opt_count, volume_prefix); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to import options dict " + "specified for %s", volinfo->volname); + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.gsync-count", count); + ret = dict_get_int32 (vols, key, &opt_count); + if (ret) { + snprintf (msg, sizeof (msg), "Gsync count not " + "specified for %s", volinfo->volname); + goto out; + } + + ret = import_prdict_dict (vols, volinfo->gsync_slaves, "slave-num", + "slave-val", opt_count, volume_prefix); + if (ret) { + snprintf (msg, sizeof (msg), "Unable to import gsync sessions " + "specified for %s", volinfo->volname); + goto out; + } + +out: + if (msg[0]) + gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_import_new_brick (dict_t *vols, int32_t vol_count, + int32_t brick_count, + glusterd_brickinfo_t **brickinfo) +{ char key[512] = {0,}; - glusterd_volinfo_t *volinfo = NULL; - char *volname = NULL; + int ret = -1; + int32_t snap_status = 0; + char *snap_device = NULL; char *hostname = NULL; char *path = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - glusterd_brickinfo_t *tmp = NULL; - int new_volinfo = 0; - int i = 1; - char *volume_id_str = NULL; + glusterd_brickinfo_t *new_brickinfo = NULL; + char msg[2048] = {0}; GF_ASSERT (vols); + GF_ASSERT (vol_count >= 0); + GF_ASSERT (brickinfo); - snprintf (key, sizeof (key), "volume%d.name", count); - ret = dict_get_str (vols, key, &volname); - if (ret) + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.brick%d.hostname", + vol_count, brick_count); + ret = dict_get_str (vols, key, &hostname); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); goto out; + } - priv = THIS->private; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.brick%d.path", + vol_count, brick_count); + ret = dict_get_str (vols, key, &path); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } - ret = glusterd_volinfo_find (volname, &volinfo); + snprintf (key, sizeof (key), "volume%d.brick%d.snap_status", + vol_count, brick_count); + ret = dict_get_int32 (vols, key, &snap_status); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + snprintf (key, sizeof (key), "volume%d.brick%d.device_path", + vol_count, brick_count); + ret = dict_get_str (vols, key, &snap_device); if (ret) { - ret = glusterd_volinfo_new (&volinfo); + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + + ret = glusterd_brickinfo_new (&new_brickinfo); + if (ret) + goto out; + + strcpy (new_brickinfo->path, path); + strcpy (new_brickinfo->hostname, hostname); + strcpy (new_brickinfo->device_path, snap_device); + new_brickinfo->snap_status = snap_status; + + //peerinfo might not be added yet + (void) glusterd_resolve_brick (new_brickinfo); + ret = 0; + *brickinfo = new_brickinfo; +out: + if (msg[0]) + gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_import_bricks (dict_t *vols, int32_t vol_count, + glusterd_volinfo_t *new_volinfo) +{ + int ret = -1; + int brick_count = 1; + glusterd_brickinfo_t *new_brickinfo = NULL; + + GF_ASSERT (vols); + GF_ASSERT (vol_count >= 0); + GF_ASSERT (new_volinfo); + while (brick_count <= new_volinfo->brick_count) { + + ret = glusterd_import_new_brick (vols, vol_count, brick_count, + &new_brickinfo); if (ret) goto out; - strncpy (volinfo->volname, volname, sizeof (volinfo->volname)); - new_volinfo = 1; + list_add_tail (&new_brickinfo->brick_list, &new_volinfo->bricks); + brick_count++; + } + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_import_volinfo (dict_t *vols, int count, + glusterd_volinfo_t **volinfo) +{ + int ret = -1; + char key[256] = {0}; + char *volname = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + char *volume_id_str = NULL; + char msg[2048] = {0}; + char *src_brick = NULL; + char *dst_brick = NULL; + char *str = NULL; + int rb_status = 0; + char *rebalance_id_str = NULL; + char *rb_id_str = NULL; + int op_version = 0; + int client_op_version = 0; + uint32_t is_snap_volume = 0; + + GF_ASSERT (vols); + GF_ASSERT (volinfo); + + snprintf (key, sizeof (key), "volume%d.name", count); + ret = dict_get_str (vols, key, &volname); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload", key); + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.is_snap_volume", count); + ret = dict_get_uint32 (vols, key, &is_snap_volume); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + if (is_snap_volume == _gf_true) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Not syncing snap volume %s", volname); + ret = 0; + goto out; } + ret = glusterd_volinfo_new (&new_volinfo); + if (ret) + goto out; + strncpy (new_volinfo->volname, volname, sizeof (new_volinfo->volname)); + memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.type", count); - ret = dict_get_int32 (vols, key, &volinfo->type); - if (ret) + ret = dict_get_int32 (vols, key, &new_volinfo->type); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); goto out; + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.brick_count", count); - ret = dict_get_int32 (vols, key, &volinfo->brick_count); - if (ret) + ret = dict_get_int32 (vols, key, &new_volinfo->brick_count); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); goto out; + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.version", count); - ret = dict_get_int32 (vols, key, &volinfo->version); - if (ret) + ret = dict_get_int32 (vols, key, &new_volinfo->version); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); goto out; + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.status", count); - ret = dict_get_int32 (vols, key, (int32_t *)&volinfo->status); - if (ret) + ret = dict_get_int32 (vols, key, (int32_t *)&new_volinfo->status); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); goto out; + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.sub_count", count); - ret = dict_get_int32 (vols, key, &volinfo->sub_count); - if (ret) + ret = dict_get_int32 (vols, key, &new_volinfo->sub_count); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); goto out; + } + /* not having a 'stripe_count' key is not a error + (as peer may be of old version) */ memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.ckusm", count); - ret = dict_get_uint32 (vols, key, &volinfo->cksum); + snprintf (key, sizeof (key), "volume%d.stripe_count", count); + ret = dict_get_int32 (vols, key, &new_volinfo->stripe_count); if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + + /* not having a 'replica_count' key is not a error + (as peer may be of old version) */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.replica_count", count); + ret = dict_get_int32 (vols, key, &new_volinfo->replica_count); + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + + /* not having a 'dist_count' key is not a error + (as peer may be of old version) */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.dist_count", count); + ret = dict_get_int32 (vols, key, &new_volinfo->dist_leaf_count); + if (ret) + gf_log (THIS->name, GF_LOG_INFO, + "peer is possibly old version"); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.ckusm", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->cksum); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); goto out; + } memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.volume_id", count); ret = dict_get_str (vols, key, &volume_id_str); - if (ret) + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); goto out; - uuid_parse (volume_id_str, volinfo->volume_id); + } - list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, - brick_list) { - glusterd_delete_volfile (volinfo, brickinfo); - glusterd_store_delete_brick (volinfo, brickinfo); - ret = glusterd_brickinfo_delete (brickinfo); + uuid_parse (volume_id_str, new_volinfo->volume_id); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.username", count); + ret = dict_get_str (vols, key, &str); + if (!ret) { + ret = glusterd_auth_set_username (new_volinfo, str); if (ret) goto out; } - while (i <= volinfo->brick_count) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.password", count); + ret = dict_get_str (vols, key, &str); + if (!ret) { + ret = glusterd_auth_set_password (new_volinfo, str); + if (ret) + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.transport_type", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->transport_type); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + new_volinfo->is_snap_volume = is_snap_volume; + + snprintf (key, sizeof (key), "volume%d.is_volume_restored", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->is_volume_restored); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get " + "is_volume_restored option for %s", + volname); + goto out; + } + snprintf (key, sizeof (key), "volume%d.snap-max-hard-limit", count); + ret = dict_get_uint64 (vols, key, &new_volinfo->snap_max_hard_limit); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance", count); + ret = dict_get_uint32 (vols, key, &new_volinfo->rebal.defrag_cmd); + if (ret) { + snprintf (msg, sizeof (msg), "%s missing in payload for %s", + key, volname); + goto out; + } + + if (new_volinfo->rebal.defrag_cmd) { memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.hostname", - count, i); - ret = dict_get_str (vols, key, &hostname); + snprintf (key, sizeof (key), "volume%d.rebalance-id", count); + ret = dict_get_str (vols, key, &rebalance_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rebalance_id_str, + new_volinfo->rebal.rebalance_id); + } + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-op", count); + ret = dict_get_uint32 (vols, key,(uint32_t *) &new_volinfo->rebal.op); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); + ret = dict_get_int32 (vols, key, &rb_status); + if (ret) + goto out; + new_volinfo->rep_brick.rb_status = rb_status; + + if (new_volinfo->rep_brick.rb_status > GF_RB_STATUS_NONE) { + + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_SRC_BRICK, + count); + ret = dict_get_str (vols, key, &src_brick); if (ret) goto out; + ret = glusterd_brickinfo_new_from_brick (src_brick, + &new_volinfo->rep_brick.src_brick); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to create" + " src brickinfo"); + goto out; + } + memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.brick%d.path", - count, i); - ret = dict_get_str (vols, key, &path); + snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_DST_BRICK, + count); + ret = dict_get_str (vols, key, &dst_brick); if (ret) goto out; - ret = glusterd_brickinfo_new (&brickinfo); - if (ret) + ret = glusterd_brickinfo_new_from_brick (dst_brick, + &new_volinfo->rep_brick.dst_brick); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to create" + " dst brickinfo"); goto out; + } - strcpy (brickinfo->path, path); - strcpy (brickinfo->hostname, hostname); - glusterd_resolve_brick (brickinfo); + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rb_id", count); + ret = dict_get_str (vols, key, &rb_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rb_id_str, new_volinfo->rep_brick.rb_id); + } + } - list_add_tail (&brickinfo->brick_list, &volinfo->bricks); - i++; - } + ret = glusterd_import_friend_volume_opts (vols, count, new_volinfo); + if (ret) + goto out; - ret = glusterd_import_friend_volume_opts (vols, count, volinfo, - new_volinfo); + /* Import the volume's op-versions if available else set it to 1. + * Not having op-versions implies this informtation was obtained from a + * op-version 1 friend (gluster-3.3), ergo the cluster is at op-version + * 1 and all volumes are at op-versions 1. + * + * Either both the volume op-versions should be absent or both should be + * present. Only one being present is a failure + */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.op-version", count); + ret = dict_get_int32 (vols, key, &op_version); if (ret) + ret = 0; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.client-op-version", count); + ret = dict_get_int32 (vols, key, &client_op_version); + if (ret) + ret = 0; + + if (op_version && client_op_version) { + new_volinfo->op_version = op_version; + new_volinfo->client_op_version = client_op_version; + } else if (((op_version == 0) && (client_op_version != 0)) || + ((op_version != 0) && (client_op_version == 0))) { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, + "Only one volume op-version found"); goto out; - if (new_volinfo) { - list_add_tail (&volinfo->vol_list, &priv->volumes); - ret = glusterd_store_create_volume (volinfo); } else { - ret = glusterd_store_update_volume (volinfo); + new_volinfo->op_version = 1; + new_volinfo->client_op_version = 1; } - ret = glusterd_create_volfiles (volinfo); + ret = glusterd_import_bricks (vols, count, new_volinfo); if (ret) goto out; + *volinfo = new_volinfo; +out: + if (msg[0]) + gf_log ("glusterd", GF_LOG_ERROR, "%s", msg); + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_volume_disconnect_all_bricks (glusterd_volinfo_t *volinfo) +{ + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + GF_ASSERT (volinfo); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (glusterd_is_brick_started (brickinfo)) { + ret = glusterd_brick_disconnect (brickinfo); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "disconnect %s:%s", brickinfo->hostname, + brickinfo->path); + break; + } + } + } + + return ret; +} + +int32_t +glusterd_volinfo_copy_brick_portinfo (glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *old_volinfo) +{ + glusterd_brickinfo_t *new_brickinfo = NULL; + glusterd_brickinfo_t *old_brickinfo = NULL; + + int ret = 0; + GF_ASSERT (new_volinfo); + GF_ASSERT (old_volinfo); + if (_gf_false == glusterd_is_volume_started (new_volinfo)) + goto out; + list_for_each_entry (new_brickinfo, &new_volinfo->bricks, brick_list) { + ret = glusterd_volume_brickinfo_get (new_brickinfo->uuid, + new_brickinfo->hostname, + new_brickinfo->path, + old_volinfo, &old_brickinfo); + if ((0 == ret) && glusterd_is_brick_started (old_brickinfo)) { + new_brickinfo->port = old_brickinfo->port; + } + } +out: + ret = 0; + return ret; +} - //volinfo->version++; +int32_t +glusterd_volinfo_stop_stale_bricks (glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *old_volinfo) +{ + glusterd_brickinfo_t *new_brickinfo = NULL; + glusterd_brickinfo_t *old_brickinfo = NULL; - ret = glusterd_volume_compute_cksum (volinfo); + int ret = 0; + GF_ASSERT (new_volinfo); + GF_ASSERT (old_volinfo); + if (_gf_false == glusterd_is_volume_started (old_volinfo)) + goto out; + list_for_each_entry (old_brickinfo, &old_volinfo->bricks, brick_list) { + ret = glusterd_volume_brickinfo_get (old_brickinfo->uuid, + old_brickinfo->hostname, + old_brickinfo->path, + new_volinfo, &new_brickinfo); + if (ret) { + /*TODO: may need to switch to 'atomic' flavour of + * brick_stop, once we make peer rpc program also + * synctask enabled*/ + ret = glusterd_brick_stop (old_volinfo, old_brickinfo, + _gf_false); + if (ret) + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "stop brick %s:%s", old_brickinfo->hostname, + old_brickinfo->path); + } + } + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int32_t +glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo, + glusterd_volinfo_t *valid_volinfo) +{ + GF_ASSERT (stale_volinfo); + GF_ASSERT (valid_volinfo); + + /* If stale volume is in started state, copy the port numbers of the + * local bricks if they exist in the valid volume information. + * stop stale bricks. Stale volume information is going to be deleted. + * Which deletes the valid brick information inside stale volinfo. + * We dont want brick_rpc_notify to access already deleted brickinfo. + * Disconnect all bricks from stale_volinfo (unconditionally), since + * they are being deleted subsequently. + */ + if (glusterd_is_volume_started (stale_volinfo)) { + if (glusterd_is_volume_started (valid_volinfo)) { + (void) glusterd_volinfo_stop_stale_bricks (valid_volinfo, + stale_volinfo); + //Only valid bricks will be running now. + (void) glusterd_volinfo_copy_brick_portinfo (valid_volinfo, + stale_volinfo); + + } else { + (void) glusterd_stop_bricks (stale_volinfo); + } + + (void) glusterd_volume_disconnect_all_bricks (stale_volinfo); + } + /* Delete all the bricks and stores and vol files. They will be created + * again by the valid_volinfo. Volume store delete should not be + * performed because some of the bricks could still be running, + * keeping pid files under run directory + */ + (void) glusterd_delete_all_bricks (stale_volinfo); + if (stale_volinfo->shandle) { + unlink (stale_volinfo->shandle->path); + (void) gf_store_handle_destroy (stale_volinfo->shandle); + stale_volinfo->shandle = NULL; + } + (void) glusterd_volinfo_delete (stale_volinfo); + return 0; +} + +int32_t +glusterd_import_friend_volume (dict_t *vols, size_t count) +{ + + int32_t ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *old_volinfo = NULL; + glusterd_volinfo_t *new_volinfo = NULL; + + GF_ASSERT (vols); + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + ret = glusterd_import_volinfo (vols, count, &new_volinfo); if (ret) goto out; + if (!new_volinfo) { + gf_log (this->name, GF_LOG_DEBUG, + "Not importing snap volume"); + goto out; + } + + ret = glusterd_volinfo_find (new_volinfo->volname, &old_volinfo); + if (0 == ret) { + (void) glusterd_delete_stale_volume (old_volinfo, new_volinfo); + } + + if (glusterd_is_volume_started (new_volinfo)) { + (void) glusterd_start_bricks (new_volinfo); + } + + ret = glusterd_store_volinfo (new_volinfo, GLUSTERD_VOLINFO_VER_AC_NONE); + ret = glusterd_create_volfiles_and_notify_services (new_volinfo); + if (ret) + goto out; + list_add_tail (&new_volinfo->vol_list, &priv->volumes); out: gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d", ret); return ret; @@ -1663,13 +3511,104 @@ out: return ret; } +int +glusterd_get_global_opt_version (dict_t *opts, uint32_t *version) +{ + int ret = -1; + char *version_str = NULL; + + ret = dict_get_str (opts, GLUSTERD_GLOBAL_OPT_VERSION, &version_str); + if (ret) + goto out; + + ret = gf_string2uint (version_str, version); + if (ret) + goto out; + ret = 0; +out: + return ret; +} + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str) +{ + int ret = -1; + char version_string[64] = {0}; + uint32_t version = 0; + + ret = glusterd_get_global_opt_version (opts, &version); + if (ret) + goto out; + version++; + snprintf (version_string, sizeof (version_string), "%"PRIu32, version); + *version_str = gf_strdup (version_string); + if (*version_str) + ret = 0; +out: + return ret; +} + int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status) +glusterd_import_global_opts (dict_t *friend_data) +{ + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + int ret = -1; + dict_t *import_options = NULL; + int count = 0; + uint32_t local_version = 0; + uint32_t remote_version = 0; + + this = THIS; + conf = this->private; + + ret = dict_get_int32 (friend_data, "global-opt-count", &count); + if (ret) { + //old version peer + ret = 0; + goto out; + } + + import_options = dict_new (); + if (!import_options) + goto out; + ret = import_prdict_dict (friend_data, import_options, "key", "val", + count, "global"); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to import" + " global options"); + goto out; + } + + ret = glusterd_get_global_opt_version (conf->opts, &local_version); + if (ret) + goto out; + ret = glusterd_get_global_opt_version (import_options, &remote_version); + if (ret) + goto out; + if (remote_version > local_version) { + ret = glusterd_store_options (this, import_options); + if (ret) + goto out; + dict_unref (conf->opts); + conf->opts = dict_ref (import_options); + } + ret = 0; +out: + if (import_options) + dict_unref (import_options); + return ret; +} + +int32_t +glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname) { int32_t ret = -1; int32_t count = 0; int i = 1; gf_boolean_t update = _gf_false; + gf_boolean_t stale_nfs = _gf_false; + gf_boolean_t stale_shd = _gf_false; GF_ASSERT (vols); GF_ASSERT (status); @@ -1679,7 +3618,8 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) goto out; while (i <= count) { - ret = glusterd_compare_friend_volume (vols, i, status); + ret = glusterd_compare_friend_volume (vols, i, status, + hostname); if (ret) goto out; @@ -1694,9 +3634,24 @@ glusterd_compare_friend_data (dict_t *vols, int32_t *status) } if (update) { + if (glusterd_is_nodesvc_running ("nfs")) + stale_nfs = _gf_true; + if (glusterd_is_nodesvc_running ("glustershd")) + stale_shd = _gf_true; + ret = glusterd_import_global_opts (vols); + if (ret) + goto out; ret = glusterd_import_friend_volumes (vols); if (ret) goto out; + if (_gf_false == glusterd_are_all_volumes_stopped ()) { + ret = glusterd_nodesvcs_handle_graph_change (NULL); + } else { + if (stale_nfs) + glusterd_nfs_server_stop (); + if (stale_shd) + glusterd_shd_stop (); + } } out: @@ -1706,48 +3661,279 @@ out: return ret; } +/* Valid only in if service is 'local' to glusterd. + * pid can be -1, if reading pidfile failed */ gf_boolean_t -glusterd_is_nfs_started () +glusterd_is_service_running (char *pidfile, int *pid) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; + FILE *file = NULL; + gf_boolean_t running = _gf_false; + int ret = 0; + int fno = 0; - this = THIS; - GF_ASSERT(this); + file = fopen (pidfile, "r+"); + if (!file) + goto out; + fno = fileno (file); + ret = lockf (fno, F_TEST, 0); + if (ret == -1) + running = _gf_true; + if (!pid) + goto out; + + ret = fscanf (file, "%d", pid); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s, %s", + pidfile, strerror (errno)); + *pid = -1; + } + +out: + if (file) + fclose (file); + return running; +} + +void +glusterd_get_nodesvc_dir (char *server, char *workdir, + char *path, size_t len) +{ + GF_ASSERT (len == PATH_MAX); + snprintf (path, len, "%s/%s", workdir, server); +} + +void +glusterd_get_nodesvc_rundir (char *server, char *workdir, + char *path, size_t len) +{ + char dir[PATH_MAX] = {0}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir)); + snprintf (path, len, "%s/run", dir); +} + +void +glusterd_get_nodesvc_pidfile (char *server, char *workdir, + char *path, size_t len) +{ + char dir[PATH_MAX] = {0}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_rundir (server, workdir, dir, sizeof (dir)); + snprintf (path, len, "%s/%s.pid", dir, server); +} + +void +glusterd_get_nodesvc_volfile (char *server, char *workdir, + char *volfile, size_t len) +{ + char dir[PATH_MAX] = {0,}; + GF_ASSERT (len == PATH_MAX); + + glusterd_get_nodesvc_dir (server, workdir, dir, sizeof (dir)); + snprintf (volfile, len, "%s/%s-server.vol", dir, server); +} + +void +glusterd_nodesvc_set_online_status (char *server, gf_boolean_t status) +{ + glusterd_conf_t *priv = NULL; + + GF_ASSERT (server); + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + GF_ASSERT (priv->nfs); + + if (!strcmp("glustershd", server)) + priv->shd->online = status; + else if (!strcmp ("nfs", server)) + priv->nfs->online = status; +} + +gf_boolean_t +glusterd_is_nodesvc_online (char *server) +{ + glusterd_conf_t *conf = NULL; + gf_boolean_t online = _gf_false; + + GF_ASSERT (server); + conf = THIS->private; + GF_ASSERT (conf); + GF_ASSERT (conf->shd); + GF_ASSERT (conf->nfs); + + if (!strcmp (server, "glustershd")) + online = conf->shd->online; + else if (!strcmp (server, "nfs")) + online = conf->nfs->online; + + return online; +} + +int32_t +glusterd_nodesvc_set_socket_filepath (char *rundir, uuid_t uuid, + char *socketpath, int len) +{ + char sockfilepath[PATH_MAX] = {0,}; + + snprintf (sockfilepath, sizeof (sockfilepath), "%s/run-%s", + rundir, uuid_utoa (uuid)); + + glusterd_set_socket_filepath (sockfilepath, socketpath, len); + return 0; +} + +struct rpc_clnt* +glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node) +{ + struct rpc_clnt *rpc = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + nodesrv_t *shd = NULL; + glusterd_volinfo_t *volinfo = NULL; + nodesrv_t *nfs = NULL; + + GF_VALIDATE_OR_GOTO (THIS->name, pending_node, out); + GF_VALIDATE_OR_GOTO (THIS->name, pending_node->node, out); + + if (pending_node->type == GD_NODE_BRICK) { + brickinfo = pending_node->node; + rpc = brickinfo->rpc; + + } else if (pending_node->type == GD_NODE_SHD) { + shd = pending_node->node; + rpc = shd->rpc; + + } else if (pending_node->type == GD_NODE_REBALANCE) { + volinfo = pending_node->node; + if (volinfo->rebal.defrag) + rpc = volinfo->rebal.defrag->rpc; + + } else if (pending_node->type == GD_NODE_NFS) { + nfs = pending_node->node; + rpc = nfs->rpc; + + } else { + GF_ASSERT (0); + } + +out: + return rpc; +} + +struct rpc_clnt* +glusterd_nodesvc_get_rpc (char *server) +{ + glusterd_conf_t *priv = NULL; + struct rpc_clnt *rpc = NULL; + + GF_ASSERT (server); + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + GF_ASSERT (priv->nfs); + + if (!strcmp (server, "glustershd")) + rpc = priv->shd->rpc; + else if (!strcmp (server, "nfs")) + rpc = priv->nfs->rpc; + + return rpc; +} + +int32_t +glusterd_nodesvc_set_rpc (char *server, struct rpc_clnt *rpc) +{ + int ret = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + GF_ASSERT (this); priv = this->private; + GF_ASSERT (priv); + GF_ASSERT (priv->shd); + GF_ASSERT (priv->nfs); - GLUSTERD_GET_NFS_PIDFILE(pidfile); - ret = access (pidfile, F_OK); + if (!strcmp ("glustershd", server)) + priv->shd->rpc = rpc; + else if (!strcmp ("nfs", server)) + priv->nfs->rpc = rpc; - if (ret == 0) - return _gf_true; - else - return _gf_false; + return ret; } int32_t -glusterd_nfs_server_start () +glusterd_nodesvc_connect (char *server, char *socketpath) { + int ret = 0; + dict_t *options = NULL; + struct rpc_clnt *rpc = NULL; + glusterd_conf_t *priv = THIS->private; + + rpc = glusterd_nodesvc_get_rpc (server); + + if (rpc == NULL) { + /* Setting frame-timeout to 10mins (600seconds). + * Unix domain sockets ensures that the connection is reliable. + * The default timeout of 30mins used for unreliable network + * connections is too long for unix domain socket connections. + */ + ret = rpc_transport_unix_options_build (&options, socketpath, + 600); + if (ret) + goto out; + synclock_unlock (&priv->big_lock); + ret = glusterd_rpc_create (&rpc, options, + glusterd_nodesvc_rpc_notify, + server); + synclock_lock (&priv->big_lock); + if (ret) + goto out; + (void) glusterd_nodesvc_set_rpc (server, rpc); + } +out: + return ret; +} + +int32_t +glusterd_nodesvc_disconnect (char *server) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - char pidfile[PATH_MAX] = {0,}; - char logfile[PATH_MAX] = {0,}; - char volfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; - char cmd_str[8192] = {0,}; - char rundir[PATH_MAX] = {0,}; + struct rpc_clnt *rpc = NULL; + + rpc = glusterd_nodesvc_get_rpc (server); + (void)glusterd_nodesvc_set_rpc (server, NULL); + + if (rpc) + rpc_clnt_unref (rpc); + + return 0; +} + +int32_t +glusterd_nodesvc_start (char *server) +{ + int32_t ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + char pidfile[PATH_MAX] = {0,}; + char logfile[PATH_MAX] = {0,}; + char volfile[PATH_MAX] = {0,}; + char rundir[PATH_MAX] = {0,}; + char sockfpath[PATH_MAX] = {0,}; + char volfileid[256] = {0}; + char glusterd_uuid_option[1024] = {0}; + char valgrind_logfile[PATH_MAX] = {0}; this = THIS; GF_ASSERT(this); priv = this->private; - GLUSTERD_GET_NFS_DIR(path, priv); - snprintf (rundir, PATH_MAX, "%s/run", path); + glusterd_get_nodesvc_rundir (server, priv->workdir, + rundir, sizeof (rundir)); ret = mkdir (rundir, 0777); if ((ret == -1) && (EEXIST != errno)) { @@ -1756,44 +3942,266 @@ glusterd_nfs_server_start () goto out; } - GLUSTERD_GET_NFS_PIDFILE(pidfile); - glusterd_get_nfs_filepath (volfile); - + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + glusterd_get_nodesvc_volfile (server, priv->workdir, + volfile, sizeof (volfile)); ret = access (volfile, F_OK); if (ret) { - gf_log ("", GF_LOG_ERROR, "Nfs Volfile %s is not present", - volfile); + gf_log ("", GF_LOG_ERROR, "%s Volfile %s is not present", + server, volfile); goto out; } - snprintf (logfile, PATH_MAX, "%s/logs/nfs.log", priv->workdir); + snprintf (logfile, PATH_MAX, "%s/%s.log", DEFAULT_LOG_FILE_DIRECTORY, + server); + snprintf (volfileid, sizeof (volfileid), "gluster/%s", server); + + glusterd_nodesvc_set_socket_filepath (rundir, MY_UUID, + sockfpath, sizeof (sockfpath)); + + runinit (&runner); - snprintf (cmd_str, 8192, - "%s/sbin/glusterfs -f %s -p %s -l %s", - GFS_PREFIX, volfile, pidfile, logfile); - ret = gf_system (cmd_str); + if (priv->valgrind) { + snprintf (valgrind_logfile, PATH_MAX, + "%s/valgrind-%s.log", + DEFAULT_LOG_FILE_DIRECTORY, + server); + runner_add_args (&runner, "valgrind", "--leak-check=full", + "--trace-children=yes", "--track-origins=yes", + NULL); + runner_argprintf (&runner, "--log-file=%s", valgrind_logfile); + } + + runner_add_args (&runner, SBIN_DIR"/glusterfs", + "-s", "localhost", + "--volfile-id", volfileid, + "-p", pidfile, + "-l", logfile, + "-S", sockfpath, NULL); + + if (!strcmp (server, "glustershd")) { + snprintf (glusterd_uuid_option, sizeof (glusterd_uuid_option), + "*replicate*.node-uuid=%s", uuid_utoa (MY_UUID)); + runner_add_args (&runner, "--xlator-option", + glusterd_uuid_option, NULL); + } + runner_log (&runner, "", GF_LOG_DEBUG, + "Starting the nfs/glustershd services"); + + ret = runner_run_nowait (&runner); + if (ret == 0) { + glusterd_nodesvc_connect (server, sockfpath); + } out: return ret; } +int +glusterd_nfs_server_start () +{ + return glusterd_nodesvc_start ("nfs"); +} + +int +glusterd_shd_start () +{ + return glusterd_nodesvc_start ("glustershd"); +} + +gf_boolean_t +glusterd_is_nodesvc_running (char *server) +{ + char pidfile[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + return glusterd_is_service_running (pidfile, NULL); +} + +int32_t +glusterd_nodesvc_unlink_socket_file (char *server) +{ + int ret = 0; + char sockfpath[PATH_MAX] = {0,}; + char rundir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + + glusterd_get_nodesvc_rundir (server, priv->workdir, + rundir, sizeof (rundir)); + + glusterd_nodesvc_set_socket_filepath (rundir, MY_UUID, + sockfpath, sizeof (sockfpath)); + + ret = unlink (sockfpath); + if (ret && (ENOENT == errno)) { + ret = 0; + } else { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to remove %s" + " error: %s", sockfpath, strerror (errno)); + } + + return ret; +} + int32_t +glusterd_nodesvc_stop (char *server, int sig) +{ + char pidfile[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + int ret = 0; + + if (!glusterd_is_nodesvc_running (server)) + goto out; + + (void)glusterd_nodesvc_disconnect (server); + + glusterd_get_nodesvc_pidfile (server, priv->workdir, + pidfile, sizeof (pidfile)); + ret = glusterd_service_stop (server, pidfile, sig, _gf_true); + + if (ret == 0) { + glusterd_nodesvc_set_online_status (server, _gf_false); + (void)glusterd_nodesvc_unlink_socket_file (server); + } +out: + return ret; +} + +void +glusterd_nfs_pmap_deregister () +{ + if (pmap_unset (MOUNT_PROGRAM, MOUNTV3_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered MOUNTV3 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-register MOUNTV3 is unsuccessful"); + + if (pmap_unset (MOUNT_PROGRAM, MOUNTV1_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered MOUNTV1 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-register MOUNTV1 is unsuccessful"); + + if (pmap_unset (NFS_PROGRAM, NFSV3_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered NFSV3 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-register NFSV3 is unsuccessful"); + + if (pmap_unset (NLM_PROGRAM, NLMV4_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered NLM v4 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-registration of NLM v4 failed"); + + if (pmap_unset (NLM_PROGRAM, NLMV1_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered NLM v1 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-registration of NLM v1 failed"); + + if (pmap_unset (ACL_PROGRAM, ACLV3_VERSION)) + gf_log ("", GF_LOG_INFO, "De-registered ACL v3 successfully"); + else + gf_log ("", GF_LOG_ERROR, "De-registration of ACL v3 failed"); +} + +int glusterd_nfs_server_stop () { - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int ret = 0; + gf_boolean_t deregister = _gf_false; + + if (glusterd_is_nodesvc_running ("nfs")) + deregister = _gf_true; + ret = glusterd_nodesvc_stop ("nfs", SIGKILL); + if (ret) + goto out; + if (deregister) + glusterd_nfs_pmap_deregister (); +out: + return ret; +} + +int +glusterd_shd_stop () +{ + return glusterd_nodesvc_stop ("glustershd", SIGTERM); +} + +int +glusterd_add_node_to_dict (char *server, dict_t *dict, int count, + dict_t *vol_opts) +{ + int ret = -1; + glusterd_conf_t *priv = THIS->private; char pidfile[PATH_MAX] = {0,}; - char path[PATH_MAX] = {0,}; + gf_boolean_t running = _gf_false; + int pid = -1; + int port = 0; + char key[1024] = {0,}; + + glusterd_get_nodesvc_pidfile (server, priv->workdir, pidfile, + sizeof (pidfile)); + //Consider service to be running only when glusterd sees it Online + if (glusterd_is_nodesvc_online (server)) + running = glusterd_is_service_running (pidfile, &pid); + + /* For nfs-servers/self-heal-daemon setting + * brick<n>.hostname = "NFS Server" / "Self-heal Daemon" + * brick<n>.path = uuid + * brick<n>.port = 0 + * + * This might be confusing, but cli displays the name of + * the brick as hostname+path, so this will make more sense + * when output. + */ + snprintf (key, sizeof (key), "brick%d.hostname", count); + if (!strcmp (server, "nfs")) + ret = dict_set_str (dict, key, "NFS Server"); + else if (!strcmp (server, "glustershd")) + ret = dict_set_str (dict, key, "Self-heal Daemon"); + if (ret) + goto out; - this = THIS; - GF_ASSERT(this); + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.path", count); + ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (MY_UUID))); + if (ret) + goto out; - priv = this->private; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.port", count); + /* Port is available only for the NFS server. + * Self-heal daemon doesn't provide any port for access + * by entities other than gluster. + */ + if (!strcmp (server, "nfs")) { + if (dict_get (vol_opts, "nfs.port")) { + ret = dict_get_int32 (vol_opts, "nfs.port", &port); + if (ret) + goto out; + } else + port = GF_NFS3_PORT; + } + ret = dict_set_int32 (dict, key, port); + if (ret) + goto out; - GLUSTERD_GET_NFS_DIR(path, priv); - GLUSTERD_GET_NFS_PIDFILE(pidfile); + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.pid", count); + ret = dict_set_int32 (dict, key, pid); + if (ret) + goto out; - return glusterd_service_stop ("nfsd", pidfile, SIGKILL, _gf_true); + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.status", count); + ret = dict_set_int32 (dict, key, running); + if (ret) + goto out; + + +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; } int @@ -1804,49 +4212,251 @@ glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len) GF_ASSERT (req->trans); char *name = NULL; - char *delimiter = NULL; + char *hostname = NULL; + char *tmp_host = NULL; + int ret = 0; name = req->trans->peerinfo.identifier; - strncpy (remote_host, name, len); - delimiter = strchr (remote_host, ':'); + tmp_host = gf_strdup (name); + if (tmp_host) + get_host_name (tmp_host, &hostname); - GF_ASSERT (delimiter); - if (!delimiter) { + GF_ASSERT (hostname); + if (!hostname) { memset (remote_host, 0, len); - return -1; + ret = -1; + goto out; } - *delimiter = '\0'; + strncpy (remote_host, hostname, strlen (hostname)); - return 0; + +out: + GF_FREE (tmp_host); + return ret; } int -glusterd_check_generate_start_nfs (glusterd_volinfo_t *volinfo) +glusterd_check_generate_start_service (int (*create_volfile) (), + int (*stop) (), int (*start) ()) { int ret = -1; - if (!volinfo) { - gf_log ("", GF_LOG_ERROR, "Invalid Arguments"); + ret = create_volfile (); + if (ret) + goto out; + + ret = stop (); + if (ret) + goto out; + + ret = start (); +out: + return ret; +} + +int +glusterd_reconfigure_nodesvc (int (*create_volfile) ()) +{ + int ret = -1; + + ret = create_volfile (); + if (ret) + goto out; + + ret = glusterd_fetchspec_notify (THIS); +out: + return ret; +} + +int +glusterd_reconfigure_shd () +{ + int (*create_volfile) () = glusterd_create_shd_volfile; + return glusterd_reconfigure_nodesvc (create_volfile); +} + +int +glusterd_reconfigure_nfs () +{ + int ret = -1; + gf_boolean_t identical = _gf_false; + + /* + * Check both OLD and NEW volfiles, if they are SAME by size + * and cksum i.e. "character-by-character". If YES, then + * NOTHING has been changed, just return. + */ + ret = glusterd_check_nfs_volfile_identical (&identical); + if (ret) + goto out; + + if (identical) { + ret = 0; goto out; } - ret = glusterd_create_nfs_volfile (); + /* + * They are not identical. Find out if the topology is changed + * OR just the volume options. If just the options which got + * changed, then inform the xlator to reconfigure the options. + */ + identical = _gf_false; /* RESET the FLAG */ + ret = glusterd_check_nfs_topology_identical (&identical); if (ret) goto out; - if (glusterd_is_nfs_started ()) { - ret = glusterd_nfs_server_stop (); - if (ret) - goto out; + /* Topology is not changed, but just the options. But write the + * options to NFS volfile, so that NFS will be reconfigured. + */ + if (identical) { + ret = glusterd_create_nfs_volfile(); + if (ret == 0) {/* Only if above PASSES */ + ret = glusterd_fetchspec_notify (THIS); + } + goto out; } - ret = glusterd_nfs_server_start (); + /* + * NFS volfile's topology has been changed. NFS server needs + * to be RESTARTED to ACT on the changed volfile. + */ + ret = glusterd_check_generate_start_nfs (); + +out: + return ret; +} + + +int +glusterd_check_generate_start_nfs () +{ + int ret = 0; + + ret = glusterd_check_generate_start_service (glusterd_create_nfs_volfile, + glusterd_nfs_server_stop, + glusterd_nfs_server_start); + return ret; +} + +int +glusterd_check_generate_start_shd () +{ + int ret = 0; + + ret = glusterd_check_generate_start_service (glusterd_create_shd_volfile, + glusterd_shd_stop, + glusterd_shd_start); + if (ret == -EINVAL) + ret = 0; + return ret; +} + +int +glusterd_nodesvcs_batch_op (glusterd_volinfo_t *volinfo, + int (*nfs_op) (), int (*shd_op) ()) +{ + int ret = 0; + + ret = nfs_op (); + if (ret) + goto out; + + if (volinfo && !glusterd_is_volume_replicate (volinfo)) + goto out; + + ret = shd_op (); + if (ret) + goto out; out: return ret; } int +glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_nfs_server_start, + glusterd_shd_start); +} + +int +glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_nfs_server_stop, + glusterd_shd_stop); +} + +gf_boolean_t +glusterd_are_all_volumes_stopped () +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (voliter->status == GLUSTERD_STATUS_STARTED) + return _gf_false; + } + + return _gf_true; + +} + +gf_boolean_t +glusterd_all_replicate_volumes_stopped () +{ + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (!glusterd_is_volume_replicate (voliter)) + continue; + if (voliter->status == GLUSTERD_STATUS_STARTED) + return _gf_false; + } + + return _gf_true; +} + +int +glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo) +{ + int (*shd_op) () = NULL; + int (*nfs_op) () = NULL; + + shd_op = glusterd_check_generate_start_shd; + nfs_op = glusterd_check_generate_start_nfs; + if (glusterd_are_all_volumes_stopped ()) { + shd_op = glusterd_shd_stop; + nfs_op = glusterd_nfs_server_stop; + } else if (glusterd_all_replicate_volumes_stopped()) { + shd_op = glusterd_shd_stop; + } + return glusterd_nodesvcs_batch_op (volinfo, nfs_op, shd_op); +} + +int +glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo) +{ + return glusterd_nodesvcs_batch_op (volinfo, + glusterd_reconfigure_nfs, + glusterd_reconfigure_shd); +} + +int glusterd_volume_count_get (void) { glusterd_volinfo_t *tmp_volinfo = NULL; @@ -1888,9 +4498,9 @@ glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path, list_for_each_entry (volinfo, &priv->volumes, vol_list) { ret = glusterd_volume_brickinfo_get (uuid, hostname, path, - volinfo, - brickinfo); - if (!ret) + volinfo, brickinfo); + if (ret == 0) + /*Found*/ goto out; } out: @@ -1899,7 +4509,8 @@ out: int glusterd_brick_start (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, + gf_boolean_t wait) { int ret = -1; xlator_t *this = NULL; @@ -1916,54 +4527,146 @@ glusterd_brick_start (glusterd_volinfo_t *volinfo, if (uuid_is_null (brickinfo->uuid)) { ret = glusterd_resolve_brick (brickinfo); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot resolve brick: %s:%s", + gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); goto out; } } - if (uuid_compare (brickinfo->uuid, conf->uuid)) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) { ret = 0; goto out; } - ret = glusterd_volume_start_glusterfs (volinfo, brickinfo); + ret = glusterd_volume_start_glusterfs (volinfo, brickinfo, wait); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to start " - "glusterfs, ret: %d", ret); + gf_log (this->name, GF_LOG_ERROR, "Unable to start brick %s:%s", + brickinfo->hostname, brickinfo->path); goto out; } - out: - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); + gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret); return ret; } int glusterd_restart_bricks (glusterd_conf_t *conf) { - glusterd_volinfo_t *volinfo = NULL; - glusterd_brickinfo_t *brickinfo = NULL; - int ret = -1; + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_snap_t *snap = NULL; + gf_boolean_t start_nodesvcs = _gf_false; + xlator_t *this = NULL; - GF_ASSERT (conf); + this = THIS; + GF_ASSERT (this); list_for_each_entry (volinfo, &conf->volumes, vol_list) { - //If volume status is not started, do not proceed - if (volinfo->status == GLUSTERD_STATUS_STARTED) { + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the volume %s", + volinfo->volname); + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + glusterd_brick_start (volinfo, brickinfo, _gf_false); + } + } + + list_for_each_entry (snap, &conf->snapshots, snap_list) { + list_for_each_entry (volinfo, &snap->volumes, vol_list) { + if (volinfo->status != GLUSTERD_STATUS_STARTED) + continue; + start_nodesvcs = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "starting the snap " + "volume %s", volinfo->volname); list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { - glusterd_brick_start (volinfo, brickinfo); + glusterd_brick_start (volinfo, brickinfo, + _gf_false); } - glusterd_check_generate_start_nfs (volinfo); } } + + if (start_nodesvcs) + glusterd_nodesvcs_handle_graph_change (NULL); + + return ret; +} + +int +_local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) +{ + char *path_list = NULL; + char *slave = NULL; + int uuid_len = 0; + int ret = 0; + char uuid_str[64] = {0}; + glusterd_volinfo_t *volinfo = NULL; + char *conf_path = NULL; + + volinfo = data; + GF_ASSERT (volinfo); + slave = strchr(value->data, ':'); + if (slave) + slave ++; + else + return 0; + uuid_len = (slave - value->data - 1); + + strncpy (uuid_str, (char*)value->data, uuid_len); + + ret = glusterd_get_local_brickpaths (volinfo, &path_list); + + ret = dict_get_str (this, "conf_path", &conf_path); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to fetch conf file path."); + goto out; + } + + glusterd_start_gsync (volinfo, slave, path_list, conf_path, + uuid_str, NULL); + + GF_FREE (path_list); + path_list = NULL; + +out: return ret; } int -glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, +glusterd_volume_restart_gsyncds (glusterd_volinfo_t *volinfo) +{ + GF_ASSERT (volinfo); + + dict_foreach (volinfo->gsync_slaves, _local_gsyncd_start, volinfo); + return 0; +} + +int +glusterd_restart_gsyncds (glusterd_conf_t *conf) +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + glusterd_volume_restart_gsyncds (volinfo); + } + return ret; +} + +inline int +glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo) +{ + int rcount = volinfo->replica_count; + int scount = volinfo->stripe_count; + + return (rcount ? rcount : 1) * (scount ? scount : 1); +} + +int +glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, gf_boolean_t localhost, glusterd_brickinfo_t **brickinfo) { glusterd_conf_t *priv = NULL; @@ -1978,7 +4681,7 @@ glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, list_for_each_entry (volinfo, &priv->volumes, vol_list) { list_for_each_entry (tmpbrkinfo, &volinfo->bricks, brick_list) { - if (localhost && glusterd_is_local_addr (tmpbrkinfo->hostname)) + if (localhost && !gf_is_local_addr (tmpbrkinfo->hostname)) continue; if (!strcmp(tmpbrkinfo->path, brickname) && (tmpbrkinfo->port == port)) { @@ -1990,19 +4693,40 @@ glusterd_get_brickinfo (xlator_t *this, const char *brickname, int port, return ret; } +glusterd_brickinfo_t* +glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos) +{ + glusterd_brickinfo_t *tmpbrkinfo = NULL; + + list_for_each_entry (tmpbrkinfo, &volinfo->bricks, + brick_list) { + if (pos == 0) + return tmpbrkinfo; + pos--; + } + return NULL; +} + void glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo, - gf_brick_status_t status) + gf_brick_status_t status) { GF_ASSERT (brickinfo); brickinfo->status = status; + if (GF_BRICK_STARTED == status) { + gf_log ("glusterd", GF_LOG_DEBUG, "Setting brick %s:%s status " + "to started", brickinfo->hostname, brickinfo->path); + } else { + gf_log ("glusterd", GF_LOG_DEBUG, "Setting brick %s:%s status " + "to stopped", brickinfo->hostname, brickinfo->path); + } } -int +gf_boolean_t glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo) { GF_ASSERT (brickinfo); - return (!(brickinfo->status == GF_BRICK_STARTED)); + return (brickinfo->status == GF_BRICK_STARTED); } int @@ -2028,6 +4752,481 @@ out: return -1; } +#ifdef GF_LINUX_HOST_OS +int +glusterd_get_brick_root (char *path, char **mount_point) +{ + char *ptr = NULL; + char *mnt_pt = NULL; + struct stat brickstat = {0}; + struct stat buf = {0}; + + if (!path) + goto err; + mnt_pt = gf_strdup (path); + if (!mnt_pt) + goto err; + if (stat (mnt_pt, &brickstat)) + goto err; + + while ((ptr = strrchr (mnt_pt, '/')) && + ptr != mnt_pt) { + + *ptr = '\0'; + if (stat (mnt_pt, &buf)) { + gf_log (THIS->name, GF_LOG_ERROR, "error in " + "stat: %s", strerror (errno)); + goto err; + } + + if (brickstat.st_dev != buf.st_dev) { + *ptr = '/'; + break; + } + } + + if (ptr == mnt_pt) { + if (stat ("/", &buf)) { + gf_log (THIS->name, GF_LOG_ERROR, "error in " + "stat: %s", strerror (errno)); + goto err; + } + if (brickstat.st_dev == buf.st_dev) + strcpy (mnt_pt, "/"); + } + + *mount_point = mnt_pt; + return 0; + + err: + GF_FREE (mnt_pt); + return -1; +} + +static char* +glusterd_parse_inode_size (char *stream, char *pattern) +{ + char *needle = NULL; + char *trail = NULL; + + needle = strstr (stream, pattern); + if (!needle) + goto out; + + needle = nwstrtail (needle, pattern); + + trail = needle; + while (trail && isdigit (*trail)) trail++; + if (trail) + *trail = '\0'; + +out: + return needle; +} + +static int +glusterd_add_inode_size_to_dict (dict_t *dict, int count) +{ + int ret = -1; + char key[1024] = {0}; + char buffer[4096] = {0}; + char *inode_size = NULL; + char *device = NULL; + char *fs_name = NULL; + char *cur_word = NULL; + char *pattern = NULL; + char *trail = NULL; + runner_t runner = {0, }; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.device", count); + ret = dict_get_str (dict, key, &device); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.fs_name", count); + ret = dict_get_str (dict, key, &fs_name); + if (ret) + goto out; + + runinit (&runner); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + /* get inode size for xfs or ext2/3/4 */ + if (!strcmp (fs_name, "xfs")) { + + runner_add_args (&runner, "xfs_info", device, NULL); + pattern = "isize="; + + } else if (IS_EXT_FS(fs_name)) { + + runner_add_args (&runner, "tune2fs", "-l", device, NULL); + pattern = "Inode size:"; + + } else { + ret = 0; + gf_log (THIS->name, GF_LOG_INFO, "Skipped fetching " + "inode size for %s: FS type not recommended", + fs_name); + goto out; + } + + ret = runner_start (&runner); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "could not get inode " + "size for %s : %s package missing", fs_name, + ((strcmp (fs_name, "xfs")) ? + "e2fsprogs" : "xfsprogs")); + goto out; + } + + for (;;) { + if (fgets (buffer, sizeof (buffer), + runner_chio (&runner, STDOUT_FILENO)) == NULL) + break; + trail = strrchr (buffer, '\n'); + if (trail) + *trail = '\0'; + + cur_word = glusterd_parse_inode_size (buffer, pattern); + if (cur_word) + break; + } + + ret = runner_end (&runner); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "%s exited with non-zero " + "exit status", ((!strcmp (fs_name, "xfs")) ? + "xfs_info" : "tune2fs")); + goto out; + } + if (!cur_word) { + ret = -1; + gf_log (THIS->name, GF_LOG_ERROR, "Unable to retrieve inode " + "size using %s", + (!strcmp (fs_name, "xfs")? "xfs_info": "tune2fs")); + goto out; + } + + inode_size = gf_strdup (cur_word); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "brick%d.inode_size", count); + + ret = dict_set_dynstr (dict, key, inode_size); + + out: + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "failed to get inode size"); + return ret; +} + +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab) +{ + struct mntent *entry = NULL; + + mtab = setmntent (_PATH_MOUNTED, "r"); + if (!mtab) + goto out; + + entry = getmntent (mtab); + + while (1) { + if (!entry) + goto out; + + if (!strcmp (entry->mnt_dir, mnt_pt) && + strcmp (entry->mnt_type, "rootfs")) + break; + entry = getmntent (mtab); + } + +out: + return entry; +} + +static int +glusterd_add_brick_mount_details (glusterd_brickinfo_t *brickinfo, + dict_t *dict, int count) +{ + int ret = -1; + char key[1024] = {0}; + char base_key[1024] = {0}; + char *mnt_pt = NULL; + char *fs_name = NULL; + char *mnt_options = NULL; + char *device = NULL; + struct mntent *entry = NULL; + FILE *mtab = NULL; + + snprintf (base_key, sizeof (base_key), "brick%d", count); + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) + goto out; + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (!entry) { + ret = -1; + goto out; + } + + /* get device file */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.device", base_key); + + device = gf_strdup (entry->mnt_fsname); + ret = dict_set_dynstr (dict, key, device); + if (ret) + goto out; + + /* fs type */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.fs_name", base_key); + + fs_name = gf_strdup (entry->mnt_type); + ret = dict_set_dynstr (dict, key, fs_name); + if (ret) + goto out; + + /* mount options */ + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.mnt_options", base_key); + + mnt_options = gf_strdup (entry->mnt_opts); + ret = dict_set_dynstr (dict, key, mnt_options); + + out: + GF_FREE (mnt_pt); + if (mtab) + endmntent (mtab); + + return ret; +} + +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo) +{ + int ret = -1; + char *mnt_pt = NULL; + char *device = NULL; + FILE *mtab = NULL; + struct mntent *entry = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (brickinfo); + + ret = glusterd_get_brick_root (brickinfo->path, &mnt_pt); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mount point " + "for %s brick", brickinfo->path); + goto out; + } + + entry = glusterd_get_mnt_entry_info (mnt_pt, mtab); + if (NULL == entry) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get mnt entry " + "for %s mount path", mnt_pt); + goto out; + } + + /* get the fs_name/device */ + device = gf_strdup (entry->mnt_fsname); + +out: + if (NULL != mtab) { + endmntent (mtab); + } + + return device; +} +#endif + +int +glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + dict_t *dict, int count) +{ + int ret = -1; + uint64_t memtotal = 0; + uint64_t memfree = 0; + uint64_t inodes_total = 0; + uint64_t inodes_free = 0; + uint64_t block_size = 0; + char key[1024] = {0}; + char base_key[1024] = {0}; + struct statvfs brickstat = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); + GF_ASSERT (dict); + + snprintf (base_key, sizeof (base_key), "brick%d", count); + + ret = statvfs (brickinfo->path, &brickstat); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "statfs error: %s ", + strerror (errno)); + goto out; + } + + /* file system block size */ + block_size = brickstat.f_bsize; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.block_size", base_key); + ret = dict_set_uint64 (dict, key, block_size); + if (ret) + goto out; + + /* free space in brick */ + memfree = brickstat.f_bfree * brickstat.f_bsize; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.free", base_key); + ret = dict_set_uint64 (dict, key, memfree); + if (ret) + goto out; + + /* total space of brick */ + memtotal = brickstat.f_blocks * brickstat.f_bsize; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.total", base_key); + ret = dict_set_uint64 (dict, key, memtotal); + if (ret) + goto out; + + /* inodes: total and free counts only for ext2/3/4 and xfs */ + inodes_total = brickstat.f_files; + if (inodes_total) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.total_inodes", base_key); + ret = dict_set_uint64 (dict, key, inodes_total); + if (ret) + goto out; + } + + inodes_free = brickstat.f_ffree; + if (inodes_free) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.free_inodes", base_key); + ret = dict_set_uint64 (dict, key, inodes_free); + if (ret) + goto out; + } +#ifdef GF_LINUX_HOST_OS + ret = glusterd_add_brick_mount_details (brickinfo, dict, count); + if (ret) + goto out; + + ret = glusterd_add_inode_size_to_dict (dict, count); +#endif + out: + if (ret) + gf_log (this->name, GF_LOG_DEBUG, "Error adding brick" + " detail to dict: %s", strerror (errno)); + return ret; +} + +int32_t +glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + dict_t *dict, int32_t count) +{ + + int ret = -1; + int32_t pid = -1; + int32_t brick_online = -1; + char key[1024] = {0}; + char base_key[1024] = {0}; + char pidfile[PATH_MAX] = {0}; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); + GF_ASSERT (dict); + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + + snprintf (base_key, sizeof (base_key), "brick%d", count); + snprintf (key, sizeof (key), "%s.hostname", base_key); + + ret = dict_set_str (dict, key, brickinfo->hostname); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.path", base_key); + ret = dict_set_str (dict, key, brickinfo->path); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.port", base_key); + ret = dict_set_int32 (dict, key, brickinfo->port); + if (ret) + goto out; + + GLUSTERD_GET_BRICK_PIDFILE (pidfile, volinfo, brickinfo, priv); + + brick_online = glusterd_is_service_running (pidfile, &pid); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.pid", base_key); + ret = dict_set_int32 (dict, key, pid); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "%s.status", base_key); + ret = dict_set_int32 (dict, key, brick_online); + +out: + if (ret) + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +int32_t +glusterd_get_all_volnames (dict_t *dict) +{ + int ret = -1; + int32_t vol_count = 0; + char key[256] = {0}; + glusterd_volinfo_t *entry = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + list_for_each_entry (entry, &priv->volumes, vol_list) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "vol%d", vol_count); + ret = dict_set_str (dict, key, entry->volname); + if (ret) + goto out; + + vol_count++; + } + + ret = dict_set_int32 (dict, "vol_count", vol_count); + + out: + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "failed to get all " + "volume names for status"); + return ret; +} + int glusterd_all_volume_cond_check (glusterd_condition_func func, int status, void *ctx) @@ -2064,11 +5263,14 @@ glusterd_friend_find_by_uuid (uuid_t uuid, int ret = -1; glusterd_conf_t *priv = NULL; glusterd_peerinfo_t *entry = NULL; + xlator_t *this = NULL; + this = THIS; + GF_ASSERT (this); GF_ASSERT (peerinfo); *peerinfo = NULL; - priv = THIS->private; + priv = this->private; GF_ASSERT (priv); @@ -2078,14 +5280,16 @@ glusterd_friend_find_by_uuid (uuid_t uuid, list_for_each_entry (entry, &priv->peers, uuid_list) { if (!uuid_compare (entry->uuid, uuid)) { - gf_log ("glusterd", GF_LOG_NORMAL, - "Friend found.. state: %s", + gf_log (this->name, GF_LOG_DEBUG, + "Friend found... state: %s", glusterd_friend_sm_state_name_get (entry->state.state)); *peerinfo = entry; return 0; } } + gf_log (this->name, GF_LOG_DEBUG, "Friend with uuid: %s, not found", + uuid_utoa (uuid)); return ret; } @@ -2097,7 +5301,6 @@ glusterd_friend_find_by_hostname (const char *hoststr, int ret = -1; glusterd_conf_t *priv = NULL; glusterd_peerinfo_t *entry = NULL; - glusterd_peer_hostname_t *name = NULL; struct addrinfo *addr = NULL; struct addrinfo *p = NULL; char *host = NULL; @@ -2105,32 +5308,34 @@ glusterd_friend_find_by_hostname (const char *hoststr, struct sockaddr_in *s4 = NULL; struct in_addr *in_addr = NULL; char hname[1024] = {0,}; + xlator_t *this = NULL; + + this = THIS; GF_ASSERT (hoststr); GF_ASSERT (peerinfo); *peerinfo = NULL; - priv = THIS->private; + priv = this->private; GF_ASSERT (priv); list_for_each_entry (entry, &priv->peers, uuid_list) { - list_for_each_entry (name, &entry->hostnames, hostname_list) { - if (!strncmp (name->hostname, hoststr, - 1024)) { + if (!strncasecmp (entry->hostname, hoststr, + 1024)) { - gf_log ("glusterd", GF_LOG_NORMAL, + gf_log (this->name, GF_LOG_DEBUG, "Friend %s found.. state: %d", hoststr, entry->state.state); *peerinfo = entry; return 0; - } } } - ret = getaddrinfo(hoststr, NULL, NULL, &addr); + ret = getaddrinfo (hoststr, NULL, NULL, &addr); if (ret != 0) { - gf_log ("", GF_LOG_ERROR, "error in getaddrinfo: %s\n", + gf_log (this->name, GF_LOG_ERROR, + "error in getaddrinfo: %s\n", gai_strerror(ret)); goto out; } @@ -2156,23 +5361,21 @@ glusterd_friend_find_by_hostname (const char *hoststr, goto out; list_for_each_entry (entry, &priv->peers, uuid_list) { - list_for_each_entry (name, &entry->hostnames, - hostname_list) { - if (!strncmp (name->hostname, host, - 1024) || !strncmp (name->hostname,hname, - 1024)) { - gf_log ("glusterd", GF_LOG_NORMAL, - "Friend %s found.. state: %d", - hoststr, entry->state.state); - *peerinfo = entry; - freeaddrinfo (addr); - return 0; - } + if (!strncasecmp (entry->hostname, host, + 1024) || !strncasecmp (entry->hostname,hname, + 1024)) { + gf_log (this->name, GF_LOG_DEBUG, + "Friend %s found.. state: %d", + hoststr, entry->state.state); + *peerinfo = entry; + freeaddrinfo (addr); + return 0; } } } out: + gf_log (this->name, GF_LOG_DEBUG, "Unable to find friend: %s", hoststr); if (addr) freeaddrinfo (addr); return -1; @@ -2196,23 +5399,26 @@ glusterd_hostname_to_uuid (char *hostname, uuid_t uuid) ret = glusterd_friend_find_by_hostname (hostname, &peerinfo); if (ret) { - ret = glusterd_is_local_addr (hostname); - if (ret) + if (gf_is_local_addr (hostname)) { + uuid_copy (uuid, MY_UUID); + ret = 0; + } else { + ret = 0; goto out; - else - uuid_copy (uuid, priv->uuid); + } } else { uuid_copy (uuid, peerinfo->uuid); } out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); return ret; } int glusterd_brick_stop (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo) + glusterd_brickinfo_t *brickinfo, + gf_boolean_t del_brick) { int ret = -1; xlator_t *this = NULL; @@ -2229,39 +5435,53 @@ glusterd_brick_stop (glusterd_volinfo_t *volinfo, if (uuid_is_null (brickinfo->uuid)) { ret = glusterd_resolve_brick (brickinfo); if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "cannot resolve brick: %s:%s", + gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path); goto out; } } - if (uuid_compare (brickinfo->uuid, conf->uuid)) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) { ret = 0; + if (del_brick) + glusterd_delete_brick (volinfo, brickinfo); goto out; } - gf_log ("", GF_LOG_NORMAL, "About to stop glusterfs" + gf_log (this->name, GF_LOG_DEBUG, "About to stop glusterfs" " for brick %s:%s", brickinfo->hostname, brickinfo->path); - ret = glusterd_volume_stop_glusterfs (volinfo, brickinfo); + ret = glusterd_volume_stop_glusterfs (volinfo, brickinfo, del_brick); if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unable to remove" + gf_log (this->name, GF_LOG_CRITICAL, "Unable to stop" " brick: %s:%s", brickinfo->hostname, brickinfo->path); goto out; } out: - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); + gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret); return ret; } int glusterd_is_defrag_on (glusterd_volinfo_t *volinfo) { - return ((volinfo->defrag_status == GF_DEFRAG_STATUS_STARTED) || - (volinfo->defrag_status == GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE)); + return (volinfo->rebal.defrag != NULL); +} + +gf_boolean_t +glusterd_is_rb_ongoing (glusterd_volinfo_t *volinfo) +{ + gf_boolean_t ret = _gf_false; + + GF_ASSERT (volinfo); + + if (glusterd_is_rb_started (volinfo) || + glusterd_is_rb_paused (volinfo)) + ret = _gf_true; + + return ret; } int @@ -2269,7 +5489,6 @@ glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, char *op_errstr, size_t len) { glusterd_brickinfo_t *newbrickinfo = NULL; - glusterd_brickinfo_t *tmpbrkinfo = NULL; int ret = -1; gf_boolean_t is_allocated = _gf_false; glusterd_peerinfo_t *peerinfo = NULL; @@ -2286,7 +5505,7 @@ glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, GF_ASSERT (op_errstr); if (!brickinfo) { - ret = glusterd_brickinfo_from_brick (brick, &newbrickinfo); + ret = glusterd_brickinfo_new_from_brick (brick, &newbrickinfo); if (ret) goto out; is_allocated = _gf_true; @@ -2296,61 +5515,73 @@ glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, ret = glusterd_resolve_brick (newbrickinfo); if (ret) { - snprintf (op_errstr, len, "Host %s not a friend", - newbrickinfo->hostname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", op_errstr); + snprintf(op_errstr, len, "Host %s is not in \'Peer " + "in Cluster\' state", newbrickinfo->hostname); goto out; } - if (!uuid_compare (priv->uuid, newbrickinfo->uuid)) - goto brick_validation; - ret = glusterd_friend_find_by_uuid (newbrickinfo->uuid, &peerinfo); - if (ret) - goto out; - if ((!peerinfo->connected) || - (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)) { - snprintf(op_errstr, len, "Host %s not connected", - newbrickinfo->hostname); - gf_log ("glusterd", GF_LOG_ERROR, "%s", op_errstr); - ret = -1; - goto out; - } -brick_validation: - ret = glusterd_brickinfo_get (newbrickinfo->uuid, - newbrickinfo->hostname, - newbrickinfo->path, &tmpbrkinfo); - if (!ret) { - snprintf(op_errstr, len, "Brick: %s already in use", - brick); - gf_log ("", GF_LOG_ERROR, op_errstr); - ret = -1; - goto out; + if (!uuid_compare (MY_UUID, newbrickinfo->uuid)) { + /* brick is local */ + if (!glusterd_is_brickpath_available (newbrickinfo->uuid, + newbrickinfo->path)) { + snprintf(op_errstr, len, "Brick: %s not available." + " Brick may be containing or be contained " + "by an existing brick", brick); + ret = -1; + goto out; + } + } else { - ret = 0; + ret = glusterd_friend_find_by_uuid (newbrickinfo->uuid, + &peerinfo); + if (ret) { + snprintf (op_errstr, len, "Failed to find host %s", + newbrickinfo->hostname); + goto out; + } + + if ((!peerinfo->connected)) { + snprintf(op_errstr, len, "Host %s not connected", + newbrickinfo->hostname); + ret = -1; + goto out; + } + + if (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) { + snprintf(op_errstr, len, "Host %s is not in \'Peer " + "in Cluster\' state", + newbrickinfo->hostname); + ret = -1; + goto out; + } } + + ret = 0; out: - if (is_allocated && newbrickinfo) + if (is_allocated) glusterd_brickinfo_delete (newbrickinfo); - gf_log ("", GF_LOG_DEBUG, "returning %d ", ret); + if (op_errstr[0] != '\0') + gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr); + gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret); return ret; } -inline int +int glusterd_is_rb_started(glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_started:status=%d", volinfo->rb_status); - return (volinfo->rb_status == GF_RB_STATUS_STARTED); + "is_rb_started:status=%d", volinfo->rep_brick.rb_status); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_STARTED); } -inline int +int glusterd_is_rb_paused ( glusterd_volinfo_t *volinfo) { gf_log ("", GF_LOG_DEBUG, - "is_rb_paused:status=%d", volinfo->rb_status); + "is_rb_paused:status=%d", volinfo->rep_brick.rb_status); - return (volinfo->rb_status == GF_RB_STATUS_PAUSED); + return (volinfo->rep_brick.rb_status == GF_RB_STATUS_PAUSED); } inline int @@ -2358,10 +5589,10 @@ glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status) { gf_log ("", GF_LOG_DEBUG, "setting status from %d to %d", - volinfo->rb_status, + volinfo->rep_brick.rb_status, status); - volinfo->rb_status = status; + volinfo->rep_brick.rb_status = status; return 0; } @@ -2369,54 +5600,3295 @@ inline int glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *src, glusterd_brickinfo_t *dst) { - if (!volinfo->src_brick || !volinfo->dst_brick) + glusterd_replace_brick_t *rb = NULL; + + GF_ASSERT (volinfo); + + rb = &volinfo->rep_brick; + + if (!rb->src_brick || !rb->dst_brick) return -1; - if (strcmp (volinfo->src_brick->hostname, src->hostname) || - strcmp (volinfo->src_brick->path, src->path)) { + if (strcmp (rb->src_brick->hostname, src->hostname) || + strcmp (rb->src_brick->path, src->path)) { gf_log("", GF_LOG_ERROR, "Replace brick src bricks differ"); return -1; } - if (strcmp (volinfo->dst_brick->hostname, dst->hostname) || - strcmp (volinfo->dst_brick->path, dst->path)) { + + if (strcmp (rb->dst_brick->hostname, dst->hostname) || + strcmp (rb->dst_brick->path, dst->path)) { gf_log ("", GF_LOG_ERROR, "Replace brick dst bricks differ"); return -1; } + return 0; } +/*path needs to be absolute; works only on gfid, volume-id*/ +static int +glusterd_is_uuid_present (char *path, char *xattr, gf_boolean_t *present) +{ + GF_ASSERT (path); + GF_ASSERT (xattr); + GF_ASSERT (present); + + int ret = -1; + uuid_t uid = {0,}; + + if (!path || !xattr || !present) + goto out; + + ret = sys_lgetxattr (path, xattr, &uid, 16); + + if (ret >= 0) { + *present = _gf_true; + ret = 0; + goto out; + } + + switch (errno) { +#if defined(ENODATA) + case ENODATA: /* FALLTHROUGH */ +#endif +#if defined(ENOATTR) && (ENOATTR != ENODATA) + case ENOATTR: /* FALLTHROUGH */ +#endif + case ENOTSUP: + *present = _gf_false; + ret = 0; + break; + default: + break; + } +out: + return ret; +} + +/*path needs to be absolute*/ +static int +glusterd_is_path_in_use (char *path, gf_boolean_t *in_use, char **op_errstr) +{ + int i = 0; + int ret = -1; + gf_boolean_t used = _gf_false; + char dir[PATH_MAX] = {0,}; + char *curdir = NULL; + char msg[2048] = {0}; + char *keys[3] = {GFID_XATTR_KEY, + GF_XATTR_VOL_ID_KEY, + NULL}; + + GF_ASSERT (path); + if (!path) + goto out; + + strcpy (dir, path); + curdir = dir; + do { + for (i = 0; !used && keys[i]; i++) { + ret = glusterd_is_uuid_present (curdir, keys[i], &used); + if (ret) + goto out; + } + + if (used) + break; + + curdir = dirname (curdir); + if (!strcmp (curdir, ".")) + goto out; + + + } while (strcmp (curdir, "/")); + + if (!strcmp (curdir, "/")) { + for (i = 0; !used && keys[i]; i++) { + ret = glusterd_is_uuid_present (curdir, keys[i], &used); + if (ret) + goto out; + } + } + + ret = 0; + *in_use = used; +out: + if (ret) { + snprintf (msg, sizeof (msg), "Failed to get extended " + "attribute %s, reason: %s", keys[i], + strerror (errno)); + } + + if (*in_use) { + if (!strcmp (path, curdir)) { + snprintf (msg, sizeof (msg), "%s is already part of a " + "volume", path); + } else { + snprintf (msg, sizeof (msg), "parent directory %s is " + "already part of a volume", curdir); + } + } + + if (strlen (msg)) { + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + } + + return ret; +} + int -glusterd_brick_create_path (char *host, char *path, mode_t mode, - char **op_errstr) +glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid, + char **op_errstr, gf_boolean_t is_force) { - int ret = -1; - char msg[2048] = {0}; - struct stat st_buf = {0}; - - ret = stat (path, &st_buf); - if ((!ret) && (!S_ISDIR (st_buf.st_mode))) { - snprintf (msg, sizeof (msg), "brick %s:%s, " - "path %s is not a directory", host, path, path); - gf_log ("", GF_LOG_ERROR, "%s", msg); + int ret = -1; + char msg[2048] = {0,}; + gf_boolean_t in_use = _gf_false; + int flags = 0; + + /* Check for xattr support in backend fs */ + ret = sys_lsetxattr (path, "trusted.glusterfs.test", + "working", 8, 0); + if (ret) { + snprintf (msg, sizeof (msg), "Glusterfs is not" + " supported on brick: %s:%s.\nSetting" + " extended attributes failed, reason:" + " %s.", host, path, strerror(errno)); + goto out; + + } else { + sys_lremovexattr (path, "trusted.glusterfs.test"); + } + + ret = glusterd_is_path_in_use (path, &in_use, op_errstr); + if (ret) + goto out; + + if (in_use && !is_force) { + ret = -1; + goto out; + } + + + if (!is_force) + flags = XATTR_CREATE; + + ret = sys_lsetxattr (path, GF_XATTR_VOL_ID_KEY, uuid, 16, + flags); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to set extended " + "attributes %s, reason: %s", + GF_XATTR_VOL_ID_KEY, strerror (errno)); + goto out; + } + + ret = 0; +out: + if (strlen (msg)) *op_errstr = gf_strdup (msg); + + return ret; +} + +int +glusterd_sm_tr_log_transition_add_to_dict (dict_t *dict, + glusterd_sm_tr_log_t *log, int i, + int count) +{ + int ret = -1; + char key[512] = {0}; + char timestr[64] = {0,}; + char *str = NULL; + + GF_ASSERT (dict); + GF_ASSERT (log); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "log%d-old-state", count); + str = log->state_name_get (log->transitions[i].old_state); + ret = dict_set_str (dict, key, str); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "log%d-event", count); + str = log->event_name_get (log->transitions[i].event); + ret = dict_set_str (dict, key, str); + if (ret) + goto out; + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "log%d-new-state", count); + str = log->state_name_get (log->transitions[i].new_state); + ret = dict_set_str (dict, key, str); + if (ret) + goto out; + + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "log%d-time", count); + gf_time_fmt (timestr, sizeof timestr, log->transitions[i].time, + gf_timefmt_FT); + str = gf_strdup (timestr); + ret = dict_set_dynstr (dict, key, str); + if (ret) + goto out; + +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +int +glusterd_sm_tr_log_add_to_dict (dict_t *dict, + glusterd_sm_tr_log_t *circular_log) +{ + int ret = -1; + int i = 0; + int start = 0; + int end = 0; + int index = 0; + char key[256] = {0}; + glusterd_sm_tr_log_t *log = NULL; + int count = 0; + + GF_ASSERT (dict); + GF_ASSERT (circular_log); + + log = circular_log; + if (!log->count) + return 0; + + if (log->count == log->size) + start = log->current + 1; + + end = start + log->count; + for (i = start; i < end; i++, count++) { + index = i % log->count; + ret = glusterd_sm_tr_log_transition_add_to_dict (dict, log, index, + count); + if (ret) + goto out; + } + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "count"); + ret = dict_set_int32 (dict, key, log->count); + +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +int +glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log, + char * (*state_name_get) (int), + char * (*event_name_get) (int), + size_t size) +{ + glusterd_sm_transition_t *transitions = NULL; + int ret = -1; + + GF_ASSERT (size > 0); + GF_ASSERT (log && state_name_get && event_name_get); + + if (!log || !state_name_get || !event_name_get || (size <= 0)) + goto out; + + transitions = GF_CALLOC (size, sizeof (*transitions), + gf_gld_mt_sm_tr_log_t); + if (!transitions) + goto out; + + log->transitions = transitions; + log->size = size; + log->state_name_get = state_name_get; + log->event_name_get = event_name_get; + ret = 0; + +out: + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +void +glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log) +{ + if (!log) + return; + GF_FREE (log->transitions); + return; +} + +int +glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, + int old_state, int new_state, + int event) +{ + glusterd_sm_transition_t *transitions = NULL; + int ret = -1; + int next = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (log); + if (!log) + goto out; + + transitions = log->transitions; + if (!transitions) + goto out; + + if (log->count) + next = (log->current + 1) % log->size; + else + next = 0; + + transitions[next].old_state = old_state; + transitions[next].new_state = new_state; + transitions[next].event = event; + time (&transitions[next].time); + log->current = next; + if (log->count < log->size) + log->count++; + ret = 0; + gf_log (this->name, GF_LOG_DEBUG, "Transitioning from '%s' to '%s' " + "due to event '%s'", log->state_name_get (old_state), + log->state_name_get (new_state), log->event_name_get (event)); +out: + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +int +glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, + glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port) +{ + glusterd_peerinfo_t *new_peer = NULL; + int ret = -1; + + GF_ASSERT (peerinfo); + if (!peerinfo) + goto out; + + new_peer = GF_CALLOC (1, sizeof (*new_peer), gf_gld_mt_peerinfo_t); + if (!new_peer) + goto out; + + new_peer->state.state = state; + if (hostname) + new_peer->hostname = gf_strdup (hostname); + + INIT_LIST_HEAD (&new_peer->uuid_list); + + if (uuid) { + uuid_copy (new_peer->uuid, *uuid); + } + + ret = glusterd_sm_tr_log_init (&new_peer->sm_log, + glusterd_friend_sm_state_name_get, + glusterd_friend_sm_event_name_get, + GLUSTERD_TR_LOG_SIZE); + if (ret) + goto out; + + if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED) + new_peer->quorum_contrib = QUORUM_WAITING; + new_peer->port = port; + *peerinfo = new_peer; +out: + if (ret && new_peer) + glusterd_friend_cleanup (new_peer); + gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +int32_t +glusterd_peer_destroy (glusterd_peerinfo_t *peerinfo) +{ + int32_t ret = -1; + + if (!peerinfo) + goto out; + + ret = glusterd_store_delete_peerinfo (peerinfo); + + if (ret) { + gf_log ("", GF_LOG_ERROR, "Deleting peer info failed"); + } + + list_del_init (&peerinfo->uuid_list); + GF_FREE (peerinfo->hostname); + glusterd_sm_tr_log_delete (&peerinfo->sm_log); + GF_FREE (peerinfo); + peerinfo = NULL; + + ret = 0; + +out: + return ret; +} + +int +glusterd_remove_pending_entry (struct list_head *list, void *elem) +{ + glusterd_pending_node_t *pending_node = NULL; + glusterd_pending_node_t *tmp = NULL; + int ret = 0; + + list_for_each_entry_safe (pending_node, tmp, list, list) { + if (elem == pending_node->node) { + list_del_init (&pending_node->list); + GF_FREE (pending_node); + ret = 0; + goto out; + } + } +out: + gf_log (THIS->name, GF_LOG_DEBUG, "returning %d", ret); + return ret; + +} + +int +glusterd_clear_pending_nodes (struct list_head *list) +{ + glusterd_pending_node_t *pending_node = NULL; + glusterd_pending_node_t *tmp = NULL; + + list_for_each_entry_safe (pending_node, tmp, list, list) { + list_del_init (&pending_node->list); + GF_FREE (pending_node); + } + + return 0; +} + +gf_boolean_t +glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo) +{ + GF_ASSERT (peerinfo); + + if (uuid_is_null (peerinfo->uuid)) + return _gf_true; + return _gf_false; +} + +int32_t +glusterd_delete_volume (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + GF_ASSERT (volinfo); + + ret = glusterd_store_delete_volume (volinfo); + + if (ret) + goto out; + + ret = glusterd_volinfo_delete (volinfo); +out: + gf_log (THIS->name, GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +int32_t +glusterd_delete_brick (glusterd_volinfo_t* volinfo, + glusterd_brickinfo_t *brickinfo) +{ + int ret = 0; + char voldir[PATH_MAX] = {0,}; + glusterd_conf_t *priv = THIS->private; + GF_ASSERT (volinfo); + GF_ASSERT (brickinfo); + + GLUSTERD_GET_VOLUME_DIR(voldir, volinfo, priv); + + glusterd_delete_volfile (volinfo, brickinfo); + glusterd_store_delete_brick (brickinfo, voldir); + glusterd_brickinfo_delete (brickinfo); + volinfo->brick_count--; + return ret; +} + +int32_t +glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo) +{ + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_brickinfo_t *tmp = NULL; + + GF_ASSERT (volinfo); + + list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) { + ret = glusterd_delete_brick (volinfo, brickinfo); + } + return ret; +} + +int +glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, char **pathlist) +{ + char **path_tokens = NULL; + char *tmp_path_list = NULL; + char path[PATH_MAX] = ""; + int32_t count = 0; + int32_t pathlen = 0; + int32_t total_len = 0; + int32_t ret = 0; + int i = 0; + glusterd_brickinfo_t *brickinfo = NULL; + + if ((!volinfo) || (!pathlist)) + goto out; + + path_tokens = GF_CALLOC (sizeof(char*), volinfo->brick_count, + gf_gld_mt_charptr); + if (!path_tokens) { + gf_log ("", GF_LOG_DEBUG, "Could not allocate memory."); ret = -1; goto out; - } else if (!ret) { + } + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + pathlen = snprintf (path, sizeof(path), + "--path=%s ", brickinfo->path); + if (pathlen < sizeof(path)) + path[pathlen] = '\0'; + else + path[sizeof(path)-1] = '\0'; + path_tokens[count] = gf_strdup (path); + if (!path_tokens[count]) { + gf_log ("", GF_LOG_DEBUG, + "Could not allocate memory."); + ret = -1; + goto out; + } + count++; + total_len += pathlen; + } + + tmp_path_list = GF_CALLOC (sizeof(char), total_len + 1, + gf_gld_mt_char); + if (!tmp_path_list) { + gf_log ("", GF_LOG_DEBUG, "Could not allocate memory."); + ret = -1; goto out; } - ret = mkdir (path, mode); - if ((ret == -1) && (EEXIST != errno)) { - snprintf (msg, sizeof (msg), "brick: %s:%s, path " - "creation failed, reason: %s", - host, path, strerror(errno)); - gf_log ("glusterd",GF_LOG_ERROR, "%s", msg); + for (i = 0; i < count; i++) + strcat (tmp_path_list, path_tokens[i]); + + if (count) + *pathlist = tmp_path_list; + + ret = count; +out: + for (i = 0; i < count; i++) { + GF_FREE (path_tokens[i]); + path_tokens[i] = NULL; + } + + GF_FREE (path_tokens); + path_tokens = NULL; + + if (ret == 0) { + gf_log ("", GF_LOG_DEBUG, "No Local Bricks Present."); + GF_FREE (tmp_path_list); + tmp_path_list = NULL; + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, + char *path_list, char *conf_path, + char *glusterd_uuid_str, + char **op_errstr) +{ + int32_t ret = 0; + int32_t status = 0; + char uuid_str [64] = {0}; + runner_t runner = {0,}; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int errcode = 0; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + uuid_utoa_r (MY_UUID, uuid_str); + + if (!path_list) { + ret = 0; + gf_log ("", GF_LOG_DEBUG, "No Bricks in this node." + " Not starting gsyncd."); + goto out; + } + + ret = gsync_status (master_vol->volname, slave, conf_path, &status); + if (status == 0) + goto out; + + uuid_utoa_r (master_vol->volume_id, uuid_str); + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + path_list, "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); + runner_argprintf (&runner, ":%s", master_vol->volname); + runner_add_args (&runner, slave, "--config-set", "session-owner", + uuid_str, NULL); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret == -1) { + errcode = -1; + goto out; + } + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", + path_list, "--monitor", "-c", NULL); + runner_argprintf (&runner, "%s", conf_path); + runner_argprintf (&runner, ":%s", master_vol->volname); + runner_argprintf (&runner, "--glusterd-uuid=%s", + uuid_utoa (priv->uuid)); + runner_add_arg (&runner, slave); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret == -1) { + gf_asprintf (op_errstr, GEOREP" start failed for %s %s", + master_vol->volname, slave); + goto out; + } + + ret = 0; + +out: + if ((ret != 0) && errcode == -1) { + if (op_errstr) + *op_errstr = gf_strdup ("internal error, cannot start " + "the " GEOREP " session"); + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_recreate_volfiles (glusterd_conf_t *conf) +{ + + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int op_ret = 0; + + GF_ASSERT (conf); + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + ret = generate_brick_volfiles (volinfo); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate brick volfiles for %s", + volinfo->volname); + op_ret = ret; + } + ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate trusted client volfiles for %s", + volinfo->volname); + op_ret = ret; + } + ret = generate_client_volfiles (volinfo, GF_CLIENT_OTHER); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "Failed to " + "regenerate client volfiles for %s", + volinfo->volname); + op_ret = ret; + } + } + return op_ret; +} + +int32_t +glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf) +{ + int ret = 0; + char *type = NULL; + gf_boolean_t upgrade = _gf_false; + gf_boolean_t downgrade = _gf_false; + gf_boolean_t regenerate_volfiles = _gf_false; + gf_boolean_t terminate = _gf_false; + + ret = dict_get_str (options, "upgrade", &type); + if (!ret) { + ret = gf_string2boolean (type, &upgrade); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "upgrade option " + "%s is not a valid boolean type", type); + ret = -1; + goto out; + } + if (_gf_true == upgrade) + regenerate_volfiles = _gf_true; + } + + ret = dict_get_str (options, "downgrade", &type); + if (!ret) { + ret = gf_string2boolean (type, &downgrade); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "downgrade option " + "%s is not a valid boolean type", type); + ret = -1; + goto out; + } + } + + if (upgrade && downgrade) { + gf_log ("glusterd", GF_LOG_ERROR, "Both upgrade and downgrade" + " options are set. Only one should be on"); + ret = -1; + goto out; + } + + if (!upgrade && !downgrade) + ret = 0; + else + terminate = _gf_true; + if (regenerate_volfiles) { + ret = glusterd_recreate_volfiles (conf); + } +out: + if (terminate && (ret == 0)) + kill (getpid(), SIGTERM); + return ret; +} + +gf_boolean_t +glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo) +{ + gf_boolean_t replicates = _gf_false; + if (volinfo && ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) || + (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE))) + replicates = _gf_true; + return replicates; +} + +int +glusterd_set_dump_options (char *dumpoptions_path, char *options, + int option_cnt) +{ + int ret = 0; + char *dup_options = NULL; + char *option = NULL; + char *tmpptr = NULL; + FILE *fp = NULL; + int nfs_cnt = 0; + + if (0 == option_cnt || + (option_cnt == 1 && (!strcmp (options, "nfs ")))) { + ret = 0; + goto out; + } + + fp = fopen (dumpoptions_path, "w"); + if (!fp) { + ret = -1; + goto out; + } + dup_options = gf_strdup (options); + gf_log ("", GF_LOG_INFO, "Received following statedump options: %s", + dup_options); + option = strtok_r (dup_options, " ", &tmpptr); + while (option) { + if (!strcmp (option, "nfs")) { + if (nfs_cnt > 0) { + unlink (dumpoptions_path); + ret = 0; + goto out; + } + nfs_cnt++; + option = strtok_r (NULL, " ", &tmpptr); + continue; + } + fprintf (fp, "%s=yes\n", option); + option = strtok_r (NULL, " ", &tmpptr); + } + +out: + if (fp) + fclose (fp); + GF_FREE (dup_options); + return ret; +} + +int +glusterd_brick_statedump (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *options, int option_cnt, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile_path[PATH_MAX] = {0,}; + char dumpoptions_path[PATH_MAX] = {0,}; + FILE *pidfile = NULL; + pid_t pid = -1; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + if (uuid_is_null (brickinfo->uuid)) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, + "Cannot resolve brick %s:%s", + brickinfo->hostname, brickinfo->path); + goto out; + } + } + + if (uuid_compare (brickinfo->uuid, MY_UUID)) { + ret = 0; + goto out; + } + + GLUSTERD_GET_BRICK_PIDFILE (pidfile_path, volinfo, brickinfo, conf); + + pidfile = fopen (pidfile_path, "r"); + if (!pidfile) { + gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", + pidfile_path); + ret = -1; + goto out; + } + + ret = fscanf (pidfile, "%d", &pid); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to get pid of brick process"); + ret = -1; + goto out; + } + + snprintf (dumpoptions_path, sizeof (dumpoptions_path), + DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid); + ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error while parsing the statedump " + "options"); + ret = -1; + goto out; + } + + gf_log ("", GF_LOG_INFO, "Performing statedump on brick with pid %d", + pid); + + kill (pid, SIGUSR1); + + sleep (1); + ret = 0; +out: + unlink (dumpoptions_path); + if (pidfile) + fclose (pidfile); + return ret; +} + +int +glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr) +{ + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile_path[PATH_MAX] = {0,}; + char path[PATH_MAX] = {0,}; + FILE *pidfile = NULL; + pid_t pid = -1; + char dumpoptions_path[PATH_MAX] = {0,}; + char *option = NULL; + char *tmpptr = NULL; + char *dup_options = NULL; + char msg[256] = {0,}; + + this = THIS; + GF_ASSERT (this); + conf = this->private; + GF_ASSERT (conf); + + dup_options = gf_strdup (options); + option = strtok_r (dup_options, " ", &tmpptr); + if (strcmp (option, "nfs")) { + snprintf (msg, sizeof (msg), "for nfs statedump, options should" + " be after the key nfs"); *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + + GLUSTERD_GET_NFS_DIR (path, conf); + GLUSTERD_GET_NFS_PIDFILE (pidfile_path, path); + + pidfile = fopen (pidfile_path, "r"); + if (!pidfile) { + gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s", + pidfile_path); + ret = -1; + goto out; + } + + ret = fscanf (pidfile, "%d", &pid); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to get pid of brick process"); + ret = -1; + goto out; + } + + snprintf (dumpoptions_path, sizeof (dumpoptions_path), + DEFAULT_VAR_RUN_DIRECTORY"/glusterdump.%d.options", pid); + ret = glusterd_set_dump_options (dumpoptions_path, options, option_cnt); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "error while parsing the statedump " + "options"); + ret = -1; + goto out; + } + + gf_log ("", GF_LOG_INFO, "Performing statedump on nfs server with " + "pid %d", pid); + + kill (pid, SIGUSR1); + + sleep (1); + + ret = 0; +out: + if (pidfile) + fclose (pidfile); + unlink (dumpoptions_path); + GF_FREE (dup_options); + return ret; +} + +/* Checks if the given peer contains all the bricks belonging to the + * given volume. Returns true if it does else returns false + */ +gf_boolean_t +glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo, + uuid_t friend_uuid) +{ + gf_boolean_t ret = _gf_true; + glusterd_brickinfo_t *brickinfo = NULL; + + GF_ASSERT (volinfo); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (friend_uuid, brickinfo->uuid)) { + ret = _gf_false; + break; + } + } + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Remove all volumes which completely belong to given friend + */ +int +glusterd_friend_remove_cleanup_vols (uuid_t uuid) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + glusterd_volinfo_t *tmp_volinfo = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + list_for_each_entry_safe (volinfo, tmp_volinfo, + &priv->volumes, vol_list) { + if (glusterd_friend_contains_vol_bricks (volinfo, uuid)) { + gf_log (THIS->name, GF_LOG_INFO, + "Deleting stale volume %s", volinfo->volname); + ret = glusterd_delete_volume (volinfo); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Error deleting stale volume"); + goto out; + } + } + } + ret = 0; +out: + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Check if the all peers are connected and befriended, except the peer + * specified (the peer being detached) + */ +gf_boolean_t +glusterd_chk_peers_connected_befriended (uuid_t skip_uuid) +{ + gf_boolean_t ret = _gf_true; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + + priv= THIS->private; + GF_ASSERT (priv); + + list_for_each_entry (peerinfo, &priv->peers, uuid_list) { + + if (!uuid_is_null (skip_uuid) && !uuid_compare (skip_uuid, + peerinfo->uuid)) + continue; + + if ((GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) + || !(peerinfo->connected)) { + ret = _gf_false; + break; + } + } + gf_log (THIS->name, GF_LOG_DEBUG, "Returning %s", + (ret?"TRUE":"FALSE")); + return ret; +} + +void +glusterd_get_client_filepath (char *filepath, glusterd_volinfo_t *volinfo, + gf_transport_type type) +{ + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + + if ((volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) && + (type == GF_TRANSPORT_RDMA)) + snprintf (filepath, PATH_MAX, "%s/%s.rdma-fuse.vol", + path, volinfo->volname); + else + snprintf (filepath, PATH_MAX, "%s/%s-fuse.vol", + path, volinfo->volname); +} + +void +glusterd_get_trusted_client_filepath (char *filepath, + glusterd_volinfo_t *volinfo, + gf_transport_type type) +{ + char path[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + + if ((volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) && + (type == GF_TRANSPORT_RDMA)) + snprintf (filepath, PATH_MAX, + "%s/trusted-%s.rdma-fuse.vol", + path, volinfo->volname); + else + snprintf (filepath, PATH_MAX, + "%s/trusted-%s-fuse.vol", + path, volinfo->volname); +} + +int +glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk) +{ + glusterd_conf_t *priv = NULL; + char pidfile[PATH_MAX]; + int ret = -1; + pid_t pid; + + priv = THIS->private; + if (!priv) + return ret; + + GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); + + if (!glusterd_is_service_running (pidfile, &pid)) { + glusterd_handle_defrag_start (volinfo, op_errstr, len, cmd, + cbk, volinfo->rebal.op); } else { + glusterd_rebalance_rpc_create (volinfo, priv, cmd); + } + + return ret; +} + +int +glusterd_restart_rebalance (glusterd_conf_t *conf) +{ + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + char op_errstr[256]; + + list_for_each_entry (volinfo, &conf->volumes, vol_list) { + if (!volinfo->rebal.defrag_cmd) + continue; + glusterd_volume_defrag_restart (volinfo, op_errstr, 256, + volinfo->rebal.defrag_cmd, NULL); + } + return ret; +} + +void +glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo) +{ + glusterd_rebalance_t *rebal = NULL; + GF_ASSERT (volinfo); + + rebal = &volinfo->rebal; + rebal->rebalance_files = 0; + rebal->rebalance_data = 0; + rebal->lookedup_files = 0; + rebal->rebalance_failures = 0; + rebal->rebalance_time = 0; + rebal->skipped_files = 0; + +} + +/* Return hostname for given uuid if it exists + * else return NULL + */ +char * +glusterd_uuid_to_hostname (uuid_t uuid) +{ + char *hostname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_peerinfo_t *entry = NULL; + + priv = THIS->private; + GF_ASSERT (priv); + + if (!uuid_compare (MY_UUID, uuid)) { + hostname = gf_strdup ("localhost"); + } + if (!list_empty (&priv->peers)) { + list_for_each_entry (entry, &priv->peers, uuid_list) { + if (!uuid_compare (entry->uuid, uuid)) { + hostname = gf_strdup (entry->hostname); + break; + } + } + } + + return hostname; +} + +gf_boolean_t +glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo) +{ + gf_boolean_t local = _gf_false; + int ret = 0; + glusterd_conf_t *conf = NULL; + + if (uuid_is_null (brickinfo->uuid)) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) + goto out; + } + conf = this->private; + local = !uuid_compare (brickinfo->uuid, MY_UUID); +out: + return local; +} +int +glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char *volid_str = NULL; + uuid_t vol_uid = {0, }; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_str (op_dict, "vol-id", &volid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get volume id for " + "volume %s", volinfo->volname); + goto out; + } + ret = uuid_parse (volid_str, vol_uid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to parse volume id " + "for volume %s", volinfo->volname); + goto out; + } + + if (uuid_compare (vol_uid, volinfo->volume_id)) { + gf_log (this->name, GF_LOG_ERROR, "Volume ids of volume %s - %s" + " and %s - are different. Possibly a split brain among " + "peers.", volinfo->volname, volid_str, + uuid_utoa (volinfo->volume_id)); + ret = -1; + goto out; + } + +out: + return ret; +} + +int +glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, + dict_t *rsp_dict) +{ + int ret = 0; + uint64_t files = 0; + uint64_t size = 0; + uint64_t lookup = 0; + gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED; + uint64_t failures = 0; + uint64_t skipped = 0; + xlator_t *this = NULL; + double run_time = 0; + + this = THIS; + + ret = dict_get_uint64 (rsp_dict, "files", &files); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get file count"); + + ret = dict_get_uint64 (rsp_dict, "size", &size); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get size of xfer"); + + ret = dict_get_uint64 (rsp_dict, "lookups", &lookup); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get lookedup file count"); + + ret = dict_get_int32 (rsp_dict, "status", (int32_t *)&status); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get status"); + + ret = dict_get_uint64 (rsp_dict, "failures", &failures); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get failure count"); + + ret = dict_get_uint64 (rsp_dict, "skipped", &skipped); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get skipped count"); + + ret = dict_get_double (rsp_dict, "run-time", &run_time); + if (ret) + gf_log (this->name, GF_LOG_TRACE, + "failed to get run-time"); + + if (files) + volinfo->rebal.rebalance_files = files; + if (size) + volinfo->rebal.rebalance_data = size; + if (lookup) + volinfo->rebal.lookedup_files = lookup; + if (status) + volinfo->rebal.defrag_status = status; + if (failures) + volinfo->rebal.rebalance_failures = failures; + if (skipped) + volinfo->rebal.skipped_files = skipped; + if (run_time) + volinfo->rebal.rebalance_time = run_time; + + return ret; +} + +int +glusterd_check_topology_identical (const char *filename1, + const char *filename2, + gf_boolean_t *identical) +{ + int ret = -1; /* FAILURE */ + xlator_t *this = NULL; + FILE *fp1 = NULL; + FILE *fp2 = NULL; + glusterfs_graph_t *grph1 = NULL; + glusterfs_graph_t *grph2 = NULL; + + if ((!filename1) || (!filename2) || (!identical)) + goto out; + + this = THIS; + + errno = 0; /* RESET the errno */ + + /* fopen() the volfile1 to create the graph */ + fp1 = fopen (filename1, "r"); + if (fp1 == NULL) { + gf_log (this->name, GF_LOG_ERROR, "fopen() on file: %s failed " + "(%s)", filename1, strerror (errno)); + goto out; + } + + /* fopen() the volfile2 to create the graph */ + fp2 = fopen (filename2, "r"); + if (fp2 == NULL) { + gf_log (this->name, GF_LOG_ERROR, "fopen() on file: %s failed " + "(%s)", filename2, strerror (errno)); + goto out; + } + + /* create the graph for filename1 */ + grph1 = glusterfs_graph_construct(fp1); + if (grph1 == NULL) + goto out; + + /* create the graph for filename2 */ + grph2 = glusterfs_graph_construct(fp2); + if (grph2 == NULL) + goto out; + + /* compare the graph topology */ + *identical = is_graph_topology_equal(grph1, grph2); + ret = 0; /* SUCCESS */ +out: + if (fp1) + fclose(fp1); + if (fp2) + fclose(fp2); + if (grph1) + glusterfs_graph_destroy(grph1); + if (grph2) + glusterfs_graph_destroy(grph2); + + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int +glusterd_check_files_identical (char *filename1, char *filename2, + gf_boolean_t *identical) +{ + int ret = -1; + struct stat buf1 = {0,}; + struct stat buf2 = {0,}; + uint32_t cksum1 = 0; + uint32_t cksum2 = 0; + xlator_t *this = NULL; + + GF_ASSERT (filename1); + GF_ASSERT (filename2); + GF_ASSERT (identical); + + this = THIS; + + ret = stat (filename1, &buf1); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "stat on file: %s failed " + "(%s)", filename1, strerror (errno)); + goto out; + } + + ret = stat (filename2, &buf2); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "stat on file: %s failed " + "(%s)", filename2, strerror (errno)); + goto out; + } + + if (buf1.st_size != buf2.st_size) { + *identical = _gf_false; + goto out; + } + + ret = get_checksum_for_path (filename1, &cksum1); + if (ret) + goto out; + + + ret = get_checksum_for_path (filename2, &cksum2); + if (ret) + goto out; + + if (cksum1 != cksum2) + *identical = _gf_false; + else + *identical = _gf_true; + +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + +int +glusterd_volset_help (dict_t *dict, char **op_errstr) +{ + int ret = -1; + gf_boolean_t xml_out = _gf_false; + xlator_t *this = NULL; + + this = THIS; + + if (!dict) { + if (!(dict = glusterd_op_get_ctx ())) { + ret = 0; + goto out; + } + } + + if (dict_get (dict, "help" )) { + xml_out = _gf_false; + + } else if (dict_get (dict, "help-xml" )) { + xml_out = _gf_true; +#if (HAVE_LIB_XML) + ret = 0; +#else + gf_log (this->name, GF_LOG_ERROR, + "libxml not present in the system"); + if (op_errstr) + *op_errstr = gf_strdup ("Error: xml libraries not " + "present to produce " + "xml-output"); + goto out; +#endif + + } else { + goto out; + } + + ret = glusterd_get_volopt_content (dict, xml_out); + if (ret && op_errstr) + *op_errstr = gf_strdup ("Failed to get volume options help"); + out: + + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload, + int payloadcount, struct iobref *iobref, xdrproc_t xdrproc, + dict_t *dict) +{ + int ret = -1; + char *cmd = NULL; + int op_ret = 0; + char *op_errstr = NULL; + int op_errno = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + op_ret = arg->op_ret; + op_errstr = arg->op_errstr; + op_errno = arg->op_errno; + + ret = dict_get_str (dict, "cmd-str", &cmd); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to get command " + "string"); + + if (cmd) { + if (op_ret) + gf_cmd_log ("", "%s : FAILED %s %s", cmd, + (op_errstr)? ":" : " ", + (op_errstr)? op_errstr : " "); + else + gf_cmd_log ("", "%s : SUCCESS", cmd); + } + + glusterd_submit_reply (req, arg, payload, payloadcount, iobref, + (xdrproc_t) xdrproc); + if (dict) + dict_unref (dict); + + return ret; +} + +static int32_t +glusterd_append_gsync_status (dict_t *dst, dict_t *src) +{ + int ret = 0; + char *stop_msg = NULL; + + ret = dict_get_str (src, "gsync-status", &stop_msg); + if (ret) { + ret = 0; + goto out; + } + + ret = dict_set_dynstr (dst, "gsync-status", gf_strdup (stop_msg)); + if (ret) { + gf_log ("glusterd", GF_LOG_WARNING, "Unable to set the stop" + "message in the ctx dictionary"); + goto out; + } + + ret = 0; + out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int32_t +glusterd_append_status_dicts (dict_t *dst, dict_t *src) +{ + int dst_count = 0; + int src_count = 0; + int i = 0; + int ret = 0; + char mst[PATH_MAX] = {0,}; + char slv[PATH_MAX] = {0, }; + char sts[PATH_MAX] = {0, }; + char nds[PATH_MAX] = {0, }; + char *mst_val = NULL; + char *slv_val = NULL; + char *sts_val = NULL; + char *nds_val = NULL; + + GF_ASSERT (dst); + + if (src == NULL) + goto out; + + ret = dict_get_int32 (dst, "gsync-count", &dst_count); + if (ret) + dst_count = 0; + + ret = dict_get_int32 (src, "gsync-count", &src_count); + if (ret || !src_count) { + gf_log ("", GF_LOG_DEBUG, "Source brick empty"); ret = 0; + goto out; + } + + for (i = 1; i <= src_count; i++) { + snprintf (nds, sizeof(nds), "node%d", i); + snprintf (mst, sizeof(mst), "master%d", i); + snprintf (slv, sizeof(slv), "slave%d", i); + snprintf (sts, sizeof(sts), "status%d", i); + + ret = dict_get_str (src, nds, &nds_val); + if (ret) + goto out; + + ret = dict_get_str (src, mst, &mst_val); + if (ret) + goto out; + + ret = dict_get_str (src, slv, &slv_val); + if (ret) + goto out; + + ret = dict_get_str (src, sts, &sts_val); + if (ret) + goto out; + + snprintf (nds, sizeof(nds), "node%d", i+dst_count); + snprintf (mst, sizeof(mst), "master%d", i+dst_count); + snprintf (slv, sizeof(slv), "slave%d", i+dst_count); + snprintf (sts, sizeof(sts), "status%d", i+dst_count); + + ret = dict_set_dynstr (dst, nds, gf_strdup (nds_val)); + if (ret) + goto out; + + ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val)); + if (ret) + goto out; + + ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val)); + if (ret) + goto out; + + ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val)); + if (ret) + goto out; + + } + + ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count); + + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +int32_t +glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr) +{ + dict_t *ctx = NULL; + int ret = 0; + char *conf_path = NULL; + + if (aggr) { + ctx = aggr; + + } else { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log ("", GF_LOG_ERROR, + "Operation Context is not present"); + GF_ASSERT (0); + } + } + + if (rsp_dict) { + ret = glusterd_append_status_dicts (ctx, rsp_dict); + if (ret) + goto out; + + ret = glusterd_append_gsync_status (ctx, rsp_dict); + if (ret) + goto out; + + ret = dict_get_str (rsp_dict, "conf_path", &conf_path); + if (!ret && conf_path) { + ret = dict_set_dynstr (ctx, "conf_path", + gf_strdup(conf_path)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to store conf path."); + goto out; + } + } + } + if ((op_errstr) && (strcmp ("", op_errstr))) { + ret = dict_set_dynstr (ctx, "errstr", gf_strdup(op_errstr)); + if (ret) + goto out; + } + + ret = 0; + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret); + return ret; +} + +int32_t +glusterd_rb_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int32_t src_port = 0; + int32_t dst_port = 0; + int ret = 0; + dict_t *ctx = NULL; + + + if (aggr) { + ctx = aggr; + + } else { + ctx = glusterd_op_get_ctx (); + if (!ctx) { + gf_log ("", GF_LOG_ERROR, + "Operation Context is not present"); + GF_ASSERT (0); + } + } + + if (rsp_dict) { + ret = dict_get_int32 (rsp_dict, "src-brick-port", &src_port); + if (ret == 0) { + gf_log ("", GF_LOG_DEBUG, + "src-brick-port=%d found", src_port); + } + + ret = dict_get_int32 (rsp_dict, "dst-brick-port", &dst_port); + if (ret == 0) { + gf_log ("", GF_LOG_DEBUG, + "dst-brick-port=%d found", dst_port); + } + + } + + if (src_port) { + ret = dict_set_int32 (ctx, "src-brick-port", + src_port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set src-brick"); + goto out; + } + } + + if (dst_port) { + ret = dict_set_int32 (ctx, "dst-brick-port", + dst_port); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not set dst-brick"); + goto out; + } + } out: - gf_log ("", GF_LOG_DEBUG, "returning %d", ret); + return ret; + +} + +int32_t +glusterd_sync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + + GF_ASSERT (rsp_dict); + + if (!rsp_dict) { + goto out; + } + + ret = glusterd_import_friend_volumes (rsp_dict); +out: + return ret; + +} + +static int +_profile_volume_add_friend_rsp (dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[256] = {0}; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + int brick_count = 0; + char brick_key[256]; + + if (strcmp (key, "count") == 0) + return 0; + sscanf (key, "%d%s", &brick_count, brick_key); + rsp_ctx = data; + new_value = data_copy (value); + GF_ASSERT (new_value); + snprintf (new_key, sizeof (new_key), "%d%s", + rsp_ctx->count + brick_count, brick_key); + dict_set (rsp_ctx->dict, new_key, new_value); + return 0; +} + +int +glusterd_profile_volume_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t brick_count = 0; + int32_t count = 0; + dict_t *ctx_dict = NULL; + glusterd_op_t op = GD_OP_NONE; + + GF_ASSERT (rsp_dict); + + ret = dict_get_int32 (rsp_dict, "count", &brick_count); + if (ret) { + ret = 0; //no bricks in the rsp + goto out; + } + + op = glusterd_op_get_op (); + GF_ASSERT (GD_OP_PROFILE_VOLUME == op); + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx (); + } + + ret = dict_get_int32 (ctx_dict, "count", &count); + rsp_ctx.count = count; + rsp_ctx.dict = ctx_dict; + dict_foreach (rsp_dict, _profile_volume_add_friend_rsp, &rsp_ctx); + dict_del (ctx_dict, "count"); + ret = dict_set_int32 (ctx_dict, "count", count + brick_count); +out: + return ret; +} + +static int +glusterd_volume_status_add_peer_rsp (dict_t *this, char *key, data_t *value, + void *data) +{ + glusterd_status_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + char brick_key[1024] = {0,}; + char new_key[1024] = {0,}; + int32_t index = 0; + int32_t ret = 0; + + /* Skip the following keys, they are already present in the ctx_dict */ + if (!strcmp (key, "count") || !strcmp (key, "cmd") || + !strcmp (key, "brick-index-max") || !strcmp (key, "other-count")) + return 0; + + rsp_ctx = data; + new_value = data_copy (value); + GF_ASSERT (new_value); + + sscanf (key, "brick%d.%s", &index, brick_key); + + if (index > rsp_ctx->brick_index_max) { + snprintf (new_key, sizeof (new_key), "brick%d.%s", + index + rsp_ctx->other_count, brick_key); + } else { + strncpy (new_key, key, sizeof (new_key)); + new_key[sizeof (new_key) - 1] = 0; + } + + ret = dict_set (rsp_ctx->dict, new_key, new_value); + if (ret) + gf_log ("", GF_LOG_ERROR, "Unable to set key: %s in dict", + key); + + return 0; +} + +int +glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + glusterd_status_rsp_conv_t rsp_ctx = {0}; + int32_t cmd = GF_CLI_STATUS_NONE; + int32_t node_count = 0; + int32_t other_count = 0; + int32_t brick_index_max = -1; + int32_t rsp_node_count = 0; + int32_t rsp_other_count = 0; + int vol_count = -1; + int i = 0; + dict_t *ctx_dict = NULL; + char key[PATH_MAX] = {0,}; + char *volname = NULL; + + GF_ASSERT (rsp_dict); + + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx (GD_OP_STATUS_VOLUME); + + } + + ret = dict_get_int32 (ctx_dict, "cmd", &cmd); + if (ret) + goto out; + + if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd (ctx_dict)) { + ret = dict_get_int32 (rsp_dict, "vol_count", &vol_count); + if (ret == 0) { + ret = dict_set_int32 (ctx_dict, "vol_count", + vol_count); + if (ret) + goto out; + + for (i = 0; i < vol_count; i++) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "vol%d", i); + ret = dict_get_str (rsp_dict, key, &volname); + if (ret) + goto out; + + ret = dict_set_str (ctx_dict, key, volname); + if (ret) + goto out; + } + } + } + + if ((cmd & GF_CLI_STATUS_TASKS) != 0) { + dict_copy (rsp_dict, aggr); + ret = 0; + goto out; + } + + ret = dict_get_int32 (rsp_dict, "count", &rsp_node_count); + if (ret) { + ret = 0; //no bricks in the rsp + goto out; + } + + ret = dict_get_int32 (rsp_dict, "other-count", &rsp_other_count); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to get other count from rsp_dict"); + goto out; + } + + ret = dict_get_int32 (ctx_dict, "count", &node_count); + ret = dict_get_int32 (ctx_dict, "other-count", &other_count); + if (!dict_get (ctx_dict, "brick-index-max")) { + ret = dict_get_int32 (rsp_dict, "brick-index-max", &brick_index_max); + if (ret) + goto out; + ret = dict_set_int32 (ctx_dict, "brick-index-max", brick_index_max); + if (ret) + goto out; + + } else { + ret = dict_get_int32 (ctx_dict, "brick-index-max", &brick_index_max); + } + + rsp_ctx.count = node_count; + rsp_ctx.brick_index_max = brick_index_max; + rsp_ctx.other_count = other_count; + rsp_ctx.dict = ctx_dict; + + dict_foreach (rsp_dict, glusterd_volume_status_add_peer_rsp, &rsp_ctx); + + ret = dict_set_int32 (ctx_dict, "count", node_count + rsp_node_count); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to update node count"); + goto out; + } + + ret = dict_set_int32 (ctx_dict, "other-count", + (other_count + rsp_other_count)); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "Failed to update other-count"); +out: + return ret; +} + +int +glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + char key[256] = {0,}; + char *node_uuid = NULL; + char *node_uuid_str = NULL; + char *volname = NULL; + dict_t *ctx_dict = NULL; + double elapsed_time = 0; + glusterd_conf_t *conf = NULL; + glusterd_op_t op = GD_OP_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int32_t index = 0; + int32_t count = 0; + int32_t current_index = 2; + int32_t value32 = 0; + uint64_t value = 0; + char *peer_uuid_str = NULL; + + GF_ASSERT (rsp_dict); + conf = THIS->private; + + op = glusterd_op_get_op (); + GF_ASSERT ((GD_OP_REBALANCE == op) || + (GD_OP_DEFRAG_BRICK_VOLUME == op)); + + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx (op); + + } + + if (!ctx_dict) + goto out; + + ret = dict_get_str (ctx_dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + ret = dict_get_int32 (rsp_dict, "count", &index); + if (ret) + gf_log ("", GF_LOG_ERROR, "failed to get index"); + + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", index); + ret = dict_get_str (rsp_dict, key, &node_uuid); + if (!ret) { + node_uuid_str = gf_strdup (node_uuid); + + /* Finding the index of the node-uuid in the peer-list */ + list_for_each_entry (peerinfo, &conf->peers, uuid_list) { + peer_uuid_str = gd_peer_uuid_str (peerinfo); + if (strcmp (peer_uuid_str, node_uuid_str) == 0) + break; + + current_index++; + } + + /* Setting the largest index value as the total count. */ + ret = dict_get_int32 (ctx_dict, "count", &count); + if (count < current_index) { + ret = dict_set_int32 (ctx_dict, "count", current_index); + if (ret) + gf_log ("", GF_LOG_ERROR, "Failed to set count"); + } + + /* Setting the same index for the node, as is in the peerlist.*/ + memset (key, 0, 256); + snprintf (key, 256, "node-uuid-%d", current_index); + ret = dict_set_dynstr (ctx_dict, key, node_uuid_str); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set node-uuid"); + } + } + + snprintf (key, 256, "files-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "files-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set the file count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "size-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "size-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set the size of migration"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "lookups-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set lookuped file count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "status-%d", index); + ret = dict_get_int32 (rsp_dict, key, &value32); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "status-%d", current_index); + ret = dict_set_int32 (ctx_dict, key, value32); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set status"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "failures-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set failure count"); + } + } + + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", index); + ret = dict_get_uint64 (rsp_dict, key, &value); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "skipped-%d", current_index); + ret = dict_set_uint64 (ctx_dict, key, value); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set skipped count"); + } + } + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", index); + ret = dict_get_double (rsp_dict, key, &elapsed_time); + if (!ret) { + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", current_index); + ret = dict_set_double (ctx_dict, key, elapsed_time); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "failed to set run-time"); + } + } + + ret = 0; + +out: + return ret; +} + +int +glusterd_snap_config_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char buf[PATH_MAX] = ""; + char *volname = NULL; + int ret = -1; + int config_command = 0; + uint64_t i = 0; + uint64_t value = 0; + uint64_t voldisplaycount = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "config-command", &config_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "failed to get config-command type"); + goto out; + } + + switch (config_command) { + case GF_SNAP_CONFIG_DISPLAY: + ret = dict_get_uint64 (src, "snap-max-hard-limit", &value); + if (!ret) { + ret = dict_set_uint64 (dst, "snap-max-hard-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_hard_limit"); + goto out; + } + } else { + /* Received dummy response from other nodes */ + ret = 0; + goto out; + } + + ret = dict_get_uint64 (src, "snap-max-soft-limit", &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get snap_max_soft_limit"); + goto out; + } + + ret = dict_set_uint64 (dst, "snap-max-soft-limit", value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set snap_max_soft_limit"); + goto out; + } + + ret = dict_get_uint64 (src, "voldisplaycount", + &voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get voldisplaycount"); + goto out; + } + + ret = dict_set_uint64 (dst, "voldisplaycount", + voldisplaycount); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set voldisplaycount"); + goto out; + } + + for (i = 0; i < voldisplaycount; i++) { + snprintf (buf, sizeof(buf), "volume%ld-volname", i); + ret = dict_get_str (src, buf, &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_str (dst, buf, volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-active-hard-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + + snprintf (buf, sizeof(buf), + "volume%ld-snap-max-soft-limit", i); + ret = dict_get_uint64 (src, buf, &value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get %s", buf); + goto out; + } + ret = dict_set_uint64 (dst, buf, value); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set %s", buf); + goto out; + } + } + + break; + default: + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Aggregate missed_snap_counts from different nodes and save it * + * in the req_dict of the originator node */ +int +glusterd_snap_create_use_rsp_dict (dict_t *dst, dict_t *src) +{ + char *buf = NULL; + char *tmp_str = NULL; + char name_buf[PATH_MAX] = ""; + int32_t i = -1; + int32_t ret = -1; + int32_t src_missed_snap_count = -1; + int32_t dst_missed_snap_count = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dst || !src) { + gf_log (this->name, GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (src, "missed_snap_count", + &src_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, "No missed snaps"); + ret = 0; + goto out; + } + + ret = dict_get_int32 (dst, "missed_snap_count", + &dst_missed_snap_count); + if (ret) { + /* Initialize dst_missed_count for the first time */ + dst_missed_snap_count = 0; + } + + for (i = 0; i < src_missed_snap_count; i++) { + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + i); + ret = dict_get_str (src, name_buf, &buf); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to fetch %s", name_buf); + goto out; + } + + snprintf (name_buf, sizeof(name_buf), "missed_snaps_%d", + dst_missed_snap_count); + + tmp_str = gf_strdup (buf); + if (!tmp_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dst, name_buf, tmp_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set %s", name_buf); + goto out; + } + + tmp_str = NULL; + dst_missed_snap_count++; + } + + ret = dict_set_int32 (dst, "missed_snap_count", dst_missed_snap_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set dst_missed_snap_count"); + goto out; + } + +out: + if (ret && tmp_str) + GF_FREE(tmp_str); + + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int +glusterd_snap_use_rsp_dict (dict_t *dst, dict_t *src) +{ + int ret = -1; + int32_t snap_command = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "type", &snap_command); + if (ret) { + gf_log ("", GF_LOG_ERROR, "unable to get the type of " + "the snapshot command"); + goto out; + } + + switch (snap_command) { + case GF_SNAP_OPTION_TYPE_CREATE: + case GF_SNAP_OPTION_TYPE_DELETE: + ret = glusterd_snap_create_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + case GF_SNAP_OPTION_TYPE_CONFIG: + ret = glusterd_snap_config_use_rsp_dict (dst, src); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to use rsp dict"); + goto out; + } + break; + default: + // copy the response dictinary's contents to the dict to be + // sent back to the cli + dict_copy (src, dst); + break; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_sys_exec_output_rsp_dict (dict_t *dst, dict_t *src) +{ + char output_name[PATH_MAX] = ""; + char *output = NULL; + int ret = 0; + int i = 0; + int len = 0; + int src_output_count = 0; + int dst_output_count = 0; + + if (!dst || !src) { + gf_log ("", GF_LOG_ERROR, "Source or Destination " + "dict is empty."); + goto out; + } + + ret = dict_get_int32 (dst, "output_count", &dst_output_count); + + ret = dict_get_int32 (src, "output_count", &src_output_count); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "No output from source"); + ret = 0; + goto out; + } + + for (i = 1; i <= src_output_count; i++) { + len = snprintf (output_name, sizeof(output_name) - 1, + "output_%d", i); + output_name[len] = '\0'; + ret = dict_get_str (src, output_name, &output); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to fetch %s", + output_name); + goto out; + } + + len = snprintf (output_name, sizeof(output_name) - 1, + "output_%d", i+dst_output_count); + output_name[len] = '\0'; + ret = dict_set_dynstr (dst, output_name, gf_strdup (output)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set %s", + output_name); + goto out; + } + } + + ret = dict_set_int32 (dst, "output_count", + dst_output_count+src_output_count); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + glusterd_op_t op = GD_OP_NONE; + + op = glusterd_op_get_op (); + GF_ASSERT (aggr); + GF_ASSERT (rsp_dict); + + if (!aggr) + goto out; + dict_copy (rsp_dict, aggr); +out: + return ret; +} + +int +glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict) +{ + int ret = 0; + dict_t *ctx_dict = NULL; + glusterd_op_t op = GD_OP_NONE; + + GF_ASSERT (rsp_dict); + + op = glusterd_op_get_op (); + GF_ASSERT (GD_OP_HEAL_VOLUME == op); + + if (aggr) { + ctx_dict = aggr; + + } else { + ctx_dict = glusterd_op_get_ctx (op); + } + + if (!ctx_dict) + goto out; + dict_copy (rsp_dict, ctx_dict); +out: + return ret; +} + +int +_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[256] = {0}; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + data_t *new_value = NULL; + + rsp_ctx = data; + new_value = data_copy (value); + GF_ASSERT (new_value); + snprintf (new_key, sizeof (new_key), "%d-%s", rsp_ctx->count, key); + dict_set (rsp_ctx->dict, new_key, new_value); + return 0; +} + +int +glusterd_profile_volume_brick_rsp (void *pending_entry, + dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr, gd_node_type type) +{ + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t count = 0; + char brick[PATH_MAX+1024] = {0}; + char key[256] = {0}; + char *full_brick = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (rsp_dict); + GF_ASSERT (op_ctx); + GF_ASSERT (op_errstr); + GF_ASSERT (pending_entry); + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_int32 (op_ctx, "count", &count); + if (ret) { + count = 1; + } else { + count++; + } + snprintf (key, sizeof (key), "%d-brick", count); + if (type == GD_NODE_BRICK) { + brickinfo = pending_entry; + snprintf (brick, sizeof (brick), "%s:%s", brickinfo->hostname, + brickinfo->path); + } else if (type == GD_NODE_NFS) { + snprintf (brick, sizeof (brick), "%s", uuid_utoa (MY_UUID)); + } + full_brick = gf_strdup (brick); + GF_ASSERT (full_brick); + ret = dict_set_dynstr (op_ctx, key, full_brick); + + rsp_ctx.count = count; + rsp_ctx.dict = op_ctx; + dict_foreach (rsp_dict, _profile_volume_add_brick_rsp, &rsp_ctx); + dict_del (op_ctx, "count"); + ret = dict_set_int32 (op_ctx, "count", count); + return ret; +} + +//input-key: <replica-id>:<child-id>-* +//output-key: <brick-id>-* +int +_heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data) +{ + char new_key[256] = {0,}; + char int_str[16] = {0}; + data_t *new_value = NULL; + char *rxl_end = NULL; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + rxl_end = strchr (key, '-'); + if (!rxl_end) + goto out; + + int_len = strlen (key) - strlen (rxl_end); + strncpy (int_str, key, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_id); + if (ret) + goto out; + + rxl_child_end = strchr (rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; + strncpy (int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + if (!strcmp (rxl_child_end, "-status")) { + brickinfo = glusterd_get_brickinfo_by_position (volinfo, + brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, + brickinfo)) + goto out; + } + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end); + dict_set (rsp_ctx->dict, new_key, new_value); + +out: + return 0; +} + +int +_heal_volume_add_shd_rsp_of_statistics (dict_t *this, char *key, data_t + *value, void *data) +{ + char new_key[256] = {0,}; + char int_str[16] = {0,}; + char key_begin_string[128] = {0,}; + data_t *new_value = NULL; + char *rxl_end = NULL; + char *rxl_child_end = NULL; + glusterd_volinfo_t *volinfo = NULL; + char *key_begin_str = NULL; + int rxl_id = 0; + int rxl_child_id = 0; + int brick_id = 0; + int int_len = 0; + int ret = 0; + glusterd_heal_rsp_conv_t *rsp_ctx = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + rsp_ctx = data; + key_begin_str = strchr (key, '-'); + if (!key_begin_str) + goto out; + + int_len = strlen (key) - strlen (key_begin_str); + strncpy (key_begin_string, key, int_len); + key_begin_string[int_len] = '\0'; + + rxl_end = strchr (key_begin_str + 1, '-'); + if (!rxl_end) + goto out; + + int_len = strlen (key_begin_str) - strlen (rxl_end) - 1; + strncpy (int_str, key_begin_str + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_id); + if (ret) + goto out; + + + rxl_child_end = strchr (rxl_end + 1, '-'); + if (!rxl_child_end) + goto out; + + int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1; + strncpy (int_str, rxl_end + 1, int_len); + int_str[int_len] = '\0'; + ret = gf_string2int (int_str, &rxl_child_id); + if (ret) + goto out; + + volinfo = rsp_ctx->volinfo; + brick_id = rxl_id * volinfo->replica_count + rxl_child_id; + + brickinfo = glusterd_get_brickinfo_by_position (volinfo, brick_id); + if (!brickinfo) + goto out; + if (!glusterd_is_local_brick (rsp_ctx->this, volinfo, brickinfo)) + goto out; + + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "%s-%d%s", key_begin_string, + brick_id, rxl_child_end); + dict_set (rsp_ctx->dict, new_key, new_value); + +out: + return 0; + +} + +int +glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx, char **op_errstr) +{ + int ret = 0; + glusterd_heal_rsp_conv_t rsp_ctx = {0}; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + int heal_op = -1; + + GF_ASSERT (rsp_dict); + GF_ASSERT (op_ctx); + GF_ASSERT (op_errstr); + + ret = dict_get_str (req_dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = dict_get_int32 (req_dict, "heal-op", &heal_op); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get heal_op"); + goto out; + } + + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + rsp_ctx.dict = op_ctx; + rsp_ctx.volinfo = volinfo; + rsp_ctx.this = THIS; + if (heal_op == GF_AFR_OP_STATISTICS) + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp_of_statistics, + &rsp_ctx); + else + dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx); + + +out: + return ret; +} + +int +_status_volume_add_brick_rsp (dict_t *this, char *key, data_t *value, + void *data) +{ + char new_key[256] = {0,}; + data_t *new_value = 0; + glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL; + + rsp_ctx = data; + new_value = data_copy (value); + snprintf (new_key, sizeof (new_key), "brick%d.%s", rsp_ctx->count, key); + dict_set (rsp_ctx->dict, new_key, new_value); + + return 0; +} + +int +glusterd_status_volume_brick_rsp (dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr) +{ + int ret = 0; + glusterd_pr_brick_rsp_conv_t rsp_ctx = {0}; + int32_t count = 0; + int index = 0; + + GF_ASSERT (rsp_dict); + GF_ASSERT (op_ctx); + GF_ASSERT (op_errstr); + + ret = dict_get_int32 (op_ctx, "count", &count); + if (ret) { + count = 0; + } else { + count++; + } + ret = dict_get_int32 (rsp_dict, "index", &index); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get node index"); + goto out; + } + dict_del (rsp_dict, "index"); + + rsp_ctx.count = index; + rsp_ctx.dict = op_ctx; + dict_foreach (rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx); + ret = dict_set_int32 (op_ctx, "count", count); + +out: + return ret; +} + +int +glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict, + dict_t *op_ctx) +{ + int ret = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + char key[256] = {0,}; + int32_t i = 0; + char buf[1024] = {0,}; + char *node_str = NULL; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + GF_ASSERT (req_dict); + + ret = dict_get_str (req_dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + + if (ret) + goto out; + + if (rsp_dict) { + ret = glusterd_defrag_volume_status_update (volinfo, + rsp_dict); + } + + if (!op_ctx) { + dict_copy (rsp_dict, op_ctx); + goto out; + } + + ret = dict_get_int32 (op_ctx, "count", &i); + i++; + + ret = dict_set_int32 (op_ctx, "count", i); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, "Failed to set count"); + + snprintf (buf, 1024, "%s", uuid_utoa (MY_UUID)); + node_str = gf_strdup (buf); + + snprintf (key, 256, "node-uuid-%d",i); + ret = dict_set_dynstr (op_ctx, key, node_str); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set node-uuid"); + + memset (key, 0 , 256); + snprintf (key, 256, "files-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_files); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set file count"); + + memset (key, 0 , 256); + snprintf (key, 256, "size-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_data); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set size of xfer"); + + memset (key, 0 , 256); + snprintf (key, 256, "lookups-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.lookedup_files); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set lookedup file count"); + + memset (key, 0 , 256); + snprintf (key, 256, "status-%d", i); + ret = dict_set_int32 (op_ctx, key, volinfo->rebal.defrag_status); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set status"); + + memset (key, 0 , 256); + snprintf (key, 256, "failures-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.rebalance_failures); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set failure count"); + + memset (key, 0 , 256); + snprintf (key, 256, "skipped-%d", i); + ret = dict_set_uint64 (op_ctx, key, volinfo->rebal.skipped_files); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set skipped count"); + + memset (key, 0, 256); + snprintf (key, 256, "run-time-%d", i); + ret = dict_set_double (op_ctx, key, volinfo->rebal.rebalance_time); + if (ret) + gf_log (THIS->name, GF_LOG_ERROR, + "failed to set run-time"); + +out: + return ret; +} +int32_t +glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry, + glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr, gd_node_type type) +{ + int ret = 0; + + GF_ASSERT (op_errstr); + + switch (op) { + case GD_OP_PROFILE_VOLUME: + ret = glusterd_profile_volume_brick_rsp (pending_entry, + rsp_dict, op_ctx, + op_errstr, type); + break; + case GD_OP_STATUS_VOLUME: + ret = glusterd_status_volume_brick_rsp (rsp_dict, op_ctx, + op_errstr); + break; + + case GD_OP_DEFRAG_BRICK_VOLUME: + glusterd_defrag_volume_node_rsp (req_dict, + rsp_dict, op_ctx); + break; + + case GD_OP_HEAL_VOLUME: + ret = glusterd_heal_volume_brick_rsp (req_dict, rsp_dict, + op_ctx, op_errstr); + break; + default: + break; + } + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +/* Should be used only when an operation is in progress, as that is the only + * time a lock_owner is set + */ +gf_boolean_t +is_origin_glusterd (dict_t *dict) +{ + gf_boolean_t ret = _gf_false; + uuid_t lock_owner = {0,}; + uuid_t *originator_uuid = NULL; + + GF_ASSERT (dict); + + ret = dict_get_bin (dict, "originator_uuid", + (void **) &originator_uuid); + if (ret) { + /* If not originator_uuid has been set, then the command + * has been originated from a glusterd running on older version + * Hence fetching the lock owner */ + ret = glusterd_get_lock_owner (&lock_owner); + if (ret) { + ret = _gf_false; + goto out; + } + ret = !uuid_compare (MY_UUID, lock_owner); + } else + ret = !uuid_compare (MY_UUID, *originator_uuid); + +out: + return ret; +} + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key) +{ + int ret = -1; + uuid_t task_id = {0,}; + char *uuid_str = NULL; + xlator_t *this = NULL; + + GF_ASSERT (dict); + + this = THIS; + GF_ASSERT (this); + + uuid_generate (task_id); + uuid_str = gf_strdup (uuid_utoa (task_id)); + if (!uuid_str) { + ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, key, uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set %s in dict", + key); + goto out; + } + gf_log (this->name, GF_LOG_INFO, "Generated task-id %s for key %s", + uuid_str, key); + +out: + if (ret) + GF_FREE (uuid_str); + return ret; +} + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key) +{ + int ret = -1; + char tmp_str[40] = {0,}; + char *task_id_str = NULL; + + GF_ASSERT (dict); + GF_ASSERT (key); + + uuid_unparse (uuid, tmp_str); + task_id_str = gf_strdup (tmp_str); + if (!task_id_str) + return -1; + + ret = dict_set_dynstr (dict, key, task_id_str); + if (ret) { + GF_FREE (task_id_str); + gf_log (THIS->name, GF_LOG_ERROR, + "Error setting uuid in dict with key %s", key); + } + + return 0; +} + +int +_update_volume_op_versions (dict_t *this, char *key, data_t *value, void *data) +{ + int op_version = 0; + glusterd_volinfo_t *ctx = NULL; + gf_boolean_t enabled = _gf_true; + int ret = -1; + + GF_ASSERT (data); + ctx = data; + + op_version = glusterd_get_op_version_for_key (key); + + if (gd_is_xlator_option (key) || gd_is_boolean_option (key)) { + ret = gf_string2boolean (value->data, &enabled); + if (ret) + return 0; + + if (!enabled) + return 0; + } + + if (op_version > ctx->op_version) + ctx->op_version = op_version; + + if (gd_is_client_option (key) && + (op_version > ctx->client_op_version)) + ctx->client_op_version = op_version; + + return 0; +} + +void +gd_update_volume_op_versions (glusterd_volinfo_t *volinfo) +{ + glusterd_conf_t *conf = NULL; + gf_boolean_t ob_enabled = _gf_false; + + GF_ASSERT (volinfo); + + conf = THIS->private; + GF_ASSERT (conf); + + /* Reset op-versions to minimum */ + volinfo->op_version = 1; + volinfo->client_op_version = 1; + + dict_foreach (volinfo->dict, _update_volume_op_versions, volinfo); + + /* Special case for open-behind + * If cluster op-version >= 2 and open-behind hasn't been explicitly + * disabled, volume op-versions must be updated to account for it + */ + + /* TODO: Remove once we have a general way to update automatically + * enabled features + */ + if (conf->op_version >= 2) { + ob_enabled = dict_get_str_boolean (volinfo->dict, + "performance.open-behind", + _gf_true); + if (ob_enabled) { + + if (volinfo->op_version < 2) + volinfo->op_version = 2; + if (volinfo->client_op_version < 2) + volinfo->client_op_version = 2; + } + } + + return; +} + +/* A task is committed/completed once the task-id for it is cleared */ +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo) +{ + GF_ASSERT (volinfo); + + if ((GD_OP_REMOVE_BRICK == volinfo->rebal.op) && + !uuid_is_null (volinfo->rebal.rebalance_id)) + return _gf_false; + + return _gf_true; +} + +gf_boolean_t +glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + struct list_head *peers, + char **down_peerstr) +{ + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + gf_boolean_t ret = _gf_false; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (!uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + list_for_each_entry (peerinfo, peers, uuid_list) { + if (uuid_compare (peerinfo->uuid, brickinfo->uuid)) + continue; + + /*Found peer who owns the brick, return false + * if peer is not connected or not friend */ + if (!(peerinfo->connected) || + (peerinfo->state.state != + GD_FRIEND_STATE_BEFRIENDED)) { + *down_peerstr = gf_strdup (peerinfo->hostname); + gf_log ("", GF_LOG_DEBUG, "Peer %s is down. ", + peerinfo->hostname); + goto out; + } + } + } + + ret = _gf_true; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +gf_boolean_t +glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict) +{ + int ret = -1; + uint32_t cmd = GF_CLI_STATUS_NONE; + gf_boolean_t is_status_tasks = _gf_false; + + if (op != GD_OP_STATUS_VOLUME) + goto out; + + ret = dict_get_uint32 (dict, "cmd", &cmd); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get opcode"); + goto out; + } + + if (cmd & GF_CLI_STATUS_TASKS) + is_status_tasks = _gf_true; + +out: + return is_status_tasks; +} + +int +glusterd_compare_snap_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_snap_t *snap1 = NULL; + glusterd_snap_t *snap2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snap1 = list_entry(list1, glusterd_snap_t, snap_list); + snap2 = list_entry(list2, glusterd_snap_t, snap_list); + diff_time = difftime(snap1->time_stamp, snap2->time_stamp); + + return ((int)diff_time); +} + +int +glusterd_compare_snap_vol_time(struct list_head *list1, struct list_head *list2) +{ + glusterd_volinfo_t *snapvol1 = NULL; + glusterd_volinfo_t *snapvol2 = NULL; + double diff_time = 0; + + GF_ASSERT (list1); + GF_ASSERT (list2); + + snapvol1 = list_entry(list1, glusterd_volinfo_t, snapvol_list); + snapvol2 = list_entry(list2, glusterd_volinfo_t, snapvol_list); + diff_time = difftime(snapvol1->snapshot->time_stamp, + snapvol2->snapshot->time_stamp); + + return ((int)diff_time); +} + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo) +{ + glusterd_missed_snap_info *new_missed_snapinfo = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (missed_snapinfo); + + new_missed_snapinfo = GF_CALLOC (1, sizeof(*new_missed_snapinfo), + gf_gld_mt_missed_snapinfo_t); + + if (!new_missed_snapinfo) + goto out; + + new_missed_snapinfo->node_snap_info = NULL; + INIT_LIST_HEAD (&new_missed_snapinfo->missed_snaps); + INIT_LIST_HEAD (&new_missed_snapinfo->snap_ops); + + *missed_snapinfo = new_missed_snapinfo; + + ret = 0; + +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op) +{ + glusterd_snap_op_t *new_snap_op = NULL; + int32_t ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (snap_op); + + new_snap_op = GF_CALLOC (1, sizeof(*new_snap_op), + gf_gld_mt_missed_snapinfo_t); + + if (!new_snap_op) + goto out; + + new_snap_op->brick_path = NULL; + new_snap_op->brick_num = -1; + new_snap_op->op = -1; + new_snap_op->status = -1; + INIT_LIST_HEAD (&new_snap_op->snap_ops_list); + + *snap_op = new_snap_op; + + ret = 0; +out: + gf_log (this->name, GF_LOG_TRACE, "Returning %d", ret); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 5797c35cf..56bb799bf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_UTILS_H #define _GLUSTERD_UTILS_H_ @@ -38,24 +28,33 @@ #include "rpc-clnt.h" #include "protocol-common.h" +#define GLUSTERD_SOCK_DIR "/var/run" + struct glusterd_lock_ { uuid_t owner; time_t timestamp; }; -typedef struct glusterd_volopt_ctx_ { +typedef struct glusterd_dict_ctx_ { dict_t *dict; - int count; int opt_count; -} glusterd_volopt_ctx_t; + char *key_name; + char *val_name; + char *prefix; +} glusterd_dict_ctx_t; + +int +glusterd_compare_lines (const void *a, const void *b); typedef int (*glusterd_condition_func) (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, void *ctx); - typedef struct glusterd_lock_ glusterd_lock_t; int32_t +glusterd_get_lock_owner (uuid_t *cur_owner); + +int32_t glusterd_lock (uuid_t new_owner); int32_t @@ -67,18 +66,41 @@ glusterd_get_uuid (uuid_t *uuid); int glusterd_submit_reply (rpcsvc_request_t *req, void *arg, struct iovec *payload, int payloadcount, - struct iobref *iobref, gd_serialize_t sfunc); + struct iobref *iobref, xdrproc_t xdrproc); int -glusterd_submit_request (glusterd_peerinfo_t *peerinfo, void *req, - call_frame_t *frame, struct rpc_clnt_program *prog, - int procnum, struct iobref *iobref, - gd_serialize_t sfunc, xlator_t *this, - fop_cbk_fn_t cbkfn); +glusterd_to_cli (rpcsvc_request_t *req, gf_cli_rsp *arg, struct iovec *payload, + int payloadcount, struct iobref *iobref, xdrproc_t xdrproc, + dict_t *dict); +int +glusterd_submit_request (struct rpc_clnt *rpc, void *req, + call_frame_t *frame, rpc_clnt_prog_t *prog, + int procnum, struct iobref *iobref, + xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc); int32_t glusterd_volinfo_new (glusterd_volinfo_t **volinfo); +int32_t +glusterd_volinfo_dup (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t **dup_volinfo, + gf_boolean_t set_userauth); + +char * +glusterd_auth_get_username (glusterd_volinfo_t *volinfo); + +char * +glusterd_auth_get_password (glusterd_volinfo_t *volinfo); + +int32_t +glusterd_auth_set_username (glusterd_volinfo_t *volinfo, char *username); + +int32_t +glusterd_auth_set_password (glusterd_volinfo_t *volinfo, char *password); + +void +glusterd_auth_cleanup (glusterd_volinfo_t *volinfo); + gf_boolean_t glusterd_check_volume_exists (char *volname); @@ -86,7 +108,7 @@ int32_t glusterd_brickinfo_new (glusterd_brickinfo_t **brickinfo); int32_t -glusterd_brickinfo_from_brick (char *brick, glusterd_brickinfo_t **brickinfo); +glusterd_brickinfo_new_from_brick (char *brick, glusterd_brickinfo_t **brickinfo); int32_t glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo); @@ -98,8 +120,23 @@ int32_t glusterd_peer_hostname_new (char *hostname, glusterd_peer_hostname_t **name); int32_t +glusterd_snap_volinfo_find (char *volname, glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); +int32_t +glusterd_snap_volinfo_find_from_parent_volname (char *origin_volname, + glusterd_snap_t *snap, + glusterd_volinfo_t **volinfo); + +int32_t glusterd_volinfo_find (char *volname, glusterd_volinfo_t **volinfo); +int +glusterd_volinfo_find_by_volume_id (uuid_t volume_id, glusterd_volinfo_t **volinfo); + +int +glusterd_snap_volinfo_find_by_volume_id (uuid_t volume_id, + glusterd_volinfo_t **volinfo); + int32_t glusterd_service_stop(const char *service, char *pidfile, int sig, gf_boolean_t force_kill); @@ -109,11 +146,13 @@ glusterd_resolve_brick (glusterd_brickinfo_t *brickinfo); int32_t glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); + glusterd_brickinfo_t *brickinfo, + gf_boolean_t wait); int32_t glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); + glusterd_brickinfo_t *brickinfo, + gf_boolean_t del_brick); int32_t glusterd_volinfo_delete (glusterd_volinfo_t *volinfo); @@ -128,27 +167,69 @@ int32_t glusterd_volume_brickinfo_get_by_brick (char *brick, glusterd_volinfo_t *volinfo, glusterd_brickinfo_t **brickinfo); -int32_t -glusterd_is_local_addr (char *hostname); int32_t glusterd_build_volume_dict (dict_t **vols); int32_t -glusterd_compare_friend_data (dict_t *vols, int32_t *status); +glusterd_compare_friend_data (dict_t *vols, int32_t *status, char *hostname); int glusterd_volume_compute_cksum (glusterd_volinfo_t *volinfo); +void +glusterd_get_nodesvc_volfile (char *server, char *workdir, + char *volfile, size_t len); + gf_boolean_t -glusterd_is_nfs_started (); +glusterd_is_service_running (char *pidfile, int *pid); +gf_boolean_t +glusterd_is_nodesvc_running (); + +gf_boolean_t +glusterd_is_nodesvc_running (); + +void +glusterd_get_nodesvc_dir (char *server, char *workdir, + char *path, size_t len); int32_t glusterd_nfs_server_start (); int32_t glusterd_nfs_server_stop (); +int32_t +glusterd_shd_start (); + +int32_t +glusterd_shd_stop (); + +void +glusterd_set_socket_filepath (char *sock_filepath, char *sockpath, size_t len); + +int32_t +glusterd_nodesvc_set_socket_filepath (char *rundir, uuid_t uuid, + char *socketpath, int len); + +struct rpc_clnt* +glusterd_pending_node_get_rpc (glusterd_pending_node_t *pending_node); + +struct rpc_clnt* +glusterd_nodesvc_get_rpc (char *server); + +int32_t +glusterd_nodesvc_set_rpc (char *server, struct rpc_clnt *rpc); + +int32_t +glusterd_nodesvc_connect (char *server, char *socketpath); + +void +glusterd_nodesvc_set_online_status (char *server, gf_boolean_t status); + +gf_boolean_t +glusterd_is_nodesvc_online (char *server); + int glusterd_remote_hostname_get (rpcsvc_request_t *req, char *remote_host, int len); @@ -158,22 +239,38 @@ void glusterd_set_volume_status (glusterd_volinfo_t *volinfo, glusterd_volume_status status); int -glusterd_check_generate_start_nfs (glusterd_volinfo_t *volinfo); +glusterd_check_generate_start_nfs (void); + +int +glusterd_check_generate_start_shd (void); + +int +glusterd_nodesvcs_handle_graph_change (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_handle_reconfigure (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_start (glusterd_volinfo_t *volinfo); + +int +glusterd_nodesvcs_stop (glusterd_volinfo_t *volinfo); + int32_t glusterd_volume_count_get (void); int32_t glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, dict_t *dict, int32_t count); int -glusterd_get_brickinfo (xlator_t *this, const char *brickname, - int port, gf_boolean_t localhost, +glusterd_get_brickinfo (xlator_t *this, const char *brickname, + int port, gf_boolean_t localhost, glusterd_brickinfo_t **brickinfo); void glusterd_set_brick_status (glusterd_brickinfo_t *brickinfo, gf_brick_status_t status); -int +gf_boolean_t glusterd_is_brick_started (glusterd_brickinfo_t *brickinfo); int @@ -190,16 +287,19 @@ glusterd_all_volume_cond_check (glusterd_condition_func func, int status, void *ctx); int glusterd_brick_start (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); + glusterd_brickinfo_t *brickinfo, + gf_boolean_t wait); int glusterd_brick_stop (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *brickinfo); + glusterd_brickinfo_t *brickinfo, + gf_boolean_t del_brick); int glusterd_is_defrag_on (glusterd_volinfo_t *volinfo); int32_t glusterd_volinfo_bricks_delete (glusterd_volinfo_t *volinfo); + int glusterd_friend_find_by_uuid (uuid_t uuid, glusterd_peerinfo_t **peerinfo); @@ -207,11 +307,13 @@ int glusterd_new_brick_validate (char *brick, glusterd_brickinfo_t *brickinfo, char *op_errstr, size_t len); int32_t -glusterd_volume_bricks_delete (glusterd_volinfo_t *volinfo); +glusterd_volume_brickinfos_delete (glusterd_volinfo_t *volinfo); + int32_t glusterd_volume_brickinfo_get (uuid_t uuid, char *hostname, char *path, glusterd_volinfo_t *volinfo, glusterd_brickinfo_t **brickinfo); + int glusterd_brickinfo_get (uuid_t uuid, char *hostname, char *path, glusterd_brickinfo_t **brickinfo); @@ -224,10 +326,309 @@ glusterd_is_rb_paused (glusterd_volinfo_t *volinfo); int glusterd_set_rb_status (glusterd_volinfo_t *volinfo, gf_rb_status_t status); +gf_boolean_t +glusterd_is_rb_ongoing (glusterd_volinfo_t *volinfo); + int glusterd_rb_check_bricks (glusterd_volinfo_t *volinfo, - glusterd_brickinfo_t *src_brick, glusterd_brickinfo_t *dst_brick); + glusterd_brickinfo_t *src_brick, + glusterd_brickinfo_t *dst_brick); + +int +glusterd_check_and_set_brick_xattr (char *host, char *path, uuid_t uuid, + char **op_errstr, gf_boolean_t is_force); + +int +glusterd_validate_and_create_brickpath (glusterd_brickinfo_t *brickinfo, + uuid_t volume_id, char **op_errstr, + gf_boolean_t is_force); +int +glusterd_sm_tr_log_transition_add (glusterd_sm_tr_log_t *log, + int old_state, int new_state, + int event); +int +glusterd_peerinfo_new (glusterd_peerinfo_t **peerinfo, + glusterd_friend_sm_state_t state, uuid_t *uuid, + const char *hostname, int port); +int +glusterd_sm_tr_log_init (glusterd_sm_tr_log_t *log, + char * (*state_name_get) (int), + char * (*event_name_get) (int), + size_t size); +void +glusterd_sm_tr_log_delete (glusterd_sm_tr_log_t *log); + +int +glusterd_sm_tr_log_add_to_dict (dict_t *dict, + glusterd_sm_tr_log_t *circular_log); +int +glusterd_remove_pending_entry (struct list_head *list, void *elem); +int +glusterd_clear_pending_nodes (struct list_head *list); +gf_boolean_t +glusterd_peerinfo_is_uuid_unknown (glusterd_peerinfo_t *peerinfo); +int32_t +glusterd_brick_connect (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, char *socketpath); +int32_t +glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo); +int32_t +glusterd_delete_volume (glusterd_volinfo_t *volinfo); +int32_t +glusterd_delete_brick (glusterd_volinfo_t* volinfo, + glusterd_brickinfo_t *brickinfo); + +int32_t +glusterd_delete_all_bricks (glusterd_volinfo_t* volinfo); + +int +glusterd_spawn_daemons (void *opaque); + +int +glusterd_restart_gsyncds (glusterd_conf_t *conf); + +int +glusterd_start_gsync (glusterd_volinfo_t *master_vol, char *slave, + char *path_list, char *conf_path, + char *glusterd_uuid_str, + char **op_errstr); +int +glusterd_get_local_brickpaths (glusterd_volinfo_t *volinfo, + char **pathlist); + +int32_t +glusterd_recreate_bricks (glusterd_conf_t *conf); +int32_t +glusterd_handle_upgrade_downgrade (dict_t *options, glusterd_conf_t *conf); + +int +glusterd_add_brick_detail_to_dict (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + dict_t *dict, int32_t count); + +int32_t +glusterd_add_brick_to_dict (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + dict_t *dict, int32_t count); + +int32_t +glusterd_get_all_volnames (dict_t *dict); + +gf_boolean_t +glusterd_is_fuse_available (); + +int +glusterd_brick_statedump (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, + char *options, int option_cnt, char **op_errstr); +int +glusterd_nfs_statedump (char *options, int option_cnt, char **op_errstr); +gf_boolean_t +glusterd_is_volume_replicate (glusterd_volinfo_t *volinfo); +gf_boolean_t +glusterd_is_brick_decommissioned (glusterd_volinfo_t *volinfo, char *hostname, + char *path); +gf_boolean_t +glusterd_friend_contains_vol_bricks (glusterd_volinfo_t *volinfo, + uuid_t friend_uuid); +int +glusterd_friend_remove_cleanup_vols (uuid_t uuid); + +gf_boolean_t +glusterd_chk_peers_connected_befriended (uuid_t skip_uuid); + +void +glusterd_get_client_filepath (char *filepath, + glusterd_volinfo_t *volinfo, + gf_transport_type type); +void +glusterd_get_trusted_client_filepath (char *filepath, + glusterd_volinfo_t *volinfo, + gf_transport_type type); +int +glusterd_restart_rebalance (glusterd_conf_t *conf); + +int32_t +glusterd_add_bricks_hname_path_to_dict (dict_t *dict, + glusterd_volinfo_t *volinfo); + +int +glusterd_add_node_to_dict (char *server, dict_t *dict, int count, + dict_t *vol_opts); + +char * +glusterd_uuid_to_hostname (uuid_t uuid); + +int +glusterd_get_dist_leaf_count (glusterd_volinfo_t *volinfo); + +glusterd_brickinfo_t* +glusterd_get_brickinfo_by_position (glusterd_volinfo_t *volinfo, uint32_t pos); + +gf_boolean_t +glusterd_is_local_brick (xlator_t *this, glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo); int -glusterd_brick_create_path (char *host, char *path, mode_t mode, - char **op_errstr); +glusterd_validate_volume_id (dict_t *op_dict, glusterd_volinfo_t *volinfo); + +int +glusterd_defrag_volume_status_update (glusterd_volinfo_t *volinfo, + dict_t *rsp_dict); + +int +glusterd_check_files_identical (char *filename1, char *filename2, + gf_boolean_t *identical); + +int +glusterd_check_topology_identical (const char *filename1, + const char *filename2, + gf_boolean_t *identical); + +void +glusterd_volinfo_reset_defrag_stats (glusterd_volinfo_t *volinfo); +int +glusterd_volset_help (dict_t *dict, char **op_errstr); + +int32_t +glusterd_sync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int32_t +glusterd_gsync_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict, char *op_errstr); +int32_t +glusterd_rb_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_profile_volume_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_sys_exec_output_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_snap_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int32_t +glusterd_handle_node_rsp (dict_t *req_ctx, void *pending_entry, + glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr, gd_node_type type); +int +glusterd_volume_rebalance_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +glusterd_volume_heal_use_rsp_dict (dict_t *aggr, dict_t *rsp_dict); +int +_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value, + void *data); +int +glusterd_profile_volume_brick_rsp (void *pending_entry, + dict_t *rsp_dict, dict_t *op_ctx, + char **op_errstr, gd_node_type type); + +gf_boolean_t +glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + struct list_head *peers, + char **down_peerstr); + +/* Should be used only when an operation is in progress, as that is the only + * time a lock_owner is set + */ +gf_boolean_t +is_origin_glusterd (dict_t *dict); + +gf_boolean_t +glusterd_is_quorum_changed (dict_t *options, char *option, char *value); + +int +glusterd_do_quorum_action (); + +int +glusterd_get_quorum_cluster_counts (xlator_t *this, int *active_count, + int *quorum_count); + +int +glusterd_get_next_global_opt_version_str (dict_t *opts, char **version_str); +gf_boolean_t +glusterd_is_quorum_option (char *option); +gf_boolean_t +glusterd_is_volume_in_server_quorum (glusterd_volinfo_t *volinfo); +gf_boolean_t +glusterd_is_any_volume_in_server_quorum (xlator_t *this); +gf_boolean_t +does_gd_meet_server_quorum (xlator_t *this); + +int +glusterd_generate_and_set_task_id (dict_t *dict, char *key); + +int +glusterd_copy_uuid_to_dict (uuid_t uuid, dict_t *dict, char *key); + +gf_boolean_t +glusterd_is_same_address (char *name1, char *name2); + +void +gd_update_volume_op_versions (glusterd_volinfo_t *volinfo); + +char* +gd_peer_uuid_str (glusterd_peerinfo_t *peerinfo); + +gf_boolean_t +gd_is_remove_brick_committed (glusterd_volinfo_t *volinfo); + +gf_boolean_t +glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, + struct list_head *peers, + char **down_peerstr); + +int +glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict, + char **slave_ip, char **slave_vol, + char **conf_path, char **op_errstr); + +int +glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, + dict_t *resp_dict, char *path_list, + char *conf_path, gf_boolean_t is_force); + +int +glusterd_check_gsync_running_local (char *master, char *slave, + char *conf_path, + gf_boolean_t *is_run); + +gf_boolean_t +glusterd_is_status_tasks_op (glusterd_op_t op, dict_t *dict); + +#ifdef GF_LINUX_HOST_OS +char* +glusterd_get_brick_mount_details (glusterd_brickinfo_t *brickinfo); +struct mntent * +glusterd_get_mnt_entry_info (char *mnt_pt, FILE *mtab); +int +glusterd_get_brick_root (char *path, char **mount_point); +#endif //LINUX_HOST + +int +glusterd_compare_snap_time(struct list_head *, struct list_head *); + +int +glusterd_compare_snap_vol_time(struct list_head *, struct list_head *); + +int32_t +glusterd_snap_volinfo_restore (dict_t *rsp_dict, + glusterd_volinfo_t *new_volinfo, + glusterd_volinfo_t *snap_volinfo); +int32_t +glusterd_lvm_snapshot_remove (dict_t *rsp_dict, glusterd_volinfo_t *snap_vol); + +int32_t +glusterd_missed_snapinfo_new (glusterd_missed_snap_info **missed_snapinfo); + +int32_t +glusterd_missed_snap_op_new (glusterd_snap_op_t **snap_op); + +int32_t +glusterd_add_missed_snaps_to_dict (dict_t *rsp_dict, char *snap_uuid, + glusterd_brickinfo_t *brickinfo, + int32_t brick_number, int32_t op); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index c8846a4fa..6f3c69e7d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is GF_FREE software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H @@ -24,6 +14,13 @@ #endif #include <fnmatch.h> +#include <sys/wait.h> +#include <dlfcn.h> + +#if (HAVE_LIB_XML) +#include <libxml/encoding.h> +#include <libxml/xmlwriter.h> +#endif #include "xlator.h" #include "glusterd.h" @@ -31,128 +28,18 @@ #include "logging.h" #include "dict.h" #include "graph-utils.h" +#include "glusterd-store.h" +#include "glusterd-hooks.h" +#include "trie.h" #include "glusterd-mem-types.h" -#include "cli1.h" +#include "cli1-xdr.h" #include "glusterd-volgen.h" +#include "glusterd-op-sm.h" +#include "glusterd-utils.h" +#include "run.h" +#include "options.h" - -/* dispatch table for VOLUME SET - * ----------------------------- - * - * Format of entries: - * - * First field is the <key>, for the purpose of looking it up - * in volume dictionary. Each <key> is of the format "<domain>.<specifier>". - * - * Second field is <voltype>. - * - * Third field is <option>, if its unset, it's assumed to be - * the same as <specifier>. - * - * Fourth field is <value>. In this context they are used to specify - * a default. That is, even the volume dict doesn't have a value, - * we procced as if the default value were set for it. - * - * There are two type of entries: basic and special. - * - * - Basic entries are the ones where the <option> does _not_ start with - * the bang! character ('!'). - * - * In their case, <option> is understood as an option for an xlator of - * type <voltype>. Their effect is to copy over the volinfo->dict[<key>] - * value to all graph nodes of type <voltype> (if such a value is set). - * - * You are free to add entries of this type, they will become functional - * just by being present in the table. - * - * - Special entries where the <option> starts with the bang!. - * - * They are not applied to all graphs during generation, and you cannot - * extend them in a trivial way which could be just picked up. Better - * not touch them unless you know what you do. - * - * "NODOC" entries are not part of the public interface and are subject - * to change at any time. - */ - -struct volopt_map_entry { - char *key; - char *voltype; - char *option; - char *value; -}; - -static struct volopt_map_entry glusterd_volopt_map[] = { - {"cluster.lookup-unhashed", "cluster/distribute", }, /* NODOC */ - {"cluster.min-free-disk", "cluster/distribute", }, /* NODOC */ - - {"cluster.entry-change-log", "cluster/replicate", }, /* NODOC */ - {"cluster.read-subvolume", "cluster/replicate", }, /* NODOC */ - {"cluster.background-self-heal-count", "cluster/replicate", }, /* NODOC */ - {"cluster.metadata-self-heal", "cluster/replicate", }, /* NODOC */ - {"cluster.data-self-heal", "cluster/replicate", }, /* NODOC */ - {"cluster.entry-self-heal", "cluster/replicate", }, /* NODOC */ - {"cluster.strict-readdir", "cluster/replicate", }, /* NODOC */ - {"cluster.self-heal-window-size", "cluster/replicate", "data-self-heal-window-size",}, - {"cluster.data-change-log", "cluster/replicate", }, /* NODOC */ - {"cluster.metadata-change-log", "cluster/replicate", }, /* NODOC */ - - {"cluster.stripe-block-size", "cluster/stripe", "block-size",}, - - {"diagnostics.latency-measurement", "debug/io-stats", }, - {"diagnostics.dump-fd-stats", "debug/io-stats", }, - {"diagnostics.brick-log-level", "debug/io-stats", "!log-level",}, - {"diagnostics.client-log-level", "debug/io-stats", "!log-level",}, - - {"performance.cache-max-file-size", "performance/io-cache", "max-file-size",}, - {"performance.cache-min-file-size", "performance/io-cache", "min-file-size",}, - {"performance.cache-refresh-timeout", "performance/io-cache", "cache-timeout",}, - {"performance.cache-priority", "performance/io-cache", "priority",}, /* NODOC */ - {"performance.cache-size", "performance/io-cache", }, - {"performance.cache-size", "performance/quick-read", }, - {"performance.flush-behind", "performance/write-behind", "flush-behind",}, - - {"performance.io-thread-count", "performance/io-threads", "thread-count",}, - - {"performance.disk-usage-limit", "performance/quota", }, /* NODOC */ - {"performance.min-free-disk-limit", "performance/quota", }, /* NODOC */ - - {"performance.write-behind-window-size", "performance/write-behind", "cache-size",}, - - {"network.frame-timeout", "protocol/client", }, - {"network.ping-timeout", "protocol/client", }, - {"network.inode-lru-limit", "protocol/server", }, /* NODOC */ - - {"auth.allow", "protocol/server", "!server-auth", "*"}, - {"auth.reject", "protocol/server", "!server-auth",}, - - {"transport.keepalive", "protocol/server", "transport.socket.keepalive",}, - - {"performance.write-behind", "performance/write-behind", "!perf", "on"}, /* NODOC */ - {"performance.read-ahead", "performance/read-ahead", "!perf", "on"}, /* NODOC */ - {"performance.io-cache", "performance/io-cache", "!perf", "on"}, /* NODOC */ - {"performance.quick-read", "performance/quick-read", "!perf", "on"}, /* NODOC */ - {"performance.stat-prefetch", "performance/stat-prefetch", "!perf",}, /* NODOC */ - - {NULL, } -}; - - -#define VOLGEN_GET_NFS_DIR(path) \ - do { \ - glusterd_conf_t *priv = THIS->private; \ - snprintf (path, PATH_MAX, "%s/nfs", priv->workdir); \ - } while (0); \ - -#define VOLGEN_GET_VOLUME_DIR(path, volinfo) \ - do { \ - glusterd_conf_t *priv = THIS->private; \ - snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir, \ - volinfo->volname); \ - } while (0); \ - - - +extern struct volopt_map_entry glusterd_volopt_map[]; /********************************************* * @@ -161,6 +48,20 @@ static struct volopt_map_entry glusterd_volopt_map[] = { *********************************************/ +struct volgen_graph { + char **errstr; + glusterfs_graph_t graph; +}; +typedef struct volgen_graph volgen_graph_t; + +static void +set_graph_errstr (volgen_graph_t *graph, const char *str) +{ + if (!graph->errstr) + return; + + *graph->errstr = gf_strdup (str); +} static xlator_t * xlator_instantiate_va (const char *type, const char *format, va_list arg) @@ -188,13 +89,14 @@ xlator_instantiate_va (const char *type, const char *format, va_list arg) xl->name = volname; INIT_LIST_HEAD (&xl->volume_options); + xl->ctx = THIS->ctx; + return xl; error: gf_log ("", GF_LOG_ERROR, "creating xlator of type %s failed", type); - if (volname) - GF_FREE (volname); + GF_FREE (volname); if (xl) xlator_destroy (xl); @@ -232,13 +134,13 @@ volgen_xlator_link (xlator_t *pxl, xlator_t *cxl) } static int -volgen_graph_link (glusterfs_graph_t *graph, xlator_t *xl) +volgen_graph_link (volgen_graph_t *graph, xlator_t *xl) { int ret = 0; /* no need to care about graph->top here */ - if (graph->first) - ret = volgen_xlator_link (xl, graph->first); + if (graph->graph.first) + ret = volgen_xlator_link (xl, graph->graph.first); if (ret == -1) { gf_log ("", GF_LOG_ERROR, "failed to add graph entry %s", xl->name); @@ -250,7 +152,7 @@ volgen_graph_link (glusterfs_graph_t *graph, xlator_t *xl) } static xlator_t * -volgen_graph_add_as (glusterfs_graph_t *graph, const char *type, +volgen_graph_add_as (volgen_graph_t *graph, const char *type, const char *format, ...) { va_list arg; @@ -268,13 +170,13 @@ volgen_graph_add_as (glusterfs_graph_t *graph, const char *type, return NULL; } else - glusterfs_graph_set_first (graph, xl); + glusterfs_graph_set_first (&graph->graph, xl); return xl; } static xlator_t * -volgen_graph_add_nolink (glusterfs_graph_t *graph, const char *type, +volgen_graph_add_nolink (volgen_graph_t *graph, const char *type, const char *format, ...) { va_list arg; @@ -287,13 +189,13 @@ volgen_graph_add_nolink (glusterfs_graph_t *graph, const char *type, if (!xl) return NULL; - glusterfs_graph_set_first (graph, xl); + glusterfs_graph_set_first (&graph->graph, xl); return xl; } static xlator_t * -volgen_graph_add (glusterfs_graph_t *graph, char *type, char *volname) +volgen_graph_add (volgen_graph_t *graph, char *type, char *volname) { char *shorttype = NULL; @@ -325,10 +227,203 @@ xlator_set_option (xlator_t *xl, char *key, char *value) return dict_set_dynstr (xl->options, key, dval); } +static int +xlator_get_option (xlator_t *xl, char *key, char **value) +{ + GF_ASSERT (xl); + return dict_get_str (xl->options, key, value); +} + static inline xlator_t * -first_of (glusterfs_graph_t *graph) +first_of (volgen_graph_t *graph) +{ + return (xlator_t *)graph->graph.first; +} + + + + +/************************** + * + * Trie glue + * + *************************/ + + +static int +volopt_selector (int lvl, char **patt, void *param, + int (*optcbk)(char *word, void *param)) +{ + struct volopt_map_entry *vme = NULL; + char *w = NULL; + int i = 0; + int len = 0; + int ret = 0; + char *dot = NULL; + + for (vme = glusterd_volopt_map; vme->key; vme++) { + w = vme->key; + + for (i = 0; i < lvl; i++) { + if (patt[i]) { + w = strtail (w, patt[i]); + GF_ASSERT (!w || *w); + if (!w || *w != '.') + goto next; + } else { + w = strchr (w, '.'); + GF_ASSERT (w); + } + w++; + } + + dot = strchr (w, '.'); + if (dot) { + len = dot - w; + w = gf_strdup (w); + if (!w) + return -1; + w[len] = '\0'; + } + ret = optcbk (w, param); + if (dot) + GF_FREE (w); + if (ret) + return -1; + next: + continue; + } + + return 0; +} + +static int +volopt_trie_cbk (char *word, void *param) +{ + return trie_add ((trie_t *)param, word); +} + +static int +process_nodevec (struct trienodevec *nodevec, char **hint) +{ + int ret = 0; + char *hint1 = NULL; + char *hint2 = NULL; + char *hintinfx = ""; + trienode_t **nodes = nodevec->nodes; + + if (!nodes[0]) { + *hint = NULL; + return 0; + } + +#if 0 + /* Limit as in git */ + if (trienode_get_dist (nodes[0]) >= 6) { + *hint = NULL; + return 0; + } +#endif + + if (trienode_get_word (nodes[0], &hint1)) + return -1; + + if (nodevec->cnt < 2 || !nodes[1]) { + *hint = hint1; + return 0; + } + + if (trienode_get_word (nodes[1], &hint2)) + return -1; + + if (*hint) + hintinfx = *hint; + ret = gf_asprintf (hint, "%s or %s%s", hint1, hintinfx, hint2); + if (ret > 0) + ret = 0; + return ret; +} + +static int +volopt_trie_section (int lvl, char **patt, char *word, char **hint, int hints) { - return (xlator_t *)graph->first; + trienode_t *nodes[] = { NULL, NULL }; + struct trienodevec nodevec = { nodes, 2}; + trie_t *trie = NULL; + int ret = 0; + + trie = trie_new (); + if (!trie) + return -1; + + if (volopt_selector (lvl, patt, trie, &volopt_trie_cbk)) { + trie_destroy (trie); + + return -1; + } + + GF_ASSERT (hints <= 2); + nodevec.cnt = hints; + ret = trie_measure_vec (trie, word, &nodevec); + if (ret || !nodevec.nodes[0]) + trie_destroy (trie); + + ret = process_nodevec (&nodevec, hint); + trie_destroy (trie); + + return ret; +} + +static int +volopt_trie (char *key, char **hint) +{ + char *patt[] = { NULL }; + char *fullhint = NULL; + char *dot = NULL; + char *dom = NULL; + int len = 0; + int ret = 0; + + *hint = NULL; + + dot = strchr (key, '.'); + if (!dot) + return volopt_trie_section (1, patt, key, hint, 2); + + len = dot - key; + dom = gf_strdup (key); + if (!dom) + return -1; + dom[len] = '\0'; + + ret = volopt_trie_section (0, NULL, dom, patt, 1); + GF_FREE (dom); + if (ret) { + patt[0] = NULL; + goto out; + } + if (!patt[0]) + goto out; + + *hint = "..."; + ret = volopt_trie_section (1, patt, dot + 1, hint, 2); + if (ret) + goto out; + if (*hint) { + ret = gf_asprintf (&fullhint, "%s.%s", patt[0], *hint); + GF_FREE (*hint); + if (ret >= 0) { + ret = 0; + *hint = fullhint; + } + } + + out: + GF_FREE (patt[0]); + if (ret) + *hint = NULL; + + return ret; } @@ -341,40 +436,36 @@ first_of (glusterfs_graph_t *graph) **************************/ -typedef int (*volgen_opthandler_t) (glusterfs_graph_t *graph, +typedef int (*volgen_opthandler_t) (volgen_graph_t *graph, struct volopt_map_entry *vme, void *param); struct opthandler_data { - glusterfs_graph_t *graph; + volgen_graph_t *graph; volgen_opthandler_t handler; struct volopt_map_entry *vme; gf_boolean_t found; gf_boolean_t data_t_fake; int rv; + char *volname; void *param; }; -#define pattern_match_options 0 - - -static void -process_option (dict_t *dict, char *key, data_t *value, void *param) +static int +process_option (char *key, data_t *value, void *param) { struct opthandler_data *odt = param; struct volopt_map_entry vme = {0,}; if (odt->rv) - return; -#if pattern_match_options - if (fnmatch (odt->vme->key, key, 0) != 0) - return; -#endif + return 0; odt->found = _gf_true; vme.key = key; vme.voltype = odt->vme->voltype; vme.option = odt->vme->option; + vme.op_version = odt->vme->op_version; + if (!vme.option) { vme.option = strrchr (key, '.'); if (vme.option) @@ -388,10 +479,11 @@ process_option (dict_t *dict, char *key, data_t *value, void *param) vme.value = value->data; odt->rv = odt->handler (odt->graph, &vme, odt->param); + return 0; } static int -volgen_graph_set_options_generic (glusterfs_graph_t *graph, dict_t *dict, +volgen_graph_set_options_generic (volgen_graph_t *graph, dict_t *dict, void *param, volgen_opthandler_t handler) { struct volopt_map_entry *vme = NULL; @@ -408,14 +500,10 @@ volgen_graph_set_options_generic (glusterfs_graph_t *graph, dict_t *dict, odt.found = _gf_false; odt.data_t_fake = _gf_false; -#if pattern_match_options - dict_foreach (dict, process_option, &odt); -#else data = dict_get (dict, vme->key); if (data) - process_option (dict, vme->key, data, &odt); -#endif + process_option (vme->key, data, &odt); if (odt.rv) return odt.rv; @@ -429,8 +517,7 @@ volgen_graph_set_options_generic (glusterfs_graph_t *graph, dict_t *dict, * in this context */ odt.data_t_fake = _gf_true; - process_option (NULL, vme->key, (data_t *)vme->value, - &odt); + process_option (vme->key, (data_t *)vme->value, &odt); if (odt.rv) return odt.rv; } @@ -440,36 +527,46 @@ volgen_graph_set_options_generic (glusterfs_graph_t *graph, dict_t *dict, } static int -basic_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, - void *param) +no_filter_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { xlator_t *trav; int ret = 0; - if (vme->option[0] == '!') - return 0; - for (trav = first_of (graph); trav; trav = trav->next) { if (strcmp (trav->type, vme->voltype) != 0) continue; ret = xlator_set_option (trav, vme->option, vme->value); if (ret) - return -1; + break; } + return ret; +} - return 0; +static int +basic_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + int ret = 0; + + if (vme->option[0] == '!') + goto out; + + ret = no_filter_option_handler (graph, vme, param); +out: + return ret; } static int -volgen_graph_set_options (glusterfs_graph_t *graph, dict_t *dict) +volgen_graph_set_options (volgen_graph_t *graph, dict_t *dict) { return volgen_graph_set_options_generic (graph, dict, NULL, &basic_option_handler); } static int -optget_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, +optget_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, void *param) { struct volopt_map_entry *vme2 = param; @@ -480,16 +577,118 @@ optget_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, return 0; } -/* This getter considers defaults also. */ +static glusterd_server_xlator_t +get_server_xlator (char *xlator) +{ + glusterd_server_xlator_t subvol = GF_XLATOR_NONE; + + if (strcmp (xlator, "posix") == 0) + subvol = GF_XLATOR_POSIX; + if (strcmp (xlator, "acl") == 0) + subvol = GF_XLATOR_ACL; + if (strcmp (xlator, "locks") == 0) + subvol = GF_XLATOR_LOCKS; + if (strcmp (xlator, "io-threads") == 0) + subvol = GF_XLATOR_IOT; + if (strcmp (xlator, "index") == 0) + subvol = GF_XLATOR_INDEX; + if (strcmp (xlator, "marker") == 0) + subvol = GF_XLATOR_MARKER; + if (strcmp (xlator, "io-stats") == 0) + subvol = GF_XLATOR_IO_STATS; + if (strcmp (xlator, "bd") == 0) + subvol = GF_XLATOR_BD; + + return subvol; +} + +static glusterd_client_xlator_t +get_client_xlator (char *xlator) +{ + glusterd_client_xlator_t subvol = GF_CLNT_XLATOR_NONE; + + if (strcmp (xlator, "client") == 0) + subvol = GF_CLNT_XLATOR_FUSE; + + return subvol; +} + +static int +debugxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + char *volname = NULL; + gf_boolean_t enabled = _gf_false; + + volname = param; + + if (strcmp (vme->option, "!debug") != 0) + return 0; + + if (!strcmp (vme->key , "debug.trace") || + !strcmp (vme->key, "debug.error-gen")) { + if (get_server_xlator (vme->value) == GF_XLATOR_NONE && + get_client_xlator (vme->value) == GF_CLNT_XLATOR_NONE) + return 0; + else + goto add_graph; + } + + if (gf_string2boolean (vme->value, &enabled) == -1) + return -1; + if (!enabled) + return 0; + +add_graph: + if (volgen_graph_add (graph, vme->voltype, volname)) + return 0; + else + return -1; +} + int -glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value) +check_and_add_debug_xl (volgen_graph_t *graph, dict_t *set_dict, char *volname, + char *xlname) +{ + int ret = 0; + char *value_str = NULL; + + ret = dict_get_str (set_dict, "debug.trace", &value_str); + if (!ret) { + if (strcmp (xlname, value_str) == 0) { + ret = volgen_graph_set_options_generic (graph, set_dict, volname, + &debugxl_option_handler); + if (ret) + goto out; + } + } + + ret = dict_get_str (set_dict, "debug.error-gen", &value_str); + if (!ret) { + if (strcmp (xlname, value_str) == 0) { + ret = volgen_graph_set_options_generic (graph, set_dict, volname, + &debugxl_option_handler); + if (ret) + goto out; + } + } + + ret = 0; + +out: + return ret; +} + +/* This getter considers defaults also. */ +static int +volgen_dict_get (dict_t *dict, char *key, char **value) { struct volopt_map_entry vme = {0,}; int ret = 0; vme.key = key; - ret = volgen_graph_set_options_generic (NULL, volinfo->dict, &vme, + ret = volgen_graph_set_options_generic (NULL, dict, &vme, &optget_option_handler); if (ret) { gf_log ("", GF_LOG_ERROR, "Out of memory"); @@ -502,47 +701,178 @@ glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value) return 0; } -static char * -option_complete (char *key) +static int +option_complete (char *key, char **completion) { struct volopt_map_entry *vme = NULL; - char *completion = NULL; + *completion = NULL; for (vme = glusterd_volopt_map; vme->key; vme++) { if (strcmp (strchr (vme->key, '.') + 1, key) != 0) continue; - if (completion) - return NULL; - else - completion = vme->key; + if (*completion && strcmp (*completion, vme->key) != 0) { + /* cancel on non-unique match */ + *completion = NULL; + + return 0; + } else + *completion = vme->key; } - return completion; + if (*completion) { + /* For sake of unified API we want + * have the completion to be a to-be-freed + * string. + */ + *completion = gf_strdup (*completion); + return -!*completion; + } + + return 0; +} + +int +glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value) +{ + return volgen_dict_get (volinfo->dict, key, value); +} + +int +glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key) +{ + char *val = NULL; + gf_boolean_t boo = _gf_false; + int ret = 0; + + ret = glusterd_volinfo_get (volinfo, key, &val); + if (ret) + return -1; + + if (val) + ret = gf_string2boolean (val, &boo); + if (ret) { + gf_log ("", GF_LOG_ERROR, "value for %s option is not valid", key); + + return -1; + } + + return boo; +} + +gf_boolean_t +glusterd_check_voloption_flags (char *key, int32_t flags) +{ + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION(key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp (vmep->key, key) == 0) { + if (vmep->flags & flags) + return _gf_true; + else + return _gf_false; + } + } + + return _gf_false; +} + +gf_boolean_t +glusterd_check_globaloption (char *key) +{ + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION(key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp (vmep->key, key) == 0) { + if ((vmep->type == GLOBAL_DOC) || + (vmep->type == GLOBAL_NO_DOC)) + return _gf_true; + else + return _gf_false; + } + } + + return _gf_false; +} + +gf_boolean_t +glusterd_check_localoption (char *key) +{ + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION(key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp (vmep->key, key) == 0) { + if ((vmep->type == DOC) || + (vmep->type == NO_DOC)) + return _gf_true; + else + return _gf_false; + } + } + + return _gf_false; +} + +int +glusterd_check_voloption (char *key) +{ + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION(key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp (vmep->key, key) == 0) { + if ((vmep->type == DOC) || + (vmep->type == DOC)) + return _gf_true; + else + return _gf_false; + } + } + + return _gf_false; + } int glusterd_check_option_exists (char *key, char **completion) { - dict_t *dict = NULL; struct volopt_map_entry vme = {0,}; struct volopt_map_entry *vmep = NULL; int ret = 0; + xlator_t *this = THIS; (void)vme; (void)vmep; - (void)dict; if (!strchr (key, '.')) { if (completion) { - *completion = option_complete (key); + ret = option_complete (key, completion); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + return -1; + } - return !!*completion; + ret = !!*completion; + if (ret) + return ret; + else + goto trie; } else return 0; } -#if !pattern_match_options for (vmep = glusterd_volopt_map; vmep->key; vmep++) { if (strcmp (vmep->key, key) == 0) { ret = 1; @@ -550,61 +880,147 @@ glusterd_check_option_exists (char *key, char **completion) } } - return ret; -#else - vme.key = key; + if (ret || !completion) + return ret; - /* We are getting a bit anal here to avoid typing - * fnmatch one more time. Orthogonality foremost! - * The internal logic of looking up in the volopt_map table - * should be coded exactly once. - * - * [[Ha-ha-ha, so now if I ever change the internals then I'll - * have to update the fnmatch in this comment also :P ]] - */ - dict = get_new_dict (); - if (!dict || dict_set_str (dict, key, "")) - goto oom; + trie: + ret = volopt_trie (key, completion); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Some error occurred during keyword hinting"); + } - ret = volgen_graph_set_options_generic (NULL, dict, &vme, - &optget_option_handler); - dict_destroy (dict); - if (ret) - goto oom; + return ret; +} - return !!vme.value; +char* +glusterd_get_trans_type_rb (gf_transport_type ttype) +{ + char *trans_type = NULL; + + switch (ttype) { + case GF_TRANSPORT_RDMA: + gf_asprintf (&trans_type, "rdma"); + break; + case GF_TRANSPORT_TCP: + case GF_TRANSPORT_BOTH_TCP_RDMA: + gf_asprintf (&trans_type, "tcp"); + break; + default: + gf_log (THIS->name, GF_LOG_ERROR, "Unknown " + "transport type"); + } - oom: - gf_log ("", GF_LOG_ERROR, "Out of memory"); + return trans_type; +} - return -1; -#endif +static int +_xl_link_children (xlator_t *parent, xlator_t *children, size_t child_count) +{ + xlator_t *trav = NULL; + size_t seek = 0; + int ret = -1; + + if (child_count == 0) + goto out; + seek = child_count; + for (trav = children; --seek; trav = trav->next); + for (; child_count--; trav = trav->prev) { + ret = volgen_xlator_link (parent, trav); + if (ret) + goto out; + } + ret = 0; +out: + return ret; } static int -volgen_graph_merge_sub (glusterfs_graph_t *dgraph, glusterfs_graph_t *sgraph) +volgen_graph_merge_sub (volgen_graph_t *dgraph, volgen_graph_t *sgraph, + size_t child_count) { xlator_t *trav = NULL; + int ret = 0; - GF_ASSERT (dgraph->first); + GF_ASSERT (dgraph->graph.first); - if (volgen_xlator_link (first_of (dgraph), first_of (sgraph)) == -1) - return -1; + ret = _xl_link_children (first_of (dgraph), first_of (sgraph), + child_count); + if (ret) + goto out; for (trav = first_of (dgraph); trav->next; trav = trav->next); - trav->next = sgraph->first; + trav->next = first_of (sgraph); trav->next->prev = trav; - dgraph->xl_count += sgraph->xl_count; + dgraph->graph.xl_count += sgraph->graph.xl_count; - return 0; +out: + return ret; +} + +static void +volgen_apply_filters (char *orig_volfile) +{ + DIR *filterdir = NULL; + struct dirent entry = {0,}; + struct dirent *next = NULL; + char *filterpath = NULL; + struct stat statbuf = {0,}; + + filterdir = opendir(FILTERDIR); + if (!filterdir) { + return; + } + + while ((readdir_r(filterdir,&entry,&next) == 0) && next) { + if (!strncmp(entry.d_name,".",sizeof(entry.d_name))) { + continue; + } + if (!strncmp(entry.d_name,"..",sizeof(entry.d_name))) { + continue; + } + /* + * d_type isn't guaranteed to be present/valid on all systems, + * so do an explicit stat instead. + */ + if (gf_asprintf(&filterpath,"%s/%.*s",FILTERDIR, + sizeof(entry.d_name), entry.d_name) == (-1)) { + continue; + } + /* Deliberately use stat instead of lstat to allow symlinks. */ + if (stat(filterpath,&statbuf) == (-1)) { + goto free_fp; + } + if (!S_ISREG(statbuf.st_mode)) { + goto free_fp; + } + /* + * We could check the mode in statbuf directly, or just skip + * this entirely and check for EPERM after exec fails, but this + * is cleaner. + */ + if (access(filterpath,X_OK) != 0) { + goto free_fp; + } + if (runcmd(filterpath,orig_volfile,NULL)) { + gf_log("",GF_LOG_ERROR,"failed to run filter %.*s", + (int)sizeof(entry.d_name), entry.d_name); + } +free_fp: + GF_FREE(filterpath); + } } static int -volgen_write_volfile (glusterfs_graph_t *graph, char *filename) +volgen_write_volfile (volgen_graph_t *graph, char *filename) { - char *ftmp = NULL; - FILE *f = NULL; + char *ftmp = NULL; + FILE *f = NULL; + int fd = 0; + xlator_t *this = NULL; + + this = THIS; if (gf_asprintf (&ftmp, "%s.tmp", filename) == -1) { ftmp = NULL; @@ -612,37 +1028,60 @@ volgen_write_volfile (glusterfs_graph_t *graph, char *filename) goto error; } + fd = creat (ftmp, S_IRUSR | S_IWUSR); + if (fd < 0) { + gf_log (this->name, GF_LOG_ERROR, "%s", + strerror (errno)); + goto error; + } + + close (fd); + f = fopen (ftmp, "w"); if (!f) goto error; - if (glusterfs_graph_print_file (f, graph) == -1) + if (glusterfs_graph_print_file (f, &graph->graph) == -1) goto error; - if (fclose (f) == -1) + if (fclose (f) != 0) { + gf_log (THIS->name, GF_LOG_ERROR, "fclose on the file %s " + "failed (%s)", ftmp, strerror (errno)); + /* + * Even though fclose has failed here, we have to set f to NULL. + * Otherwise when the code path goes to error, there again we + * try to close it which might cause undefined behavior such as + * process crash. + */ + f = NULL; goto error; + } + + f = NULL; if (rename (ftmp, filename) == -1) goto error; GF_FREE (ftmp); + volgen_apply_filters(filename); + return 0; error: - if (ftmp) - GF_FREE (ftmp); + GF_FREE (ftmp); if (f) fclose (f); - gf_log ("", GF_LOG_ERROR, "failed to create volfile %s", filename); + gf_log (this->name, GF_LOG_ERROR, + "failed to create volfile %s", filename); return -1; } static void -volgen_graph_free (glusterfs_graph_t *graph) +volgen_graph_free (volgen_graph_t *graph) { xlator_t *trav = NULL; xlator_t *trav_old = NULL; @@ -659,9 +1098,9 @@ volgen_graph_free (glusterfs_graph_t *graph) } static int -build_graph_generic (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, +build_graph_generic (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *mod_dict, void *param, - int (*builder) (glusterfs_graph_t *graph, + int (*builder) (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, void *param)) { @@ -672,10 +1111,11 @@ build_graph_generic (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, set_dict = dict_copy (volinfo->dict, NULL); if (!set_dict) return -1; - dict_copy (mod_dict, set_dict); - /* XXX dict_copy swallows errors */ - } else + dict_copy (mod_dict, set_dict); + /* XXX dict_copy swallows errors */ + } else { set_dict = volinfo->dict; + } ret = builder (graph, volinfo, set_dict, param); if (!ret) @@ -687,16 +1127,86 @@ build_graph_generic (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, return ret; } +static gf_transport_type +transport_str_to_type (char *tt) +{ + gf_transport_type type = GF_TRANSPORT_TCP; + + if (!strcmp ("tcp", tt)) + type = GF_TRANSPORT_TCP; + else if (!strcmp ("rdma", tt)) + type = GF_TRANSPORT_RDMA; + else if (!strcmp ("tcp,rdma", tt)) + type = GF_TRANSPORT_BOTH_TCP_RDMA; + return type; +} + +static void +transport_type_to_str (gf_transport_type type, char *tt) +{ + switch (type) { + case GF_TRANSPORT_RDMA: + strcpy (tt, "rdma"); + break; + case GF_TRANSPORT_TCP: + strcpy (tt, "tcp"); + break; + case GF_TRANSPORT_BOTH_TCP_RDMA: + strcpy (tt, "tcp,rdma"); + break; + } +} + static void get_vol_transport_type (glusterd_volinfo_t *volinfo, char *tt) { - volinfo->transport_type == GF_TRANSPORT_RDMA ? - strcpy (tt, "rdma"): - strcpy (tt, "tcp"); + transport_type_to_str (volinfo->transport_type, tt); +} + +static void +get_vol_nfs_transport_type (glusterd_volinfo_t *volinfo, char *tt) +{ + if (volinfo->nfs_transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) { + gf_log ("", GF_LOG_ERROR, "%s:nfs transport cannot be both" + " tcp and rdma", volinfo->volname); + GF_ASSERT (0); + } + transport_type_to_str (volinfo->nfs_transport_type, tt); +} + +/* gets the volinfo, dict, a character array for filling in + * the transport type and a boolean option which says whether + * the transport type is required for nfs or not. If its not + * for nfs, then it is considered as the client transport + * and client transport type is filled in the character array + */ +static void +get_transport_type (glusterd_volinfo_t *volinfo, dict_t *set_dict, + char *transt, gf_boolean_t is_nfs) +{ + int ret = -1; + char *tt = NULL; + char *key = NULL; + typedef void (*transport_type) (glusterd_volinfo_t *volinfo, char *tt); + transport_type get_transport; + + if (is_nfs == _gf_false) { + key = "client-transport-type"; + get_transport = get_vol_transport_type; + } else { + key = "nfs.transport-type"; + get_transport = get_vol_nfs_transport_type; + } + + ret = dict_get_str (set_dict, key, &tt); + if (ret) + get_transport (volinfo, transt); + if (!ret) + strcpy (transt, tt); } static int -server_auth_option_handler (glusterfs_graph_t *graph, +server_auth_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, void *param) { xlator_t *xl = NULL; @@ -728,14 +1238,15 @@ server_auth_option_handler (glusterfs_graph_t *graph, } static int -loglevel_option_handler (glusterfs_graph_t *graph, +loglevel_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, void *param) { char *role = param; struct volopt_map_entry vme2 = {0,}; - if (strcmp (vme->option, "!log-level") != 0 || - !strstr (vme->key, role)) + if ( (strcmp (vme->option, "!client-log-level") != 0 && + strcmp (vme->option, "!brick-log-level") != 0) + || !strstr (vme->key, role)) return 0; memcpy (&vme2, vme, sizeof (vme2)); @@ -745,35 +1256,193 @@ loglevel_option_handler (glusterfs_graph_t *graph, } static int -server_spec_option_handler (glusterfs_graph_t *graph, +server_check_marker_off (volgen_graph_t *graph, struct volopt_map_entry *vme, + glusterd_volinfo_t *volinfo) +{ + gf_boolean_t bool = _gf_false; + int ret = 0; + + GF_ASSERT (volinfo); + GF_ASSERT (vme); + + if (strcmp (vme->option, "!xtime") != 0) + return 0; + + ret = gf_string2boolean (vme->value, &bool); + if (ret || bool) + goto out; + + ret = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME); + if (ret < 0) { + gf_log ("", GF_LOG_WARNING, "failed to get the marker status"); + ret = -1; + goto out; + } + + if (ret) { + bool = _gf_false; + ret = glusterd_check_gsync_running (volinfo, &bool); + + if (bool) { + gf_log ("", GF_LOG_WARNING, GEOREP" sessions active" + "for the volume %s, cannot disable marker " + ,volinfo->volname); + set_graph_errstr (graph, + VKEY_MARKER_XTIME" cannot be disabled " + "while "GEOREP" sessions exist"); + ret = -1; + goto out; + } + + if (ret) { + gf_log ("", GF_LOG_WARNING, "Unable to get the status" + " of active gsync session"); + goto out; + } + } + + ret = 0; + out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int +sys_loglevel_option_handler (volgen_graph_t *graph, + struct volopt_map_entry *vme, + void *param) +{ + char *role = NULL; + struct volopt_map_entry vme2 = {0,}; + + role = (char *) param; + + if (strcmp (vme->option, "!sys-log-level") != 0 || + !strstr (vme->key, role)) + return 0; + + memcpy (&vme2, vme, sizeof (vme2)); + vme2.option = "sys-log-level"; + + return basic_option_handler (graph, &vme2, NULL); +} + +static int +volgen_graph_set_xl_options (volgen_graph_t *graph, dict_t *dict) +{ + int32_t ret = -1; + char *xlator = NULL; + char xlator_match[1024] = {0,}; /* for posix* -> *posix* */ + char *loglevel = NULL; + xlator_t *trav = NULL; + + ret = dict_get_str (dict, "xlator", &xlator); + if (ret) + goto out; + + ret = dict_get_str (dict, "loglevel", &loglevel); + if (ret) + goto out; + + snprintf (xlator_match, 1024, "*%s", xlator); + + for (trav = first_of (graph); trav; trav = trav->next) { + if (fnmatch(xlator_match, trav->type, FNM_NOESCAPE) == 0) { + gf_log ("glusterd", GF_LOG_DEBUG, "Setting log level for xlator: %s", + trav->type); + ret = xlator_set_option (trav, "log-level", loglevel); + if (ret) + break; + } + } + + out: + return ret; +} + +static int +server_spec_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, void *param) { - int ret = 0; + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + + volinfo = param; ret = server_auth_option_handler (graph, vme, NULL); if (!ret) + ret = server_check_marker_off (graph, vme, volinfo); + + if (!ret) ret = loglevel_option_handler (graph, vme, "brick"); + if (!ret) + ret = sys_loglevel_option_handler (graph, vme, "brick"); + return ret; } static int -server_graph_builder (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *set_dict, void *param) +server_spec_extended_option_handler (volgen_graph_t *graph, + struct volopt_map_entry *vme, void *param) { - char *volname = NULL; - char *path = NULL; - int pump = 0; - xlator_t *xl = NULL; - xlator_t *txl = NULL; - xlator_t *rbxl = NULL; - int ret = 0; - char transt[16] = {0,}; + int ret = 0; + dict_t *dict = NULL; - path = param; + GF_ASSERT (param); + dict = (dict_t *)param; + + ret = server_auth_option_handler (graph, vme, NULL); + if (!ret) + ret = volgen_graph_set_xl_options (graph, dict); + + return ret; +} + +static void get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo); + +static int +server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +{ + char *volname = NULL; + char *path = NULL; + int pump = 0; + xlator_t *xl = NULL; + xlator_t *txl = NULL; + xlator_t *rbxl = NULL; + char transt[16] = {0,}; + char *ptranst = NULL; + char volume_id[64] = {0,}; + char tstamp_file[PATH_MAX] = {0,}; + int ret = 0; + char *xlator = NULL; + char *loglevel = NULL; + char *username = NULL; + char *password = NULL; + char index_basepath[PATH_MAX] = {0}; + char key[1024] = {0}; + glusterd_brickinfo_t *brickinfo = NULL; + char changelog_basepath[PATH_MAX] = {0,}; + + brickinfo = param; + path = brickinfo->path; volname = volinfo->volname; get_vol_transport_type (volinfo, transt); + ret = dict_get_str (set_dict, "xlator", &xlator); + + /* got a cli log level request */ + if (!ret) { + ret = dict_get_str (set_dict, "loglevel", &loglevel); + if (ret) { + gf_log ("glusterd", GF_LOG_ERROR, "could not get both" + " translator name and loglevel for log level request"); + goto out; + } + } + xl = volgen_graph_add (graph, "storage/posix", volname); if (!xl) return -1; @@ -782,19 +1451,87 @@ server_graph_builder (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, if (ret) return -1; + ret = xlator_set_option (xl, "volume-id", + uuid_utoa (volinfo->volume_id)); + if (ret) + return -1; + + ret = check_and_add_debug_xl (graph, set_dict, volname, + "posix"); + if (ret) + return -1; +#ifdef HAVE_BD_XLATOR + if (*brickinfo->vg != '\0') { + /* Now add BD v2 xlator if volume is BD type */ + xl = volgen_graph_add (graph, "storage/bd", volname); + if (!xl) + return -1; + + ret = xlator_set_option (xl, "device", "vg"); + if (ret) + return -1; + ret = xlator_set_option (xl, "export", brickinfo->vg); + if (ret) + return -1; + + ret = check_and_add_debug_xl (graph, set_dict, volname, "bd"); + if (ret) + return -1; + + } +#endif + + xl = volgen_graph_add (graph, "features/changelog", volname); + if (!xl) + return -1; + + ret = xlator_set_option (xl, "changelog-brick", path); + if (ret) + return -1; + + snprintf (changelog_basepath, sizeof (changelog_basepath), + "%s/%s", path, ".glusterfs/changelogs"); + ret = xlator_set_option (xl, "changelog-dir", changelog_basepath); + if (ret) + return -1; + + ret = check_and_add_debug_xl (graph, set_dict, volname, "changelog"); + if (ret) + return -1; + xl = volgen_graph_add (graph, "features/access-control", volname); if (!xl) return -1; + ret = check_and_add_debug_xl (graph, set_dict, volname, "acl"); + if (ret) + return -1; + xl = volgen_graph_add (graph, "features/locks", volname); if (!xl) return -1; + ret = check_and_add_debug_xl (graph, set_dict, volname, "locks"); + if (ret) + return -1; + + xl = volgen_graph_add (graph, "performance/io-threads", volname); + if (!xl) + return -1; + + ret = check_and_add_debug_xl (graph, set_dict, volname, "io-threads"); + if (ret) + return -1; + ret = dict_get_int32 (volinfo->dict, "enable-pump", &pump); if (ret == -ENOENT) ret = pump = 0; if (ret) return -1; + + username = glusterd_auth_get_username (volinfo); + password = glusterd_auth_get_password (volinfo); + if (pump) { txl = first_of (graph); @@ -802,9 +1539,28 @@ server_graph_builder (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, "%s-replace-brick", volname); if (!rbxl) return -1; - ret = xlator_set_option (rbxl, "transport-type", transt); + + ptranst = glusterd_get_trans_type_rb (volinfo->transport_type); + if (NULL == ptranst) + return -1; + + if (username) { + ret = xlator_set_option (rbxl, "username", username); + if (ret) + return -1; + } + + if (password) { + ret = xlator_set_option (rbxl, "password", password); + if (ret) + return -1; + } + + ret = xlator_set_option (rbxl, "transport-type", ptranst); + GF_FREE (ptranst); if (ret) return -1; + xl = volgen_graph_add_nolink (graph, "cluster/pump", "%s-pump", volname); if (!xl) @@ -817,13 +1573,77 @@ server_graph_builder (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, return -1; } - xl = volgen_graph_add (graph, "performance/io-threads", volname); + xl = volgen_graph_add (graph, "features/index", volname); + if (!xl) + return -1; + + snprintf (index_basepath, sizeof (index_basepath), "%s/%s", + path, ".glusterfs/indices"); + ret = xlator_set_option (xl, "index-base", index_basepath); + if (ret) + return -1; + + ret = check_and_add_debug_xl (graph, set_dict, volname, + "index"); + if (ret) + return -1; + + xl = volgen_graph_add (graph, "features/marker", volname); if (!xl) return -1; - ret = xlator_set_option (xl, "thread-count", "16"); + + uuid_unparse (volinfo->volume_id, volume_id); + ret = xlator_set_option (xl, "volume-uuid", volume_id); + if (ret) + return -1; + get_vol_tstamp_file (tstamp_file, volinfo); + ret = xlator_set_option (xl, "timestamp-file", tstamp_file); if (ret) return -1; + ret = check_and_add_debug_xl (graph, set_dict, volname, "marker"); + if (ret) + return -1; + + if (dict_get_str_boolean (set_dict, "features.read-only", 0) && + dict_get_str_boolean (set_dict, "features.worm",0)) { + gf_log (THIS->name, GF_LOG_ERROR, + "read-only and worm cannot be set together"); + ret = -1; + goto out; + } + + /* Check for read-only volume option, and add it to the graph */ + if (dict_get_str_boolean (set_dict, "features.read-only", 0) + || volinfo -> is_snap_volume) { + xl = volgen_graph_add (graph, "features/read-only", volname); + if (!xl) { + ret = -1; + goto out; + } + } + + /* Check for worm volume option, and add it to the graph */ + if (dict_get_str_boolean (set_dict, "features.worm", 0)) { + xl = volgen_graph_add (graph, "features/worm", volname); + if (!xl) { + ret = -1; + goto out; + } + } + + /* Check for compress volume option, and add it to the graph on server side */ + if (dict_get_str_boolean (set_dict, "features.compress", 0)) { + xl = volgen_graph_add (graph, "features/cdc", volname); + if (!xl) { + ret = -1; + goto out; + } + ret = dict_set_str (set_dict, "compress.mode", "server"); + if (ret) + goto out; + } + xl = volgen_graph_add_as (graph, "debug/io-stats", path); if (!xl) return -1; @@ -835,24 +1655,86 @@ server_graph_builder (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, if (ret) return -1; - ret = volgen_graph_set_options_generic (graph, set_dict, NULL, - &server_spec_option_handler); + /*In the case of running multiple glusterds on a single machine, + * we should ensure that bricks don't listen on all IPs on that + * machine and break the IP based separation being brought about.*/ + if (dict_get (THIS->options, "transport.socket.bind-address")) { + ret = xlator_set_option (xl, "transport.socket.bind-address", + brickinfo->hostname); + if (ret) + return -1; + } + if (username) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "auth.login.%s.allow", path); + + ret = xlator_set_option (xl, key, username); + if (ret) + return -1; + } + + if (password) { + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "auth.login.%s.password", + username); + + ret = xlator_set_option (xl, key, password); + if (ret) + return -1; + } + + ret = volgen_graph_set_options_generic (graph, set_dict, + (xlator && loglevel) ? (void *)set_dict : volinfo, + (xlator && loglevel) ? &server_spec_extended_option_handler : + &server_spec_option_handler); + + out: return ret; } /* builds a graph for server role , with option overrides in mod_dict */ static int -build_server_graph (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *mod_dict, char *path) +build_server_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *mod_dict, glusterd_brickinfo_t *brickinfo) { - return build_graph_generic (graph, volinfo, mod_dict, path, + return build_graph_generic (graph, volinfo, mod_dict, brickinfo, &server_graph_builder); } static int -perfxl_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, +perfxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) +{ + gf_boolean_t enabled = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + + GF_ASSERT (param); + volinfo = param; + + if (strcmp (vme->option, "!perf") != 0) + return 0; + + if (gf_string2boolean (vme->value, &enabled) == -1) + return -1; + if (!enabled) + return 0; + + /* Check op-version before adding the 'open-behind' xlator in the graph + */ + if (!strcmp (vme->key, "performance.open-behind") && + (vme->op_version > volinfo->client_op_version)) + return 0; + + if (volgen_graph_add (graph, vme->voltype, volinfo->volname)) + return 0; + else + return -1; +} + +static int +nfsperfxl_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, void *param) { char *volname = NULL; @@ -860,7 +1742,7 @@ perfxl_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, volname = param; - if (strcmp (vme->option, "!perf") != 0) + if (strcmp (vme->option, "!nfsperf") != 0) return 0; if (gf_string2boolean (vme->value, &enabled) == -1) @@ -874,236 +1756,1334 @@ perfxl_option_handler (glusterfs_graph_t *graph, struct volopt_map_entry *vme, return -1; } +#if (HAVE_LIB_XML) static int -client_graph_builder (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, - dict_t *set_dict, void *param) +end_sethelp_xml_doc (xmlTextWriterPtr writer) { - int replicate_count = 0; - int stripe_count = 0; - int dist_count = 0; - int num_bricks = 0; - char transt[16] = {0,}; - int cluster_count = 0; - char *volname = NULL; - dict_t *dict = NULL; - glusterd_brickinfo_t *brick = NULL; - char *replicate_args[] = {"cluster/replicate", - "%s-replicate-%d"}; - char *stripe_args[] = {"cluster/stripe", - "%s-stripe-%d"}; - char **cluster_args = NULL; - int i = 0; - int j = 0; - int ret = 0; - xlator_t *xl = NULL; - xlator_t *txl = NULL; - xlator_t *trav = NULL; + int ret = -1; - volname = volinfo->volname; - dict = volinfo->dict; - GF_ASSERT (dict); - get_vol_transport_type (volinfo, transt); + ret = xmlTextWriterEndElement(writer); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Could not end an " + "xmlElemetnt"); + ret = -1; + goto out; + } + ret = xmlTextWriterEndDocument (writer); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Could not end an " + "xmlDocument"); + ret = -1; + goto out; + } + ret = 0; + out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int +init_sethelp_xml_doc (xmlTextWriterPtr *writer, xmlBufferPtr *buf) +{ + int ret; + + *buf = xmlBufferCreateSize (8192); + if (buf == NULL) { + gf_log ("glusterd", GF_LOG_ERROR, "Error creating the xml " + "buffer"); + ret = -1; + goto out; + } + + xmlBufferSetAllocationScheme (*buf,XML_BUFFER_ALLOC_DOUBLEIT); + + *writer = xmlNewTextWriterMemory(*buf, 0); + if (writer == NULL) { + gf_log ("glusterd", GF_LOG_ERROR, " Error creating the xml " + "writer"); + ret = -1; + goto out; + } + + ret = xmlTextWriterStartDocument(*writer, "1.0", "UTF-8", "yes"); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Error While starting the " + "xmlDoc"); + goto out; + } + + ret = xmlTextWriterStartElement(*writer, (xmlChar *)"options"); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + + ret = 0; + + out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int +xml_add_volset_element (xmlTextWriterPtr writer, const char *name, + const char *def_val, const char *dscrpt) +{ - list_for_each_entry (brick, &volinfo->bricks, brick_list) - num_bricks++; - - if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { - if (volinfo->brick_count <= volinfo->sub_count) { - gf_log ("", GF_LOG_DEBUG, - "Volfile is plain replicated"); - replicate_count = volinfo->sub_count; - dist_count = num_bricks / replicate_count; - if (!dist_count) { - replicate_count = num_bricks; - dist_count = num_bricks / replicate_count; + int ret = -1; + + GF_ASSERT (name); + + ret = xmlTextWriterStartElement(writer, (xmlChar *) "option"); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = xmlTextWriterWriteFormatElement(writer, (xmlChar*)"defaultValue", + "%s", def_val); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"description", + "%s", dscrpt ); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *) "name", "%s", + name); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Could not create an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = xmlTextWriterEndElement(writer); + if (ret < 0) { + gf_log ("glusterd", GF_LOG_ERROR, "Could not end an " + "xmlElemetnt"); + ret = -1; + goto out; + } + + ret = 0; + out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +#endif + +static int +_get_xlator_opt_key_from_vme ( struct volopt_map_entry *vme, char **key) +{ + int ret = 0; + + GF_ASSERT (vme); + GF_ASSERT (key); + + + if (!strcmp (vme->key, AUTH_ALLOW_MAP_KEY)) + *key = gf_strdup (AUTH_ALLOW_OPT_KEY); + else if (!strcmp (vme->key, AUTH_REJECT_MAP_KEY)) + *key = gf_strdup (AUTH_REJECT_OPT_KEY); + else if (!strcmp (vme->key, NFS_DISABLE_MAP_KEY)) + *key = gf_strdup (NFS_DISABLE_OPT_KEY); + else { + if (vme->option) { + if (vme->option[0] == '!') { + *key = vme->option + 1; + if (!*key[0]) + ret = -1; + } else { + *key = vme->option; } } else { - gf_log ("", GF_LOG_DEBUG, - "Volfile is distributed-replicated"); - replicate_count = volinfo->sub_count; - dist_count = num_bricks / replicate_count; + *key = strchr (vme->key, '.'); + if (*key) { + (*key) ++; + if (!*key[0]) + ret = -1; + } else { + ret = -1; + } } + } + if (ret) + gf_log ("glusterd", GF_LOG_ERROR, "Wrong entry found in " + "glusterd_volopt_map entry %s", vme->key); + else + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +static void +_free_xlator_opt_key (char *key) +{ + GF_ASSERT (key); + + if (!strcmp (key, AUTH_ALLOW_OPT_KEY) || + !strcmp (key, AUTH_REJECT_OPT_KEY) || + !strcmp (key, NFS_DISABLE_OPT_KEY)) + GF_FREE (key); + + return; +} + +int +glusterd_get_volopt_content (dict_t * ctx, gf_boolean_t xml_out) +{ + void *dl_handle = NULL; + volume_opt_list_t vol_opt_handle = {{0},}; + char *key = NULL; + struct volopt_map_entry *vme = NULL; + int ret = -1; + char *def_val = NULL; + char *descr = NULL; + char output_string[25600] = {0, }; + char *output = NULL; + char tmp_str[2048] = {0, }; +#if (HAVE_LIB_XML) + xmlTextWriterPtr writer = NULL; + xmlBufferPtr buf = NULL; + + if (xml_out) { + ret = init_sethelp_xml_doc (&writer, &buf); + if (ret) /*logging done in init_xml_lib*/ + goto out; + } +#endif + + INIT_LIST_HEAD (&vol_opt_handle.list); - } else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) { - if (volinfo->brick_count == volinfo->sub_count) { - gf_log ("", GF_LOG_DEBUG, - "Volfile is plain striped"); - stripe_count = volinfo->sub_count; - dist_count = num_bricks / stripe_count; + for (vme = &glusterd_volopt_map[0]; vme->key; vme++) { + + if ((vme->type == NO_DOC) || (vme->type == GLOBAL_NO_DOC)) + continue; + + if (vme->description) { + descr = vme->description; + def_val = vme->value; } else { - gf_log ("", GF_LOG_DEBUG, - "Volfile is distributed-striped"); - stripe_count = volinfo->sub_count; - dist_count = num_bricks / stripe_count; + if (_get_xlator_opt_key_from_vme (vme, &key)) { + gf_log ("glusterd", GF_LOG_DEBUG, "Failed to " + "get %s key from volume option entry", + vme->key); + goto out; /*Some error while geting key*/ + } + + ret = xlator_volopt_dynload (vme->voltype, + &dl_handle, + &vol_opt_handle); + + if (ret) { + gf_log ("glusterd", GF_LOG_DEBUG, + "xlator_volopt_dynload error(%d)", ret); + ret = 0; + goto cont; + } + + ret = xlator_option_info_list (&vol_opt_handle, key, + &def_val, &descr); + if (ret) { /*Swallow Error i.e if option not found*/ + gf_log ("glusterd", GF_LOG_DEBUG, + "Failed to get option for %s key", key); + ret = 0; + goto cont; + } } - } else { - gf_log ("", GF_LOG_DEBUG, - "Volfile is plain distributed"); - dist_count = num_bricks; + + if (xml_out) { +#if (HAVE_LIB_XML) + if (xml_add_volset_element (writer,vme->key, + def_val, descr)) { + ret = -1; + goto cont; + } +#else + gf_log ("glusterd", GF_LOG_ERROR, "Libxml not present"); +#endif + } else { + snprintf (tmp_str, sizeof (tmp_str), "Option: %s\nDefault " + "Value: %s\nDescription: %s\n\n", + vme->key, def_val, descr); + strcat (output_string, tmp_str); + } +cont: + if (dl_handle) { + dlclose (dl_handle); + dl_handle = NULL; + vol_opt_handle.given_opt = NULL; + } + if (key) { + _free_xlator_opt_key (key); + key = NULL; + } + if (ret) + goto out; } - if (stripe_count && replicate_count) { - gf_log ("", GF_LOG_DEBUG, - "Striped Replicate config not allowed"); - return -1; +#if (HAVE_LIB_XML) + if ((xml_out) && + (ret = end_sethelp_xml_doc (writer))) + goto out; +#else + if (xml_out) + gf_log ("glusterd", GF_LOG_ERROR, "Libxml not present"); +#endif + + if (!xml_out) + output = gf_strdup (output_string); + else +#if (HAVE_LIB_XML) + output = gf_strdup ((char *)buf->content); +#else + gf_log ("glusterd", GF_LOG_ERROR, "Libxml not present"); +#endif + + if (NULL == output) { + ret = -1; + goto out; } - if (replicate_count > 1) { - cluster_count = replicate_count; - cluster_args = replicate_args; + + ret = dict_set_dynstr (ctx, "help-str", output); +out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int +volgen_graph_build_clients (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +{ + int i = 0; + int ret = -1; + uint32_t client_type = GF_CLIENT_OTHER; + char transt[16] = {0,}; + char *volname = NULL; + char *str = NULL; + glusterd_brickinfo_t *brick = NULL; + xlator_t *xl = NULL; + char *ssl_str = NULL; + gf_boolean_t ssl_bool; + + volname = volinfo->volname; + + if (volinfo->brick_count == 0) { + gf_log ("", GF_LOG_ERROR, + "volume inconsistency: brick count is 0"); + goto out; } - if (stripe_count > 1) { - cluster_count = stripe_count; - cluster_args = stripe_args; + + if ((volinfo->dist_leaf_count < volinfo->brick_count) && + ((volinfo->brick_count % volinfo->dist_leaf_count) != 0)) { + gf_log ("", GF_LOG_ERROR, + "volume inconsistency: " + "total number of bricks (%d) is not divisible with " + "number of bricks per cluster (%d) in a multi-cluster " + "setup", + volinfo->brick_count, volinfo->dist_leaf_count); + goto out; } + get_transport_type (volinfo, set_dict, transt, _gf_false); + + if (!strcmp (transt, "tcp,rdma")) + strcpy (transt, "tcp"); + i = 0; + ret = -1; list_for_each_entry (brick, &volinfo->bricks, brick_list) { + ret = -1; xl = volgen_graph_add_nolink (graph, "protocol/client", "%s-client-%d", volname, i); if (!xl) - return -1; + goto out; ret = xlator_set_option (xl, "remote-host", brick->hostname); if (ret) - return -1; + goto out; ret = xlator_set_option (xl, "remote-subvolume", brick->path); if (ret) - return -1; + goto out; ret = xlator_set_option (xl, "transport-type", transt); if (ret) - return -1; + goto out; + + ret = dict_get_uint32 (set_dict, "trusted-client", + &client_type); + + if (!ret && client_type == GF_CLIENT_TRUSTED) { + str = NULL; + str = glusterd_auth_get_username (volinfo); + if (str) { + ret = xlator_set_option (xl, "username", + str); + if (ret) + goto out; + } + + str = glusterd_auth_get_password (volinfo); + if (str) { + ret = xlator_set_option (xl, "password", + str); + if (ret) + goto out; + } + } + + if (dict_get_str(set_dict,"client.ssl",&ssl_str) == 0) { + if (gf_string2boolean(ssl_str,&ssl_bool) == 0) { + if (ssl_bool) { + ret = xlator_set_option(xl, + "transport.socket.ssl-enabled", + "true"); + if (ret) { + goto out; + } + } + } + } i++; } - if (cluster_count > 1) { - j = 0; - i = 0; - txl = first_of (graph); - for (trav = txl; trav->next; trav = trav->next); - for (;; trav = trav->prev) { - if (i % cluster_count == 0) { - xl = volgen_graph_add_nolink (graph, - cluster_args[0], - cluster_args[1], - volname, j); - if (!xl) - return -1; - j++; + if (i != volinfo->brick_count) { + gf_log ("", GF_LOG_ERROR, + "volume inconsistency: actual number of bricks (%d) " + "differs from brick count (%d)", i, + volinfo->brick_count); + + ret = -1; + goto out; + } + ret = 0; +out: + return ret; +} + +static int +volgen_graph_build_clusters (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, char *xl_type, + char *xl_namefmt, size_t child_count, + size_t sub_count) +{ + int i = 0; + int j = 0; + xlator_t *txl = NULL; + xlator_t *xl = NULL; + xlator_t *trav = NULL; + char *volname = NULL; + int ret = -1; + + if (child_count == 0) + goto out; + volname = volinfo->volname; + txl = first_of (graph); + for (trav = txl; --child_count; trav = trav->next); + for (;; trav = trav->prev) { + if ((i % sub_count) == 0) { + xl = volgen_graph_add_nolink (graph, xl_type, + xl_namefmt, volname, j); + if (!xl) { + ret = -1; + goto out; } + j++; + } - ret = volgen_xlator_link (xl, trav); - if (ret) - return -1; + ret = volgen_xlator_link (xl, trav); + if (ret) + goto out; - if (trav == txl) - break; - i++; + if (trav == txl) + break; + + i++; + } + + ret = j; +out: + return ret; +} + +gf_boolean_t +_xl_is_client_decommissioned (xlator_t *xl, glusterd_volinfo_t *volinfo) +{ + int ret = 0; + gf_boolean_t decommissioned = _gf_false; + char *hostname = NULL; + char *path = NULL; + + GF_ASSERT (!strcmp (xl->type, "protocol/client")); + ret = xlator_get_option (xl, "remote-host", &hostname); + if (ret) { + GF_ASSERT (0); + gf_log ("glusterd", GF_LOG_ERROR, "Failed to get remote-host " + "from client %s", xl->name); + goto out; + } + ret = xlator_get_option (xl, "remote-subvolume", &path); + if (ret) { + GF_ASSERT (0); + gf_log ("glusterd", GF_LOG_ERROR, "Failed to get remote-host " + "from client %s", xl->name); + goto out; + } + + decommissioned = glusterd_is_brick_decommissioned (volinfo, hostname, + path); +out: + return decommissioned; +} + +gf_boolean_t +_xl_has_decommissioned_clients (xlator_t *xl, glusterd_volinfo_t *volinfo) +{ + xlator_list_t *xl_child = NULL; + gf_boolean_t decommissioned = _gf_false; + xlator_t *cxl = NULL; + + if (!xl) + goto out; + + if (!strcmp (xl->type, "protocol/client")) { + decommissioned = _xl_is_client_decommissioned (xl, volinfo); + goto out; + } + + xl_child = xl->children; + while (xl_child) { + cxl = xl_child->xlator; + /* this can go into 2 depths if the volume type + is stripe-replicate */ + decommissioned = _xl_has_decommissioned_clients (cxl, volinfo); + if (decommissioned) + break; + + xl_child = xl_child->next; + } +out: + return decommissioned; +} + +static int +_graph_get_decommissioned_children (xlator_t *dht, glusterd_volinfo_t *volinfo, + char **children) +{ + int ret = -1; + xlator_list_t *xl_child = NULL; + xlator_t *cxl = NULL; + gf_boolean_t comma = _gf_false; + + *children = NULL; + xl_child = dht->children; + while (xl_child) { + cxl = xl_child->xlator; + if (_xl_has_decommissioned_clients (cxl, volinfo)) { + if (!*children) { + *children = GF_CALLOC (16 * GF_UNIT_KB, 1, + gf_common_mt_char); + if (!*children) + goto out; + } + + if (comma) + strcat (*children, ","); + strcat (*children, cxl->name); + comma = _gf_true; } + + xl_child = xl_child->next; } + ret = 0; +out: + return ret; +} - if (dist_count > 1) { - xl = volgen_graph_add_nolink (graph, "cluster/distribute", - "%s-dht", volname); - if (!xl) - return -1; +static int +volgen_graph_build_dht_cluster (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, size_t child_count) +{ + int32_t clusters = 0; + int ret = -1; + char *decommissioned_children = NULL; + xlator_t *dht = NULL; + char *voltype = "cluster/distribute"; + + /* NUFA and Switch section */ + if (dict_get_str_boolean (volinfo->dict, "cluster.nufa", 0) && + dict_get_str_boolean (volinfo->dict, "cluster.switch", 0)) { + gf_log (THIS->name, GF_LOG_ERROR, + "nufa and switch cannot be set together"); + ret = -1; + goto out; + } - trav = xl; - for (i = 0; i < dist_count; i++) - trav = trav->next; - for (; trav != xl; trav = trav->prev) { - ret = volgen_xlator_link (xl, trav); - if (ret) - return -1; + /* Check for NUFA volume option, and change the voltype */ + if (dict_get_str_boolean (volinfo->dict, "cluster.nufa", 0)) + voltype = "cluster/nufa"; + + /* Check for switch volume option, and change the voltype */ + if (dict_get_str_boolean (volinfo->dict, "cluster.switch", 0)) + voltype = "cluster/switch"; + + clusters = volgen_graph_build_clusters (graph, volinfo, + voltype, + "%s-dht", + child_count, + child_count); + if (clusters < 0) + goto out; + + dht = first_of (graph); + ret = _graph_get_decommissioned_children (dht, volinfo, + &decommissioned_children); + if (ret) + goto out; + if (decommissioned_children) { + ret = xlator_set_option (dht, "decommissioned-bricks", + decommissioned_children); + if (ret) + goto out; + } + ret = 0; +out: + GF_FREE (decommissioned_children); + return ret; +} + +static int +volume_volgen_graph_build_clusters (volgen_graph_t *graph, + glusterd_volinfo_t *volinfo) +{ + char *replicate_args[] = {"cluster/replicate", + "%s-replicate-%d"}; + char *stripe_args[] = {"cluster/stripe", + "%s-stripe-%d"}; + int rclusters = 0; + int clusters = 0; + int dist_count = 0; + int ret = -1; + + if (!volinfo->dist_leaf_count) + goto out; + + if (volinfo->dist_leaf_count == 1) + goto build_distribute; + + /* All other cases, it will have one or the other cluster type */ + switch (volinfo->type) { + case GF_CLUSTER_TYPE_REPLICATE: + clusters = volgen_graph_build_clusters (graph, volinfo, + replicate_args[0], + replicate_args[1], + volinfo->brick_count, + volinfo->replica_count); + if (clusters < 0) + goto out; + break; + case GF_CLUSTER_TYPE_STRIPE: + clusters = volgen_graph_build_clusters (graph, volinfo, + stripe_args[0], + stripe_args[1], + volinfo->brick_count, + volinfo->stripe_count); + if (clusters < 0) + goto out; + break; + case GF_CLUSTER_TYPE_STRIPE_REPLICATE: + /* Replicate after the clients, then stripe */ + if (volinfo->replica_count == 0) + goto out; + clusters = volgen_graph_build_clusters (graph, volinfo, + replicate_args[0], + replicate_args[1], + volinfo->brick_count, + volinfo->replica_count); + if (clusters < 0) + goto out; + + rclusters = volinfo->brick_count / volinfo->replica_count; + GF_ASSERT (rclusters == clusters); + clusters = volgen_graph_build_clusters (graph, volinfo, + stripe_args[0], + stripe_args[1], + rclusters, + volinfo->stripe_count); + if (clusters < 0) + goto out; + break; + default: + gf_log ("", GF_LOG_ERROR, "volume inconsistency: " + "unrecognized clustering type"); + goto out; + } + +build_distribute: + dist_count = volinfo->brick_count / volinfo->dist_leaf_count; + if (!dist_count) { + ret = -1; + goto out; + } + + ret = volgen_graph_build_dht_cluster (graph, volinfo, + dist_count); + if (ret == -1) + goto out; + + ret = 0; +out: + return ret; +} + +static int client_graph_set_perf_options(volgen_graph_t *graph, + glusterd_volinfo_t *volinfo, + dict_t *set_dict) +{ + data_t *tmp_data = NULL; + char *volname = NULL; + + /* + * Logic to make sure NFS doesn't have performance translators by + * default for a volume + */ + volname = volinfo->volname; + tmp_data = dict_get (set_dict, "nfs-volume-file"); + if (!tmp_data) + return volgen_graph_set_options_generic(graph, set_dict, + volname, + &perfxl_option_handler); + else + return volgen_graph_set_options_generic(graph, set_dict, + volname, + &nfsperfxl_option_handler); +} + +static int +client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, void *param) +{ + int ret = 0; + xlator_t *xl = NULL; + char *volname = NULL; + + volname = volinfo->volname; + ret = volgen_graph_build_clients (graph, volinfo, set_dict, param); + if (ret) + goto out; + + ret = volume_volgen_graph_build_clusters (graph, volinfo); + if (ret == -1) + goto out; + + /* Check for compress volume option, and add it to the graph on client side */ + if (dict_get_str_boolean (set_dict, "features.compress", 0)) { + xl = volgen_graph_add (graph, "features/cdc", volname); + if (!xl) { + ret = -1; + goto out; + } + ret = dict_set_str (set_dict, "compress.mode", "client"); + if (ret) + goto out; + + } + + ret = glusterd_volinfo_get_boolean (volinfo, "features.encryption"); + if (ret == -1) + goto out; + if (ret) { + xl = volgen_graph_add (graph, "encryption/crypt", volname); + + if (!xl) { + ret = -1; + goto out; } } - ret = volgen_graph_set_options_generic (graph, set_dict, volname, - &perfxl_option_handler); + ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA); + if (ret == -1) + goto out; + if (ret) { + xl = volgen_graph_add (graph, "features/quota", volname); + + if (!xl) { + ret = -1; + goto out; + } + } + + + ret = glusterd_volinfo_get_boolean (volinfo, "features.file-snapshot"); + if (ret == -1) + goto out; + if (ret) { + xl = volgen_graph_add (graph, "features/qemu-block", volname); + + if (!xl) { + ret = -1; + goto out; + } + } + + ret = client_graph_set_perf_options(graph, volinfo, set_dict); + if (ret) + goto out; + + /* add debug translators depending on the options */ + ret = check_and_add_debug_xl (graph, set_dict, volname, + "client"); if (ret) return -1; + ret = -1; xl = volgen_graph_add_as (graph, "debug/io-stats", volname); if (!xl) - return -1; + goto out; ret = volgen_graph_set_options_generic (graph, set_dict, "client", &loglevel_option_handler); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, "changing client log level" + " failed"); + + ret = volgen_graph_set_options_generic (graph, set_dict, "client", + &sys_loglevel_option_handler); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, "changing client syslog " + "level failed"); +out: return ret; } /* builds a graph for client role , with option overrides in mod_dict */ static int -build_client_graph (glusterfs_graph_t *graph, glusterd_volinfo_t *volinfo, +build_client_graph (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *mod_dict) { return build_graph_generic (graph, volinfo, mod_dict, NULL, &client_graph_builder); } +char *gd_shd_options[] = { + "!self-heal-daemon", + "!heal-timeout", + NULL +}; + +char* +gd_get_matching_option (char **options, char *option) +{ + while (*options && strcmp (*options, option)) + options++; + return *options; +} -/* builds a graph for nfs server role */ static int -build_nfs_graph (glusterfs_graph_t *graph) +shd_option_handler (volgen_graph_t *graph, struct volopt_map_entry *vme, + void *param) { - glusterfs_graph_t cgraph = {{0,},}; + int ret = 0; + struct volopt_map_entry new_vme = {0}; + char *shd_option = NULL; + + shd_option = gd_get_matching_option (gd_shd_options, vme->option); + if ((vme->option[0] == '!') && !shd_option) + goto out; + new_vme = *vme; + if (shd_option) { + new_vme.option = shd_option + 1;//option with out '!' + } + + ret = no_filter_option_handler (graph, &new_vme, param); +out: + return ret; +} + +static int +nfs_option_handler (volgen_graph_t *graph, + struct volopt_map_entry *vme, void *param) +{ + xlator_t *xl = NULL; + char *aa = NULL; + int ret = 0; + glusterd_volinfo_t *volinfo = NULL; + + volinfo = param; + + xl = first_of (graph); + +/* if (vme->type == GLOBAL_DOC || vme->type == GLOBAL_NO_DOC) { + + ret = xlator_set_option (xl, vme->key, vme->value); + }*/ + if (!volinfo || (volinfo->volname[0] == '\0')) + return 0; + + if (! strcmp (vme->option, "!rpc-auth.addr.*.allow")) { + ret = gf_asprintf (&aa, "rpc-auth.addr.%s.allow", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + if (! strcmp (vme->option, "!rpc-auth.addr.*.reject")) { + ret = gf_asprintf (&aa, "rpc-auth.addr.%s.reject", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + if (! strcmp (vme->option, "!rpc-auth.auth-unix.*")) { + ret = gf_asprintf (&aa, "rpc-auth.auth-unix.%s", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + if (! strcmp (vme->option, "!rpc-auth.auth-null.*")) { + ret = gf_asprintf (&aa, "rpc-auth.auth-null.%s", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + if (! strcmp (vme->option, "!nfs3.*.trusted-sync")) { + ret = gf_asprintf (&aa, "nfs3.%s.trusted-sync", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + if (! strcmp (vme->option, "!nfs3.*.trusted-write")) { + ret = gf_asprintf (&aa, "nfs3.%s.trusted-write", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + if (! strcmp (vme->option, "!nfs3.*.volume-access")) { + ret = gf_asprintf (&aa, "nfs3.%s.volume-access", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + if (! strcmp (vme->option, "!nfs3.*.export-dir")) { + ret = gf_asprintf (&aa, "nfs3.%s.export-dir", + volinfo->volname); + + if (ret != -1) { + ret = gf_canonicalize_path (vme->value); + if (ret) + return -1; + + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + + + if (! strcmp (vme->option, "!rpc-auth.ports.*.insecure")) { + ret = gf_asprintf (&aa, "rpc-auth.ports.%s.insecure", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + + if (! strcmp (vme->option, "!nfs-disable")) { + ret = gf_asprintf (&aa, "nfs.%s.disable", + volinfo->volname); + + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + + if (ret) + return -1; + } + + if ( (strcmp (vme->voltype, "nfs/server") == 0) && + (vme->option && vme->option[0]!='!') ) { + ret = xlator_set_option (xl, vme->option, vme->value); + if (ret) + return -1; + } + + + /*key = strchr (vme->key, '.') + 1; + + for (trav = xl->children; trav; trav = trav->next) { + ret = gf_asprintf (&aa, "auth.addr.%s.%s", trav->xlator->name, + key); + if (ret != -1) { + ret = xlator_set_option (xl, aa, vme->value); + GF_FREE (aa); + } + if (ret) + return -1; + }*/ + + return 0; +} + +static int +volgen_graph_set_iam_shd (volgen_graph_t *graph) +{ + xlator_t *trav; + int ret = 0; + + for (trav = first_of (graph); trav; trav = trav->next) { + if (strcmp (trav->type, "cluster/replicate") != 0) + continue; + + ret = xlator_set_option (trav, "iam-self-heal-daemon", "yes"); + if (ret) + break; + } + return ret; +} + +static int +build_shd_graph (volgen_graph_t *graph, dict_t *mod_dict) +{ + volgen_graph_t cgraph = {0}; glusterd_volinfo_t *voliter = NULL; xlator_t *this = NULL; glusterd_conf_t *priv = NULL; + dict_t *set_dict = NULL; + int ret = 0; + gf_boolean_t valid_config = _gf_false; + xlator_t *iostxl = NULL; + int rclusters = 0; + int replica_count = 0; + gf_boolean_t graph_check = _gf_false; + + this = THIS; + priv = this->private; + + set_dict = dict_new (); + if (!set_dict) { + ret = -ENOMEM; + goto out; + } + + graph_check = dict_get_str_boolean (mod_dict, "graph-check", 0); + iostxl = volgen_graph_add_as (graph, "debug/io-stats", "glustershd"); + if (!iostxl) { + ret = -1; + goto out; + } + + list_for_each_entry (voliter, &priv->volumes, vol_list) { + if (!graph_check && + (voliter->status != GLUSTERD_STATUS_STARTED)) + continue; + + if (!glusterd_is_volume_replicate (voliter)) + continue; + + replica_count = voliter->replica_count; + + valid_config = _gf_true; + + ret = dict_set_str (set_dict, "cluster.self-heal-daemon", "on"); + if (ret) + goto out; + + ret = dict_set_uint32 (set_dict, "trusted-client", + GF_CLIENT_TRUSTED); + if (ret) + goto out; + + dict_copy (voliter->dict, set_dict); + if (mod_dict) + dict_copy (mod_dict, set_dict); + + memset (&cgraph, 0, sizeof (cgraph)); + ret = volgen_graph_build_clients (&cgraph, voliter, set_dict, + NULL); + if (ret) + goto out; + + rclusters = volgen_graph_build_clusters (&cgraph, voliter, + "cluster/replicate", + "%s-replicate-%d", + voliter->brick_count, + replica_count); + if (rclusters < 0) { + ret = -1; + goto out; + } + + ret = volgen_graph_set_options_generic (&cgraph, set_dict, voliter, + shd_option_handler); + if (ret) + goto out; + + ret = volgen_graph_set_iam_shd (&cgraph); + if (ret) + goto out; + + ret = volgen_graph_merge_sub (graph, &cgraph, rclusters); + if (ret) + goto out; + + ret = volgen_graph_set_options_generic (graph, set_dict, + "client", + &loglevel_option_handler); + + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, "changing loglevel " + "of self-heal daemon failed"); + + ret = volgen_graph_set_options_generic (graph, set_dict, + "client", + &sys_loglevel_option_handler); + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, "changing syslog " + "level of self-heal daemon failed"); + + ret = dict_reset (set_dict); + if (ret) + goto out; + } +out: + if (set_dict) + dict_unref (set_dict); + if (!valid_config) + ret = -EINVAL; + return ret; +} + +/* builds a graph for nfs server role, with option overrides in mod_dict */ +static int +build_nfs_graph (volgen_graph_t *graph, dict_t *mod_dict) +{ + volgen_graph_t cgraph = {0,}; + glusterd_volinfo_t *voliter = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + dict_t *set_dict = NULL; xlator_t *nfsxl = NULL; char *skey = NULL; - char volume_id[64] = {0,}; int ret = 0; + char nfs_xprt[16] = {0,}; + char *volname = NULL; + data_t *data = NULL; this = THIS; GF_ASSERT (this); priv = this->private; GF_ASSERT (priv); - nfsxl = volgen_graph_add_as (graph, "nfs/server", "nfs-server"); - if (!nfsxl) + set_dict = dict_new (); + if (!set_dict) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); return -1; + } + + nfsxl = volgen_graph_add_as (graph, "nfs/server", "nfs-server"); + if (!nfsxl) { + ret = -1; + goto out; + } ret = xlator_set_option (nfsxl, "nfs.dynamic-volumes", "on"); if (ret) - return -1; + goto out; + + ret = xlator_set_option (nfsxl, "nfs.nlm", "on"); + if (ret) + goto out; + + ret = xlator_set_option (nfsxl, "nfs.drc", "on"); + if (ret) + goto out; list_for_each_entry (voliter, &priv->volumes, vol_list) { if (voliter->status != GLUSTERD_STATUS_STARTED) continue; + if (dict_get_str_boolean (voliter->dict, "nfs.disable", 0)) + continue; + ret = gf_asprintf (&skey, "rpc-auth.addr.%s.allow", voliter->volname); - if (ret == -1) - goto oom; + if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + goto out; + } ret = xlator_set_option (nfsxl, skey, "*"); GF_FREE (skey); if (ret) - return -1; + goto out; ret = gf_asprintf (&skey, "nfs3.%s.volume-id", voliter->volname); - if (ret == -1) - goto oom; - uuid_unparse (voliter->volume_id, volume_id); - ret = xlator_set_option (nfsxl, skey, volume_id); + if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + goto out; + } + ret = xlator_set_option (nfsxl, skey, uuid_utoa (voliter->volume_id)); GF_FREE (skey); if (ret) - return -1; + goto out; + /* If both RDMA and TCP are the transport_type, use RDMA + for NFS client protocols */ memset (&cgraph, 0, sizeof (cgraph)); - ret = build_client_graph (&cgraph, voliter, NULL); + if (mod_dict) + get_transport_type (voliter, mod_dict, nfs_xprt, _gf_true); + else + get_transport_type (voliter, voliter->dict, nfs_xprt, _gf_true); + + ret = dict_set_str (set_dict, "performance.stat-prefetch", "off"); if (ret) - return -1; - ret = volgen_graph_merge_sub (graph, &cgraph); + goto out; + + ret = dict_set_str (set_dict, "performance.client-io-threads", + "off"); + if (ret) + goto out; + + ret = dict_set_str (set_dict, "client-transport-type", + nfs_xprt); + if (ret) + goto out; + + ret = dict_set_uint32 (set_dict, "trusted-client", + GF_CLIENT_TRUSTED); + if (ret) + goto out; + + ret = dict_set_str (set_dict, "nfs-volume-file", "yes"); + if (ret) + goto out; + + if (mod_dict && (data = dict_get (mod_dict, "volume-name"))) { + volname = data->data; + if (strcmp (volname, voliter->volname) == 0) + dict_copy (mod_dict, set_dict); + } + + ret = build_client_graph (&cgraph, voliter, set_dict); + if (ret) + goto out; + + if (mod_dict) { + dict_copy (mod_dict, set_dict); + ret = volgen_graph_set_options_generic (&cgraph, set_dict, voliter, + basic_option_handler); + } else { + ret = volgen_graph_set_options_generic (&cgraph, voliter->dict, voliter, + basic_option_handler); + } + + if (ret) + goto out; + + ret = volgen_graph_merge_sub (graph, &cgraph, 1); + if (ret) + goto out; + ret = dict_reset (set_dict); + if (ret) + goto out; } - return ret; + list_for_each_entry (voliter, &priv->volumes, vol_list) { - oom: - gf_log ("", GF_LOG_ERROR, "Out of memory"); + if (mod_dict) { + ret = volgen_graph_set_options_generic (graph, mod_dict, voliter, + nfs_option_handler); + } else { + ret = volgen_graph_set_options_generic (graph, voliter->dict, voliter, + nfs_option_handler); + } - return -1; + if (ret) + gf_log ("glusterd", GF_LOG_WARNING, "Could not set " + "vol-options for the volume %s", voliter->volname); + } + + out: + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + dict_destroy (set_dict); + + return ret; } @@ -1122,9 +3102,12 @@ get_brick_filepath (char *filename, glusterd_volinfo_t *volinfo, { char path[PATH_MAX] = {0,}; char brick[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, brick); - VOLGEN_GET_VOLUME_DIR (path, volinfo); + GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); snprintf (filename, PATH_MAX, "%s/%s.%s.%s.vol", path, volinfo->volname, @@ -1132,11 +3115,49 @@ get_brick_filepath (char *filename, glusterd_volinfo_t *volinfo, brick); } +gf_boolean_t +glusterd_is_valid_volfpath (char *volname, char *brick) +{ + char volfpath[PATH_MAX] = {0,}; + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_volinfo_t *volinfo = NULL; + int32_t ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to create brickinfo" + " for brick %s", brick ); + ret = 0; + goto out; + } + ret = glusterd_volinfo_new (&volinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to create volinfo"); + ret = 0; + goto out; + } + strncpy (volinfo->volname, volname, sizeof (volinfo->volname)); + get_brick_filepath (volfpath, volinfo, brickinfo); + + ret = (strlen (volfpath) < _POSIX_PATH_MAX); + +out: + if (brickinfo) + glusterd_brickinfo_delete (brickinfo); + if (volinfo) + glusterd_volinfo_delete (volinfo); + return ret; +} + static int glusterd_generate_brick_volfile (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo) { - glusterfs_graph_t graph = {{0,},}; + volgen_graph_t graph = {0,}; char filename[PATH_MAX] = {0,}; int ret = -1; @@ -1145,7 +3166,7 @@ glusterd_generate_brick_volfile (glusterd_volinfo_t *volinfo, get_brick_filepath (filename, volinfo, brickinfo); - ret = build_server_graph (&graph, volinfo, NULL, brickinfo->path); + ret = build_server_graph (&graph, volinfo, NULL, brickinfo); if (!ret) ret = volgen_write_volfile (&graph, filename); @@ -1154,11 +3175,54 @@ glusterd_generate_brick_volfile (glusterd_volinfo_t *volinfo, return ret; } -static int +static void +get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo) +{ + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + + GLUSTERD_GET_VOLUME_DIR (filename, volinfo, priv); + strncat (filename, "/marker.tstamp", + PATH_MAX - strlen(filename) - 1); +} + +int generate_brick_volfiles (glusterd_volinfo_t *volinfo) { glusterd_brickinfo_t *brickinfo = NULL; - int ret = -1; + char tstamp_file[PATH_MAX] = {0,}; + int ret = -1; + + ret = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME); + if (ret == -1) + return -1; + + get_vol_tstamp_file (tstamp_file, volinfo); + + if (ret) { + ret = open (tstamp_file, O_WRONLY|O_CREAT|O_EXCL, 0600); + if (ret == -1 && errno == EEXIST) { + gf_log ("", GF_LOG_DEBUG, "timestamp file exist"); + ret = -2; + } + if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "failed to create %s (%s)", + tstamp_file, strerror (errno)); + return -1; + } + if (ret >= 0) + close (ret); + } else { + ret = unlink (tstamp_file); + if (ret == -1 && errno == ENOENT) + ret = 0; + if (ret == -1) { + gf_log ("", GF_LOG_ERROR, "failed to unlink %s (%s)", + tstamp_file, strerror (errno)); + return -1; + } + } list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { gf_log ("", GF_LOG_DEBUG, @@ -1178,32 +3242,84 @@ out: return ret; } -static void -get_client_filepath (char *filename, glusterd_volinfo_t *volinfo) +static int +generate_single_transport_client_volfile (glusterd_volinfo_t *volinfo, + char *filepath, dict_t *dict) { - char path[PATH_MAX] = {0,}; + volgen_graph_t graph = {0,}; + int ret = -1; - VOLGEN_GET_VOLUME_DIR (path, volinfo); + ret = build_client_graph (&graph, volinfo, dict); + if (!ret) + ret = volgen_write_volfile (&graph, filepath); + + volgen_graph_free (&graph); - snprintf (filename, PATH_MAX, "%s/%s-fuse.vol", - path, volinfo->volname); + return ret; } -static int -generate_client_volfile (glusterd_volinfo_t *volinfo) +static void +enumerate_transport_reqs (gf_transport_type type, char **types) { - glusterfs_graph_t graph = {{0,},}; - char filename[PATH_MAX] = {0,}; - int ret = -1; + switch (type) { + case GF_TRANSPORT_TCP: + types[0] = "tcp"; + break; + case GF_TRANSPORT_RDMA: + types[0] = "rdma"; + break; + case GF_TRANSPORT_BOTH_TCP_RDMA: + types[0] = "tcp"; + types[1] = "rdma"; + break; + } +} - get_client_filepath (filename, volinfo); +int +generate_client_volfiles (glusterd_volinfo_t *volinfo, + glusterd_client_type_t client_type) +{ + char filepath[PATH_MAX] = {0,}; + int ret = -1; + char *types[] = {NULL, NULL, NULL}; + int i = 0; + dict_t *dict = NULL; + gf_transport_type type = GF_TRANSPORT_TCP; + + enumerate_transport_reqs (volinfo->transport_type, types); + dict = dict_new (); + if (!dict) + goto out; + for (i = 0; types[i]; i++) { + memset (filepath, 0, sizeof (filepath)); + ret = dict_set_str (dict, "client-transport-type", types[i]); + if (ret) + goto out; + type = transport_str_to_type (types[i]); - ret = build_client_graph (&graph, volinfo, NULL); - if (!ret) - ret = volgen_write_volfile (&graph, filename); + ret = dict_set_uint32 (dict, "trusted-client", client_type); + if (ret) + goto out; - volgen_graph_free (&graph); + if (client_type == GF_CLIENT_TRUSTED) { + glusterd_get_trusted_client_filepath (filepath, + volinfo, + type); + } else { + glusterd_get_client_filepath (filepath, + volinfo, + type); + } + ret = generate_single_transport_client_volfile (volinfo, + filepath, + dict); + if (ret) + goto out; + } +out: + if (dict) + dict_unref (dict); return ret; } @@ -1215,7 +3331,7 @@ glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo, ret = glusterd_generate_brick_volfile (volinfo, brickinfo); if (!ret) - ret = generate_client_volfile (volinfo); + ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED); if (!ret) ret = glusterd_fetchspec_notify (THIS); @@ -1223,54 +3339,203 @@ glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo, } int -glusterd_create_volfiles (glusterd_volinfo_t *volinfo) +glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo) { - int ret = -1; + int ret = -1; + xlator_t *this = NULL; + + this = THIS; ret = generate_brick_volfiles (volinfo); if (ret) { - gf_log ("", GF_LOG_ERROR, + gf_log (this->name, GF_LOG_ERROR, "Could not generate volfiles for bricks"); goto out; } - ret = generate_client_volfile (volinfo); + ret = generate_client_volfiles (volinfo, GF_CLIENT_TRUSTED); if (ret) { - gf_log ("", GF_LOG_ERROR, - "Could not generate volfile for client"); + gf_log (this->name, GF_LOG_ERROR, + "Could not generate trusted client volfiles"); + goto out; + } + + ret = generate_client_volfiles (volinfo, GF_CLIENT_OTHER); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Could not generate client volfiles"); goto out; } - ret = glusterd_fetchspec_notify (THIS); + ret = glusterd_fetchspec_notify (this); out: return ret; } -void -glusterd_get_nfs_filepath (char *filename) +int +glusterd_create_global_volfile (int (*builder) (volgen_graph_t *graph, + dict_t *set_dict), + char *filepath, dict_t *mod_dict) { - char path[PATH_MAX] = {0,}; + volgen_graph_t graph = {0,}; + int ret = -1; + + ret = builder (&graph, mod_dict); + if (!ret) + ret = volgen_write_volfile (&graph, filepath); - VOLGEN_GET_NFS_DIR (path); + volgen_graph_free (&graph); - snprintf (filename, PATH_MAX, "%s/nfs-server.vol", path); + return ret; } int glusterd_create_nfs_volfile () { - glusterfs_graph_t graph = {{0,},}; - char filename[PATH_MAX] = {0,}; - int ret = -1; + char filepath[PATH_MAX] = {0,}; + glusterd_conf_t *conf = THIS->private; - glusterd_get_nfs_filepath (filename); + glusterd_get_nodesvc_volfile ("nfs", conf->workdir, + filepath, sizeof (filepath)); + return glusterd_create_global_volfile (build_nfs_graph, + filepath, NULL); +} - ret = build_nfs_graph (&graph); - if (!ret) - ret = volgen_write_volfile (&graph, filename); +int +glusterd_create_shd_volfile () +{ + char filepath[PATH_MAX] = {0,}; + int ret = -1; + glusterd_conf_t *conf = THIS->private; + dict_t *mod_dict = NULL; - volgen_graph_free (&graph); + mod_dict = dict_new (); + if (!mod_dict) + goto out; + + ret = dict_set_uint32 (mod_dict, "cluster.background-self-heal-count", 0); + if (ret) + goto out; + + ret = dict_set_str (mod_dict, "cluster.data-self-heal", "on"); + if (ret) + goto out; + + ret = dict_set_str (mod_dict, "cluster.metadata-self-heal", "on"); + if (ret) + goto out; + + ret = dict_set_str (mod_dict, "cluster.entry-self-heal", "on"); + if (ret) + goto out; + + glusterd_get_nodesvc_volfile ("glustershd", conf->workdir, + filepath, sizeof (filepath)); + ret = glusterd_create_global_volfile (build_shd_graph, filepath, + mod_dict); +out: + if (mod_dict) + dict_unref (mod_dict); + return ret; +} + +int +glusterd_check_nfs_topology_identical (gf_boolean_t *identical) +{ + char nfsvol[PATH_MAX] = {0,}; + char tmpnfsvol[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + xlator_t *this = THIS; + int ret = -1; + int tmpclean = 0; + int tmpfd = -1; + + if ((!identical) || (!this) || (!this->private)) + goto out; + + conf = (glusterd_conf_t *) this->private; + + /* Fetch the original NFS volfile */ + glusterd_get_nodesvc_volfile ("nfs", conf->workdir, + nfsvol, sizeof (nfsvol)); + + /* Create the temporary NFS volfile */ + snprintf (tmpnfsvol, sizeof (tmpnfsvol), "/tmp/gnfs-XXXXXX"); + tmpfd = mkstemp (tmpnfsvol); + if (tmpfd < 0) { + gf_log (this->name, GF_LOG_WARNING, + "Unable to create temp file %s: (%s)", + tmpnfsvol, strerror (errno)); + goto out; + } + + tmpclean = 1; /* SET the flag to unlink() tmpfile */ + + ret = glusterd_create_global_volfile (build_nfs_graph, + tmpnfsvol, NULL); + if (ret) + goto out; + + /* Compare the topology of volfiles */ + ret = glusterd_check_topology_identical (nfsvol, tmpnfsvol, + identical); +out: + if (tmpfd >= 0) + close (tmpfd); + if (tmpclean) + unlink (tmpnfsvol); + return ret; +} + +int +glusterd_check_nfs_volfile_identical (gf_boolean_t *identical) +{ + char nfsvol[PATH_MAX] = {0,}; + char tmpnfsvol[PATH_MAX] = {0,}; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + int ret = -1; + int need_unlink = 0; + int tmp_fd = -1; + + this = THIS; + + GF_ASSERT (this); + GF_ASSERT (identical); + + conf = this->private; + + glusterd_get_nodesvc_volfile ("nfs", conf->workdir, + nfsvol, sizeof (nfsvol)); + + snprintf (tmpnfsvol, sizeof (tmpnfsvol), "/tmp/gnfs-XXXXXX"); + + tmp_fd = mkstemp (tmpnfsvol); + if (tmp_fd < 0) { + gf_log ("", GF_LOG_WARNING, "Unable to create temp file %s: " + "(%s)", tmpnfsvol, strerror (errno)); + goto out; + } + + need_unlink = 1; + + ret = glusterd_create_global_volfile (build_nfs_graph, + tmpnfsvol, NULL); + if (ret) + goto out; + + ret = glusterd_check_files_identical (nfsvol, tmpnfsvol, + identical); + if (ret) + goto out; + +out: + if (need_unlink) + unlink (tmpnfsvol); + + if (tmp_fd >= 0) + close (tmp_fd); return ret; } @@ -1279,29 +3544,121 @@ int glusterd_delete_volfile (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo) { + int ret = 0; char filename[PATH_MAX] = {0,}; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); get_brick_filepath (filename, volinfo, brickinfo); - return unlink (filename); + ret = unlink (filename); + if (ret) + gf_log ("glusterd", GF_LOG_ERROR, "failed to delete file: %s, " + "reason: %s", filename, strerror (errno)); + return ret; } int -validate_clientopts (glusterd_volinfo_t *volinfo, - dict_t *val_dict, +validate_shdopts (glusterd_volinfo_t *volinfo, + dict_t *val_dict, + char **op_errstr) +{ + volgen_graph_t graph = {0,}; + int ret = -1; + + graph.errstr = op_errstr; + + if (!glusterd_is_volume_replicate (volinfo)) { + ret = 0; + goto out; + } + ret = dict_set_str (val_dict, "graph-check", "on"); + if (ret) + goto out; + ret = build_shd_graph (&graph, val_dict); + if (!ret) + ret = graph_reconf_validateopt (&graph.graph, op_errstr); + + volgen_graph_free (&graph); + + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); +out: + dict_del (val_dict, "graph-check"); + return ret; +} + +int +validate_nfsopts (glusterd_volinfo_t *volinfo, + dict_t *val_dict, char **op_errstr) { - glusterfs_graph_t graph = {{0,},}; + volgen_graph_t graph = {0,}; int ret = -1; + char transport_type[16] = {0,}; + char *tt = NULL; + char err_str[4096] = {0,}; + xlator_t *this = THIS; + + GF_ASSERT (this); + + graph.errstr = op_errstr; + + get_vol_transport_type (volinfo, transport_type); + ret = dict_get_str (val_dict, "nfs.transport-type", &tt); + if (!ret) { + if (volinfo->transport_type != GF_TRANSPORT_BOTH_TCP_RDMA) { + snprintf (err_str, sizeof (err_str), "Changing nfs " + "transport type is allowed only for volumes " + "of transport type tcp,rdma"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + *op_errstr = gf_strdup (err_str); + ret = -1; + goto out; + } + if (strcmp (tt,"tcp") && strcmp (tt,"rdma")) { + snprintf (err_str, sizeof (err_str), "wrong transport " + "type %s", tt); + *op_errstr = gf_strdup (err_str); + ret = -1; + goto out; + } + } + + ret = dict_set_str (val_dict, "volume-name", volinfo->volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set volume name"); + goto out; + } + + ret = build_nfs_graph (&graph, val_dict); + if (!ret) + ret = graph_reconf_validateopt (&graph.graph, op_errstr); + + volgen_graph_free (&graph); + +out: + if (dict_get (val_dict, "volume-name")) + dict_del (val_dict, "volume-name"); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + + +int +validate_clientopts (glusterd_volinfo_t *volinfo, + dict_t *val_dict, + char **op_errstr) +{ + volgen_graph_t graph = {0,}; + int ret = -1; GF_ASSERT (volinfo); + graph.errstr = op_errstr; ret = build_client_graph (&graph, volinfo, val_dict); if (!ret) - ret = graph_reconf_validateopt (&graph, op_errstr); + ret = graph_reconf_validateopt (&graph.graph, op_errstr); volgen_graph_free (&graph); @@ -1310,21 +3667,21 @@ validate_clientopts (glusterd_volinfo_t *volinfo, } int -validate_brickopts (glusterd_volinfo_t *volinfo, - char *brickinfo_path, +validate_brickopts (glusterd_volinfo_t *volinfo, + glusterd_brickinfo_t *brickinfo, dict_t *val_dict, char **op_errstr) { - glusterfs_graph_t graph = {{0,},}; - int ret = -1; + volgen_graph_t graph = {0,}; + int ret = -1; GF_ASSERT (volinfo); + graph.errstr = op_errstr; - - ret = build_server_graph (&graph, volinfo, val_dict, brickinfo_path); + ret = build_server_graph (&graph, volinfo, val_dict, brickinfo); if (!ret) - ret = graph_reconf_validateopt (&graph, op_errstr); + ret = graph_reconf_validateopt (&graph.graph, op_errstr); volgen_graph_free (&graph); @@ -1339,31 +3696,105 @@ glusterd_validate_brickreconf (glusterd_volinfo_t *volinfo, { glusterd_brickinfo_t *brickinfo = NULL; int ret = -1; - + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { gf_log ("", GF_LOG_DEBUG, "Validating %s", brickinfo->hostname); - ret = validate_brickopts (volinfo, brickinfo->path, val_dict, + ret = validate_brickopts (volinfo, brickinfo, val_dict, op_errstr); if (ret) goto out; } ret = 0; + out: - - return ret; + return ret; +} + +static int +_check_globalopt (dict_t *this, char *key, data_t *value, void *ret_val) +{ + int *ret = NULL; + + ret = ret_val; + if (*ret) + return 0; + if (!glusterd_check_globaloption (key)) + *ret = 1; + + return 0; } int -glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, - char **op_errstr) +glusterd_validate_globalopts (glusterd_volinfo_t *volinfo, + dict_t *val_dict, char **op_errstr) { - int ret = -1; + int ret = 0; + + dict_foreach (val_dict, _check_globalopt, &ret); + if (ret) { + *op_errstr = gf_strdup ( "option specified is not a global option"); + return -1; + } + ret = glusterd_validate_brickreconf (volinfo, val_dict, op_errstr); - gf_log ("", GF_LOG_DEBUG, "Inside Validate reconfigure options"); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not Validate bricks"); + goto out; + } + + ret = validate_clientopts (volinfo, val_dict, op_errstr); + if (ret) { + gf_log ("", GF_LOG_DEBUG, + "Could not Validate client"); + goto out; + } + ret = validate_nfsopts (volinfo, val_dict, op_errstr); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Could not Validate nfs"); + goto out; + } + + ret = validate_shdopts (volinfo, val_dict, op_errstr); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Could not Validate self-heald"); + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +_check_localopt (dict_t *this, char *key, data_t *value, void *ret_val) +{ + int *ret = NULL; + + ret = ret_val; + if (*ret) + return 0; + if (!glusterd_check_localoption (key)) + *ret = 1; + + return 0; +} + +int +glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, + char **op_errstr) +{ + int ret = 0; + + dict_foreach (val_dict, _check_localopt, &ret); + if (ret) { + *op_errstr = gf_strdup ( "option specified is not a local option"); + return -1; + } ret = glusterd_validate_brickreconf (volinfo, val_dict, op_errstr); if (ret) { @@ -1371,7 +3802,7 @@ glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, "Could not Validate bricks"); goto out; } - + ret = validate_clientopts (volinfo, val_dict, op_errstr); if (ret) { gf_log ("", GF_LOG_DEBUG, @@ -1379,8 +3810,256 @@ glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, goto out; } + ret = validate_nfsopts (volinfo, val_dict, op_errstr); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Could not Validate nfs"); + goto out; + } + + + ret = validate_shdopts (volinfo, val_dict, op_errstr); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Could not Validate self-heald"); + goto out; + } + out: - gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); - return ret; + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static struct volopt_map_entry * +_gd_get_vmep (char *key) { + char *completion = NULL; + struct volopt_map_entry *vmep = NULL; + int ret = 0; + + COMPLETE_OPTION ((char *)key, completion, ret); + for (vmep = glusterd_volopt_map; vmep->key; vmep++) { + if (strcmp (vmep->key, key) == 0) + return vmep; + } + + return NULL; +} + +uint32_t +glusterd_get_op_version_for_key (char *key) +{ + struct volopt_map_entry *vmep = NULL; + + GF_ASSERT (key); + + vmep = _gd_get_vmep (key); + if (vmep) + return vmep->op_version; + + return 0; +} + +gf_boolean_t +gd_is_client_option (char *key) +{ + struct volopt_map_entry *vmep = NULL; + + GF_ASSERT (key); + + vmep = _gd_get_vmep (key); + if (vmep && (vmep->flags & OPT_FLAG_CLIENT_OPT)) + return _gf_true; + + return _gf_false; +} + +gf_boolean_t +gd_is_xlator_option (char *key) +{ + struct volopt_map_entry *vmep = NULL; + + GF_ASSERT (key); + + vmep = _gd_get_vmep (key); + if (vmep && (vmep->flags & OPT_FLAG_XLATOR_OPT)) + return _gf_true; + + return _gf_false; +} + +volume_option_type_t +_gd_get_option_type (char *key) +{ + struct volopt_map_entry *vmep = NULL; + void *dl_handle = NULL; + volume_opt_list_t vol_opt_list = {{0},}; + int ret = -1; + volume_option_t *opt = NULL; + char *xlopt_key = NULL; + volume_option_type_t opt_type = GF_OPTION_TYPE_MAX; + + GF_ASSERT (key); + + vmep = _gd_get_vmep (key); + + if (vmep) { + INIT_LIST_HEAD (&vol_opt_list.list); + ret = xlator_volopt_dynload (vmep->voltype, &dl_handle, + &vol_opt_list); + if (ret) + goto out; + + if (_get_xlator_opt_key_from_vme (vmep, &xlopt_key)) + goto out; + + opt = xlator_volume_option_get_list (&vol_opt_list, xlopt_key); + _free_xlator_opt_key (xlopt_key); + + if (opt) + opt_type = opt->type; + } + +out: + if (dl_handle) { + dlclose (dl_handle); + dl_handle = NULL; + } + + return opt_type; +} + +gf_boolean_t +gd_is_boolean_option (char *key) +{ + GF_ASSERT (key); + + if (GF_OPTION_TYPE_BOOL == _gd_get_option_type (key)) + return _gf_true; + + return _gf_false; +} + +/* This function will restore origin volume to it's snap. + * The restore operation will simply replace the Gluster origin + * volume with the snap volume. + * TODO: Multi-volume delete to be done. + * Cleanup in case of restore failure is pending. + * + * @param orig_vol volinfo of origin volume + * @param snap_vol volinfo of snapshot volume + * + * @return 0 on success and negative value on error + */ +int +gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol) +{ + int ret = -1; + glusterd_volinfo_t *new_volinfo = NULL; + glusterd_snap_t *snap = NULL; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + glusterd_volinfo_t *temp_volinfo = NULL; + glusterd_volinfo_t *voliter = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (rsp_dict); + conf = this->private; + GF_ASSERT (conf); + + GF_VALIDATE_OR_GOTO (this->name, orig_vol, out); + GF_VALIDATE_OR_GOTO (this->name, snap_vol, out); + snap = snap_vol->snapshot; + GF_VALIDATE_OR_GOTO (this->name, snap, out); + + /* Snap volume must be stoped before performing the + * restore operation. + */ + ret = glusterd_stop_volume (snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "snap volume"); + goto out; + } + + /* Create a new volinfo for the restored volume */ + ret = glusterd_volinfo_dup (snap_vol, &new_volinfo, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to create volinfo"); + goto out; + } + + /* Following entries need to be derived from origin volume. */ + strcpy (new_volinfo->volname, orig_vol->volname); + uuid_copy (new_volinfo->volume_id, orig_vol->volume_id); + new_volinfo->snap_count = orig_vol->snap_count; + new_volinfo->snap_max_hard_limit = orig_vol->snap_max_hard_limit; + new_volinfo->is_volume_restored = _gf_true; + + /* Bump the version of the restored volume, so that nodes * + * which are done can sync during handshake */ + new_volinfo->version = orig_vol->version; + + list_for_each_entry_safe (voliter, temp_volinfo, + &orig_vol->snap_volumes, snapvol_list) { + list_add_tail (&voliter->snapvol_list, + &new_volinfo->snap_volumes); + } + /* Copy the snap vol info to the new_volinfo.*/ + ret = glusterd_snap_volinfo_restore (rsp_dict, new_volinfo, snap_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to restore snap"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + /* If the orig_vol is already restored then we should delete + * the backend LVMs */ + if (orig_vol->is_volume_restored) { + ret = glusterd_lvm_snapshot_remove (rsp_dict, orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to remove " + "LVM backend"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + } + + /* Once the new_volinfo is completely constructed then delete + * the orinal volinfo + */ + ret = glusterd_volinfo_delete (orig_vol); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to delete volinfo"); + (void)glusterd_volinfo_delete (new_volinfo); + goto out; + } + + /* New volinfo always shows the status as created. Therefore + * set the status to stop. */ + glusterd_set_volume_status (new_volinfo, GLUSTERD_STATUS_STOPPED); + + list_add_tail (&new_volinfo->vol_list, &conf->volumes); + + /* Now delete the snap entry. As a first step delete the snap + * volume information stored in store. */ + ret = glusterd_snap_remove (rsp_dict, snap, _gf_false, _gf_true); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "Failed to delete " + "snap %s", snap->snapname); + goto out; + } + + ret = glusterd_store_volinfo (new_volinfo, + GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo"); + goto out; + } + + ret = 0; +out: + + return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index e84e183ee..fcbaaf93e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_VOLGEN_H_ #define _GLUSTERD_VOLGEN_H_ @@ -27,18 +17,160 @@ #include "glusterd.h" +/* volopt map key name definitions */ + +#define VKEY_DIAG_CNT_FOP_HITS "diagnostics.count-fop-hits" +#define VKEY_DIAG_LAT_MEASUREMENT "diagnostics.latency-measurement" +#define VKEY_FEATURES_LIMIT_USAGE "features.limit-usage" +#define VKEY_MARKER_XTIME GEOREP".indexing" +#define VKEY_MARKER_XTIME_FORCE GEOREP".ignore-pid-check" +#define VKEY_CHANGELOG "changelog.changelog" +#define VKEY_FEATURES_QUOTA "features.quota" + +#define AUTH_ALLOW_MAP_KEY "auth.allow" +#define AUTH_REJECT_MAP_KEY "auth.reject" +#define NFS_DISABLE_MAP_KEY "nfs.disable" +#define AUTH_ALLOW_OPT_KEY "auth.addr.*.allow" +#define AUTH_REJECT_OPT_KEY "auth.addr.*.reject" +#define NFS_DISABLE_OPT_KEY "nfs.*.disable" + + +typedef enum { + GF_CLIENT_TRUSTED, + GF_CLIENT_OTHER +} glusterd_client_type_t; + +#define COMPLETE_OPTION(key, completion, ret) \ + do { \ + if (!strchr (key, '.')) { \ + ret = option_complete (key, &completion); \ + if (ret) { \ + gf_log ("", GF_LOG_ERROR, "Out of memory"); \ + return _gf_false; \ + } \ + \ + if (!completion) { \ + gf_log ("", GF_LOG_ERROR, "option %s does not" \ + "exist", key); \ + return _gf_false; \ + } \ + } \ + \ + if (completion) \ + GF_FREE (completion); \ + } while (0); + +typedef enum gd_volopt_flags_ { + OPT_FLAG_NONE, + OPT_FLAG_FORCE = 0x01, // option needs force to be reset + OPT_FLAG_XLATOR_OPT = 0x02, // option enables/disables xlators + OPT_FLAG_CLIENT_OPT = 0x04, // option affects clients +} gd_volopt_flags_t; + +typedef enum { + GF_XLATOR_POSIX = 0, + GF_XLATOR_ACL, + GF_XLATOR_LOCKS, + GF_XLATOR_IOT, + GF_XLATOR_INDEX, + GF_XLATOR_MARKER, + GF_XLATOR_IO_STATS, + GF_XLATOR_BD, + GF_XLATOR_NONE, +} glusterd_server_xlator_t; + +/* As of now debug xlators can be loaded only below fuse in the client + * graph via cli. More xlators can be added below when the cli option + * for adding debug xlators anywhere in the client graph has to be made + * available. + */ +typedef enum { + GF_CLNT_XLATOR_FUSE = 0, + GF_CLNT_XLATOR_NONE, +} glusterd_client_xlator_t; + +typedef enum { DOC, NO_DOC, GLOBAL_DOC, GLOBAL_NO_DOC } option_type_t; + +typedef int (*vme_option_validation) (dict_t *dict, char *key, char *value, + char **op_errstr); + +struct volopt_map_entry { + char *key; + char *voltype; + char *option; + char *value; + option_type_t type; + uint32_t flags; + uint32_t op_version; + char *description; + vme_option_validation validate_fn; + /* If client_option is true, the option affects clients. + * this is used to calculate client-op-version of volumes + */ + //gf_boolean_t client_option; +}; + int glusterd_create_rb_volfiles (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo); -int glusterd_create_volfiles (glusterd_volinfo_t *volinfo); +int glusterd_create_volfiles_and_notify_services (glusterd_volinfo_t *volinfo); void glusterd_get_nfs_filepath (char *filename); +void glusterd_get_shd_filepath (char *filename); + int glusterd_create_nfs_volfile (); +int glusterd_create_shd_volfile (); int glusterd_delete_volfile (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo); +int +glusterd_delete_snap_volfile (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_brickinfo_t *brickinfo); int glusterd_volinfo_get (glusterd_volinfo_t *volinfo, char *key, char **value); +int glusterd_volinfo_get_boolean (glusterd_volinfo_t *volinfo, char *key); + +int glusterd_validate_globalopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, char **op_errstr); + +int glusterd_validate_localopts (dict_t *val_dict, char **op_errstr); +gf_boolean_t glusterd_check_globaloption (char *key); +gf_boolean_t +glusterd_check_voloption_flags (char *key, int32_t flags); +gf_boolean_t +glusterd_is_valid_volfpath (char *volname, char *brick); +int generate_brick_volfiles (glusterd_volinfo_t *volinfo); +int generate_snap_brick_volfiles (glusterd_volinfo_t *volinfo, + glusterd_volinfo_t *snap_volinfo); +int generate_client_volfiles (glusterd_volinfo_t *volinfo, + glusterd_client_type_t client_type); +int +generate_snap_client_volfiles (glusterd_volinfo_t *actual_volinfo, + glusterd_volinfo_t *snap_volinfo, + glusterd_client_type_t client_type, + gf_boolean_t vol_restore); +int glusterd_get_volopt_content (dict_t *dict, gf_boolean_t xml_out); +char* +glusterd_get_trans_type_rb (gf_transport_type ttype); +int +glusterd_check_nfs_volfile_identical (gf_boolean_t *identical); +int +glusterd_check_nfs_topology_identical (gf_boolean_t *identical); + +uint32_t +glusterd_get_op_version_for_key (char *key); + +gf_boolean_t +gd_is_client_option (char *key); + +gf_boolean_t +gd_is_xlator_option (char *key); + +gf_boolean_t +gd_is_boolean_option (char *key); +int gd_restore_snap_volume (dict_t *rsp_dict, + glusterd_volinfo_t *orig_vol, + glusterd_volinfo_t *snap_vol); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c new file mode 100644 index 000000000..0d322b9ad --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -0,0 +1,2225 @@ +/* + Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_BD_XLATOR +#include <lvm2app.h> +#endif + +#include "common-utils.h" +#include "syscall.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "run.h" + +#define glusterd_op_start_volume_args_get(dict, volname, flags) \ + glusterd_op_stop_volume_args_get (dict, volname, flags) + + +int +__glusterd_handle_create_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + char *bricks = NULL; + char *volname = NULL; + int brick_count = 0; + void *cli_rsp = NULL; + char err_str[2048] = {0,}; + gf_cli_rsp rsp = {0,}; + xlator_t *this = NULL; + char *free_ptr = NULL; + char *trans_type = NULL; + uuid_t volume_id = {0,}; + uuid_t tmp_uuid = {0}; + int32_t type = 0; + char *username = NULL; + char *password = NULL; + + GF_ASSERT (req); + + this = THIS; + GF_ASSERT(this); + + ret = -1; + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + snprintf (err_str, sizeof (err_str), "Failed to decode request " + "received from cli"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received create volume req"); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + if ((ret = glusterd_check_volume_exists (volname))) { + snprintf (err_str, sizeof (err_str), "Volume %s already exists", + volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_int32 (dict, "count", &brick_count); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get brick count" + " for volume %s", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_int32 (dict, "type", &type); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get type of " + "volume %s", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_str (dict, "transport", &trans_type); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get " + "transport-type of volume %s", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + ret = dict_get_str (dict, "bricks", &bricks); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get bricks for " + "volume %s", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + if (!dict_get (dict, "force")) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get 'force' flag"); + goto out; + } + + uuid_generate (volume_id); + free_ptr = gf_strdup (uuid_utoa (volume_id)); + ret = dict_set_dynstr (dict, "volume-id", free_ptr); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to set volume " + "id of volume %s", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + free_ptr = NULL; + + /* generate internal username and password */ + + uuid_generate (tmp_uuid); + username = gf_strdup (uuid_utoa (tmp_uuid)); + ret = dict_set_dynstr (dict, "internal-username", username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set username for " + "volume %s", volname); + goto out; + } + + uuid_generate (tmp_uuid); + password = gf_strdup (uuid_utoa (tmp_uuid)); + ret = dict_set_dynstr (dict, "internal-password", password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to set password for " + "volume %s", volname); + goto out; + } + + ret = glusterd_op_begin_synctask (req, GD_OP_CREATE_VOLUME, dict); + +out: + if (ret) { + rsp.op_ret = -1; + rsp.op_errno = 0; + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + rsp.op_errstr = err_str; + cli_rsp = &rsp; + glusterd_to_cli (req, cli_rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gf_cli_rsp, dict); + ret = 0; //Client response sent, prevent second response + } + + GF_FREE(free_ptr); + + return ret; +} + +int +glusterd_handle_create_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_create_volume); +} + +int +__glusterd_handle_cli_start_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + char *volname = NULL; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_START_VOLUME; + char errstr[2048] = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf (errstr, sizeof (errstr), "Failed to decode message " + "received from cli"); + req->rpc_err = GARBAGE_ARGS; + gf_log (this->name, sizeof (errstr), "%s", errstr); + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (errstr, sizeof (errstr), "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (errstr, sizeof (errstr), "Unable to get volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received start vol req" + " for volume %s", volname); + + ret = glusterd_op_begin_synctask (req, GD_OP_START_VOLUME, dict); + +out: + free (cli_req.dict.dict_val); //its malloced by xdr + + if (ret) { + if(errstr[0] == '\0') + snprintf (errstr, sizeof (errstr), "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, errstr); + } + + return ret; +} + +int +glusterd_handle_cli_start_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_start_volume); +} + +int +__glusterd_handle_cli_stop_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + char *dup_volname = NULL; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_STOP_VOLUME; + xlator_t *this = NULL; + char err_str[2048] = {0,}; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Failed to decode message " + "received from cli"); + req->rpc_err = GARBAGE_ARGS; + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_str (dict, "volname", &dup_volname); + + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received stop vol req " + "for volume %s", dup_volname); + + ret = glusterd_op_begin_synctask (req, GD_OP_STOP_VOLUME, dict); + +out: + free (cli_req.dict.dict_val); //its malloced by xdr + + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + + return ret; +} + +int +glusterd_handle_cli_stop_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_stop_volume); +} + +int +__glusterd_handle_cli_delete_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,},}; + glusterd_op_t cli_op = GD_OP_DELETE_VOLUME; + dict_t *dict = NULL; + char *volname = NULL; + char err_str[2048]= {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + snprintf (err_str, sizeof (err_str), "Failed to decode request " + "received from cli"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to decode " + "the command"); + goto out; + } + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Failed to get volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received delete vol req" + "for volume %s", volname); + + ret = glusterd_op_begin_synctask (req, GD_OP_DELETE_VOLUME, dict); + +out: + free (cli_req.dict.dict_val); //its malloced by xdr + + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + + return ret; +} + +int +glusterd_handle_cli_delete_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_delete_volume); +} + +int +__glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + dict_t *dict = NULL; + glusterd_op_t cli_op = GD_OP_HEAL_VOLUME; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + char op_errstr[2048] = {0,}; + + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + this = THIS; + GF_ASSERT (this); + + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (op_errstr, sizeof (op_errstr), + "Unable to decode the command"); + goto out; + } else { + dict->extra_stdfree = cli_req.dict.dict_val; + } + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (op_errstr, sizeof (op_errstr), "Unable to find " + "volume name"); + gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr); + goto out; + } + + gf_log (this->name, GF_LOG_INFO, "Received heal vol req " + "for volume %s", volname); + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (op_errstr, sizeof (op_errstr), + "Volume %s does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", op_errstr); + goto out; + } + + ret = glusterd_add_bricks_hname_path_to_dict (dict, volinfo); + if (ret) + goto out; + + ret = dict_set_int32 (dict, "count", volinfo->brick_count); + if (ret) + goto out; + + ret = glusterd_op_begin_synctask (req, GD_OP_HEAL_VOLUME, dict); + +out: + if (ret) { + if (op_errstr[0] == '\0') + snprintf (op_errstr, sizeof (op_errstr), + "operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, op_errstr); + } + + return ret; +} + +int +glusterd_handle_cli_heal_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_heal_volume); +} + +int +__glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req) +{ + int32_t ret = -1; + gf_cli_req cli_req = {{0,}}; + char *volname = NULL; + char *options = NULL; + dict_t *dict = NULL; + int32_t option_cnt = 0; + glusterd_op_t cli_op = GD_OP_STATEDUMP_VOLUME; + char err_str[2048] = {0,}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_ASSERT (req); + + ret = -1; + ret = xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req); + if (ret < 0) { + req->rpc_err = GARBAGE_ARGS; + goto out; + } + if (cli_req.dict.dict_len) { + /* Unserialize the dictionary */ + dict = dict_new (); + + ret = dict_unserialize (cli_req.dict.dict_val, + cli_req.dict.dict_len, + &dict); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to " + "unserialize req-buffer to dictionary"); + snprintf (err_str, sizeof (err_str), "Unable to " + "decode the command"); + goto out; + } + } + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get the volume " + "name"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_str (dict, "options", &options); + if (ret) { + snprintf (err_str, sizeof (err_str), "Unable to get options"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + ret = dict_get_int32 (dict, "option_cnt", &option_cnt); + if (ret) { + snprintf (err_str , sizeof (err_str), "Unable to get option " + "count"); + gf_log (this->name, GF_LOG_ERROR, "%s", err_str); + goto out; + } + + + gf_log (this->name, GF_LOG_INFO, "Received statedump request for " + "volume %s with options %s", volname, options); + + ret = glusterd_op_begin_synctask (req, GD_OP_STATEDUMP_VOLUME, dict); + +out: + if (ret) { + if (err_str[0] == '\0') + snprintf (err_str, sizeof (err_str), + "Operation failed"); + ret = glusterd_op_send_cli_response (cli_op, ret, 0, req, + dict, err_str); + } + free (cli_req.dict.dict_val); + + return ret; +} + +int +glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + __glusterd_handle_cli_statedump_volume); +} + +#ifdef HAVE_BD_XLATOR +/* + * Validates if given VG in the brick exists or not. Also checks if VG has + * GF_XATTR_VOL_ID_KEY tag set to avoid using same VG for multiple bricks. + * Tag is checked only during glusterd_op_stage_create_volume. Tag is set during + * glusterd_validate_and_create_brickpath(). + * @brick - brick info, @check_tag - check for VG tag or not + * @msg - Error message to return to caller + */ +int +glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg) +{ + lvm_t handle = NULL; + vg_t vg = NULL; + char *vg_name = NULL; + int retval = 0; + char *p = NULL; + char *ptr = NULL; + struct dm_list *dm_lvlist = NULL; + struct dm_list *dm_seglist = NULL; + struct lvm_lv_list *lv_list = NULL; + struct lvm_property_value prop = {0, }; + struct lvm_lvseg_list *seglist = NULL; + struct dm_list *taglist = NULL; + struct lvm_str_list *strl = NULL; + + handle = lvm_init (NULL); + if (!handle) { + sprintf (msg, "lvm_init failed, could not validate vg"); + return -1; + } + if (*brick->vg == '\0') { /* BD xlator has vg in brick->path */ + p = gf_strdup (brick->path); + vg_name = strtok_r (p, "/", &ptr); + } else + vg_name = brick->vg; + + vg = lvm_vg_open (handle, vg_name, "r", 0); + if (!vg) { + sprintf (msg, "no such vg: %s", vg_name); + retval = -1; + goto out; + } + if (!check_tag) + goto next; + + taglist = lvm_vg_get_tags (vg); + if (!taglist) + goto next; + + dm_list_iterate_items (strl, taglist) { + if (!strncmp(strl->str, GF_XATTR_VOL_ID_KEY, + strlen (GF_XATTR_VOL_ID_KEY))) { + sprintf (msg, "VG %s is already part of" + " a brick", vg_name); + retval = -1; + goto out; + } + } +next: + + brick->caps = CAPS_BD | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; + + dm_lvlist = lvm_vg_list_lvs (vg); + if (!dm_lvlist) + goto out; + + dm_list_iterate_items (lv_list, dm_lvlist) { + dm_seglist = lvm_lv_list_lvsegs (lv_list->lv); + dm_list_iterate_items (seglist, dm_seglist) { + prop = lvm_lvseg_get_property (seglist->lvseg, + "segtype"); + if (!prop.is_valid || !prop.value.string) + continue; + if (!strcmp (prop.value.string, "thin-pool")) { + brick->caps |= CAPS_THIN; + gf_log (THIS->name, GF_LOG_INFO, "Thin Pool " + "\"%s\" will be used for thin LVs", + lvm_lv_get_name (lv_list->lv)); + break; + } + } + } + + retval = 0; +out: + if (vg) + lvm_vg_close (vg); + lvm_quit (handle); + if (p) + GF_FREE (p); + return retval; +} +#endif + +/* op-sm */ +int +glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + gf_boolean_t exists = _gf_false; + char *bricks = NULL; + char *brick_list = NULL; + char *free_ptr = NULL; + glusterd_brickinfo_t *brick_info = NULL; + int32_t brick_count = 0; + int32_t i = 0; + char *brick = NULL; + char *tmpptr = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + char msg[2048] = {0}; + uuid_t volume_uuid; + char *volume_uuid_str; + gf_boolean_t is_force = _gf_false; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + exists = glusterd_check_volume_exists (volname); + if (exists) { + snprintf (msg, sizeof (msg), "Volume %s already exists", + volname); + ret = -1; + goto out; + } else { + ret = 0; + } + + ret = dict_get_int32 (dict, "count", &brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count " + "for volume %s", volname); + goto out; + } + + ret = dict_get_str (dict, "volume-id", &volume_uuid_str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume id of " + "volume %s", volname); + goto out; + } + + ret = uuid_parse (volume_uuid_str, volume_uuid); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to parse volume id of" + " volume %s", volname); + goto out; + } + + ret = dict_get_str (dict, "bricks", &bricks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get bricks for " + "volume %s", volname); + goto out; + } + + is_force = dict_get_str_boolean (dict, "force", _gf_false); + + if (bricks) { + brick_list = gf_strdup (bricks); + if (!brick_list) { + ret = -1; + goto out; + } else { + free_ptr = brick_list; + } + } + + while ( i < brick_count) { + i++; + brick= strtok_r (brick_list, " \n", &tmpptr); + brick_list = tmpptr; + + if (!glusterd_store_is_valid_brickpath (volname, brick) || + !glusterd_is_valid_volfpath (volname, brick)) { + snprintf (msg, sizeof (msg), "brick path %s is too " + "long.", brick); + ret = -1; + goto out; + } + + ret = glusterd_brickinfo_new_from_brick (brick, &brick_info); + if (ret) + goto out; + + ret = glusterd_new_brick_validate (brick, brick_info, msg, + sizeof (msg)); + if (ret) + goto out; + + ret = glusterd_resolve_brick (brick_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, + brick_info->hostname, brick_info->path); + goto out; + } + + if (!uuid_compare (brick_info->uuid, MY_UUID)) { + +#ifdef HAVE_BD_XLATOR + if (brick_info->vg[0]) { + ret = glusterd_is_valid_vg (brick_info, 1, msg); + if (ret) + goto out; + } +#endif + ret = glusterd_validate_and_create_brickpath (brick_info, + volume_uuid, op_errstr, + is_force); + if (ret) + goto out; + brick_list = tmpptr; + } + glusterd_brickinfo_delete (brick_info); + brick_info = NULL; + } +out: + GF_FREE (free_ptr); + if (brick_info) + glusterd_brickinfo_delete (brick_info); + + if (msg[0] != '\0') { + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + } + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, int *flags) +{ + int ret = -1; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + if (!dict || !volname || !flags) + goto out; + + ret = dict_get_str (dict, "volname", volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = dict_get_int32 (dict, "flags", flags); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get flags"); + goto out; + } +out: + return ret; +} + +int +glusterd_op_statedump_volume_args_get (dict_t *dict, char **volname, + char **options, int *option_cnt) +{ + int ret = -1; + + if (!dict || !volname || !options || !option_cnt) + goto out; + + ret = dict_get_str (dict, "volname", volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volname"); + goto out; + } + + ret = dict_get_str (dict, "options", options); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get options"); + goto out; + } + + ret = dict_get_int32 (dict, "option_cnt", option_cnt); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get option count"); + goto out; + } + +out: + return ret; +} + +int +glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + int flags = 0; + gf_boolean_t exists = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + char msg[2048]; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + uuid_t volume_id = {0,}; + char volid[50] = {0,}; + char xattr_volid[50] = {0,}; + int caps = 0; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = glusterd_op_start_volume_args_get (dict, &volname, &flags); + if (ret) + goto out; + + exists = glusterd_check_volume_exists (volname); + + if (!exists) { + snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname); + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); + goto out; + } + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + + if (!(flags & GF_CLI_FLAG_OP_FORCE)) { + if (glusterd_is_volume_started (volinfo)) { + snprintf (msg, sizeof (msg), "Volume %s already " + "started", volname); + ret = -1; + goto out; + } + } + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, + brickinfo->hostname, brickinfo->path); + goto out; + } + + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + ret = gf_lstat_dir (brickinfo->path, NULL); + if (ret && (flags & GF_CLI_FLAG_OP_FORCE)) { + continue; + } else if (ret) { + snprintf (msg, sizeof (msg), "Failed to find " + "brick directory %s for volume %s. " + "Reason : %s", brickinfo->path, + volname, strerror (errno)); + goto out; + } + ret = sys_lgetxattr (brickinfo->path, GF_XATTR_VOL_ID_KEY, + volume_id, 16); + if (ret < 0 && (!(flags & GF_CLI_FLAG_OP_FORCE))) { + snprintf (msg, sizeof (msg), "Failed to get " + "extended attribute %s for brick dir %s. " + "Reason : %s", GF_XATTR_VOL_ID_KEY, + brickinfo->path, strerror (errno)); + ret = -1; + goto out; + } else if (ret < 0) { + ret = sys_lsetxattr (brickinfo->path, + GF_XATTR_VOL_ID_KEY, + volinfo->volume_id, 16, + XATTR_CREATE); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to set " + "extended attribute %s on %s. Reason: " + "%s", GF_XATTR_VOL_ID_KEY, + brickinfo->path, strerror (errno)); + goto out; + } else { + continue; + } + } + if (uuid_compare (volinfo->volume_id, volume_id)) { + snprintf (msg, sizeof (msg), "Volume id mismatch for " + "brick %s:%s. Expected volume id %s, " + "volume id %s found", brickinfo->hostname, + brickinfo->path, + uuid_utoa_r (volinfo->volume_id, volid), + uuid_utoa_r (volume_id, xattr_volid)); + ret = -1; + goto out; + } +#ifdef HAVE_BD_XLATOR + if (brickinfo->vg[0]) + caps = CAPS_BD | CAPS_THIN | + CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; + /* Check for VG/thin pool if its BD volume */ + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (ret) + goto out; + /* if anyone of the brick does not have thin support, + disable it for entire volume */ + caps &= brickinfo->caps; + } else + caps = 0; +#endif + } + + volinfo->caps = caps; + ret = 0; +out: + if (ret && (msg[0] != '\0')) { + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + } + return ret; +} + +int +glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + int flags = 0; + gf_boolean_t exists = _gf_false; + gf_boolean_t is_run = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); + if (ret) + goto out; + + exists = glusterd_check_volume_exists (volname); + + if (!exists) { + snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + ret = -1; + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + + /* If 'force' flag is given, no check is required */ + if (flags & GF_CLI_FLAG_OP_FORCE) + goto out; + + if (_gf_false == glusterd_is_volume_started (volinfo)) { + snprintf (msg, sizeof(msg), "Volume %s " + "is not in the started state", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + ret = -1; + goto out; + } + ret = glusterd_check_gsync_running (volinfo, &is_run); + if (ret && (is_run == _gf_false)) + gf_log (this->name, GF_LOG_WARNING, "Unable to get the status" + " of active "GEOREP" session"); + if (is_run) { + gf_log (this->name, GF_LOG_WARNING, GEOREP" sessions active" + "for the volume %s ", volname); + snprintf (msg, sizeof(msg), GEOREP" sessions are active " + "for the volume '%s'.\nUse 'volume "GEOREP" " + "status' command for more info. Use 'force' " + "option to ignore and stop the volume.", + volname); + ret = -1; + goto out; + } + + if (glusterd_is_rb_ongoing (volinfo)) { + snprintf (msg, sizeof (msg), "Replace brick is in progress on " + "volume %s. Please retry after replace-brick " + "operation is committed or aborted", volname); + gf_log (this->name, GF_LOG_WARNING, "replace-brick in progress " + "on volume %s", volname); + ret = -1; + goto out; + } + + if (glusterd_is_defrag_on (volinfo)) { + snprintf (msg, sizeof(msg), "rebalance session is " + "in progress for the volume '%s'", volname); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + ret = -1; + goto out; + } + if (volinfo->rep_brick.rb_status != GF_RB_STATUS_NONE) { + snprintf (msg, sizeof(msg), "replace-brick session is " + "in progress for the volume '%s'", volname); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + ret = -1; + goto out; + } + +out: + if (msg[0] != 0) + *op_errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + gf_boolean_t exists = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0}; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + exists = glusterd_check_volume_exists (volname); + if (!exists) { + snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname); + ret = -1; + goto out; + } else { + ret = 0; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), FMTSTR_CHECK_VOL_EXISTS, volname); + goto out; + } + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + + if (glusterd_is_volume_started (volinfo)) { + snprintf (msg, sizeof (msg), "Volume %s has been started." + "Volume needs to be stopped before deletion.", + volname); + ret = -1; + goto out; + } + + if (volinfo->snap_count > 0 || !list_empty(&volinfo->snap_volumes)) { + snprintf (msg, sizeof (msg), "Cannot delete Volume %s ," + "as it has %ld snapshots. " + "To delete the volume, " + "first delete all the snapshots under it.", + volname, volinfo->snap_count); + ret = -1; + goto out; + } + + ret = 0; + +out: + if (msg[0] != '\0') { + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + } + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + gf_boolean_t enabled = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048]; + glusterd_conf_t *priv = NULL; + dict_t *opt_dict = NULL; + gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID; + xlator_t *this = NULL; + + this = THIS; + priv = this->private; + if (!priv) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "priv is NULL"); + goto out; + } + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + ret = -1; + snprintf (msg, sizeof (msg), "Volume %s does not exist", volname); + gf_log (this->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + + if (!glusterd_is_volume_replicate (volinfo)) { + ret = -1; + snprintf (msg, sizeof (msg), "Volume %s is not of type " + "replicate", volname); + *op_errstr = gf_strdup (msg); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + goto out; + } + + if (!glusterd_is_volume_started (volinfo)) { + ret = -1; + snprintf (msg, sizeof (msg), "Volume %s is not started.", + volname); + gf_log (THIS->name, GF_LOG_WARNING, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + opt_dict = volinfo->dict; + if (!opt_dict) { + ret = 0; + goto out; + } + + enabled = dict_get_str_boolean (opt_dict, "cluster.self-heal-daemon", + 1); + if (!enabled) { + ret = -1; + snprintf (msg, sizeof (msg), "Self-heal-daemon is " + "disabled. Heal will not be triggered on volume %s", + volname); + gf_log (this->name, GF_LOG_WARNING, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op); + if (ret || (heal_op == GF_AFR_OP_INVALID)) { + ret = -1; + *op_errstr = gf_strdup("Invalid heal-op"); + gf_log (this->name, GF_LOG_WARNING, "%s", "Invalid heal-op"); + goto out; + } + + switch (heal_op) { + case GF_AFR_OP_INDEX_SUMMARY: + case GF_AFR_OP_STATISTICS_HEAL_COUNT: + case GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA: + break; + default: + if (!glusterd_is_nodesvc_online("glustershd")){ + ret = -1; + *op_errstr = gf_strdup ("Self-heal daemon is " + "not running. Check self-heal " + "daemon log file."); + gf_log (this->name, GF_LOG_WARNING, "%s", + "Self-heal daemon is not running." + "Check self-heal daemon log file."); + goto out; + } + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +int +glusterd_op_stage_statedump_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + char *options = NULL; + int option_cnt = 0; + gf_boolean_t is_running = _gf_false; + glusterd_volinfo_t *volinfo = NULL; + char msg[2408] = {0,}; + + ret = glusterd_op_statedump_volume_args_get (dict, &volname, &options, + &option_cnt); + if (ret) + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof(msg), "Volume %s does not exist", + volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + + is_running = glusterd_is_volume_started (volinfo); + if (!is_running) { + snprintf (msg, sizeof(msg), "Volume %s is not in a started" + " state", volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + ret = -1; + goto out; + } + +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr) +{ + int ret = -1; + char *volname = NULL; + char *path = NULL; + char *type = NULL; + char *kind = NULL; + glusterd_volinfo_t *volinfo = NULL; + char msg[2048] = {0,}; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + snprintf (msg, sizeof(msg), "Failed to get volume name"); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = dict_get_str (dict, "path", &path); + if (ret) { + snprintf (msg, sizeof(msg), "Failed to get path"); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = dict_get_str (dict, "kind", &kind); + if (ret) { + snprintf (msg, sizeof(msg), "Failed to get kind"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = dict_get_str (dict, "type", &type); + if (ret) { + snprintf (msg, sizeof(msg), "Failed to get type"); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof(msg), "Volume %s does not exist", + volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = glusterd_validate_volume_id (dict, volinfo); + if (ret) + goto out; + + if (!glusterd_is_volume_started (volinfo)) { + snprintf (msg, sizeof(msg), "Volume %s is not started", + volname); + gf_log ("", GF_LOG_ERROR, "%s", msg); + *op_errstr = gf_strdup (msg); + goto out; + } + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_op_create_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + gf_boolean_t vol_added = _gf_false; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + char *brick = NULL; + int32_t count = 0; + int32_t i = 1; + char *bricks = NULL; + char *brick_list = NULL; + char *free_ptr = NULL; + char *saveptr = NULL; + char *trans_type = NULL; + char *str = NULL; + char *username = NULL; + char *password = NULL; + int caps = 0; + char msg[1024] __attribute__((unused)) = {0, }; + + this = THIS; + GF_ASSERT (this); + + priv = this->private; + GF_ASSERT (priv); + + ret = glusterd_volinfo_new (&volinfo); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to allocate memory for volinfo"); + goto out; + } + + ret = dict_get_str (dict, "volname", &volname); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME); + GF_ASSERT (volinfo->volname); + + ret = dict_get_int32 (dict, "type", &volinfo->type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get type of volume" + " %s", volname); + goto out; + } + + ret = dict_get_int32 (dict, "count", &volinfo->brick_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get brick count of" + " volume %s", volname); + goto out; + } + + ret = dict_get_int32 (dict, "port", &volinfo->port); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get port"); + goto out; + } + + count = volinfo->brick_count; + + ret = dict_get_str (dict, "bricks", &bricks); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get bricks for " + "volume %s", volname); + goto out; + } + + /* replica-count 1 means, no replication, file is in one brick only */ + volinfo->replica_count = 1; + /* stripe-count 1 means, no striping, file is present as a whole */ + volinfo->stripe_count = 1; + + if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) { + ret = dict_get_int32 (dict, "replica-count", + &volinfo->replica_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "replica count for volume %s", volname); + goto out; + } + } else if (GF_CLUSTER_TYPE_STRIPE == volinfo->type) { + ret = dict_get_int32 (dict, "stripe-count", + &volinfo->stripe_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get stripe" + " count for volume %s", volname); + goto out; + } + } else if (GF_CLUSTER_TYPE_STRIPE_REPLICATE == volinfo->type) { + ret = dict_get_int32 (dict, "stripe-count", + &volinfo->stripe_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get stripe" + " count for volume %s", volname); + goto out; + } + ret = dict_get_int32 (dict, "replica-count", + &volinfo->replica_count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to get " + "replica count for volume %s", volname); + goto out; + } + } + + /* dist-leaf-count is the count of brick nodes for a given + subvolume of distribute */ + volinfo->dist_leaf_count = glusterd_get_dist_leaf_count (volinfo); + + /* subvol_count is the count of number of subvolumes present + for a given distribute volume */ + volinfo->subvol_count = (volinfo->brick_count / + volinfo->dist_leaf_count); + + /* Keep sub-count same as earlier, for the sake of backward + compatibility */ + if (volinfo->dist_leaf_count > 1) + volinfo->sub_count = volinfo->dist_leaf_count; + + ret = dict_get_str (dict, "transport", &trans_type); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get transport type of volume %s", volname); + goto out; + } + + ret = dict_get_str (dict, "volume-id", &str); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get volume-id of volume %s", volname); + goto out; + } + ret = uuid_parse (str, volinfo->volume_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "unable to parse uuid %s of volume %s", str, volname); + goto out; + } + + ret = dict_get_str (dict, "internal-username", &username); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "unable to get internal username of volume %s", + volname); + goto out; + } + glusterd_auth_set_username (volinfo, username); + + ret = dict_get_str (dict, "internal-password", &password); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "unable to get internal password of volume %s", + volname); + goto out; + } + glusterd_auth_set_password (volinfo, password); + + if (strcasecmp (trans_type, "rdma") == 0) { + volinfo->transport_type = GF_TRANSPORT_RDMA; + volinfo->nfs_transport_type = GF_TRANSPORT_RDMA; + } else if (strcasecmp (trans_type, "tcp") == 0) { + volinfo->transport_type = GF_TRANSPORT_TCP; + volinfo->nfs_transport_type = GF_TRANSPORT_TCP; + } else { + volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA; + volinfo->nfs_transport_type = GF_DEFAULT_NFS_TRANSPORT; + } + + if (bricks) { + brick_list = gf_strdup (bricks); + free_ptr = brick_list; + } + + if (count) + brick = strtok_r (brick_list+1, " \n", &saveptr); + caps = CAPS_BD | CAPS_THIN | CAPS_OFFLOAD_COPY | CAPS_OFFLOAD_SNAPSHOT; + + while ( i <= count) { + ret = glusterd_brickinfo_new_from_brick (brick, &brickinfo); + if (ret) + goto out; + + ret = glusterd_resolve_brick (brickinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_RESOLVE_BRICK, + brickinfo->hostname, brickinfo->path); + goto out; + } + +#ifdef HAVE_BD_XLATOR + if (!uuid_compare (brickinfo->uuid, MY_UUID)) { + if (brickinfo->vg[0]) { + ret = glusterd_is_valid_vg (brickinfo, 0, msg); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "%s", + msg); + goto out; + } + + /* if anyone of the brick does not have thin + support, disable it for entire volume */ + caps &= brickinfo->caps; + + + } else + caps = 0; + } +#endif + + list_add_tail (&brickinfo->brick_list, &volinfo->bricks); + brick = strtok_r (NULL, " \n", &saveptr); + i++; + } + + gd_update_volume_op_versions (volinfo); + + volinfo->caps = caps; + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + glusterd_store_delete_volume (volinfo); + *op_errstr = gf_strdup ("Failed to store the Volume information"); + goto out; + } + + ret = glusterd_create_volfiles_and_notify_services (volinfo); + if (ret) { + *op_errstr = gf_strdup ("Failed to create volume files"); + goto out; + } + + volinfo->rebal.defrag_status = 0; + list_add_tail (&volinfo->vol_list, &priv->volumes); + vol_added = _gf_true; + +out: + GF_FREE(free_ptr); + if (!vol_added && volinfo) + glusterd_volinfo_delete (volinfo); + return ret; +} + +int +glusterd_op_start_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + int flags = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = glusterd_op_start_volume_args_get (dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); + goto out; + } + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_brick_start (volinfo, brickinfo, _gf_true); + /* If 'force' try to start all bricks regardless of success or + * failure + */ + if (!(flags & GF_CLI_FLAG_OP_FORCE) && ret) + goto out; + } + + glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STARTED); + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) + goto out; + + ret = glusterd_nodesvcs_handle_graph_change (volinfo); + +out: + gf_log (this->name, GF_LOG_DEBUG, "returning %d ", ret); + return ret; +} + +int +glusterd_stop_volume (glusterd_volinfo_t *volinfo) +{ + int ret = -1; + glusterd_brickinfo_t *brickinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + GF_VALIDATE_OR_GOTO (this->name, volinfo, out); + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + ret = glusterd_brick_stop (volinfo, brickinfo, _gf_false); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop " + "brick (%s)", brickinfo->path); + goto out; + } + } + + glusterd_set_volume_status (volinfo, GLUSTERD_STATUS_STOPPED); + + ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to store volinfo of " + "%s volume", volinfo->volname); + goto out; + } + + ret = glusterd_nodesvcs_handle_graph_change (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to notify graph " + "change for %s volume", volinfo->volname); + goto out; + } + +out: + return ret; +} + + +int +glusterd_op_stop_volume (dict_t *dict) +{ + int ret = 0; + int flags = 0; + char *volname = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + + ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags); + if (ret) + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); + goto out; + } + + ret = glusterd_stop_volume (volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stop %s volume", + volname); + goto out; + } +out: + return ret; +} + +int +glusterd_op_delete_volume (dict_t *dict) +{ + int ret = 0; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, FMTSTR_CHECK_VOL_EXISTS, + volname); + goto out; + } + + ret = glusterd_delete_volume (volinfo); +out: + gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret); + return ret; +} + +int +glusterd_op_heal_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + /* Necessary subtasks of heal are completed in brick op */ + + return ret; +} + +int +glusterd_op_statedump_volume (dict_t *dict, char **op_errstr) +{ + int ret = 0; + char *volname = NULL; + char *options = NULL; + int option_cnt = 0; + glusterd_volinfo_t *volinfo = NULL; + glusterd_brickinfo_t *brickinfo = NULL; + + ret = glusterd_op_statedump_volume_args_get (dict, &volname, &options, + &option_cnt); + if (ret) + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) + goto out; + gf_log ("", GF_LOG_DEBUG, "Performing statedump on volume %s", volname); + if (strstr (options, "nfs") != NULL) { + ret = glusterd_nfs_statedump (options, option_cnt, op_errstr); + if (ret) + goto out; + } else { + list_for_each_entry (brickinfo, &volinfo->bricks, + brick_list) { + ret = glusterd_brick_statedump (volinfo, brickinfo, + options, option_cnt, + op_errstr); + /* Let us take the statedump of other bricks instead of + * exiting, if statedump of this brick fails. + */ + if (ret) + gf_log (THIS->name, GF_LOG_WARNING, "could not " + "take the statedump of the brick %s:%s." + " Proceeding to other bricks", + brickinfo->hostname, brickinfo->path); + } + } + +out: + return ret; +} + +int +glusterd_clearlocks_send_cmd (glusterd_volinfo_t *volinfo, char *cmd, + char *path, char *result, char *errstr, + int err_len, char *mntpt) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + char abspath[PATH_MAX] = {0, }; + + priv = THIS->private; + + snprintf (abspath, sizeof (abspath), "%s/%s", mntpt, path); + ret = sys_lgetxattr (abspath, cmd, result, PATH_MAX); + if (ret < 0) { + snprintf (errstr, err_len, "clear-locks getxattr command " + "failed. Reason: %s", strerror (errno)); + gf_log (THIS->name, GF_LOG_DEBUG, "%s", errstr); + goto out; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_clearlocks_rmdir_mount (glusterd_volinfo_t *volinfo, char *mntpt) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + + priv = THIS->private; + + ret = rmdir (mntpt); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, "rmdir failed"); + goto out; + } + + ret = 0; +out: + return ret; +} + +void +glusterd_clearlocks_unmount (glusterd_volinfo_t *volinfo, char *mntpt) +{ + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + int ret = 0; + + priv = THIS->private; + + /*umount failures are ignored. Using stat we could have avoided + * attempting to unmount a non-existent filesystem. But a failure of + * stat() on mount can be due to network failures.*/ + + runinit (&runner); + runner_add_args (&runner, "/bin/umount", "-f", NULL); + runner_argprintf (&runner, "%s", mntpt); + + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + ret = 0; + gf_log ("", GF_LOG_DEBUG, + "umount failed on maintenance client"); + } + + return; +} + +int +glusterd_clearlocks_create_mount (glusterd_volinfo_t *volinfo, char **mntpt) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + char template[PATH_MAX] = {0,}; + char *tmpl = NULL; + + priv = THIS->private; + + snprintf (template, sizeof (template), "/tmp/%s.XXXXXX", + volinfo->volname); + tmpl = mkdtemp (template); + if (!tmpl) { + gf_log (THIS->name, GF_LOG_DEBUG, "Couldn't create temporary " + "mount directory. Reason %s", strerror (errno)); + goto out; + } + + *mntpt = gf_strdup (tmpl); + ret = 0; +out: + return ret; +} + +int +glusterd_clearlocks_mount (glusterd_volinfo_t *volinfo, char **xl_opts, + char *mntpt) +{ + int ret = -1; + int i = 0; + glusterd_conf_t *priv = NULL; + runner_t runner = {0,}; + char client_volfpath[PATH_MAX] = {0,}; + char self_heal_opts[3][1024] = {"*replicate*.data-self-heal=off", + "*replicate*.metadata-self-heal=off", + "*replicate*.entry-self-heal=off"}; + + priv = THIS->private; + + runinit (&runner); + glusterd_get_trusted_client_filepath (client_volfpath, volinfo, + volinfo->transport_type); + runner_add_args (&runner, SBIN_DIR"/glusterfs", "-f", NULL); + runner_argprintf (&runner, "%s", client_volfpath); + runner_add_arg (&runner, "-l"); + runner_argprintf (&runner, DEFAULT_LOG_FILE_DIRECTORY + "/%s-clearlocks-mnt.log", volinfo->volname); + if (volinfo->memory_accounting) + runner_add_arg (&runner, "--mem-accounting"); + + for (i = 0; i < volinfo->brick_count && xl_opts[i]; i++) { + runner_add_arg (&runner, "--xlator-option"); + runner_argprintf (&runner, "%s", xl_opts[i]); + } + + for (i = 0; i < 3; i++) { + runner_add_args (&runner, "--xlator-option", + self_heal_opts[i], NULL); + } + + runner_argprintf (&runner, "%s", mntpt); + synclock_unlock (&priv->big_lock); + ret = runner_run (&runner); + synclock_lock (&priv->big_lock); + if (ret) { + gf_log (THIS->name, GF_LOG_DEBUG, + "Could not start glusterfs"); + goto out; + } + gf_log (THIS->name, GF_LOG_DEBUG, + "Started glusterfs successfully"); + +out: + return ret; +} + +int +glusterd_clearlocks_get_local_client_ports (glusterd_volinfo_t *volinfo, + char **xl_opts) +{ + glusterd_brickinfo_t *brickinfo = NULL; + glusterd_conf_t *priv = NULL; + int index = 0; + int ret = -1; + int i = 0; + int port = 0; + + GF_ASSERT (xl_opts); + if (!xl_opts) { + gf_log (THIS->name, GF_LOG_DEBUG, "Should pass non-NULL " + "xl_opts"); + goto out; + } + + priv = THIS->private; + + index = -1; + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + index++; + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + port = pmap_registry_search (THIS, brickinfo->path, + GF_PMAP_PORT_BRICKSERVER); + if (!port) { + ret = -1; + gf_log (THIS->name, GF_LOG_DEBUG, "Couldn't get port " + " for brick %s:%s", brickinfo->hostname, + brickinfo->path); + goto out; + } + + ret = gf_asprintf (&xl_opts[i], "%s-client-%d.remote-port=%d", + volinfo->volname, index, port); + if (ret == -1) { + xl_opts[i] = NULL; + goto out; + } + i++; + } + + ret = 0; +out: + return ret; +} + +int +glusterd_op_clearlocks_volume (dict_t *dict, char **op_errstr, dict_t *rsp_dict) +{ + int32_t ret = -1; + int i = 0; + char *volname = NULL; + char *path = NULL; + char *kind = NULL; + char *type = NULL; + char *opts = NULL; + char *cmd_str = NULL; + char *free_ptr = NULL; + char msg[PATH_MAX] = {0,}; + char result[PATH_MAX] = {0,}; + char *mntpt = NULL; + char **xl_opts = NULL; + glusterd_volinfo_t *volinfo = NULL; + + ret = dict_get_str (dict, "volname", &volname); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get volume name"); + goto out; + } + gf_log ("", GF_LOG_DEBUG, "Performing clearlocks on volume %s", volname); + + ret = dict_get_str (dict, "path", &path); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get path"); + goto out; + } + + ret = dict_get_str (dict, "kind", &kind); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get kind"); + goto out; + } + + ret = dict_get_str (dict, "type", &type); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Failed to get type"); + goto out; + } + + ret = dict_get_str (dict, "opts", &opts); + if (ret) + ret = 0; + + gf_log (THIS->name, GF_LOG_INFO, "Received clear-locks request for " + "volume %s with kind %s type %s and options %s", volname, + kind, type, opts); + + if (opts) + ret = gf_asprintf (&cmd_str, GF_XATTR_CLRLK_CMD".t%s.k%s.%s", + type, kind, opts); + else + ret = gf_asprintf (&cmd_str, GF_XATTR_CLRLK_CMD".t%s.k%s", + type, kind); + if (ret == -1) + goto out; + + ret = glusterd_volinfo_find (volname, &volinfo); + if (ret) { + snprintf (msg, sizeof (msg), "Volume %s doesn't exist.", + volname); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + xl_opts = GF_CALLOC (volinfo->brick_count+1, sizeof (char*), + gf_gld_mt_charptr); + if (!xl_opts) + goto out; + + ret = glusterd_clearlocks_get_local_client_ports (volinfo, xl_opts); + if (ret) { + snprintf (msg, sizeof (msg), "Couldn't get port numbers of " + "local bricks"); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + ret = glusterd_clearlocks_create_mount (volinfo, &mntpt); + if (ret) { + snprintf (msg, sizeof (msg), "Creating mount directory " + "for clear-locks failed."); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + ret = glusterd_clearlocks_mount (volinfo, xl_opts, mntpt); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to mount clear-locks " + "maintenance client."); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + goto out; + } + + ret = glusterd_clearlocks_send_cmd (volinfo, cmd_str, path, result, + msg, sizeof (msg), mntpt); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + goto umount; + } + + free_ptr = gf_strdup(result); + if (dict_set_dynstr (rsp_dict, "lk-summary", free_ptr)) { + GF_FREE (free_ptr); + snprintf (msg, sizeof (msg), "Failed to set clear-locks " + "result"); + gf_log (THIS->name, GF_LOG_ERROR, "%s", msg); + } + +umount: + glusterd_clearlocks_unmount (volinfo, mntpt); + + if (glusterd_clearlocks_rmdir_mount (volinfo, mntpt)) + gf_log (THIS->name, GF_LOG_WARNING, "Couldn't unmount " + "clear-locks mount point"); + +out: + if (ret) + *op_errstr = gf_strdup (msg); + + if (xl_opts) { + for (i = 0; i < volinfo->brick_count && xl_opts[i]; i++) + GF_FREE (xl_opts[i]); + GF_FREE (xl_opts); + } + + GF_FREE (cmd_str); + + GF_FREE (mntpt); + + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c new file mode 100644 index 000000000..665a8b298 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -0,0 +1,1452 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterd-volgen.h" +#include "glusterd-utils.h" + +static int +check_dict_key_value (dict_t *dict, char *key, char *value) +{ + glusterd_conf_t *priv = NULL; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, "Received Empty Dict."); + ret = -1; + goto out; + } + + if (!key) { + gf_log (this->name, GF_LOG_ERROR, "Received Empty Key."); + ret = -1; + goto out; + } + + if (!value) { + gf_log (this->name, GF_LOG_ERROR, "Received Empty Value."); + ret = -1; + goto out; + } + +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +static int +get_volname_volinfo (dict_t *dict, char **volname, glusterd_volinfo_t **volinfo) +{ + glusterd_conf_t *priv = NULL; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_str (dict, "volname", volname); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name"); + goto out; + } + + ret = glusterd_volinfo_find (*volname, volinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory"); + goto out; + } + +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +static int +validate_cache_max_min_size (dict_t *dict, char *key, char *value, + char **op_errstr) +{ + char *current_max_value = NULL; + char *current_min_value = NULL; + char errstr[2048] = ""; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + uint64_t max_value = 0; + uint64_t min_value = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = check_dict_key_value (dict, key, value); + if (ret) + goto out; + + ret = get_volname_volinfo (dict, &volname, &volinfo); + if (ret) + goto out; + + if ((!strcmp (key, "performance.cache-min-file-size")) || + (!strcmp (key, "cache-min-file-size"))) { + glusterd_volinfo_get (volinfo, + "performance.cache-max-file-size", + ¤t_max_value); + if (current_max_value) { + gf_string2bytesize (current_max_value, &max_value); + gf_string2bytesize (value, &min_value); + current_min_value = value; + } + } else if ((!strcmp (key, "performance.cache-max-file-size")) || + (!strcmp (key, "cache-max-file-size"))) { + glusterd_volinfo_get (volinfo, + "performance.cache-min-file-size", + ¤t_min_value); + if (current_min_value) { + gf_string2bytesize (current_min_value, &min_value); + gf_string2bytesize (value, &max_value); + current_max_value = value; + } + } + + if (min_value > max_value) { + snprintf (errstr, sizeof (errstr), + "cache-min-file-size (%s) is greater than " + "cache-max-file-size (%s)", + current_min_value, current_max_value); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + *op_errstr = gf_strdup (errstr); + ret = -1; + goto out; + } + +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +static int +validate_quota (dict_t *dict, char *key, char *value, + char **op_errstr) +{ + char errstr[2048] = ""; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = check_dict_key_value (dict, key, value); + if (ret) + goto out; + + ret = get_volname_volinfo (dict, &volname, &volinfo); + if (ret) + goto out; + + ret = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA); + if (ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "failed to get the quota status"); + goto out; + } + + if (ret == _gf_false) { + snprintf (errstr, sizeof (errstr), + "Cannot set %s. Enable quota first.", key); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + *op_errstr = gf_strdup (errstr); + ret = -1; + goto out; + } + + ret = 0; +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +static int +validate_stripe (dict_t *dict, char *key, char *value, char **op_errstr) +{ + char errstr[2048] = ""; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = check_dict_key_value (dict, key, value); + if (ret) + goto out; + + ret = get_volname_volinfo (dict, &volname, &volinfo); + if (ret) + goto out; + + if (volinfo->stripe_count == 1) { + snprintf (errstr, sizeof (errstr), + "Cannot set %s for a non-stripe volume.", key); + gf_log (this->name, GF_LOG_ERROR, "%s", errstr); + *op_errstr = gf_strdup (errstr); + ret = -1; + goto out; + } + +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + +static int +validate_subvols_per_directory (dict_t *dict, char *key, char *value, + char **op_errstr) +{ + char errstr[2048] = ""; + char *volname = NULL; + glusterd_conf_t *priv = NULL; + glusterd_volinfo_t *volinfo = NULL; + int ret = 0; + int subvols = 0; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + ret = check_dict_key_value (dict, key, value); + if (ret) + goto out; + + ret = get_volname_volinfo (dict, &volname, &volinfo); + if (ret) + goto out; + + subvols = atoi(value); + + /* Checking if the subvols-per-directory exceed the total + number of subvolumes. */ + if (subvols > volinfo->subvol_count) { + snprintf (errstr, sizeof(errstr), + "subvols-per-directory(%d) is greater " + "than the number of subvolumes(%d).", + subvols, volinfo->subvol_count); + gf_log (this->name, GF_LOG_ERROR, + "%s.", errstr); + *op_errstr = gf_strdup (errstr); + ret = -1; + goto out; + } + +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + + return ret; +} + + +/* dispatch table for VOLUME SET + * ----------------------------- + * + * Format of entries: + * + * First field is the <key>, for the purpose of looking it up + * in volume dictionary. Each <key> is of the format "<domain>.<specifier>". + * + * Second field is <voltype>. + * + * Third field is <option>, if its unset, it's assumed to be + * the same as <specifier>. + * + * Fourth field is <value>. In this context they are used to specify + * a default. That is, even the volume dict doesn't have a value, + * we procced as if the default value were set for it. + * + * Fifth field is <doctype>, which decides if the option is public and available + * in "set help" or not. "NO_DOC" entries are not part of the public interface + * and are subject to change at any time. This also decides if an option is + * global (apllies to all volumes) or normal (applies to only specified volume). + * + * Sixth field is <flags>. + * + * Seventh field is <op-version>. + * + * Eight field is description of option: If NULL, tried to fetch from + * translator code's xlator_options table. + * + * Nineth field is validation function: If NULL, xlator's option specific + * validation will be tried, otherwise tried at glusterd code itself. + * + * There are two type of entries: basic and special. + * + * - Basic entries are the ones where the <option> does _not_ start with + * the bang! character ('!'). + * + * In their case, <option> is understood as an option for an xlator of + * type <voltype>. Their effect is to copy over the volinfo->dict[<key>] + * value to all graph nodes of type <voltype> (if such a value is set). + * + * You are free to add entries of this type, they will become functional + * just by being present in the table. + * + * - Special entries where the <option> starts with the bang!. + * + * They are not applied to all graphs during generation, and you cannot + * extend them in a trivial way which could be just picked up. Better + * not touch them unless you know what you do. + * + * + * Another kind of grouping for options, according to visibility: + * + * - Exported: one which is used in the code. These are characterized by + * being used a macro as <key> (of the format VKEY_..., defined in + * glusterd-volgen.h + * + * - Non-exported: the rest; these have string literal <keys>. + * + * Adhering to this policy, option name changes shall be one-liners. + * + */ + +struct volopt_map_entry glusterd_volopt_map[] = { + /* DHT xlator options */ + { .key = "cluster.lookup-unhashed", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.min-free-disk", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.min-free-inodes", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.rebalance-stats", + .voltype = "cluster/distribute", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.subvols-per-directory", + .voltype = "cluster/distribute", + .option = "directory-layout-spread", + .op_version = 2, + .validate_fn = validate_subvols_per_directory, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.readdir-optimize", + .voltype = "cluster/distribute", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.rsync-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.extra-hash-regex", + .voltype = "cluster/distribute", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.dht-xattr-name", + .voltype = "cluster/distribute", + .option = "xattr-name", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* NUFA xlator options (Distribute special case) */ + { .key = "cluster.nufa", + .voltype = "cluster/distribute", + .option = "!nufa", + .type = NO_DOC, + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.local-volume-name", + .voltype = "cluster/nufa", + .option = "local-volume-name", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Switch xlator options (Distribute special case) */ + { .key = "cluster.switch", + .voltype = "cluster/distribute", + .option = "!switch", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.switch-pattern", + .voltype = "cluster/switch", + .option = "pattern.switch.case", + .type = NO_DOC, + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* AFR xlator options */ + { .key = "cluster.entry-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-subvolume", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-subvolume-index", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.read-hash-mode", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.background-self-heal-count", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.metadata-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.data-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.entry-self-heal", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.self-heal-daemon", + .voltype = "cluster/replicate", + .option = "!self-heal-daemon", + .op_version = 1 + }, + { .key = "cluster.heal-timeout", + .voltype = "cluster/replicate", + .option = "!heal-timeout", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.strict-readdir", + .voltype = "cluster/replicate", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.self-heal-window-size", + .voltype = "cluster/replicate", + .option = "data-self-heal-window-size", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.data-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.metadata-change-log", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.data-self-heal-algorithm", + .voltype = "cluster/replicate", + .option = "data-self-heal-algorithm", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.eager-lock", + .voltype = "cluster/replicate", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.quorum-type", + .voltype = "cluster/replicate", + .option = "quorum-type", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.quorum-count", + .voltype = "cluster/replicate", + .option = "quorum-count", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.choose-local", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.self-heal-readdir-size", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.post-op-delay-secs", + .voltype = "cluster/replicate", + .type = NO_DOC, + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.readdir-failover", + .voltype = "cluster/replicate", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.ensure-durability", + .voltype = "cluster/replicate", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Stripe xlator options */ + { .key = "cluster.stripe-block-size", + .voltype = "cluster/stripe", + .option = "block-size", + .op_version = 1, + .validate_fn = validate_stripe, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "cluster.stripe-coalesce", + .voltype = "cluster/stripe", + .option = "coalesce", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* IO-stats xlator options */ + { .key = VKEY_DIAG_LAT_MEASUREMENT, + .voltype = "debug/io-stats", + .option = "latency-measurement", + .value = "off", + .op_version = 1 + }, + { .key = "diagnostics.dump-fd-stats", + .voltype = "debug/io-stats", + .op_version = 1 + }, + { .key = VKEY_DIAG_CNT_FOP_HITS, + .voltype = "debug/io-stats", + .option = "count-fop-hits", + .value = "off", + .type = NO_DOC, + .op_version = 1 + }, + { .key = "diagnostics.brick-log-level", + .voltype = "debug/io-stats", + .option = "!brick-log-level", + .op_version = 1 + }, + { .key = "diagnostics.client-log-level", + .voltype = "debug/io-stats", + .option = "!client-log-level", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "diagnostics.brick-sys-log-level", + .voltype = "debug/io-stats", + .option = "!sys-log-level", + .op_version = 1 + }, + { .key = "diagnostics.client-sys-log-level", + .voltype = "debug/io-stats", + .option = "!sys-log-level", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* IO-cache xlator options */ + { .key = "performance.cache-max-file-size", + .voltype = "performance/io-cache", + .option = "max-file-size", + .op_version = 1, + .validate_fn = validate_cache_max_min_size, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.cache-min-file-size", + .voltype = "performance/io-cache", + .option = "min-file-size", + .op_version = 1, + .validate_fn = validate_cache_max_min_size, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.cache-refresh-timeout", + .voltype = "performance/io-cache", + .option = "cache-timeout", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.cache-priority", + .voltype = "performance/io-cache", + .option = "priority", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.cache-size", + .voltype = "performance/io-cache", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* IO-threads xlator options */ + { .key = "performance.io-thread-count", + .voltype = "performance/io-threads", + .option = "thread-count", + .op_version = 1 + }, + { .key = "performance.high-prio-threads", + .voltype = "performance/io-threads", + .op_version = 1 + }, + { .key = "performance.normal-prio-threads", + .voltype = "performance/io-threads", + .op_version = 1 + }, + { .key = "performance.low-prio-threads", + .voltype = "performance/io-threads", + .op_version = 1 + }, + { .key = "performance.least-prio-threads", + .voltype = "performance/io-threads", + .op_version = 1 + }, + { .key = "performance.enable-least-priority", + .voltype = "performance/io-threads", + .op_version = 1 + }, + { .key = "performance.least-rate-limit", + .voltype = "performance/io-threads", + .op_version = 2 + }, + + /* Other perf xlators' options */ + { .key = "performance.cache-size", + .voltype = "performance/quick-read", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.flush-behind", + .voltype = "performance/write-behind", + .option = "flush-behind", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.write-behind-window-size", + .voltype = "performance/write-behind", + .option = "cache-size", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.strict-o-direct", + .voltype = "performance/write-behind", + .option = "strict-O_DIRECT", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.strict-write-ordering", + .voltype = "performance/write-behind", + .option = "strict-write-ordering", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.lazy-open", + .voltype = "performance/open-behind", + .option = "lazy-open", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.read-ahead-page-count", + .voltype = "performance/read-ahead", + .option = "page-count", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.md-cache-timeout", + .voltype = "performance/md-cache", + .option = "md-cache-timeout", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Crypt xlator options */ + + { .key = "features.encryption", + .voltype = "encryption/crypt", + .option = "!feat", + .value = "off", + .op_version = 3, + .description = "enable/disable client-side encryption for " + "the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + + { .key = "encryption.master-key", + .voltype = "encryption/crypt", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "encryption.data-key-size", + .voltype = "encryption/crypt", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "encryption.block-size", + .voltype = "encryption/crypt", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Client xlator options */ + { .key = "network.frame-timeout", + .voltype = "protocol/client", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "network.ping-timeout", + .voltype = "protocol/client", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "network.tcp-window-size", + .voltype = "protocol/client", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "features.lock-heal", + .voltype = "protocol/client", + .option = "lk-heal", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "features.grace-timeout", + .voltype = "protocol/client", + .option = "grace-timeout", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "client.ssl", + .voltype = "protocol/client", + .option = "transport.socket.ssl-enabled", + .type = NO_DOC, + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "network.remote-dio", + .voltype = "protocol/client", + .option = "filter-O_DIRECT", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Server xlator options */ + { .key = "network.tcp-window-size", + .voltype = "protocol/server", + .op_version = 1 + }, + { .key = "network.inode-lru-limit", + .voltype = "protocol/server", + .op_version = 1 + }, + { .key = AUTH_ALLOW_MAP_KEY, + .voltype = "protocol/server", + .option = "!server-auth", + .value = "*", + .op_version = 1 + }, + { .key = AUTH_REJECT_MAP_KEY, + .voltype = "protocol/server", + .option = "!server-auth", + .op_version = 1 + }, + { .key = "transport.keepalive", + .voltype = "protocol/server", + .option = "transport.socket.keepalive", + .type = NO_DOC, + .op_version = 1 + }, + { .key = "server.allow-insecure", + .voltype = "protocol/server", + .option = "rpc-auth-allow-insecure", + .type = NO_DOC, + .op_version = 1 + }, + { .key = "server.root-squash", + .voltype = "protocol/server", + .option = "root-squash", + .op_version = 2 + }, + { .key = "server.statedump-path", + .voltype = "protocol/server", + .option = "statedump-path", + .op_version = 1 + }, + { .key = "server.outstanding-rpc-limit", + .voltype = "protocol/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "features.lock-heal", + .voltype = "protocol/server", + .option = "lk-heal", + .type = NO_DOC, + .op_version = 1 + }, + { .key = "features.grace-timeout", + .voltype = "protocol/server", + .option = "grace-timeout", + .type = NO_DOC, + .op_version = 1 + }, + { .key = "server.ssl", + .voltype = "protocol/server", + .option = "transport.socket.ssl-enabled", + .type = NO_DOC, + .op_version = 2 + }, + + /* Performance xlators enable/disbable options */ + { .key = "performance.write-behind", + .voltype = "performance/write-behind", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable write-behind translator in the " + "volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.read-ahead", + .voltype = "performance/read-ahead", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable read-ahead translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.readdir-ahead", + .voltype = "performance/readdir-ahead", + .option = "!perf", + .value = "off", + .op_version = 3, + .description = "enable/disable readdir-ahead translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + + { .key = "performance.io-cache", + .voltype = "performance/io-cache", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable io-cache translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "performance.quick-read", + .voltype = "performance/quick-read", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable quick-read translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + + }, + { .key = "performance.open-behind", + .voltype = "performance/open-behind", + .option = "!perf", + .value = "on", + .op_version = 2, + .description = "enable/disable open-behind translator in the volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + + }, + { .key = "performance.stat-prefetch", + .voltype = "performance/md-cache", + .option = "!perf", + .value = "on", + .op_version = 1, + .description = "enable/disable meta-data caching translator in the " + "volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.client-io-threads", + .voltype = "performance/io-threads", + .option = "!perf", + .value = "off", + .op_version = 1, + .description = "enable/disable io-threads translator in the client " + "graph of volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.write-behind", + .voltype = "performance/write-behind", + .option = "!nfsperf", + .value = "on", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.read-ahead", + .voltype = "performance/read-ahead", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.io-cache", + .voltype = "performance/io-cache", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.quick-read", + .voltype = "performance/quick-read", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.stat-prefetch", + .voltype = "performance/md-cache", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.nfs.io-threads", + .voltype = "performance/io-threads", + .option = "!nfsperf", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "performance.force-readdirp", + .voltype = "performance/md-cache", + .option = "force-readdirp", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Feature translators */ + { .key = "features.file-snapshot", + .voltype = "features/qemu-block", + .option = "!feat", + .value = "off", + .op_version = 3, + .description = "enable/disable file-snapshot feature in the " + "volume.", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + +#ifdef HAVE_LIB_Z + /* Compressor-decompressor xlator options + * defaults used from xlator/feature/compress/src/cdc.h + */ + { .key = "features.compress", + .voltype = "features/cdc", + .option = "!compress", + .value = "off", + .type = NO_DOC, + .op_version = 2, + .description = "enable/disable compression translator" + }, + { .key = "compress.mode", + .voltype = "features/cdc", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "compress.window-size", + .voltype = "features/cdc", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "compress.mem-level", + .voltype = "features/cdc", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "compress.min-size", + .voltype = "features/cdc", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "compress.compression-level", + .voltype = "features/cdc", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "compress.debug", + .voltype = "features/cdc", + .type = NO_DOC, + .op_version = 2 + }, + #endif + + /* Quota xlator options */ + { .key = VKEY_FEATURES_LIMIT_USAGE, + .voltype = "features/quota", + .option = "limit-set", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "features.quota-timeout", + .voltype = "features/quota", + .option = "timeout", + .value = "0", + .op_version = 1, + .validate_fn = validate_quota, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "features.quota-deem-statfs", + .voltype = "features/quota", + .option = "deem-statfs", + .value = "off", + .type = DOC, + .op_version = 3, + .validate_fn = validate_quota, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* Marker xlator options */ + { .key = VKEY_MARKER_XTIME, + .voltype = "features/marker", + .option = "xtime", + .value = "off", + .type = NO_DOC, + .flags = OPT_FLAG_FORCE, + .op_version = 1 + }, + { .key = VKEY_MARKER_XTIME, + .voltype = "features/marker", + .option = "!xtime", + .value = "off", + .type = NO_DOC, + .flags = OPT_FLAG_FORCE, + .op_version = 1 + }, + { .key = VKEY_MARKER_XTIME_FORCE, + .voltype = "features/marker", + .option = "gsync-force-xtime", + .value = "off", + .type = NO_DOC, + .flags = OPT_FLAG_FORCE, + .op_version = 2 + }, + { .key = VKEY_MARKER_XTIME_FORCE, + .voltype = "features/marker", + .option = "!gsync-force-xtime", + .value = "off", + .type = NO_DOC, + .flags = OPT_FLAG_FORCE, + .op_version = 2 + }, + { .key = VKEY_FEATURES_QUOTA, + .voltype = "features/marker", + .option = "quota", + .value = "off", + .type = NO_DOC, + .flags = OPT_FLAG_FORCE, + .op_version = 1 + }, + + /* Debug xlators options */ + { .key = "debug.trace", + .voltype = "debug/trace", + .option = "!debug", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "debug.log-history", + .voltype = "debug/trace", + .option = "log-history", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "debug.log-file", + .voltype = "debug/trace", + .option = "log-file", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "debug.exclude-ops", + .voltype = "debug/trace", + .option = "exclude-ops", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "debug.include-ops", + .voltype = "debug/trace", + .option = "include-ops", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "debug.error-gen", + .voltype = "debug/error-gen", + .option = "!debug", + .value = "off", + .type = NO_DOC, + .op_version = 1, + .flags = OPT_FLAG_XLATOR_OPT + }, + { .key = "debug.error-failure", + .voltype = "debug/error-gen", + .option = "failure", + .type = NO_DOC, + .op_version = 3 + }, + { .key = "debug.error-number", + .voltype = "debug/error-gen", + .option = "error-no", + .type = NO_DOC, + .op_version = 3 + }, + { .key = "debug.random-failure", + .voltype = "debug/error-gen", + .option = "random-failure", + .type = NO_DOC, + .op_version = 3 + }, + { .key = "debug.error-fops", + .voltype = "debug/error-gen", + .option = "enable", + .type = NO_DOC, + .op_version = 3 + }, + + + /* NFS xlator options */ + { .key = "nfs.enable-ino32", + .voltype = "nfs/server", + .option = "nfs.enable-ino32", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.mem-factor", + .voltype = "nfs/server", + .option = "nfs.mem-factor", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.export-dirs", + .voltype = "nfs/server", + .option = "nfs3.export-dirs", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.export-volumes", + .voltype = "nfs/server", + .option = "nfs3.export-volumes", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.addr-namelookup", + .voltype = "nfs/server", + .option = "rpc-auth.addr.namelookup", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.dynamic-volumes", + .voltype = "nfs/server", + .option = "nfs.dynamic-volumes", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.register-with-portmap", + .voltype = "nfs/server", + .option = "rpc.register-with-portmap", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.outstanding-rpc-limit", + .voltype = "nfs/server", + .option = "rpc.outstanding-rpc-limit", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.port", + .voltype = "nfs/server", + .option = "nfs.port", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.rpc-auth-unix", + .voltype = "nfs/server", + .option = "!rpc-auth.auth-unix.*", + .op_version = 1 + }, + { .key = "nfs.rpc-auth-null", + .voltype = "nfs/server", + .option = "!rpc-auth.auth-null.*", + .op_version = 1 + }, + { .key = "nfs.rpc-auth-allow", + .voltype = "nfs/server", + .option = "!rpc-auth.addr.*.allow", + .op_version = 1 + }, + { .key = "nfs.rpc-auth-reject", + .voltype = "nfs/server", + .option = "!rpc-auth.addr.*.reject", + .op_version = 1 + }, + { .key = "nfs.ports-insecure", + .voltype = "nfs/server", + .option = "!rpc-auth.ports.*.insecure", + .op_version = 1 + }, + { .key = "nfs.transport-type", + .voltype = "nfs/server", + .option = "!nfs.transport-type", + .value = "tcp", + .op_version = 1, + .description = "Specifies the nfs transport type. Valid " + "transport types are 'tcp' and 'rdma'." + }, + { .key = "nfs.trusted-sync", + .voltype = "nfs/server", + .option = "!nfs3.*.trusted-sync", + .op_version = 1 + }, + { .key = "nfs.trusted-write", + .voltype = "nfs/server", + .option = "!nfs3.*.trusted-write", + .op_version = 1 + }, + { .key = "nfs.volume-access", + .voltype = "nfs/server", + .option = "!nfs3.*.volume-access", + .op_version = 1 + }, + { .key = "nfs.export-dir", + .voltype = "nfs/server", + .option = "!nfs3.*.export-dir", + .op_version = 1 + }, + { .key = NFS_DISABLE_MAP_KEY, + .voltype = "nfs/server", + .option = "!nfs-disable", + .op_version = 1 + }, + { .key = "nfs.nlm", + .voltype = "nfs/server", + .option = "nfs.nlm", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.acl", + .voltype = "nfs/server", + .option = "nfs.acl", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.mount-udp", + .voltype = "nfs/server", + .option = "nfs.mount-udp", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.mount-rmtab", + .voltype = "nfs/server", + .option = "nfs.mount-rmtab", + .type = GLOBAL_DOC, + .op_version = 1 + }, + { .key = "nfs.server-aux-gids", + .voltype = "nfs/server", + .option = "nfs.server-aux-gids", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "nfs.drc", + .voltype = "nfs/server", + .option = "nfs.drc", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.drc-size", + .voltype = "nfs/server", + .option = "nfs.drc-size", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.read-size", + .voltype = "nfs/server", + .option = "nfs3.read-size", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.write-size", + .voltype = "nfs/server", + .option = "nfs3.write-size", + .type = GLOBAL_DOC, + .op_version = 3 + }, + { .key = "nfs.readdir-size", + .voltype = "nfs/server", + .option = "nfs3.readdir-size", + .type = GLOBAL_DOC, + .op_version = 3 + }, + + /* Other options which don't fit any place above */ + { .key = "features.read-only", + .voltype = "features/read-only", + .option = "!read-only", + .value = "off", + .op_version = 1, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "features.worm", + .voltype = "features/worm", + .option = "!worm", + .value = "off", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "storage.linux-aio", + .voltype = "storage/posix", + .op_version = 1 + }, + { .key = "storage.batch-fsync-mode", + .voltype = "storage/posix", + .op_version = 3 + }, + { .key = "storage.batch-fsync-delay-usec", + .voltype = "storage/posix", + .op_version = 3 + }, + { .key = "storage.owner-uid", + .voltype = "storage/posix", + .option = "brick-uid", + .op_version = 1 + }, + { .key = "storage.owner-gid", + .voltype = "storage/posix", + .option = "brick-gid", + .op_version = 1 + }, + { .key = "storage.node-uuid-pathinfo", + .voltype = "storage/posix", + .op_version = 3 + }, + { .key = "storage.health-check-interval", + .voltype = "storage/posix", + .op_version = 3 + }, + { .key = "storage.bd-aio", + .voltype = "storage/bd", + .op_version = 3 + }, + { .key = "config.memory-accounting", + .voltype = "configuration", + .option = "!config", + .op_version = 2, + .flags = OPT_FLAG_CLIENT_OPT + }, + { .key = "config.transport", + .voltype = "configuration", + .option = "!config", + .op_version = 2 + }, + { .key = GLUSTERD_QUORUM_TYPE_KEY, + .voltype = "mgmt/glusterd", + .value = "off", + .op_version = 2 + }, + { .key = GLUSTERD_QUORUM_RATIO_KEY, + .voltype = "mgmt/glusterd", + .value = "0", + .op_version = 2 + }, + /* changelog translator - global tunables */ + { .key = "changelog.changelog", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "changelog.changelog-dir", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "changelog.encoding", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "changelog.rollover-time", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = "changelog.fsync-interval", + .voltype = "features/changelog", + .type = NO_DOC, + .op_version = 2 + }, + { .key = NULL + } +}; diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 9d9820e84..59288ada0 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1,28 +1,19 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is GF_FREE software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" #endif #include <time.h> +#include <grp.h> #include <sys/uio.h> #include <sys/resource.h> @@ -43,13 +34,29 @@ #include "glusterd-sm.h" #include "glusterd-op-sm.h" #include "glusterd-store.h" +#include "glusterd-hooks.h" +#include "glusterd-utils.h" +#include "glusterd-locks.h" +#include "common-utils.h" +#include "run.h" + +#include "syncop.h" + +#include "glusterd-mountbroker.h" -static uuid_t glusterd_uuid; -extern struct rpcsvc_program glusterd1_mop_prog; extern struct rpcsvc_program gluster_handshake_prog; -extern struct rpc_clnt_program glusterd3_1_mgmt_prog; +extern struct rpcsvc_program gluster_cli_getspec_prog; extern struct rpcsvc_program gluster_pmap_prog; extern glusterd_op_info_t opinfo; +extern struct rpcsvc_program gd_svc_mgmt_prog; +extern struct rpcsvc_program gd_svc_mgmt_v3_prog; +extern struct rpcsvc_program gd_svc_peer_prog; +extern struct rpcsvc_program gd_svc_cli_prog; +extern struct rpcsvc_program gd_svc_cli_prog_ro; +extern struct rpc_clnt_program gd_brick_prog; +extern struct rpcsvc_program glusterd_mgmt_hndsk_prog; + +extern char snap_mount_folder[PATH_MAX]; rpcsvc_cbk_program_t glusterd_cbk_prog = { .progname = "Gluster Callback", @@ -57,49 +64,92 @@ rpcsvc_cbk_program_t glusterd_cbk_prog = { .progver = GLUSTER_CBK_VERSION, }; +struct rpcsvc_program *gd_inet_programs[] = { + &gd_svc_peer_prog, + &gd_svc_cli_prog_ro, + &gd_svc_mgmt_prog, + &gd_svc_mgmt_v3_prog, + &gluster_pmap_prog, + &gluster_handshake_prog, + &glusterd_mgmt_hndsk_prog, +}; +int gd_inet_programs_count = (sizeof (gd_inet_programs) / + sizeof (gd_inet_programs[0])); + +struct rpcsvc_program *gd_uds_programs[] = { + &gd_svc_cli_prog, + &gluster_cli_getspec_prog, +}; +int gd_uds_programs_count = (sizeof (gd_uds_programs) / + sizeof (gd_uds_programs[0])); + +const char *gd_op_list[GD_OP_MAX + 1] = { + [GD_OP_NONE] = "Invalid op", + [GD_OP_CREATE_VOLUME] = "Create", + [GD_OP_START_BRICK] = "Start Brick", + [GD_OP_STOP_BRICK] = "Stop Brick", + [GD_OP_DELETE_VOLUME] = "Delete", + [GD_OP_START_VOLUME] = "Start", + [GD_OP_STOP_VOLUME] = "Stop", + [GD_OP_DEFRAG_VOLUME] = "Rebalance", + [GD_OP_ADD_BRICK] = "Add brick", + [GD_OP_REMOVE_BRICK] = "Remove brick", + [GD_OP_REPLACE_BRICK] = "Replace brick", + [GD_OP_SET_VOLUME] = "Set", + [GD_OP_RESET_VOLUME] = "Reset", + [GD_OP_SYNC_VOLUME] = "Sync", + [GD_OP_LOG_ROTATE] = "Log rotate", + [GD_OP_GSYNC_SET] = "Geo-replication", + [GD_OP_PROFILE_VOLUME] = "Profile", + [GD_OP_QUOTA] = "Quota", + [GD_OP_STATUS_VOLUME] = "Status", + [GD_OP_REBALANCE] = "Rebalance", + [GD_OP_HEAL_VOLUME] = "Heal", + [GD_OP_STATEDUMP_VOLUME] = "Statedump", + [GD_OP_LIST_VOLUME] = "Lists", + [GD_OP_CLEARLOCKS_VOLUME] = "Clear locks", + [GD_OP_DEFRAG_BRICK_VOLUME] = "Rebalance", + [GD_OP_COPY_FILE] = "Copy File", + [GD_OP_SYS_EXEC] = "Execute system commands", + [GD_OP_GSYNC_CREATE] = "Geo-replication Create", + [GD_OP_SNAP] = "Snapshot", + [GD_OP_MAX] = "Invalid op" +}; static int glusterd_opinfo_init () { int32_t ret = -1; - ret = pthread_mutex_init (&opinfo.lock, NULL); + opinfo.op = GD_OP_NONE; return ret; } -static int -glusterd_uuid_init (int flag) + +int +glusterd_uuid_init () { int ret = -1; - char str[50] = {0,}; + xlator_t *this = NULL; glusterd_conf_t *priv = NULL; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); + priv = this->private; - if (!flag) { - ret = glusterd_retrieve_uuid (); - if (!ret) { - uuid_unparse (priv->uuid, str); - uuid_copy (glusterd_uuid, priv->uuid); - gf_log ("glusterd", GF_LOG_NORMAL, - "retrieved UUID: %s", str); - return 0; - } + ret = glusterd_retrieve_uuid (); + if (ret == 0) { + gf_log (this->name, GF_LOG_INFO, + "retrieved UUID: %s", uuid_utoa (priv->uuid)); + return 0; } - uuid_generate (glusterd_uuid); - uuid_unparse (glusterd_uuid, str); - - gf_log ("glusterd", GF_LOG_NORMAL, - "generated UUID: %s",str); - uuid_copy (priv->uuid, glusterd_uuid); - - ret = glusterd_store_uuid (); + ret = glusterd_uuid_generate_save (); if (ret) { gf_log ("glusterd", GF_LOG_ERROR, - "Unable to store generated UUID"); + "Unable to generate and save new UUID"); return ret; } @@ -107,6 +157,63 @@ glusterd_uuid_init (int flag) } int +glusterd_uuid_generate_save () +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); + + uuid_generate (priv->uuid); + + gf_log (this->name, GF_LOG_INFO, "generated UUID: %s", + uuid_utoa (priv->uuid)); + + ret = glusterd_store_global_info (this); + + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to store the generated uuid %s", + uuid_utoa (priv->uuid)); + + return ret; +} + +int +glusterd_options_init (xlator_t *this) +{ + int ret = -1; + glusterd_conf_t *priv = NULL; + char *initial_version = "0"; + + priv = this->private; + + priv->opts = dict_new (); + if (!priv->opts) + goto out; + + ret = glusterd_store_retrieve_options (this); + if (ret == 0) + goto out; + + ret = dict_set_str (priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, + initial_version); + if (ret) + goto out; + ret = glusterd_store_options (this, priv->opts); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Unable to store version"); + return ret; + } +out: + + return 0; +} +int glusterd_fetchspec_notify (xlator_t *this) { int ret = -1; @@ -115,10 +222,15 @@ glusterd_fetchspec_notify (xlator_t *this) priv = this->private; - list_for_each_entry (trans, &priv->xprt_list, list) { - rpcsvc_callback_submit (priv->rpc, trans, &glusterd_cbk_prog, - GF_CBK_FETCHSPEC, NULL, 0); + pthread_mutex_lock (&priv->xprt_lock); + { + list_for_each_entry (trans, &priv->xprt_list, list) { + rpcsvc_callback_submit (priv->rpc, trans, + &glusterd_cbk_prog, + GF_CBK_FETCHSPEC, NULL, 0); + } } + pthread_mutex_unlock (&priv->xprt_lock); ret = 0; @@ -176,12 +288,16 @@ glusterd_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, { INIT_LIST_HEAD (&xprt->list); + pthread_mutex_lock (&priv->xprt_lock); list_add_tail (&xprt->list, &priv->xprt_list); + pthread_mutex_unlock (&priv->xprt_lock); break; } case RPCSVC_EVENT_DISCONNECT: { + pthread_mutex_lock (&priv->xprt_lock); list_del (&xprt->list); + pthread_mutex_unlock (&priv->xprt_lock); pmap_registry_remove (this, 0, NULL, GF_PMAP_PORT_NONE, xprt); break; } @@ -214,6 +330,810 @@ out: return ret; } +int +glusterd_rpcsvc_options_build (dict_t *options) +{ + int ret = 0; + uint32_t backlog = 0; + + ret = dict_get_uint32 (options, "transport.socket.listen-backlog", + &backlog); + + if (ret) { + backlog = GLUSTERD_SOCKET_LISTEN_BACKLOG; + ret = dict_set_uint32 (options, + "transport.socket.listen-backlog", + backlog); + if (ret) + goto out; + } + + gf_log ("", GF_LOG_DEBUG, "listen-backlog value: %d", backlog); + +out: + return ret; +} + +#if SYNCDAEMON_COMPILE +static int +glusterd_check_gsync_present (int *valid_state) +{ + char buff[PATH_MAX] = {0, }; + runner_t runner = {0,}; + char *ptr = NULL; + int ret = 0; + + runinit (&runner); + runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL); + runner_redir (&runner, STDOUT_FILENO, RUN_PIPE); + ret = runner_start (&runner); + if (ret == -1) { + if (errno == ENOENT) { + gf_log ("glusterd", GF_LOG_INFO, GEOREP + " module not installed in the system"); + *valid_state = 0; + } + else { + gf_log ("glusterd", GF_LOG_ERROR, GEOREP + " module not working as desired"); + *valid_state = -1; + } + goto out; + } + + ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + if (!strstr (buff, "gsyncd")) { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not " + "working as desired"); + *valid_state = -1; + goto out; + } + } else { + ret = -1; + gf_log ("glusterd", GF_LOG_ERROR, GEOREP" module not " + "working as desired"); + *valid_state = -1; + goto out; + } + + ret = 0; + out: + + runner_end (&runner); + + gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); + return ret; + +} + +static int +group_write_allow (char *path, gid_t gid) +{ + struct stat st = {0,}; + int ret = 0; + + ret = stat (path, &st); + if (ret == -1) + goto out; + GF_ASSERT (S_ISDIR (st.st_mode)); + + ret = chown (path, -1, gid); + if (ret == -1) + goto out; + + ret = chmod (path, (st.st_mode & ~S_IFMT) | S_IWGRP|S_IXGRP|S_ISVTX); + + out: + if (ret == -1) + gf_log ("", GF_LOG_CRITICAL, + "failed to set up write access to %s for group %d (%s)", + path, gid, strerror (errno)); + return ret; +} + +static int +glusterd_crt_georep_folders (char *georepdir, glusterd_conf_t *conf) +{ + char *greplg_s = NULL; + struct group *gr = NULL; + int ret = 0; + + GF_ASSERT (georepdir); + GF_ASSERT (conf); + + if (strlen (conf->workdir)+2 > PATH_MAX-strlen(GEOREP)) { + ret = -1; + gf_log ("glusterd", GF_LOG_CRITICAL, + "directory path %s/"GEOREP" is longer than PATH_MAX", + conf->workdir); + goto out; + } + + snprintf (georepdir, PATH_MAX, "%s/"GEOREP, conf->workdir); + ret = mkdir_p (georepdir, 0777, _gf_true); + if (-1 == ret) { + gf_log ("glusterd", GF_LOG_CRITICAL, + "Unable to create "GEOREP" directory %s", + georepdir); + goto out; + } + + if (strlen (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP) >= PATH_MAX) { + ret = -1; + gf_log ("glusterd", GF_LOG_CRITICAL, + "directory path "DEFAULT_LOG_FILE_DIRECTORY"/" + GEOREP" is longer than PATH_MAX"); + goto out; + } + ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP, 0777, _gf_true); + if (-1 == ret) { + gf_log ("glusterd", GF_LOG_CRITICAL, + "Unable to create "GEOREP" log directory"); + goto out; + } + + /* Slave log file directory */ + if (strlen(DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves") >= PATH_MAX) { + ret = -1; + gf_log ("glusterd", GF_LOG_CRITICAL, + "directory path "DEFAULT_LOG_FILE_DIRECTORY"/" + GEOREP"-slaves"" is longer than PATH_MAX"); + goto out; + } + ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves", 0777, + _gf_true); + if (-1 == ret) { + gf_log ("glusterd", GF_LOG_CRITICAL, + "Unable to create "GEOREP" slave log directory"); + goto out; + } + + /* MountBroker log file directory */ + if (strlen(DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr") >= PATH_MAX) { + ret = -1; + gf_log ("glusterd", GF_LOG_CRITICAL, + "directory path "DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP + "-slaves/mbr"" is longer than PATH_MAX"); + goto out; + } + ret = mkdir_p (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr", 0777, + _gf_true); + if (-1 == ret) { + gf_log ("glusterd", GF_LOG_CRITICAL, + "Unable to create "GEOREP" mountbroker slave log directory"); + goto out; + } + + ret = dict_get_str (THIS->options, GEOREP"-log-group", &greplg_s); + if (ret) + ret = 0; + else { + gr = getgrnam (greplg_s); + if (!gr) { + gf_log ("glusterd", GF_LOG_CRITICAL, + "group "GEOREP"-log-group %s does not exist", greplg_s); + ret = -1; + goto out; + } + + ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP, + gr->gr_gid); + if (ret == 0) + ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/" + GEOREP"-slaves", gr->gr_gid); + if (ret == 0) + ret = group_write_allow (DEFAULT_LOG_FILE_DIRECTORY"/" + GEOREP"-slaves/mbr", gr->gr_gid); + } + + out: + gf_log("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static void +runinit_gsyncd_setrx (runner_t *runner, glusterd_conf_t *conf) +{ + runinit (runner); + runner_add_args (runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL); + runner_argprintf (runner, "%s/"GSYNC_CONF_TEMPLATE, conf->workdir); + runner_add_arg (runner, "--config-set-rx"); +} + +static int +configure_syncdaemon (glusterd_conf_t *conf) +#define RUN_GSYNCD_CMD do { \ + ret = runner_run_reuse (&runner); \ + if (ret == -1) { \ + runner_log (&runner, "glusterd", GF_LOG_ERROR, "command failed"); \ + runner_end (&runner); \ + goto out; \ + } \ + runner_end (&runner); \ +} while (0) +{ + int ret = 0; + runner_t runner = {0,}; + char georepdir[PATH_MAX] = {0,}; + int valid_state = 0; + + ret = setenv ("_GLUSTERD_CALLED_", "1", 1); + if (ret < 0) { + ret = 0; + goto out; + } + valid_state = -1; + ret = glusterd_check_gsync_present (&valid_state); + if (-1 == ret) { + ret = valid_state; + goto out; + } + + glusterd_crt_georep_folders (georepdir, conf); + if (ret) { + ret = 0; + goto out; + } + + /************ + * master pre-configuration + ************/ + + /* remote-gsyncd */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "remote-gsyncd", GSYNCD_PREFIX"/gsyncd", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "remote-gsyncd", "/nonexistent/gsyncd", + ".", "^ssh:", NULL); + RUN_GSYNCD_CMD; + + /* gluster-command-dir */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "gluster-params", + "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ssh-command */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "ssh-command"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/secret.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* pid-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "pid-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.pid", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "state-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.status", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "state-detail-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", + georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-detail-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "state-detail-file"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status", + georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* state-socket */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "state-socket-unencoded"); + runner_argprintf (&runner, "%s/${mastervol}/${eSlave}.socket", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* socketdir */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, + "log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}.log", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, + "gluster-log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"/${mastervol}/${eSlave}${local_id}.gluster.log", + ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* ignore-deletes */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "ignore-deletes", "true", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* special-sync-mode */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "special-sync-mode", "partial", ".", ".", NULL); + RUN_GSYNCD_CMD; + + /* change-detector == changelog */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL); + RUN_GSYNCD_CMD; + + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg(&runner, "working-dir"); + runner_argprintf(&runner, "%s/${mastervol}/${eSlave}", + DEFAULT_VAR_RUN_DIRECTORY); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + + /************ + * slave pre-configuration + ************/ + + /* gluster-command-dir */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "gluster-command-dir", SBIN_DIR"/", + ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-params */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, "gluster-params", + "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + ".", NULL); + RUN_GSYNCD_CMD; + + /* log-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, + "log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.log", + ".", NULL); + RUN_GSYNCD_CMD; + + /* MountBroker log-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, + "log-file-mbr", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/mbr/${session_owner}:${eSlave}.log", + ".", NULL); + RUN_GSYNCD_CMD; + + /* gluster-log-file */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_args (&runner, + "gluster-log-file", + DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"-slaves/${session_owner}:${eSlave}.gluster.log", + ".", NULL); + RUN_GSYNCD_CMD; + + out: + return ret ? -1 : 0; +} +#undef RUN_GSYNCD_CMD +#else /* SYNCDAEMON_COMPILE */ +static int +configure_syncdaemon (glusterd_conf_t *conf) +{ + return 0; +} +#endif /* !SYNCDAEMON_COMPILE */ + + +static int +check_prepare_mountbroker_root (char *mountbroker_root) +{ + int dfd0 = -1; + int dfd = -1; + int dfd2 = -1; + struct stat st = {0,}; + struct stat st2 = {0,}; + int ret = 0; + + ret = open (mountbroker_root, O_RDONLY); + if (ret != -1) { + dfd = ret; + ret = fstat (dfd, &st); + } + if (ret == -1 || !S_ISDIR (st.st_mode)) { + gf_log ("", GF_LOG_ERROR, + "cannot access mountbroker-root directory %s", + mountbroker_root); + ret = -1; + goto out; + } + if (st.st_uid != 0 || + (st.st_mode & (S_IWGRP|S_IWOTH))) { + gf_log ("", GF_LOG_ERROR, + "permissions on mountbroker-root directory %s are " + "too liberal", mountbroker_root); + ret = -1; + goto out; + } + if (!(st.st_mode & (S_IXGRP|S_IXOTH))) { + gf_log ("", GF_LOG_WARNING, + "permissions on mountbroker-root directory %s are " + "probably too strict", mountbroker_root); + } + + dfd0 = dup (dfd); + + for (;;) { + ret = openat (dfd, "..", O_RDONLY); + if (ret != -1) { + dfd2 = ret; + ret = fstat (dfd2, &st2); + } + if (ret == -1) { + gf_log ("", GF_LOG_ERROR, + "error while checking mountbroker-root ancestors " + "%d (%s)", errno, strerror (errno)); + goto out; + } + + if (st2.st_ino == st.st_ino) + break; /* arrived to root */ + + if (st2.st_uid != 0 || + ((st2.st_mode & (S_IWGRP|S_IWOTH)) && + !(st2.st_mode & S_ISVTX))) { + gf_log ("", GF_LOG_ERROR, + "permissions on ancestors of mountbroker-root " + "directory are too liberal"); + ret = -1; + goto out; + } + if (!(st.st_mode & (S_IXGRP|S_IXOTH))) { + gf_log ("", GF_LOG_WARNING, + "permissions on ancestors of mountbroker-root " + "directory are probably too strict"); + } + + close (dfd); + dfd = dfd2; + st = st2; + } + + ret = mkdirat (dfd0, MB_HIVE, 0711); + if (ret == -1 && errno == EEXIST) + ret = 0; + if (ret != -1) + ret = fstatat (dfd0, MB_HIVE, &st, AT_SYMLINK_NOFOLLOW); + if (ret == -1 || st.st_mode != (S_IFDIR|0711)) { + gf_log ("", GF_LOG_ERROR, + "failed to set up mountbroker-root directory %s", + mountbroker_root); + ret = -1; + goto out; + } + + ret = 0; + + out: + if (dfd0 != -1) + close (dfd0); + if (dfd != -1) + close (dfd); + if (dfd2 != -1) + close (dfd2); + + return ret; +} + +static int +_install_mount_spec (dict_t *opts, char *key, data_t *value, void *data) +{ + glusterd_conf_t *priv = THIS->private; + char *label = NULL; + gf_boolean_t georep = _gf_false; + gf_boolean_t ghadoop = _gf_false; + char *pdesc = value->data; + char *volname = NULL; + int rv = 0; + gf_mount_spec_t *mspec = NULL; + char *user = NULL; + char *volfile_server = NULL; + + label = strtail (key, "mountbroker."); + + /* check for presence of geo-rep/hadoop label */ + if (!label) { + label = strtail (key, "mountbroker-"GEOREP"."); + if (label) + georep = _gf_true; + else { + label = strtail (key, "mountbroker-"GHADOOP"."); + if (label) + ghadoop = _gf_true; + } + } + + if (!label) + return 0; + + mspec = GF_CALLOC (1, sizeof (*mspec), gf_gld_mt_mount_spec); + if (!mspec) + goto err; + mspec->label = label; + + if (georep || ghadoop) { + volname = gf_strdup (pdesc); + if (!volname) + goto err; + user = strchr (volname, ':'); + if (user) { + *user = '\0'; + user++; + } else + user = label; + + if (georep) + rv = make_georep_mountspec (mspec, volname, user); + + if (ghadoop) { + volfile_server = strchr (user, ':'); + if (volfile_server) + *volfile_server++ = '\0'; + else + volfile_server = "localhost"; + + rv = make_ghadoop_mountspec (mspec, volname, user, volfile_server); + } + + GF_FREE (volname); + if (rv != 0) + goto err; + } else if (parse_mount_pattern_desc (mspec, pdesc) != 0) + goto err; + + list_add_tail (&mspec->speclist, &priv->mount_specs); + + return 0; + err: + + gf_log ("", GF_LOG_ERROR, + "adding %smount spec failed: label: %s desc: %s", + georep ? GEOREP" " : "", label, pdesc); + + return -1; +} + + +static int +gd_default_synctask_cbk (int ret, call_frame_t *frame, void *opaque) +{ + glusterd_conf_t *priv = THIS->private; + synclock_unlock (&priv->big_lock); + return ret; +} + +static void +glusterd_launch_synctask (synctask_fn_t fn, void *opaque) +{ + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int ret = -1; + + this = THIS; + priv = this->private; + + synclock_lock (&priv->big_lock); + ret = synctask_new (this->ctx->env, fn, gd_default_synctask_cbk, NULL, + opaque); + if (ret) + gf_log (this->name, GF_LOG_CRITICAL, "Failed to spawn bricks" + " and other volume related services"); +} + +int +glusterd_uds_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event, + void *data) +{ + /* glusterd_rpcsvc_notify() does stuff that calls coming in from the + * unix domain socket don't need. This is just an empty function to be + * used for the uds listener. This will be used later if required. + */ + return 0; +} + +/* The glusterd unix domain socket listener only listens for cli */ +rpcsvc_t * +glusterd_init_uds_listener (xlator_t *this) +{ + int ret = -1; + dict_t *options = NULL; + rpcsvc_t *rpc = NULL; + data_t *sock_data = NULL; + char sockfile[PATH_MAX+1] = {0,}; + int i = 0; + + + GF_ASSERT (this); + + sock_data = dict_get (this->options, "glusterd-sockfile"); + if (!sock_data) { + strncpy (sockfile, DEFAULT_GLUSTERD_SOCKFILE, PATH_MAX); + } else { + strncpy (sockfile, sock_data->data, PATH_MAX); + } + + options = dict_new (); + if (!options) + goto out; + + ret = rpcsvc_transport_unix_options_build (&options, sockfile); + if (ret) + goto out; + + rpc = rpcsvc_init (this, this->ctx, options, 8); + if (rpc == NULL) { + ret = -1; + goto out; + } + + ret = rpcsvc_register_notify (rpc, glusterd_uds_rpcsvc_notify, + this); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "Failed to register notify function"); + goto out; + } + + ret = rpcsvc_create_listeners (rpc, options, this->name); + if (ret != 1) { + gf_log (this->name, GF_LOG_DEBUG, "Failed to create listener"); + goto out; + } + ret = 0; + + for (i = 0; i < gd_uds_programs_count; i++) { + ret = glusterd_program_register (this, rpc, gd_uds_programs[i]); + if (ret) { + i--; + for (; i >= 0; i--) + rpcsvc_program_unregister (rpc, + gd_uds_programs[i]); + + goto out; + } + } + +out: + if (options) + dict_unref (options); + + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to start glusterd " + "unix domain socket listener."); + if (rpc) { + GF_FREE (rpc); + rpc = NULL; + } + } + return rpc; +} + +void +glusterd_stop_uds_listener (xlator_t *this) +{ + glusterd_conf_t *conf = NULL; + rpcsvc_listener_t *listener = NULL; + rpcsvc_listener_t *next = NULL; + + GF_ASSERT (this); + conf = this->private; + + (void) rpcsvc_program_unregister (conf->uds_rpc, &gd_svc_cli_prog); + (void) rpcsvc_program_unregister (conf->uds_rpc, &gluster_handshake_prog); + + list_for_each_entry_safe (listener, next, &conf->uds_rpc->listeners, + list) { + rpcsvc_listener_destroy (listener); + } + + (void) rpcsvc_unregister_notify (conf->uds_rpc, glusterd_rpcsvc_notify, + this); + + unlink (DEFAULT_GLUSTERD_SOCKFILE); + + GF_FREE (conf->uds_rpc); + conf->uds_rpc = NULL; + + return; +} + +static int +glusterd_init_snap_folder (xlator_t *this) +{ + int ret = -1; + struct stat buf = {0,}; + + GF_ASSERT (this); + + /* Snapshot volumes are mounted under /var/run/gluster/snaps folder. + * But /var/run is normally a symbolic link to /run folder, which + * creates problems as the entry point in the mtab for the mount point + * and glusterd maintained entry point will be different. Therefore + * identify the correct run folder and use it for snap volume mounting. + */ + ret = lstat (GLUSTERD_VAR_RUN_DIR, &buf); + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, + "stat fails on %s, exiting. (errno = %d)", + GLUSTERD_VAR_RUN_DIR, errno); + goto out; + } + + /* If /var/run is symlink then use /run folder */ + if (S_ISLNK (buf.st_mode)) { + strcpy (snap_mount_folder, GLUSTERD_RUN_DIR); + } else { + strcpy (snap_mount_folder, GLUSTERD_VAR_RUN_DIR); + } + + strcat (snap_mount_folder, GLUSTERD_DEFAULT_SNAPS_BRICK_DIR); + + ret = stat (snap_mount_folder, &buf); + if ((ret != 0) && (ENOENT != errno)) { + gf_log (this->name, GF_LOG_ERROR, + "stat fails on %s, exiting. (errno = %d)", + snap_mount_folder, errno); + ret = -1; + goto out; + } + + if ((!ret) && (!S_ISDIR(buf.st_mode))) { + gf_log (this->name, GF_LOG_CRITICAL, + "Provided snap path %s is not a directory," + "exiting", snap_mount_folder); + ret = -1; + goto out; + } + + if ((-1 == ret) && (ENOENT == errno)) { + /* Create missing folders */ + ret = mkdir_p (snap_mount_folder, 0777, _gf_false); + + if (-1 == ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to create directory %s" + " ,errno = %d", snap_mount_folder, errno); + goto out; + } + } + +out: + return ret; +} + /* * init - called during glusterd initialization * @@ -225,55 +1145,70 @@ init (xlator_t *this) { int32_t ret = -1; rpcsvc_t *rpc = NULL; + rpcsvc_t *uds_rpc = NULL; glusterd_conf_t *conf = NULL; data_t *dir_data = NULL; struct stat buf = {0,}; - char voldir [PATH_MAX] = {0,}; - char dirname [PATH_MAX]; + char storedir [PATH_MAX] = {0,}; + char workdir [PATH_MAX] = {0,}; + char hooks_dir [PATH_MAX] = {0,}; char cmd_log_filename [PATH_MAX] = {0,}; int first_time = 0; + char *mountbroker_root = NULL; + int i = 0; + char *valgrind_str = NULL; dir_data = dict_get (this->options, "working-directory"); if (!dir_data) { //Use default working dir - strncpy (dirname, GLUSTERD_DEFAULT_WORKDIR, PATH_MAX); + strncpy (workdir, GLUSTERD_DEFAULT_WORKDIR, PATH_MAX); } else { - strncpy (dirname, dir_data->data, PATH_MAX); + strncpy (workdir, dir_data->data, PATH_MAX); } - ret = stat (dirname, &buf); + ret = stat (workdir, &buf); if ((ret != 0) && (ENOENT != errno)) { gf_log (this->name, GF_LOG_ERROR, "stat fails on %s, exiting. (errno = %d)", - dirname, errno); + workdir, errno); exit (1); } if ((!ret) && (!S_ISDIR(buf.st_mode))) { gf_log (this->name, GF_LOG_CRITICAL, "Provided working area %s is not a directory," - "exiting", dirname); + "exiting", workdir); exit (1); } if ((-1 == ret) && (ENOENT == errno)) { - ret = mkdir (dirname, 0777); + ret = mkdir (workdir, 0777); if (-1 == ret) { gf_log (this->name, GF_LOG_CRITICAL, "Unable to create directory %s" - " ,errno = %d", dirname, errno); + " ,errno = %d", workdir, errno); exit (1); } + first_time = 1; } - gf_log (this->name, GF_LOG_NORMAL, "Using %s as working directory", - dirname); + setenv ("GLUSTERD_WORKING_DIR", workdir, 1); + gf_log (this->name, GF_LOG_INFO, "Using %s as working directory", + workdir); + + ret = glusterd_init_snap_folder (this); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, "Unable to create " + "snap backend folder"); + exit (1); + } - snprintf (cmd_log_filename, PATH_MAX,"%s/.cmd_log_history",dirname); + snprintf (cmd_log_filename, PATH_MAX,"%s/.cmd_log_history", + DEFAULT_LOG_FILE_DIRECTORY); ret = gf_cmd_log_init (cmd_log_filename); if (ret == -1) { @@ -282,56 +1217,68 @@ init (xlator_t *this) exit (1); } - snprintf (voldir, PATH_MAX, "%s/vols", dirname); + snprintf (storedir, PATH_MAX, "%s/vols", workdir); - ret = mkdir (voldir, 0777); + ret = mkdir (storedir, 0777); if ((-1 == ret) && (errno != EEXIST)) { gf_log (this->name, GF_LOG_CRITICAL, "Unable to create volume directory %s" - " ,errno = %d", voldir, errno); + " ,errno = %d", storedir, errno); exit (1); } - snprintf (voldir, PATH_MAX, "%s/peers", dirname); + snprintf (storedir, PATH_MAX, "%s/peers", workdir); - ret = mkdir (voldir, 0777); + ret = mkdir (storedir, 0777); if ((-1 == ret) && (errno != EEXIST)) { gf_log (this->name, GF_LOG_CRITICAL, "Unable to create peers directory %s" - " ,errno = %d", voldir, errno); + " ,errno = %d", storedir, errno); exit (1); } - snprintf (voldir, PATH_MAX, "%s/logs", dirname); - ret = symlink (DEFAULT_LOG_FILE_DIRECTORY, voldir); + snprintf (storedir, PATH_MAX, "%s/bricks", DEFAULT_LOG_FILE_DIRECTORY); + ret = mkdir (storedir, 0777); if ((-1 == ret) && (errno != EEXIST)) { gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create symlink to logs directory %s" - " ,errno = %d", voldir, errno); + "Unable to create logs directory %s" + " ,errno = %d", storedir, errno); exit (1); } - snprintf (voldir, PATH_MAX, "%s/logs/bricks", dirname); - ret = mkdir (voldir, 0777); + snprintf (storedir, PATH_MAX, "%s/nfs", workdir); + ret = mkdir (storedir, 0777); if ((-1 == ret) && (errno != EEXIST)) { gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create logs directory %s" - " ,errno = %d", voldir, errno); + "Unable to create nfs directory %s" + " ,errno = %d", storedir, errno); exit (1); } - snprintf (voldir, PATH_MAX, "%s/nfs", dirname); - ret = mkdir (voldir, 0777); + snprintf (storedir, PATH_MAX, "%s/glustershd", workdir); + ret = mkdir (storedir, 0777); if ((-1 == ret) && (errno != EEXIST)) { gf_log (this->name, GF_LOG_CRITICAL, - "Unable to create nfs directory %s" - " ,errno = %d", voldir, errno); + "Unable to create glustershd directory %s" + " ,errno = %d", storedir, errno); + exit (1); + } + + snprintf (storedir, PATH_MAX, "%s/groups", workdir); + ret = mkdir (storedir, 0777); + if ((-1 == ret) && (errno != EEXIST)) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to create glustershd directory %s" + " ,errno = %d", storedir, errno); exit (1); } - rpc = rpcsvc_init (this->ctx, this->options); + ret = glusterd_rpcsvc_options_build (this->options); + if (ret) + goto out; + rpc = rpcsvc_init (this, this->ctx, this->options, 64); if (rpc == NULL) { gf_log (this->name, GF_LOG_ERROR, "failed to init rpc"); @@ -357,59 +1304,148 @@ init (xlator_t *this) goto out; } - ret = glusterd_program_register (this, rpc, &glusterd1_mop_prog); - if (ret) { - goto out; - } + for (i = 0; i < gd_inet_programs_count; i++) { + ret = glusterd_program_register (this, rpc, + gd_inet_programs[i]); + if (ret) { + i--; + for (; i >= 0; i--) + rpcsvc_program_unregister (rpc, + gd_inet_programs[i]); - ret = glusterd_program_register (this, rpc, &gluster_pmap_prog); - if (ret) { - rpcsvc_program_unregister (rpc, &glusterd1_mop_prog); - goto out; + goto out; + } } - ret = glusterd_program_register (this, rpc, &gluster_handshake_prog); - if (ret) { - rpcsvc_program_unregister (rpc, &glusterd1_mop_prog); - rpcsvc_program_unregister (rpc, &gluster_handshake_prog); + /* Start a unix domain socket listener just for cli commands + * This should prevent ports from being wasted by being in TIMED_WAIT + * when cli commands are done continuously + */ + uds_rpc = glusterd_init_uds_listener (this); + if (uds_rpc == NULL) { + ret = -1; goto out; } conf = GF_CALLOC (1, sizeof (glusterd_conf_t), gf_gld_mt_glusterd_conf_t); GF_VALIDATE_OR_GOTO(this->name, conf, out); + conf->shd = GF_CALLOC (1, sizeof (nodesrv_t), + gf_gld_mt_nodesrv_t); + GF_VALIDATE_OR_GOTO(this->name, conf->shd, out); + conf->nfs = GF_CALLOC (1, sizeof (nodesrv_t), + gf_gld_mt_nodesrv_t); + GF_VALIDATE_OR_GOTO(this->name, conf->nfs, out); + INIT_LIST_HEAD (&conf->peers); INIT_LIST_HEAD (&conf->volumes); + INIT_LIST_HEAD (&conf->snapshots); + INIT_LIST_HEAD (&conf->missed_snaps_list); + pthread_mutex_init (&conf->mutex, NULL); conf->rpc = rpc; - conf->mgmt = &glusterd3_1_mgmt_prog; - strncpy (conf->workdir, dirname, PATH_MAX); + conf->uds_rpc = uds_rpc; + conf->gfs_mgmt = &gd_brick_prog; + strncpy (conf->workdir, workdir, PATH_MAX); + synclock_init (&conf->big_lock); + pthread_mutex_init (&conf->xprt_lock, NULL); INIT_LIST_HEAD (&conf->xprt_list); + glusterd_friend_sm_init (); + glusterd_op_sm_init (); + glusterd_opinfo_init (); + glusterd_mgmt_v3_lock_init (); + glusterd_txn_opinfo_dict_init (); + ret = glusterd_sm_tr_log_init (&conf->op_sm_log, + glusterd_op_sm_state_name_get, + glusterd_op_sm_event_name_get, + GLUSTERD_TR_LOG_SIZE); + if (ret) + goto out; + + conf->base_port = GF_IANA_PRIV_PORTS_START; + if (dict_get_uint32(this->options, "base-port", &conf->base_port) == 0) { + gf_log (this->name, GF_LOG_INFO, + "base-port override: %d", conf->base_port); + } + + /* Set option to run bricks on valgrind if enabled in glusterd.vol */ + conf->valgrind = _gf_false; + ret = dict_get_str (this->options, "run-with-valgrind", &valgrind_str); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "cannot get run-with-valgrind value"); + } + if (valgrind_str) { + if (gf_string2boolean (valgrind_str, &(conf->valgrind))) { + gf_log (this->name, GF_LOG_WARNING, + "run-with-valgrind value not a boolean string"); + } + } + this->private = conf; - //this->ctx->top = this; + (void) glusterd_nodesvc_set_online_status ("glustershd", _gf_false); - ret = glusterd_uuid_init (first_time); - if (ret < 0) + GLUSTERD_GET_HOOKS_DIR (hooks_dir, GLUSTERD_HOOK_VER, conf); + if (stat (hooks_dir, &buf)) { + ret = glusterd_hooks_create_hooks_directory (conf->workdir); + if (-1 == ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to create hooks directory "); + exit (1); + } + } + + INIT_LIST_HEAD (&conf->mount_specs); + + ret = dict_foreach (this->options, _install_mount_spec, NULL); + if (ret) + goto out; + ret = dict_get_str (this->options, "mountbroker-root", + &mountbroker_root); + if (ret) + ret = 0; + else + ret = check_prepare_mountbroker_root (mountbroker_root); + if (ret) + goto out; + + ret = configure_syncdaemon (conf); + if (ret) goto out; ret = glusterd_restore (); if (ret < 0) goto out; - glusterd_friend_sm_init (); - glusterd_op_sm_init (); - glusterd_opinfo_init (); + /* If there are no 'friends', this would be the best time to + * spawn process/bricks that may need (re)starting since last + * time (this) glusterd was up.*/ + + if (list_empty (&conf->peers)) { + glusterd_launch_synctask (glusterd_spawn_daemons, NULL); + } + ret = glusterd_options_init (this); + if (ret < 0) + goto out; + + ret = glusterd_handle_upgrade_downgrade (this->options, conf); + if (ret) + goto out; + + ret = glusterd_hooks_spawn_worker (this); + if (ret) + goto out; - glusterd_restart_bricks (conf); ret = 0; out: - if (ret == -1) { + if (ret < 0) { if (this->private != NULL) { GF_FREE (this->private); this->private = NULL; } + } return ret; @@ -433,11 +1469,17 @@ fini (xlator_t *this) goto out; conf = this->private; - if (conf->pmap) - FREE (conf->pmap); + + glusterd_stop_uds_listener (this); + + FREE (conf->pmap); if (conf->handle) - glusterd_store_handle_destroy (conf->handle); + gf_store_handle_destroy (conf->handle); + glusterd_sm_tr_log_delete (&conf->op_sm_log); + glusterd_mgmt_v3_lock_fini (); + glusterd_txn_opinfo_dict_fini (); GF_FREE (conf); + this->private = NULL; out: return; @@ -475,11 +1517,9 @@ notify (xlator_t *this, int32_t event, void *data, ...) } -struct xlator_fops fops = { -}; +struct xlator_fops fops; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks; struct xlator_dumpops dumpops = { .priv = glusterd_priv, @@ -499,6 +1539,59 @@ struct volume_options options[] = { { .key = {"rpc-auth.*"}, .type = GF_OPTION_TYPE_ANY, }, - + { .key = {"rpc-auth-allow-insecure"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { .key = {"upgrade"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { .key = {"downgrade"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { .key = {"bind-insecure"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { .key = {"mountbroker-root"}, + .type = GF_OPTION_TYPE_PATH, + }, + { .key = {"mountbroker.*"}, + .type = GF_OPTION_TYPE_ANY, + }, + { .key = {"mountbroker-"GEOREP".*"}, + .type = GF_OPTION_TYPE_ANY, + }, + { .key = {"mountbroker-"GHADOOP".*"}, + .type = GF_OPTION_TYPE_ANY, + }, + { .key = {GEOREP"-log-group"}, + .type = GF_OPTION_TYPE_ANY, + }, + { .key = {"run-with-valgrind"}, + .type = GF_OPTION_TYPE_BOOL, + }, + { .key = {"server-quorum-type"}, + .type = GF_OPTION_TYPE_STR, + .value = { "none", "server"}, + .description = "If set to server, enables the specified " + "volume to participate in quorum." + }, + { .key = {"server-quorum-ratio"}, + .type = GF_OPTION_TYPE_PERCENT, + .description = "Sets the quorum percentage for the trusted " + "storage pool." + }, + { .key = {"glusterd-sockfile"}, + .type = GF_OPTION_TYPE_PATH, + .description = "The socket file on which glusterd should listen for " + "cli requests. Default is "DEFAULT_GLUSTERD_SOCKFILE "." + }, + { .key = {"base-port"}, + .type = GF_OPTION_TYPE_INT, + .description = "Sets the base port for portmap query" + }, + { .key = {"snap-brick-path"}, + .type = GF_OPTION_TYPE_STR, + .description = "directory where the bricks for the snapshots will be created" + }, { .key = {NULL} }, }; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index dfa8b73e5..3aa395ebc 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _GLUSTERD_H_ #define _GLUSTERD_H_ @@ -45,10 +35,48 @@ #include "glusterd1-xdr.h" #include "protocol-common.h" #include "glusterd-pmap.h" - +#include "cli1-xdr.h" +#include "syncop.h" +#include "store.h" #define GLUSTERD_MAX_VOLUME_NAME 1000 -#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs" +#define GLUSTERD_TR_LOG_SIZE 50 +#define GLUSTERD_NAME "glusterd" +#define GLUSTERD_SOCKET_LISTEN_BACKLOG 128 +#define GLUSTERD_QUORUM_TYPE_KEY "cluster.server-quorum-type" +#define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" +#define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" +#define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub" +#define GEO_CONF_MAX_OPT_VALS 5 +#define GLUSTERD_CREATE_HOOK_SCRIPT "/hooks/1/gsync-create/post/" \ + "S56glusterd-geo-rep-create-post.sh" + + +#define GLUSTERD_SNAPS_MAX_HARD_LIMIT 256 +#define GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT 90 +#define GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT 100 +#define GLUSTERD_SERVER_QUORUM "server" + +#define FMTSTR_CHECK_VOL_EXISTS "Volume %s does not exist" +#define FMTSTR_RESOLVE_BRICK "Could not find peer on which brick %s:%s resides" + +#define LOGSTR_FOUND_BRICK "Found brick %s:%s in volume %s" +#define LOGSTR_BUILD_PAYLOAD "Failed to build payload for operation 'Volume %s'" +#define LOGSTR_STAGE_FAIL "Staging of operation 'Volume %s' failed on %s %s %s" +#define LOGSTR_COMMIT_FAIL "Commit of operation 'Volume %s' failed on %s %s %s" + +#define OPERRSTR_BUILD_PAYLOAD "Failed to build payload. Please check the log "\ + "file for more details." +#define OPERRSTR_STAGE_FAIL "Staging failed on %s. Please check the log file " \ + "for more details." +#define OPERRSTR_COMMIT_FAIL "Commit failed on %s. Please check the log file "\ + "for more details." + +struct glusterd_volinfo_; +typedef struct glusterd_volinfo_ glusterd_volinfo_t; + +struct glusterd_snap_; +typedef struct glusterd_snap_ glusterd_snap_t; typedef enum glusterd_op_ { GD_OP_NONE = 0, @@ -58,7 +86,6 @@ typedef enum glusterd_op_ { GD_OP_DELETE_VOLUME, GD_OP_START_VOLUME, GD_OP_STOP_VOLUME, - GD_OP_RENAME_VOLUME, GD_OP_DEFRAG_VOLUME, GD_OP_ADD_BRICK, GD_OP_REMOVE_BRICK, @@ -66,53 +93,106 @@ typedef enum glusterd_op_ { GD_OP_SET_VOLUME, GD_OP_RESET_VOLUME, GD_OP_SYNC_VOLUME, - GD_OP_LOG_FILENAME, - GD_OP_LOG_LOCATE, GD_OP_LOG_ROTATE, + GD_OP_GSYNC_SET, + GD_OP_PROFILE_VOLUME, + GD_OP_QUOTA, + GD_OP_STATUS_VOLUME, + GD_OP_REBALANCE, + GD_OP_HEAL_VOLUME, + GD_OP_STATEDUMP_VOLUME, + GD_OP_LIST_VOLUME, + GD_OP_CLEARLOCKS_VOLUME, + GD_OP_DEFRAG_BRICK_VOLUME, + GD_OP_COPY_FILE, + GD_OP_SYS_EXEC, + GD_OP_GSYNC_CREATE, + GD_OP_SNAP, GD_OP_MAX, } glusterd_op_t; +extern const char * gd_op_list[]; -struct glusterd_store_iter_ { - int fd; - FILE *file; +struct glusterd_volgen { + dict_t *dict; }; -typedef struct glusterd_store_iter_ glusterd_store_iter_t; +typedef struct { + struct rpc_clnt *rpc; + gf_boolean_t online; +} nodesrv_t; + +typedef struct { + gf_boolean_t quorum; + double quorum_ratio; + uint64_t gl_opt_version; +} gd_global_opts_t; typedef struct { - struct _volfile_ctx *volfile; - pthread_mutex_t mutex; - struct list_head peers; -// struct list_head pending_peers; - gf_boolean_t verify_volfile_checksum; - gf_boolean_t trace; - uuid_t uuid; - char workdir[PATH_MAX]; - rpcsvc_t *rpc; - rpc_clnt_prog_t *mgmt; - struct pmap_registry *pmap; - struct list_head volumes; - struct list_head xprt_list; - glusterd_store_handle_t *handle; - gf_timer_t *timer; + struct _volfile_ctx *volfile; + pthread_mutex_t mutex; + struct list_head peers; + struct list_head xaction_peers; + gf_boolean_t verify_volfile_checksum; + gf_boolean_t trace; + uuid_t uuid; + char workdir[PATH_MAX]; + rpcsvc_t *rpc; + nodesrv_t *shd; + nodesrv_t *nfs; + struct pmap_registry *pmap; + struct list_head volumes; + struct list_head snapshots; /*List of snap volumes */ + pthread_mutex_t xprt_lock; + struct list_head xprt_list; + gf_store_handle_t *handle; + gf_timer_t *timer; + glusterd_sm_tr_log_t op_sm_log; + struct rpc_clnt_program *gfs_mgmt; + + struct list_head mount_specs; + gf_boolean_t valgrind; + pthread_t brick_thread; + void *hooks_priv; + /* need for proper handshake_t */ + int op_version; /* Starts with 1 for 3.3.0 */ + xlator_t *xl; /* Should be set to 'THIS' before creating thread */ + gf_boolean_t pending_quorum_action; + dict_t *opts; + synclock_t big_lock; + gf_boolean_t restart_done; + rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ + uint32_t base_port; + uint64_t snap_max_hard_limit; + uint64_t snap_max_soft_limit; + char *snap_bricks_directory; + gf_store_handle_t *missed_snaps_list_shandle; + struct list_head missed_snaps_list; } glusterd_conf_t; + typedef enum gf_brick_status { GF_BRICK_STOPPED, GF_BRICK_STARTED, } gf_brick_status_t; struct glusterd_brickinfo { - char hostname[1024]; - char path[PATH_MAX]; - struct list_head brick_list; - uuid_t uuid; - int port; - char *logfile; - gf_boolean_t signed_in; - glusterd_store_handle_t *shandle; - gf_brick_status_t status; + char hostname[1024]; + char path[PATH_MAX]; + char device_path[PATH_MAX]; + struct list_head brick_list; + uuid_t uuid; + int port; + int rdma_port; + char *logfile; + gf_boolean_t signed_in; + gf_store_handle_t *shandle; + gf_brick_status_t status; + struct rpc_clnt *rpc; + int decommissioned; + char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ + int caps; /* Capability */ + int32_t snap_status; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; @@ -123,24 +203,25 @@ struct gf_defrag_brickinfo_ { int size; }; -typedef enum gf_defrag_status_ { - GF_DEFRAG_STATUS_NOT_STARTED, - GF_DEFRAG_STATUS_STARTED, - GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE, - GF_DEFRAG_STATUS_STOPED, - GF_DEFRAG_STATUS_COMPLETE, - GF_DEFRAG_STATUS_FAILED, -} gf_defrag_status_t; +typedef int (*defrag_cbk_fn_t) (glusterd_volinfo_t *volinfo, + gf_defrag_status_t status); struct glusterd_defrag_info_ { uint64_t total_files; uint64_t total_data; uint64_t num_files_lookedup; + uint64_t total_failures; gf_lock_t lock; + int cmd; pthread_t th; + gf_defrag_status_t defrag_status; + struct rpc_clnt *rpc; + uint32_t connected; char mount[1024]; char databuf[131072]; struct gf_defrag_brickinfo_ *bricks; /* volinfo->brick_count */ + + defrag_cbk_fn_t cbk_fn; }; @@ -149,8 +230,10 @@ typedef struct glusterd_defrag_info_ glusterd_defrag_info_t; typedef enum gf_transport_type_ { GF_TRANSPORT_TCP, //DEFAULT GF_TRANSPORT_RDMA, + GF_TRANSPORT_BOTH_TCP_RDMA, } gf_transport_type; +#define GF_DEFAULT_NFS_TRANSPORT GF_TRANSPORT_RDMA typedef enum gf_rb_status_ { GF_RB_STATUS_NONE, @@ -158,40 +241,181 @@ typedef enum gf_rb_status_ { GF_RB_STATUS_PAUSED, } gf_rb_status_t; +struct _auth { + char *username; + char *password; +}; + +typedef struct _auth auth_t; + +/* Capabilities of xlator */ +#define CAPS_BD 0x00000001 +#define CAPS_THIN 0x00000002 +#define CAPS_OFFLOAD_COPY 0x00000004 +#define CAPS_OFFLOAD_SNAPSHOT 0x00000008 + +struct glusterd_rebalance_ { + gf_defrag_status_t defrag_status; + uint64_t rebalance_files; + uint64_t rebalance_data; + uint64_t lookedup_files; + uint64_t skipped_files; + glusterd_defrag_info_t *defrag; + gf_cli_defrag_type defrag_cmd; + uint64_t rebalance_failures; + uuid_t rebalance_id; + double rebalance_time; + glusterd_op_t op; + dict_t *dict; /* Dict to store misc information + * like list of bricks being removed */ +}; + +typedef struct glusterd_rebalance_ glusterd_rebalance_t; + +struct glusterd_replace_brick_ { + gf_rb_status_t rb_status; + glusterd_brickinfo_t *src_brick; + glusterd_brickinfo_t *dst_brick; + uuid_t rb_id; +}; + +typedef struct glusterd_replace_brick_ glusterd_replace_brick_t; + struct glusterd_volinfo_ { - char volname[GLUSTERD_MAX_VOLUME_NAME]; - int type; - int brick_count; - struct list_head vol_list; - struct list_head bricks; - glusterd_volume_status status; - int sub_count; - int port; - glusterd_store_handle_t *shandle; + gf_lock_t lock; + char volname[GLUSTERD_MAX_VOLUME_NAME]; + gf_boolean_t is_snap_volume; + glusterd_snap_t *snapshot; + gf_boolean_t is_volume_restored; + char parent_volname[GLUSTERD_MAX_VOLUME_NAME]; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + field will contain the name of + the volume which is snapped. In + case of a non-snap volume, this + field will be initialized as N/A */ + int type; + int brick_count; + uint64_t snap_count; + uint64_t snap_max_hard_limit; + struct list_head vol_list; + /* In case of a snap volume + i.e (is_snap_volume == TRUE) this + is linked to glusterd_snap_t->volumes. + In case of a non-snap volume, this is + linked to glusterd_conf_t->volumes */ + struct list_head snapvol_list; + /* This is a current pointer for + glusterd_volinfo_t->snap_volumes */ + struct list_head bricks; + struct list_head snap_volumes; + /* TODO : Need to remove this, as this + * is already part of snapshot object. + */ + glusterd_volume_status status; + int sub_count; /* backward compatibility */ + int stripe_count; + int replica_count; + int subvol_count; /* Number of subvolumes in a + distribute volume */ + int dist_leaf_count; /* Number of bricks in one + distribute subvolume */ + int port; + gf_store_handle_t *shandle; + gf_store_handle_t *rb_shandle; + gf_store_handle_t *node_state_shandle; /* Defrag/rebalance related */ - gf_defrag_status_t defrag_status; - uint64_t rebalance_files; - uint64_t rebalance_data; - uint64_t lookedup_files; - glusterd_defrag_info_t *defrag; + glusterd_rebalance_t rebal; /* Replace brick status */ - gf_rb_status_t rb_status; - glusterd_brickinfo_t *src_brick; - glusterd_brickinfo_t *dst_brick; + glusterd_replace_brick_t rep_brick; + + int version; + uint32_t cksum; + gf_transport_type transport_type; + gf_transport_type nfs_transport_type; + + dict_t *dict; + + uuid_t volume_id; + auth_t auth; + char *logdir; + + dict_t *gsync_slaves; - int version; - uint32_t cksum; - gf_transport_type transport_type; + int decommission_in_progress; + xlator_t *xl; - /* All xlator options */ - dict_t *dict; + gf_boolean_t memory_accounting; + int caps; /* Capability */ - uuid_t volume_id; + int op_version; + int client_op_version; }; -typedef struct glusterd_volinfo_ glusterd_volinfo_t; +typedef enum gd_snap_status_ { + GD_SNAP_STATUS_NONE, + GD_SNAP_STATUS_INIT, + GD_SNAP_STATUS_IN_USE, + GD_SNAP_STATUS_DECOMMISSION, + GD_SNAP_STATUS_RESTORED, +} gd_snap_status_t; + +struct glusterd_snap_ { + gf_lock_t lock; + struct list_head volumes; + struct list_head snap_list; + char snapname[GLUSTERD_MAX_SNAP_NAME]; + uuid_t snap_id; + char *description; + time_t time_stamp; + gf_boolean_t snap_restored; + gd_snap_status_t snap_status; + gf_store_handle_t *shandle; +}; + +typedef struct glusterd_snap_op_ { + int32_t brick_num; + char *brick_path; + int32_t op; + int32_t status; + struct list_head snap_ops_list; +} glusterd_snap_op_t; + +typedef struct glusterd_missed_snap_ { + char *node_snap_info; + struct list_head missed_snaps; + struct list_head snap_ops; +} glusterd_missed_snap_info; + +typedef enum gd_node_type_ { + GD_NODE_NONE, + GD_NODE_BRICK, + GD_NODE_SHD, + GD_NODE_REBALANCE, + GD_NODE_NFS, +} gd_node_type; + +typedef enum missed_snap_stat { + GD_MISSED_SNAP_NONE, + GD_MISSED_SNAP_PENDING, + GD_MISSED_SNAP_DONE, +} missed_snap_stat; + +typedef struct glusterd_pending_node_ { + struct list_head list; + void *node; + gd_node_type type; + int32_t index; +} glusterd_pending_node_t; + +struct gsync_config_opt_vals_ { + char *op_name; + int no_of_pos_vals; + gf_boolean_t case_sensitive; + char *values[GEO_CONF_MAX_OPT_VALS]; +}; enum glusterd_op_ret { GLUSTERD_CONNECTION_AWAITED = 100, @@ -204,46 +428,62 @@ enum glusterd_vol_comp_status_ { GLUSTERD_VOL_COMP_RJT, }; -#define GLUSTERD_DEFAULT_WORKDIR "/etc/glusterd" +#define GLUSTERD_DEFAULT_WORKDIR "/var/lib/glusterd" #define GLUSTERD_DEFAULT_PORT GF_DEFAULT_BASE_PORT #define GLUSTERD_INFO_FILE "glusterd.info" #define GLUSTERD_VOLUME_DIR_PREFIX "vols" #define GLUSTERD_PEER_DIR_PREFIX "peers" #define GLUSTERD_VOLUME_INFO_FILE "info" +#define GLUSTERD_SNAP_INFO_FILE "info" +#define GLUSTERD_VOLUME_RBSTATE_FILE "rbstate" #define GLUSTERD_BRICK_INFO_DIR "bricks" #define GLUSTERD_CKSUM_FILE "cksum" +#define GLUSTERD_TRASH "trash" +#define GLUSTERD_NODE_STATE_FILE "node_state.info" +#define GLUSTERD_MISSED_SNAPS_LIST_FILE "missed_snaps_list" +#define GLUSTERD_VOL_SNAP_DIR_PREFIX "snaps" + +#define GLUSTERD_DEFAULT_SNAPS_BRICK_DIR "/gluster/snaps" +#define GLUSTERD_VAR_RUN_DIR "/var/run" +#define GLUSTERD_RUN_DIR "/run" -/*All definitions related to replace brick */ -#define RB_PUMP_START_CMD "trusted.glusterfs.pump.start" -#define RB_PUMP_PAUSE_CMD "trusted.glusterfs.pump.pause" -#define RB_PUMP_ABORT_CMD "trusted.glusterfs.pump.abort" -#define RB_PUMP_STATUS_CMD "trusted.glusterfs.pump.status" +/* definitions related to replace brick */ #define RB_CLIENT_MOUNTPOINT "rb_mount" #define RB_CLIENTVOL_FILENAME "rb_client.vol" #define RB_DSTBRICK_PIDFILE "rb_dst_brick.pid" #define RB_DSTBRICKVOL_FILENAME "rb_dst_brick.vol" +#define RB_PUMP_DEF_ARG "default" #define GLUSTERD_UUID_LEN 50 typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); -#define GLUSTERD_GET_NFS_DIR(path, priv) \ - do { \ - snprintf (path, PATH_MAX, "%s/nfs", priv->workdir);\ - } while (0); \ - -#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ - snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir,\ - volinfo->volname); +#define GLUSTERD_GET_VOLUME_DIR(path, volinfo, priv) \ + if (volinfo->is_snap_volume) { \ + snprintf (path, PATH_MAX, "%s/snaps/%s/%s", priv->workdir, \ + volinfo->snapshot->snapname, volinfo->volname); \ + } else { \ + snprintf (path, PATH_MAX, "%s/vols/%s", priv->workdir, \ + volinfo->volname); \ + } -#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \ - snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir,\ - GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \ - GLUSTERD_BRICK_INFO_DIR); +#define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \ + snprintf (path, PATH_MAX, "%s/snaps/%s", priv->workdir, \ + snap->snapname); + +#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \ + if (volinfo->is_snap_volume) { \ + snprintf (path, PATH_MAX, "%s/snaps/%s/%s/%s", priv->workdir, \ + volinfo->snapshot->snapname, volinfo->volname, \ + GLUSTERD_BRICK_INFO_DIR); \ + } else { \ + snprintf (path, PATH_MAX, "%s/%s/%s/%s", priv->workdir, \ + GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname, \ + GLUSTERD_BRICK_INFO_DIR); \ + } -#define GLUSTERD_GET_NFS_PIDFILE(pidfile) \ - snprintf (pidfile, PATH_MAX, "%s/nfs/run/nfs.pid", \ - priv->workdir); \ +#define GLUSTERD_GET_NFS_DIR(path, priv) \ + snprintf (path, PATH_MAX, "%s/nfs", priv->workdir); #define GLUSTERD_REMOVE_SLASH_FROM_PATH(path,string) do { \ int i = 0; \ @@ -254,31 +494,96 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); } \ } while (0) -#define GLUSTERD_GET_BRICK_PIDFILE(pidfile,volpath,hostname,brickpath) { \ - char exp_path[PATH_MAX] = {0,}; \ - GLUSTERD_REMOVE_SLASH_FROM_PATH (brickpath, exp_path); \ - snprintf (pidfile, PATH_MAX, "%s/run/%s-%s.pid", \ - volpath, hostname, exp_path); \ +#define GLUSTERD_GET_BRICK_PIDFILE(pidfile,volinfo,brickinfo, priv) do { \ + char exp_path[PATH_MAX] = {0,}; \ + char volpath[PATH_MAX] = {0,}; \ + GLUSTERD_GET_VOLUME_DIR (volpath, volinfo, priv); \ + GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); \ + snprintf (pidfile, PATH_MAX, "%s/run/%s-%s.pid", \ + volpath, brickinfo->hostname, exp_path); \ + } while (0) + +#define GLUSTERD_GET_NFS_PIDFILE(pidfile,nfspath) { \ + snprintf (pidfile, PATH_MAX, "%s/run/nfs.pid", \ + nfspath); \ } #define GLUSTERD_STACK_DESTROY(frame) do {\ - void *__local = NULL; \ - xlator_t *__xl = NULL; \ - __xl = frame->this; \ - __local = frame->local; \ - frame->local = NULL; \ - STACK_DESTROY (frame->root);\ - } while (0) + frame->local = NULL; \ + STACK_DESTROY (frame->root); \ + } while (0) + +#define GLUSTERD_GET_DEFRAG_DIR(path, volinfo, priv) do { \ + char vol_path[PATH_MAX]; \ + GLUSTERD_GET_VOLUME_DIR(vol_path, volinfo, priv); \ + snprintf (path, PATH_MAX, "%s/rebalance",vol_path); \ + } while (0) + +#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo, priv) do { \ + char defrag_path[PATH_MAX]; \ + GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \ + snprintf (path, PATH_MAX, "%s/%s.sock", defrag_path, \ + uuid_utoa(MY_UUID)); \ + } while (0) + +#define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) do { \ + char defrag_path[PATH_MAX]; \ + GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \ + snprintf (path, PATH_MAX, "%s/%s.pid", defrag_path, \ + uuid_utoa(MY_UUID)); \ + } while (0) + +#define GLUSTERD_GET_UUID_NOHYPHEN(ret_string, uuid) do { \ + char *snap_volname_ptr = ret_string; \ + char *snap_volid_ptr = uuid_utoa(uuid); \ + while (*snap_volid_ptr) { \ + if (*snap_volid_ptr == '-') { \ + snap_volid_ptr++; \ + } else { \ + (*snap_volname_ptr++) = \ + (*snap_volid_ptr++); \ + } \ + } \ + *snap_volname_ptr = '\0'; \ + } while (0) + +int glusterd_uuid_init(); + +int glusterd_uuid_generate_save (); + +#define MY_UUID (__glusterd_uuid()) + +static inline unsigned char * +__glusterd_uuid() +{ + glusterd_conf_t *priv = THIS->private; + + if (uuid_is_null (priv->uuid)) + glusterd_uuid_init(); + return &priv->uuid[0]; +} + +int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, + void *data, rpc_clnt_notify_t notify_fn); + +int +glusterd_big_locked_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe, fop_cbk_fn_t fn); + +int glusterd_big_locked_handler (rpcsvc_request_t *req, rpcsvc_actor actor_fn); int32_t glusterd_brick_from_brickinfo (glusterd_brickinfo_t *brickinfo, char **new_brick); int -glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port); +glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port, + dict_t *dict); int -glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *hostname, - int port, int32_t op_ret, int32_t op_errno); +glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *myhostname, + char *remote_hostname, int port, int32_t op_ret, + int32_t op_errno); int glusterd_friend_find (uuid_t uuid, char *hostname, @@ -287,12 +592,13 @@ glusterd_friend_find (uuid_t uuid, char *hostname, int glusterd_friend_add (const char *hoststr, int port, glusterd_friend_sm_state_t state, - uuid_t *uuid, struct rpc_clnt *rpc, - glusterd_peerinfo_t **friend, - gf_boolean_t restore, - glusterd_peerctx_args_t *args); + uuid_t *uuid, glusterd_peerinfo_t **friend, + gf_boolean_t restore, glusterd_peerctx_args_t *args); int +glusterd_friend_rpc_create (xlator_t *this, glusterd_peerinfo_t *peerinfo, + glusterd_peerctx_args_t *args); +int glusterd_friend_remove (uuid_t uuid, char *hostname); int @@ -302,6 +608,14 @@ int glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status); int +glusterd_op_mgmt_v3_lock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int +glusterd_op_mgmt_v3_unlock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int glusterd_op_stage_send_resp (rpcsvc_request_t *req, int32_t op, int32_t status, char *op_errstr, dict_t *rsp_dict); @@ -314,10 +628,6 @@ int32_t glusterd_create_volume (rpcsvc_request_t *req, dict_t *dict); int -glusterd_rpc_notify (struct rpc_clnt *rpc, void *mydata, - rpc_clnt_event_t event, - void *data); -int glusterd_handle_incoming_friend_req (rpcsvc_request_t *req); int @@ -345,8 +655,12 @@ int glusterd_handle_defrag_volume (rpcsvc_request_t *req); int +glusterd_handle_defrag_volume_v2 (rpcsvc_request_t *req); + +int glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret, - int32_t op_errno, char *hostname, int port); + int32_t op_errno, char *op_errstr, char *hostname, + int port, dict_t *dict); int glusterd_op_commit_send_resp (rpcsvc_request_t *req, @@ -358,7 +672,7 @@ glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int por int glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port, - uuid_t uuid); + uuid_t uuid, dict_t *dict); int glusterd_handle_cli_deprobe (rpcsvc_request_t *req); @@ -381,9 +695,6 @@ glusterd_handle_friend_update (rpcsvc_request_t *req); int glusterd_handle_cli_stop_volume (rpcsvc_request_t *req); -int32_t -glusterd_delete_volume (rpcsvc_request_t *req, char *volname, int flags); - int glusterd_handle_cli_delete_volume (rpcsvc_request_t *req); @@ -393,15 +704,9 @@ glusterd_handle_cli_get_volume (rpcsvc_request_t *req); int32_t glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags); -int32_t -glusterd_add_brick (rpcsvc_request_t *req, dict_t *dict); - int glusterd_handle_add_brick (rpcsvc_request_t *req); -int32_t -glusterd_replace_brick (rpcsvc_request_t *req, dict_t *dict); - int glusterd_handle_replace_brick (rpcsvc_request_t *req); @@ -409,10 +714,6 @@ int glusterd_handle_remove_brick (rpcsvc_request_t *req); int -glusterd_handle_log_filename (rpcsvc_request_t *req); -int -glusterd_handle_log_locate (rpcsvc_request_t *req); -int glusterd_handle_log_rotate (rpcsvc_request_t *req); int @@ -433,6 +734,12 @@ glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict); int32_t glusterd_reset_volume (rpcsvc_request_t *req, dict_t *dict); +int32_t +glusterd_gsync_set (rpcsvc_request_t *req, dict_t *dict); + +int32_t +glusterd_quota (rpcsvc_request_t *req, dict_t *dict); + int glusterd_handle_set_volume (rpcsvc_request_t *req); @@ -440,26 +747,206 @@ int glusterd_handle_reset_volume (rpcsvc_request_t *req); int +glusterd_handle_copy_file (rpcsvc_request_t *req); + +int +glusterd_handle_sys_exec (rpcsvc_request_t *req); + +int +glusterd_handle_gsync_set (rpcsvc_request_t *req); + +int +glusterd_handle_quota (rpcsvc_request_t *req); + +int +glusterd_handle_fsm_log (rpcsvc_request_t *req); + +int glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret, - int32_t op_errno, char *hostname); + int32_t op_errno, char *op_errstr, + char *hostname, dict_t *dict); int glusterd_fetchspec_notify (xlator_t *this); -int32_t -glusterd_sync_volume (rpcsvc_request_t *req, dict_t *ctx); - int glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo, dict_t *volumes, int count); int -glusterd_restart_bricks(glusterd_conf_t *conf); +glusterd_restart_bricks (glusterd_conf_t *conf); int32_t glusterd_volume_txn (rpcsvc_request_t *req, char *volname, int flags, glusterd_op_t op); int +glusterd_peer_dump_version (xlator_t *this, struct rpc_clnt *rpc, + glusterd_peerctx_t *peerctx); + +int glusterd_validate_reconfopts (glusterd_volinfo_t *volinfo, dict_t *val_dict, char **op_errstr); +int +glusterd_handle_cli_profile_volume (rpcsvc_request_t *req); + +int +glusterd_handle_getwd (rpcsvc_request_t *req); + +int32_t +glusterd_set_volume (rpcsvc_request_t *req, dict_t *dict); +int +glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, + void *data); +int +glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); + +int +glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata, + rpc_clnt_event_t event, void *data); + +int +glusterd_rpc_create (struct rpc_clnt **rpc, dict_t *options, + rpc_clnt_notify_t notify_fn, void *notify_data); + + +/* handler functions */ +int32_t glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, + char *err_str, size_t size); + +/* removed other definitions as they have been defined elsewhere in this file*/ + +int glusterd_handle_cli_statedump_volume (rpcsvc_request_t *req); +int glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req); + +int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, + size_t len, int cmd, defrag_cbk_fn_t cbk, + glusterd_op_t op); +int +glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, + glusterd_conf_t *priv, int cmd); + +int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req); + +int glusterd_handle_cli_list_volume (rpcsvc_request_t *req); + +int +glusterd_handle_snapshot (rpcsvc_request_t *req); + +/* op-sm functions */ +int glusterd_op_stage_heal_volume (dict_t *dict, char **op_errstr); +int glusterd_op_heal_volume (dict_t *dict, char **op_errstr); +int glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr); +int glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int glusterd_op_stage_copy_file (dict_t *dict, char **op_errstr); +int glusterd_op_copy_file (dict_t *dict, char **op_errstr); +int glusterd_op_stage_sys_exec (dict_t *dict, char **op_errstr); +int glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int glusterd_op_stage_gsync_create (dict_t *dict, char **op_errstr); +int glusterd_op_gsync_create (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int glusterd_op_stage_quota (dict_t *dict, char **op_errstr); +int glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict); +int glusterd_op_log_rotate (dict_t *dict); +int glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr); +int glusterd_op_stage_create_volume (dict_t *dict, char **op_errstr); +int glusterd_op_stage_start_volume (dict_t *dict, char **op_errstr); +int glusterd_op_stage_stop_volume (dict_t *dict, char **op_errstr); +int glusterd_op_stage_delete_volume (dict_t *dict, char **op_errstr); +int glusterd_op_create_volume (dict_t *dict, char **op_errstr); +int glusterd_op_start_volume (dict_t *dict, char **op_errstr); +int glusterd_op_stop_volume (dict_t *dict); +int glusterd_op_delete_volume (dict_t *dict); + +int glusterd_op_add_brick (dict_t *dict, char **op_errstr); +int glusterd_op_remove_brick (dict_t *dict, char **op_errstr); +int glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr); +int glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr); + +int glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr); +int glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int glusterd_op_stage_statedump_volume (dict_t *dict, char **op_errstr); +int glusterd_op_statedump_volume (dict_t *dict, char **op_errstr); + +int glusterd_op_stage_clearlocks_volume (dict_t *dict, char **op_errstr); +int glusterd_op_clearlocks_volume (dict_t *dict, char **op_errstr, + dict_t *rsp_dict); + +/* misc */ +void glusterd_do_replace_brick (void *data); +int glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick, + int force, int *need_migrate); +int glusterd_op_stop_volume_args_get (dict_t *dict, char** volname, int *flags); +int glusterd_op_statedump_volume_args_get (dict_t *dict, char **volname, + char **options, int *option_cnt); + +int glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr, + char **master, char **slave, char **host_uuid); +int glusterd_stop_volume (glusterd_volinfo_t *volinfo); + +/* Synctask part */ +int32_t glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op, + void *dict); +int32_t +glusterd_defrag_event_notify_handle (dict_t *dict); + +int32_t +glusterd_txn_opinfo_dict_init (); + +void +glusterd_txn_opinfo_dict_fini (); + +void +glusterd_txn_opinfo_init (); + +/* snapshot */ +glusterd_snap_t* +glusterd_new_snap_object(); + +int32_t +glusterd_list_add_snapvol (glusterd_volinfo_t *origin_vol, + glusterd_volinfo_t *snap_vol); + +glusterd_snap_t* +glusterd_remove_snap_by_id (uuid_t snap_id); + +glusterd_snap_t* +glusterd_remove_snap_by_name (char *snap_name); + +glusterd_snap_t* +glusterd_find_snap_by_name (char *snap_name); + +glusterd_snap_t* +glusterd_find_snap_by_id (uuid_t snap_id); + +int +glusterd_snapshot_prevalidate (dict_t *dict, char **op_errstr, + dict_t *rsp_dict); +int +glusterd_snapshot_brickop (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_snapshot (dict_t *dict, char **op_errstr, dict_t *rsp_dict); +int +glusterd_snapshot_postvalidate (dict_t *dict, int32_t op_ret, char **op_errstr, + dict_t *rsp_dict); +char * +glusterd_build_snap_device_path (char *device, char *snapname); +int32_t +glusterd_snap_remove (dict_t *rsp_dict, glusterd_snap_t *snap, + gf_boolean_t remove_lvm, gf_boolean_t force); +int32_t +glusterd_snapshot_cleanup (dict_t *dict, char **op_errstr, dict_t *rsp_dict); + +int32_t +glusterd_add_missed_snaps_to_list (dict_t *dict, int32_t missed_snap_count); + +int32_t +glusterd_store_missed_snaps_list (char *missed_info, int32_t brick_num, + char *brick_path, int32_t snap_op, + int32_t snap_status); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd3_1-mops.c b/xlators/mgmt/glusterd/src/glusterd3_1-mops.c deleted file mode 100644 index b8abebf06..000000000 --- a/xlators/mgmt/glusterd/src/glusterd3_1-mops.c +++ /dev/null @@ -1,1581 +0,0 @@ -/* - Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "rpc-clnt.h" -#include "glusterd1-xdr.h" -#include "glusterd1.h" - -#include "compat-errno.h" -#include "glusterd-op-sm.h" -#include "glusterd-sm.h" -#include "glusterd.h" -#include "protocol-common.h" -#include "glusterd-utils.h" -#include <sys/uio.h> - - -#define SERVER_PATH_MAX (16 * 1024) - - -extern glusterd_op_info_t opinfo; - -int -glusterd_null (rpcsvc_request_t *req) -{ - - return 0; -} - -int -glusterd3_1_probe_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_probe_rsp rsp = {{0},}; - glusterd_conf_t *conf = NULL; - int ret = 0; - char str[50] = {0,}; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_peerinfo_t *dup_peerinfo = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_peer_hostname_t *name = NULL; - glusterd_probe_ctx_t *ctx = NULL; - - conf = THIS->private; - - if (-1 == req->rpc_status) { - goto out; - } - - ret = gd_xdr_to_mgmt_probe_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - //rsp.op_ret = -1; - //rsp.op_errno = EINVAL; - goto out; - } - uuid_unparse (rsp.uuid, str); - - gf_log ("glusterd", GF_LOG_NORMAL, - "Received probe resp from uuid: %s, host: %s", - str, rsp.hostname); - if (rsp.op_ret != 0) { - ctx = ((call_frame_t *)myframe)->local; - ((call_frame_t *)myframe)->local = NULL; - - GF_ASSERT (ctx); - - if (ctx->req) { - glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret, - rsp.op_errno, - ctx->hostname, ctx->port); - } - - glusterd_destroy_probe_ctx (ctx); - (void) glusterd_friend_remove (rsp.uuid, rsp.hostname); - ret = rsp.op_ret; - goto out; - } - ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo); - if (ret) { - GF_ASSERT (0); - } - - if (list_empty (&peerinfo->hostnames)) { - glusterd_friend_find (NULL, rsp.hostname, &dup_peerinfo); - GF_ASSERT (dup_peerinfo); - peerinfo->hostname = gf_strdup (rsp.hostname); - glusterd_peer_hostname_new (rsp.hostname, &name); - list_add_tail (&name->hostname_list, &peerinfo->hostnames); - peerinfo->rpc = dup_peerinfo->rpc; - peerinfo->connected = dup_peerinfo->connected; - glusterd_peer_destroy (dup_peerinfo); - } - if (!peerinfo->hostname) - peerinfo->hostname = gf_strdup (rsp.hostname); - uuid_copy (peerinfo->uuid, rsp.uuid); - - ret = glusterd_friend_sm_new_event - (GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get event"); - goto out; - } - - event->peerinfo = peerinfo; - event->ctx = ((call_frame_t *)myframe)->local; - ((call_frame_t *)myframe)->local = NULL; - ret = glusterd_friend_sm_inject_event (event); - - - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - - gf_log ("glusterd", GF_LOG_NORMAL, "Received resp to probe req"); - -out: - if (rsp.hostname) - free (rsp.hostname);//malloced by xdr - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} - -int -glusterd3_1_friend_add_cbk (struct rpc_req * req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_friend_rsp rsp = {{0},}; - glusterd_conf_t *conf = NULL; - int ret = -1; - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - char str[50] = {0,}; - int32_t op_ret = -1; - int32_t op_errno = -1; - glusterd_probe_ctx_t *ctx = NULL; - glusterd_friend_update_ctx_t *ev_ctx = NULL; - - conf = THIS->private; - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - - ret = gd_xdr_to_mgmt_friend_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - uuid_unparse (rsp.uuid, str); - - op_ret = rsp.op_ret; - op_errno = rsp.op_errno; - - gf_log ("glusterd", GF_LOG_NORMAL, - "Received %s from uuid: %s, host: %s, port: %d", - (op_ret)?"RJT":"ACC", str, rsp.hostname, rsp.port); - - ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "received friend add response from" - " unknown peer uuid: %s", str); - goto out; - } - - if (op_ret) - event_type = GD_FRIEND_EVENT_RCVD_RJT; - else - event_type = GD_FRIEND_EVENT_RCVD_ACC; - - ret = glusterd_friend_sm_new_event (event_type, &event); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get event"); - goto out; - } - event->peerinfo = peerinfo; - ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), - gf_gld_mt_friend_update_ctx_t); - if (!ev_ctx) { - ret = -1; - goto out; - } - - uuid_copy (ev_ctx->uuid, rsp.uuid); - ev_ctx->hostname = gf_strdup (rsp.hostname); - - event->ctx = ev_ctx; - ret = glusterd_friend_sm_inject_event (event); - - if (ret) - goto out; - -out: - ctx = ((call_frame_t *)myframe)->local; - ((call_frame_t *)myframe)->local = NULL; - - GF_ASSERT (ctx); - - if (ctx->req)//reverse probe doesnt have req - ret = glusterd_xfer_cli_probe_resp (ctx->req, op_ret, op_errno, - ctx->hostname, ctx->port); - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - if (ctx) - glusterd_destroy_probe_ctx (ctx); - if (rsp.hostname) - free (rsp.hostname);//malloced by xdr - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} - -int -glusterd3_1_friend_remove_cbk (struct rpc_req * req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_friend_rsp rsp = {{0},}; - glusterd_conf_t *conf = NULL; - int ret = -1; - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - char str[50] = {0,}; - int32_t op_ret = -1; - int32_t op_errno = -1; - glusterd_probe_ctx_t *ctx = NULL; - - conf = THIS->private; - GF_ASSERT (conf); - - ctx = ((call_frame_t *)myframe)->local; - ((call_frame_t *)myframe)->local = NULL; - GF_ASSERT (ctx); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto inject; - } - - ret = gd_xdr_to_mgmt_friend_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto respond; - } - uuid_unparse (rsp.uuid, str); - - op_ret = rsp.op_ret; - op_errno = rsp.op_errno; - - gf_log ("glusterd", GF_LOG_NORMAL, - "Received %s from uuid: %s, host: %s, port: %d", - (op_ret)?"RJT":"ACC", str, rsp.hostname, rsp.port); - -inject: - ret = glusterd_friend_find (rsp.uuid, ctx->hostname, &peerinfo); - - if (ret) { - //can happen as part of rpc clnt connection cleanup - //when the frame timeout happens after 30 minutes - goto respond; - } - - event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; - - ret = glusterd_friend_sm_new_event (event_type, &event); - - if (ret) { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get event"); - goto respond; - } - event->peerinfo = peerinfo; - - ret = glusterd_friend_sm_inject_event (event); - - if (ret) - goto respond; - - glusterd_friend_sm (); - glusterd_op_sm (); - - op_ret = 0; - - -respond: - ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, - ctx->hostname); - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - - if (ctx) { - glusterd_broadcast_friend_delete (ctx->hostname, NULL); - glusterd_destroy_probe_ctx (ctx); - } - - if (rsp.hostname) - free (rsp.hostname);//malloced by xdr - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} - -int32_t -glusterd3_1_friend_update_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_cluster_lock_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = 0; - char str[50] = {0,}; - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - -/* ret = gd_xdr_to_mgmt_friend_update_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - uuid_unparse (rsp.uuid, str); - - op_ret = rsp.op_ret; -*/ - gf_log ("glusterd", GF_LOG_NORMAL, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", str); - -out: - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} - -int32_t -glusterd3_1_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_cluster_lock_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - char str[50] = {0,}; - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - - ret = gd_xdr_to_mgmt_cluster_lock_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - uuid_unparse (rsp.uuid, str); - - op_ret = rsp.op_ret; - - gf_log ("glusterd", GF_LOG_NORMAL, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", str); - - ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); - - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Lock response received from " - "unknown peer: %s", str); - } - - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - opinfo.op_ret = op_ret; - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - } - - ret = glusterd_op_sm_inject_event (event_type, NULL); - - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - -out: - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} - -int32_t -glusterd3_1_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_cluster_lock_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - char str[50] = {0,}; - - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - - ret = gd_xdr_to_mgmt_cluster_unlock_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - goto out; - } - uuid_unparse (rsp.uuid, str); - - op_ret = rsp.op_ret; - - gf_log ("glusterd", GF_LOG_NORMAL, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", str); - - ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); - - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Unlock response received from " - "unknown peer %s", str); - } - - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - opinfo.op_ret = op_ret; - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - } - - ret = glusterd_op_sm_inject_event (event_type, NULL); - - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - -out: - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} - -static int32_t -glusterd_rb_use_rsp_dict (dict_t *rsp_dict) -{ - int32_t src_port = 0; - int32_t dst_port = 0; - int ret = 0; - dict_t *ctx = NULL; - - - ctx = glusterd_op_get_ctx (GD_OP_REPLACE_BRICK); - if (!ctx) { - gf_log ("", GF_LOG_ERROR, - "Operation Context is not present"); - GF_ASSERT (0); - } - - if (rsp_dict) { - ret = dict_get_int32 (rsp_dict, "src-brick-port", &src_port); - if (ret == 0) { - gf_log ("", GF_LOG_DEBUG, - "src-brick-port=%d found", src_port); - } - - ret = dict_get_int32 (rsp_dict, "dst-brick-port", &dst_port); - if (ret == 0) { - gf_log ("", GF_LOG_DEBUG, - "dst-brick-port=%d found", dst_port); - } - - } - - if (src_port) { - ret = dict_set_int32 (ctx, "src-brick-port", - src_port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set src-brick"); - goto out; - } - } - - if (dst_port) { - ret = dict_set_int32 (ctx, "dst-brick-port", - dst_port); - if (ret) { - gf_log ("", GF_LOG_DEBUG, - "Could not set dst-brick"); - goto out; - } - - } - -out: - return ret; - -} - -int32_t -glusterd3_1_stage_op_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_stage_op_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - char str[50] = {0,}; - dict_t *dict = NULL; - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - goto out; - } - - ret = gd_xdr_to_mgmt_stage_op_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - goto out; - } - uuid_unparse (rsp.uuid, str); - - if (rsp.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); - - ret = dict_unserialize (rsp.dict.dict_val, - rsp.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize rsp-buffer to dictionary"); - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } else { - dict->extra_stdfree = rsp.dict.dict_val; - } - } - - op_ret = rsp.op_ret; - - gf_log ("glusterd", GF_LOG_NORMAL, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", str); - - ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); - - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Stage response received from " - "unknown peer: %s", str); - } - - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - opinfo.op_ret = op_ret; - opinfo.op_errstr = gf_strdup(rsp.op_errstr); - if (!opinfo.op_errstr) { - gf_log ("", GF_LOG_ERROR, "memory allocation failed"); - ret = -1; - goto out; - } - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - } - - switch (rsp.op) { - case GD_OP_REPLACE_BRICK: - glusterd_rb_use_rsp_dict (dict); - break; - } - - ret = glusterd_op_sm_inject_event (event_type, NULL); - - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - -out: - if (rsp.op_errstr && strcmp (rsp.op_errstr, "error")) - free (rsp.op_errstr); //malloced by xdr - if (dict) { - if (!dict->extra_stdfree && rsp.dict.dict_val) - free (rsp.dict.dict_val); //malloced by xdr - dict_unref (dict); - } else { - if (rsp.dict.dict_val) - free (rsp.dict.dict_val); //malloced by xdr - } - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} - -static int32_t -glusterd_sync_use_rsp_dict (dict_t *rsp_dict) -{ - int ret = 0; - - GF_ASSERT (rsp_dict); - - if (!rsp_dict) { - goto out; - } - - ret = glusterd_import_friend_volumes (rsp_dict); -out: - return ret; - -} - -int32_t -glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *myframe) -{ - gd1_mgmt_commit_op_rsp rsp = {{0},}; - int ret = -1; - int32_t op_ret = -1; - glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; - glusterd_peerinfo_t *peerinfo = NULL; - char str[50] = {0,}; - dict_t *dict = NULL; - - - GF_ASSERT (req); - - if (-1 == req->rpc_status) { - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } - - ret = gd_xdr_to_mgmt_commit_op_rsp (*iov, &rsp); - if (ret < 0) { - gf_log ("", GF_LOG_ERROR, "error"); - rsp.op_ret = -1; - rsp.op_errno = EINVAL; - rsp.op_errstr = "error"; - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } - uuid_unparse (rsp.uuid, str); - - if (rsp.dict.dict_len) { - /* Unserialize the dictionary */ - dict = dict_new (); - - ret = dict_unserialize (rsp.dict.dict_val, - rsp.dict.dict_len, - &dict); - if (ret < 0) { - gf_log ("glusterd", GF_LOG_ERROR, - "failed to " - "unserialize rsp-buffer to dictionary"); - event_type = GD_OP_EVENT_RCVD_RJT; - goto out; - } else { - dict->extra_stdfree = rsp.dict.dict_val; - } - } - - op_ret = rsp.op_ret; - - gf_log ("glusterd", GF_LOG_NORMAL, - "Received %s from uuid: %s", - (op_ret)?"RJT":"ACC", str); - - ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); - - if (ret) { - gf_log ("", GF_LOG_CRITICAL, "Commit response received from " - "unknown peer: %s", str); - } - - if (op_ret) { - event_type = GD_OP_EVENT_RCVD_RJT; - opinfo.op_ret = op_ret; - opinfo.op_errstr = gf_strdup(rsp.op_errstr); - if (!opinfo.op_errstr) { - gf_log ("", GF_LOG_ERROR, "memory allocation failed"); - ret = -1; - goto out; - } - } else { - event_type = GD_OP_EVENT_RCVD_ACC; - switch (rsp.op) { - case GD_OP_REPLACE_BRICK: - ret = glusterd_rb_use_rsp_dict (dict); - if (ret) - goto out; - break; - case GD_OP_SYNC_VOLUME: - ret = glusterd_sync_use_rsp_dict (dict); - if (ret) - goto out; - break; - default: - break; - } - } - -out: - ret = glusterd_op_sm_inject_event (event_type, NULL); - - if (!ret) { - glusterd_friend_sm (); - glusterd_op_sm (); - } - - if (dict) - dict_unref (dict); - if (rsp.op_errstr && strcmp (rsp.op_errstr, "error")) - free (rsp.op_errstr); //malloced by xdr - GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); - return ret; -} - - - -int32_t -glusterd3_1_probe (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_probe_req req = {{0},}; - int ret = 0; - int port = 0; - char *hostname = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - dict_t *dict = NULL; - - if (!frame || !this || !data) { - ret = -1; - goto out; - } - - dict = data; - priv = this->private; - - GF_ASSERT (priv); - ret = dict_get_str (dict, "hostname", &hostname); - if (ret) - goto out; - ret = dict_get_int32 (dict, "port", &port); - if (ret) - port = GF_DEFAULT_BASE_PORT; - - ret = glusterd_friend_find (NULL, hostname, &peerinfo); - - if (ret) { - //We should not reach this state ideally - GF_ASSERT (0); - goto out; - } - - uuid_copy (req.uuid, priv->uuid); - req.hostname = gf_strdup (hostname); - req.port = port; - - ret = glusterd_submit_request (peerinfo, &req, frame, priv->mgmt, - GD_MGMT_PROBE_QUERY, - NULL, gd_xdr_from_mgmt_probe_req, - this, glusterd3_1_probe_cbk); - -out: - if (req.hostname) - GF_FREE (req.hostname); - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - - -int32_t -glusterd3_1_friend_add (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_friend_req req = {{0},}; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_friend_req_ctx_t *ctx = NULL; - dict_t *vols = NULL; - - - if (!frame || !this || !data) { - ret = -1; - goto out; - } - - event = data; - priv = this->private; - - GF_ASSERT (priv); - - ctx = event->ctx; - - peerinfo = event->peerinfo; - - ret = glusterd_build_volume_dict (&vols); - if (ret) - goto out; - - uuid_copy (req.uuid, priv->uuid); - req.hostname = peerinfo->hostname; - req.port = peerinfo->port; - - ret = dict_allocate_and_serialize (vols, &req.vols.vols_val, - (size_t *)&req.vols.vols_len); - if (ret) - goto out; - - ret = glusterd_submit_request (peerinfo, &req, frame, priv->mgmt, - GD_MGMT_FRIEND_ADD, - NULL, gd_xdr_from_mgmt_friend_req, - this, glusterd3_1_friend_add_cbk); - - -out: - if (req.vols.vols_val) - GF_FREE (req.vols.vols_val); - - if (vols) - dict_unref (vols); - - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int32_t -glusterd3_1_friend_remove (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_friend_req req = {{0},}; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_friend_sm_event_t *event = NULL; - glusterd_probe_ctx_t *ctx = NULL; - glusterd_friend_sm_event_t *new_event = NULL; - glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE; - - - if (!frame || !this || !data) { - ret = -1; - goto out; - } - - event = data; - priv = this->private; - - GF_ASSERT (priv); - - ctx = event->ctx; - - peerinfo = event->peerinfo; - - ret = -1; - if (peerinfo->connected) { - uuid_copy (req.uuid, priv->uuid); - req.hostname = peerinfo->hostname; - req.port = peerinfo->port; - ret = glusterd_submit_request (peerinfo, &req, frame, priv->mgmt, - GD_MGMT_FRIEND_REMOVE, - NULL, gd_xdr_from_mgmt_friend_req, - this, glusterd3_1_friend_remove_cbk); - } else { - event_type = GD_FRIEND_EVENT_REMOVE_FRIEND; - - ret = glusterd_friend_sm_new_event (event_type, &new_event); - - if (!ret) { - new_event->peerinfo = peerinfo; - ret = glusterd_friend_sm_inject_event (new_event); - } else { - gf_log ("glusterd", GF_LOG_ERROR, - "Unable to get event"); - } - - if (ctx) - ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0, - ctx->hostname); - glusterd_friend_sm (); - glusterd_op_sm (); - - if (ctx) { - glusterd_broadcast_friend_delete (ctx->hostname, NULL); - glusterd_destroy_probe_ctx (ctx); - } - } - -out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - - -int32_t -glusterd3_1_friend_update (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_friend_update req = {{0},}; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - glusterd_friend_update_ctx_t *ctx = NULL; - dict_t *friends = NULL; - char key[100] = {0,}; - char uuid_buf[50] = {0,}; - char *dup_buf = NULL; - int32_t count = 0; - char *dict_buf = NULL; - size_t len = -1; - call_frame_t *dummy_frame = NULL; - - - if ( !this || !data) { - ret = -1; - goto out; - } - - ctx = data; - friends = dict_new (); - if (!friends) - goto out; - - priv = this->private; - - GF_ASSERT (priv); - - snprintf (key, sizeof (key), "op"); - ret = dict_set_int32 (friends, key, ctx->op); - if (ret) - goto out; - - if (GD_FRIEND_UPDATE_ADD == ctx->op) { - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - count++; - uuid_unparse (peerinfo->uuid, uuid_buf); - snprintf (key, sizeof (key), "friend%d.uuid", count); - dup_buf = gf_strdup (uuid_buf); - ret = dict_set_dynstr (friends, key, dup_buf); - if (ret) - goto out; - snprintf (key, sizeof (key), "friend%d.hostname", count); - ret = dict_set_str (friends, key, peerinfo->hostname); - if (ret) - goto out; - gf_log ("", GF_LOG_NORMAL, "Added uuid: %s, host: %s", - dup_buf, peerinfo->hostname); - } - } else { - snprintf (key, sizeof (key), "hostname"); - ret = dict_set_str (friends, key, ctx->hostname); - if (ret) - goto out; - } - - ret = dict_set_int32 (friends, "count", count); - if (ret) - goto out; - - - ret = dict_allocate_and_serialize (friends, &dict_buf, (size_t *)&len); - - if (ret) - goto out; - - req.friends.friends_val = dict_buf; - req.friends.friends_len = len; - - uuid_copy (req.uuid, priv->uuid); - - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - if (!peerinfo->connected) - continue; - dummy_frame = create_frame (this, this->ctx->pool); - ret = glusterd_submit_request (peerinfo, &req, dummy_frame, - priv->mgmt, - GD_MGMT_FRIEND_UPDATE, - NULL, gd_xdr_from_mgmt_friend_update, - this, glusterd3_1_friend_update_cbk); - } - -out: - if (friends) - dict_unref (friends); - if (req.friends.friends_val) - GF_FREE (req.friends.friends_val); - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int32_t -glusterd3_1_cluster_lock (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_cluster_lock_req req = {{0},}; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - call_frame_t *dummy_frame = NULL; - int32_t pending_lock = 0; - - if (!this) { - ret = -1; - goto out; - } - - priv = this->private; - glusterd_get_uuid (&req.uuid); - - GF_ASSERT (priv); - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - GF_ASSERT (peerinfo); - - if (!peerinfo->connected) - continue; - if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && - (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) - continue; - - dummy_frame = create_frame (this, this->ctx->pool); - - if (!dummy_frame) - continue; - - ret = glusterd_submit_request (peerinfo, &req, dummy_frame, - priv->mgmt, GD_MGMT_CLUSTER_LOCK, - NULL, - gd_xdr_from_mgmt_cluster_lock_req, - this, glusterd3_1_cluster_lock_cbk); - if (!ret) - pending_lock++; - //TODO: Instead of keeping count, maintain a list of locked - //UUIDs. - } - - gf_log ("glusterd", GF_LOG_NORMAL, "Sent lock req to %d peers", - pending_lock); - opinfo.pending_count = pending_lock; -out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int32_t -glusterd3_1_cluster_unlock (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_cluster_lock_req req = {{0},}; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - int32_t pending_unlock = 0; - call_frame_t *dummy_frame = NULL; - - if (!this ) { - ret = -1; - goto out; - } - - priv = this->private; - - glusterd_get_uuid (&req.uuid); - - GF_ASSERT (priv); - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - GF_ASSERT (peerinfo); - - if (!peerinfo->connected) - continue; - if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && - (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) - continue; - - dummy_frame = create_frame (this, this->ctx->pool); - - if (!dummy_frame) - continue; - - ret = glusterd_submit_request (peerinfo, &req, dummy_frame, - priv->mgmt, GD_MGMT_CLUSTER_UNLOCK, - NULL, - gd_xdr_from_mgmt_cluster_unlock_req, - this, glusterd3_1_cluster_unlock_cbk); - if (!ret) - pending_unlock++; - //TODO: Instead of keeping count, maintain a list of locked - //UUIDs. - } - - gf_log ("glusterd", GF_LOG_NORMAL, "Sent unlock req to %d peers", - pending_unlock); - opinfo.pending_count = pending_unlock; - -out: - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int32_t -glusterd3_1_stage_op (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_stage_op_req *req = NULL; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - int32_t pending_peer = 0; - int i = 0; - call_frame_t *dummy_frame = NULL; - char *op_errstr = NULL; - - if (!this) { - ret = -1; - goto out; - } - - priv = this->private; - - GF_ASSERT (priv); - - for ( i = GD_OP_NONE; i < GD_OP_MAX; i++) { - if (opinfo.pending_op[i]) - break; - } - - if (GD_OP_MAX == i) { - - //No pending ops, inject stage_acc - - ret = glusterd_op_sm_inject_event - (GD_OP_EVENT_STAGE_ACC, NULL); - - return ret; - } - - glusterd_op_clear_pending_op (i); - - - ret = glusterd_op_build_payload (i, &req); - - if (ret) - goto out; - - /* rsp_dict NULL from source */ - ret = glusterd_op_stage_validate (req, &op_errstr, NULL); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Staging failed"); - opinfo.op_errstr = op_errstr; - goto out; - } - - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - GF_ASSERT (peerinfo); - - if (!peerinfo->connected) - continue; - if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && - (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) - continue; - - dummy_frame = create_frame (this, this->ctx->pool); - - if (!dummy_frame) - continue; - - ret = glusterd_submit_request (peerinfo, req, dummy_frame, - priv->mgmt, GD_MGMT_STAGE_OP, - NULL, - gd_xdr_from_mgmt_stage_op_req, - this, glusterd3_1_stage_op_cbk); - if (!ret) - pending_peer++; - //TODO: Instead of keeping count, maintain a list of pending - //UUIDs. - } - - gf_log ("glusterd", GF_LOG_NORMAL, "Sent op req to %d peers", - pending_peer); - opinfo.pending_count = pending_peer; - -out: - if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); - opinfo.op_ret = ret; - } - if (req) { - GF_FREE (req->buf.buf_val); - GF_FREE (req); - } - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - -int32_t -glusterd3_1_commit_op (call_frame_t *frame, xlator_t *this, - void *data) -{ - gd1_mgmt_commit_op_req *req = NULL; - int ret = 0; - glusterd_peerinfo_t *peerinfo = NULL; - glusterd_conf_t *priv = NULL; - int32_t pending_peer = 0; - int i = 0; - call_frame_t *dummy_frame = NULL; - char *op_errstr = NULL; - - if (!this) { - ret = -1; - goto out; - } - - priv = this->private; - - GF_ASSERT (priv); - - for ( i = GD_OP_NONE; i < GD_OP_MAX; i++) { - if (opinfo.commit_op[i]) - break; - } - - if (GD_OP_MAX == i) { - //No pending ops, return - return ret; - } - - glusterd_op_clear_commit_op (i); - - ret = glusterd_op_build_payload (i, (gd1_mgmt_stage_op_req **)&req); - - if (ret) - goto out; - - ret = glusterd_op_commit_perform ((gd1_mgmt_stage_op_req *)req, &op_errstr, - NULL);//rsp_dict invalid for source - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Commit failed"); - opinfo.op_errstr = op_errstr; - goto out; - } - - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { - GF_ASSERT (peerinfo); - - if (!peerinfo->connected) - continue; - if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) && - (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) - continue; - - dummy_frame = create_frame (this, this->ctx->pool); - - if (!dummy_frame) - continue; - - ret = glusterd_submit_request (peerinfo, req, dummy_frame, - priv->mgmt, GD_MGMT_COMMIT_OP, - NULL, - gd_xdr_from_mgmt_commit_op_req, - this, glusterd3_1_commit_op_cbk); - if (!ret) - pending_peer++; - //TODO: Instead of keeping count, maintain a list of pending - //UUIDs. - } - - gf_log ("glusterd", GF_LOG_NORMAL, "Sent op req to %d peers", - pending_peer); - opinfo.pending_count = pending_peer; - -out: - if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); - opinfo.op_ret = ret; - } - if (req) { - GF_FREE (req->buf.buf_val); - GF_FREE (req); - } - gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - - -int -glusterd_handle_rpc_msg (rpcsvc_request_t *req) -{ - int ret = -1; - gf_boolean_t is_cli_req = _gf_false; - char *op_errstr = NULL; - - GF_ASSERT (req); - - is_cli_req = glusterd_is_cli_op_req (req->procnum); - - if (is_cli_req) { - ret = glusterd_op_set_cli_op (req->procnum); - - if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to set cli op: %d", - ret); - goto out; - } - } - - switch (req->procnum) { - case GD_MGMT_PROBE_QUERY: - ret = glusterd_handle_probe_query (req); - break; - - case GD_MGMT_FRIEND_ADD: - ret = glusterd_handle_incoming_friend_req (req); - if (ret == GLUSTERD_CONNECTION_AWAITED) - return 0; - break; - - case GD_MGMT_CLUSTER_LOCK: - ret = glusterd_handle_cluster_lock (req); - break; - - case GD_MGMT_CLUSTER_UNLOCK: - ret = glusterd_handle_cluster_unlock (req); - break; - - case GD_MGMT_STAGE_OP: - ret = glusterd_handle_stage_op (req); - break; - - case GD_MGMT_COMMIT_OP: - ret = glusterd_handle_commit_op (req); - break; - - case GD_MGMT_CLI_PROBE: - ret = glusterd_handle_cli_probe (req); - if (ret == GLUSTERD_CONNECTION_AWAITED) - return 0; - break; - - case GD_MGMT_CLI_CREATE_VOLUME: - ret = glusterd_handle_create_volume (req); - break; - - case GD_MGMT_CLI_DEPROBE: - ret = glusterd_handle_cli_deprobe (req); - break; - - case GD_MGMT_FRIEND_REMOVE: - ret = glusterd_handle_incoming_unfriend_req (req); - break; - - case GD_MGMT_CLI_LIST_FRIENDS: - ret = glusterd_handle_cli_list_friends (req); - break; - - case GD_MGMT_CLI_START_VOLUME: - ret = glusterd_handle_cli_start_volume (req); - break; - - case GD_MGMT_CLI_STOP_VOLUME: - ret = glusterd_handle_cli_stop_volume (req); - break; - - case GD_MGMT_CLI_DELETE_VOLUME: - ret = glusterd_handle_cli_delete_volume (req); - break; - - case GD_MGMT_FRIEND_UPDATE: - ret = glusterd_handle_friend_update (req); - break; - - case GD_MGMT_CLI_GET_VOLUME: - ret = glusterd_handle_cli_get_volume (req); - break; - - case GD_MGMT_CLI_DEFRAG_VOLUME: - ret = glusterd_handle_defrag_volume (req); - break; - - case GD_MGMT_CLI_ADD_BRICK: - ret = glusterd_handle_add_brick (req); - break; - - case GD_MGMT_CLI_REPLACE_BRICK: - ret = glusterd_handle_replace_brick (req); - break; - - case GD_MGMT_CLI_REMOVE_BRICK: - ret = glusterd_handle_remove_brick (req); - break; - - case GD_MGMT_CLI_LOG_FILENAME: - ret = glusterd_handle_log_filename (req); - break; - - case GD_MGMT_CLI_LOG_LOCATE: - ret = glusterd_handle_log_locate (req); - break; - - case GD_MGMT_CLI_LOG_ROTATE: - ret = glusterd_handle_log_rotate (req); - break; - - case GD_MGMT_CLI_SET_VOLUME: - ret = glusterd_handle_set_volume (req); - break; - - case GD_MGMT_CLI_SYNC_VOLUME: - ret = glusterd_handle_sync_volume (req); - break; - - case GD_MGMT_CLI_RESET_VOLUME: - ret = glusterd_handle_reset_volume (req); - break; - - default: - gf_log("", GF_LOG_ERROR, "Recieved Invalid procnum:%d", - req->procnum); - GF_ASSERT (0); - } - - glusterd_friend_sm (); - glusterd_op_sm (); - -out: - if (ret && is_cli_req) { - /* if we are sending a reply here, then return value should - be 0, and we should not point to any RPC errors, because - otherwise rpcsvc.c will send an error reply for the same - request, which causes double replies */ - ret = glusterd_op_send_cli_response (req->procnum, ret, 0, req, NULL, op_errstr); - } - if (op_errstr && (strcmp (op_errstr, ""))) - GF_FREE (op_errstr); - if (ret) - gf_log ("", GF_LOG_WARNING, "Returning %d", ret); - - return ret; -} - - -rpcsvc_actor_t glusterd1_mgmt_actors[] = { - [GD_MGMT_NULL] = { "NULL", GD_MGMT_NULL, glusterd_null, NULL, NULL}, - [GD_MGMT_PROBE_QUERY] = { "PROBE_QUERY", GD_MGMT_PROBE_QUERY, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_FRIEND_ADD] = { "FRIEND_ADD", GD_MGMT_FRIEND_ADD, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_FRIEND_REMOVE] = { "FRIEND_REMOVE", GD_MGMT_FRIEND_REMOVE, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_FRIEND_UPDATE] = { "FRIEND_UPDATE", GD_MGMT_FRIEND_UPDATE, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GD_MGMT_CLUSTER_LOCK, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GD_MGMT_CLUSTER_UNLOCK, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_STAGE_OP] = { "STAGE_OP", GD_MGMT_STAGE_OP, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_COMMIT_OP] = { "COMMIT_OP", GD_MGMT_COMMIT_OP, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_PROBE] = { "CLI_PROBE", GD_MGMT_CLI_PROBE, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GD_MGMT_CLI_CREATE_VOLUME, glusterd_handle_rpc_msg, NULL,NULL}, - [GD_MGMT_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GD_MGMT_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume, NULL,NULL}, - [GD_MGMT_CLI_DEPROBE] = { "FRIEND_REMOVE", GD_MGMT_CLI_DEPROBE, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GD_MGMT_CLI_LIST_FRIENDS, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_START_VOLUME] = { "START_VOLUME", GD_MGMT_CLI_START_VOLUME, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_STOP_VOLUME] = { "STOP_VOLUME", GD_MGMT_CLI_STOP_VOLUME, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GD_MGMT_CLI_DELETE_VOLUME, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_GET_VOLUME] = { "GET_VOLUME", GD_MGMT_CLI_GET_VOLUME, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_ADD_BRICK] = { "ADD_BRICK", GD_MGMT_CLI_ADD_BRICK, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GD_MGMT_CLI_REPLACE_BRICK, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GD_MGMT_CLI_REMOVE_BRICK, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_LOG_FILENAME] = { "LOG FILENAME", GD_MGMT_CLI_LOG_FILENAME, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_LOG_LOCATE] = { "LOG LOCATE", GD_MGMT_CLI_LOG_LOCATE, glusterd_handle_log_locate, NULL, NULL}, - [GD_MGMT_CLI_LOG_ROTATE] = { "LOG FILENAME", GD_MGMT_CLI_LOG_ROTATE, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_SET_VOLUME] = { "SET_VOLUME", GD_MGMT_CLI_SET_VOLUME, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GD_MGMT_CLI_SYNC_VOLUME, glusterd_handle_rpc_msg, NULL, NULL}, - [GD_MGMT_CLI_RESET_VOLUME] = { "RESET_VOLUME", GD_MGMT_CLI_RESET_VOLUME, glusterd_handle_rpc_msg, NULL, NULL} -}; - -/*rpcsvc_actor_t glusterd1_mgmt_actors[] = { - [GD_MGMT_NULL] = { "NULL", GD_MGMT_NULL, glusterd_null, NULL, NULL}, - [GD_MGMT_PROBE_QUERY] = { "PROBE_QUERY", GD_MGMT_PROBE_QUERY, glusterd_handle_probe_query, NULL, NULL}, - [GD_MGMT_FRIEND_ADD] = { "FRIEND_ADD", GD_MGMT_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, NULL}, - [GD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, NULL}, - [GD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, NULL}, - [GD_MGMT_STAGE_OP] = { "STAGE_OP", GD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, NULL}, - [GD_MGMT_COMMIT_OP] = { "COMMIT_OP", GD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, NULL}, - [GD_MGMT_CLI_PROBE] = { "CLI_PROBE", GD_MGMT_CLI_PROBE, glusterd_handle_cli_probe, NULL, NULL}, -};*/ - - -struct rpcsvc_program glusterd1_mop_prog = { - .progname = "GlusterD0.0.1", - .prognum = GLUSTERD1_MGMT_PROGRAM, - .progver = GLUSTERD1_MGMT_VERSION, - .numactors = GLUSTERD1_MGMT_PROCCNT, - .actors = glusterd1_mgmt_actors, -}; - - -struct rpc_clnt_procedure glusterd3_1_clnt_mgmt_actors[GD_MGMT_MAXVALUE] = { - [GD_MGMT_NULL] = {"NULL", NULL }, - [GD_MGMT_PROBE_QUERY] = { "PROBE_QUERY", glusterd3_1_probe}, - [GD_MGMT_FRIEND_ADD] = { "FRIEND_ADD", glusterd3_1_friend_add }, - [GD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd3_1_cluster_lock}, - [GD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", glusterd3_1_cluster_unlock}, - [GD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd3_1_stage_op}, - [GD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd3_1_commit_op}, - [GD_MGMT_FRIEND_REMOVE] = { "FRIEND_REMOVE", glusterd3_1_friend_remove}, - [GD_MGMT_FRIEND_UPDATE] = { "FRIEND_UPDATE", glusterd3_1_friend_update}, -}; - - - -struct rpc_clnt_program glusterd3_1_mgmt_prog = { - .progname = "Mgmt 3.1", - .prognum = GLUSTERD1_MGMT_PROGRAM, - .progver = GLUSTERD1_MGMT_VERSION, - .proctable = glusterd3_1_clnt_mgmt_actors, - .numproc = GLUSTERD1_MGMT_PROCCNT, -}; |
