diff options
Diffstat (limited to 'xlators/mgmt')
-rw-r--r-- | xlators/mgmt/glusterd/src/Makefile.am | 5 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-etcd.c | 86 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-etcd.h | 23 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 12 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.c | 16 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 3 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 343 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.h | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 13 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 16 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 20 |
11 files changed, 528 insertions, 13 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index a6f49ae01..cbb6353f8 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -11,7 +11,8 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ - glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c + glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ + glusterd-etcd.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ @@ -21,7 +22,7 @@ glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ - glusterd-syncop.h glusterd-hooks.h + glusterd-syncop.h glusterd-hooks.h glusterd-etcd.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ diff --git a/xlators/mgmt/glusterd/src/glusterd-etcd.c b/xlators/mgmt/glusterd/src/glusterd-etcd.c new file mode 100644 index 000000000..3382e20ae --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-etcd.c @@ -0,0 +1,86 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <errno.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "glusterfs.h" +#include "run.h" +#include "glusterd-etcd.h" + +#define GLUSTERD_ETCD_DIR "/var/lib/glusterd/etcd" +#define GLUSTERD_ETCD_CMD "/root/etcd/etcd" + +pid_t +start_etcd (char *this_host, char *other_host) +{ + runner_t runner; + char me[256]; + + if (gethostname(me,sizeof(me)-1) != 0) { + gf_log (__func__, GF_LOG_ERROR, "gethostname failed?!?"); + return -1; + } + me[sizeof(me)-1] = '\0'; + + if ((mkdir(GLUSTERD_ETCD_DIR,0700) < 0) && (errno != EEXIST)) { + gf_log (__func__, GF_LOG_ERROR, + "failed to create %s", GLUSTERD_ETCD_DIR); + return -1; + } + + runinit (&runner); + runner_add_args (&runner, GLUSTERD_ETCD_CMD, + "-n", this_host, "-d", GLUSTERD_ETCD_DIR, + "-c", NULL); + runner_argprintf( &runner, "%s:4001", me); + runner_add_arg (&runner, "-s"); + runner_argprintf (&runner, "%s:7001", me); + if (other_host) { + runner_add_arg (&runner, "-C"); + runner_argprintf (&runner, "%s:7001", other_host); + gf_log (__func__, GF_LOG_INFO, "starting etcd via %s", other_host); + } else { + gf_log (__func__, GF_LOG_INFO, "starting etcd standalone"); + } + + /* + * Runner_run would wait for it. Runner_run_nowait would not wait, + * but would detach it so thoroughly that it won't die when we do. + * Also, runner->chpid would be the PID of the transient middle + * process, not the one we might actually need to kill later. This + * seems to do exactly what we need. + */ + if (runner_start(&runner) != 0) { + gf_log (__func__, GF_LOG_ERROR, + "failed to start %s", GLUSTERD_ETCD_CMD); + return -1; + } + + return runner.chpid; +} + +void +stop_etcd (pid_t pid) +{ + if (pid > 0) { + gf_log (__func__, GF_LOG_INFO, "killing etcd %d", pid); + (void)kill(pid,SIGKILL); + (void)waitpid(pid,NULL,0); + } +} + +void +nuke_etcd_dir (void) +{ + (void)runcmd("rm","-rf",GLUSTERD_ETCD_DIR,NULL); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-etcd.h b/xlators/mgmt/glusterd/src/glusterd-etcd.h new file mode 100644 index 000000000..9459f6bbd --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-etcd.h @@ -0,0 +1,23 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_ETCD_H_ +#define _GLUSTERD_ETCD_H_ + +#include <sys/types.h> +#include "glusterfs.h" + +pid_t start_etcd (char *this_host, char *other_host); + +void stop_etcd (pid_t pid); + +void nuke_etcd_dir (void); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 0407741bb..e296509d8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -49,6 +49,7 @@ #include "globals.h" #include "glusterd-syncop.h" +#include "glusterd-etcd.h" #ifdef HAVE_BD_XLATOR #include <lvm2app.h> @@ -2380,7 +2381,18 @@ __glusterd_handle_probe_query (rpcsvc_request_t *req) gf_log ("", GF_LOG_ERROR, "Failed to add peer %s", remote_hostname); rsp.op_errno = GF_PROBE_ADD_FAILED; + goto respond; } + gf_log (THIS->name, GF_LOG_INFO, + "joining, should point etcd at %s", remote_hostname); + /* + * We should have started a standalone etcd before. Now we + * need a new one, with a new config. + */ + stop_etcd(conf->etcd_pid); + nuke_etcd_dir(); + conf->etcd_pid = start_etcd (uuid_utoa(MY_UUID), + remote_hostname); } respond: diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index c671edf68..2490ba665 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -34,6 +34,7 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "glusterd-etcd.h" static struct list_head gd_friend_sm_queue; @@ -596,6 +597,9 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, "Peer detach cleanup was not successful"); ret = 0; } + gf_log (THIS->name, GF_LOG_INFO, "detached, stopping etcd"); + stop_etcd(priv->etcd_pid); + nuke_etcd_dir(); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -642,6 +646,11 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) int status = 0; int32_t op_ret = -1; int32_t op_errno = 0; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + + this = THIS; + priv = this->private; GF_ASSERT (ctx); ev_ctx = ctx; @@ -692,6 +701,13 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) peerinfo->hostname, ev_ctx->port, op_ret, op_errno); + // apply a deterministic function to decide via whom we should join the cluster + if (strcmp(peerinfo->hostname, ev_ctx->hostname) > 0) { + stop_etcd(priv->etcd_pid); + nuke_etcd_dir(); + priv->etcd_pid = start_etcd (uuid_utoa(MY_UUID), peerinfo->hostname); + } + out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 4a88b5b35..fc4018190 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1534,6 +1534,7 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_conf_t *priv = NULL; char pidfile[PATH_MAX] = {0,}; int ret = 0; + glusterd_conf_t *conf = NULL; GF_ASSERT (volinfo); GF_ASSERT (brickinfo); @@ -1552,6 +1553,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, if (ret == 0) { glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo); + GLUSTERD_GET_BRICK_RECON_PIDFILE (pidfile, volinfo, brickinfo, priv); + ret = glusterd_service_stop ("recon", pidfile, SIGTERM, _gf_false); } } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 0a6746349..a94a47af3 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -101,7 +101,6 @@ xlator_instantiate_va (const char *type, const char *format, va_list arg) return NULL; } -#ifdef __not_used_as_of_now_ static xlator_t * xlator_instantiate (const char *type, const char *format, ...) { @@ -114,7 +113,6 @@ xlator_instantiate (const char *type, const char *format, ...) return xl; } -#endif static int volgen_xlator_link (xlator_t *pxl, xlator_t *cxl) @@ -1400,6 +1398,308 @@ server_spec_extended_option_handler (volgen_graph_t *graph, static void get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo); +xlator_t * +add_one_peer (volgen_graph_t *graph, glusterd_brickinfo_t *peer, + char *volname, uint16_t index) +{ + xlator_t *kid; + + kid = volgen_graph_add_nolink (graph, "protocol/client", + "%s-client-%u", volname, + index++); + if (!kid) { + return NULL; + } + + /* TBD: figure out where to get the proper transport list */ + if (xlator_set_option(kid,"transport-type","socket")) { + return NULL; + } + if (xlator_set_option(kid,"remote-host",peer->hostname)) { + return NULL; + } + if (xlator_set_option(kid,"remote-subvolume",peer->path)) { + return NULL; + } + /* TBD: deal with RDMA, SSL */ + + return kid; +} + +void +assign_groups (glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + uint16_t group_num = 0; + int in_group = 0; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + brickinfo->group = group_num; + if (++in_group >= volinfo->replica_count) { + in_group = 0; + ++group_num; + } + } +} + +int +add_nsr_stuff (volgen_graph_t *graph, char *volname, + glusterd_brickinfo_t *brickinfo, glusterd_volinfo_t *volinfo, + char *changelog_basepath) +{ + xlator_t *me; + xlator_t *kid; + glusterd_brickinfo_t *peer; + uint16_t index = 0; + //uint32_t i=0; + char *leader_opt; + uint32_t replica_group_size = 1; + char dst[NSR_MAX_PATH_SIZE]; + char local_path[NSR_MAX_PATH_SIZE]; + char local_name[NSR_MAX_PATH_SIZE]; + char hosts[NSR_MAX_PATH_SIZE * NSR_MAX_REPLICA_GROUP_SIZE]; + char remote_names[NSR_MAX_REPLICA_GROUP_SIZE * NSR_MAX_PATH_SIZE]; + char filepath[PATH_MAX] = {0,}; + char lp[PATH_MAX] = {0,}; + xlator_t *xl = NULL; + char s[256]; + char transt[16] = {0,}; + char auth[256]; + char c_d[NSR_MAX_PATH_SIZE]; + char *username = NULL, *password = NULL; + gf_boolean_t enable_recon = _gf_false; +#ifdef HAVE_ETCD + uint32_t nsr_port = 27000; +#else + static uint32_t nsr_port = 27000; +#endif + + + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr.recon") > 0) { + enable_recon = _gf_true; + } + + volgen_graph_t ng = {0,}; + char path[PATH_MAX] = {0,}; + char *ptr = NULL, *this = NULL, *that = NULL; + glusterd_conf_t *priv = NULL; + + + priv = THIS->private; + remote_names[0] = '\0'; + that = gf_strdup (brickinfo->hostname); + this = gf_strdup (brickinfo->path); + ptr = strchr (this, '/'); + while (ptr) { + *ptr = '-'; + ptr = strchr (this, '/'); + } + GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + snprintf (dst, PATH_MAX, + "%s/%s/%s:%s", + path, + GLUSTERD_BRICK_INFO_DIR, + that, + this); + + /* Create the NSR xlator, but defer linkage for now. */ + me = xlator_instantiate ("cluster/nsr", "%s-nsr", volname); + if (!me || volgen_xlator_link(me,first_of(graph))) { + return -1; + } + + strcpy(local_name, brickinfo->hostname); + strcpy(local_path, brickinfo->hostname); + strcat(local_name, ":"); + strcat(local_name, brickinfo->path); + strcpy(hosts, brickinfo->hostname); + + peer = list_prev (brickinfo, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + /* Check leader status while we have this pointer in hand. */ + leader_opt = (!peer || (peer->group != brickinfo->group)) ? "yes" + : "no"; + if (xlator_set_option(me,"vol-name",volname)) + return -1; + if (xlator_set_option(me,"my-name",local_name)) + return -1; + if (xlator_set_option(me,"leader",leader_opt)) + return -1; + +#define FILL_REMOTE_NAMES { \ + strcat(remote_names, \ + peer->hostname); \ + strcat(remote_names, \ + ":"); \ + strcat(remote_names, \ + peer->path); \ + strcat(remote_names, \ + ","); \ + strcat(hosts, ","); \ + strcat(hosts, \ + peer->hostname); \ + replica_group_size++; \ +} + + /* Now get on with the show. */ + while (peer) { + if (peer->group != brickinfo->group) { + break; + } + gf_log ("glusterd", GF_LOG_INFO, + "%s:%s needs client for %s:%s", + brickinfo->hostname, brickinfo->path, + peer->hostname, peer->path); + kid = add_one_peer (graph, peer, volname, index++); + if (!kid || volgen_xlator_link(me,kid)) { + return -1; + } + FILL_REMOTE_NAMES; + peer = list_prev (peer, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + } + + peer = list_next (brickinfo, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + while (peer) { + if (peer->group != brickinfo->group) { + break; + } + gf_log ("glusterd", GF_LOG_INFO, + "%s:%s needs client for %s:%s", + brickinfo->hostname, brickinfo->path, + peer->hostname, peer->path); + kid = add_one_peer (graph, peer, volname, index++); + if (!kid || volgen_xlator_link(me,kid)) { + return -1; + } + FILL_REMOTE_NAMES; + peer = list_next (peer, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + } + + // to remove the final "," + if (strlen(remote_names)) { + remote_names[strlen(remote_names) - 1] = '\0'; + } + if (xlator_set_option(me,"etcd-servers",hosts)) + return -1; + + // Finish linkage to client file + glusterfs_graph_set_first(&graph->graph,me); + + if (enable_recon == _gf_false) + return 0; + + /* Now fill in the various files required for reeconciliation */ + snprintf (filepath, PATH_MAX, + "%s-nsr-recon.vol", + dst); + gf_log ("glusterd", GF_LOG_INFO, + "writing nsr recon volfile in %s\n", + filepath); +#if 0 + strcpy(lp, local_name); +#else + strcpy(lp, brickinfo->path); +#endif + strcat(lp,"/recon"); + bzero(&ng, sizeof(ng)); + xl = volgen_graph_add_as (&ng, "cluster/nsr_recon",lp); + if (!xl) + return -1; + sprintf(s,"%d",replica_group_size); + if (xlator_set_option(xl, "replica-group-size", s) == -1) + return -1; + if (xlator_set_option(xl, "local-member", local_name) == -1) + return -1; + if (xlator_set_option(xl, "replica-group-members", remote_names) == -1) + return -1; + if (xlator_set_option(xl,"vol-name",volname)) + return -1; + if (xlator_set_option(xl,"changelog-dir",changelog_basepath)) + return -1; + if (xlator_set_option(xl,"base-dir",brickinfo->path)) + return -1; + + xl = volgen_graph_add (&ng, "protocol/server", lp); + if (!xl) + return -1; + get_vol_transport_type (volinfo, transt); + if(xlator_set_option (xl, "transport-type", transt) == -1) + return -1; + sprintf(s,"%d",nsr_port); + if(xlator_set_option (xl, "transport.socket.listen-port", s) == -1) + return -1; + strcpy(auth, "auth.addr."); + strcat(auth, lp); + strcat(auth, ".allow"); + if(xlator_set_option (xl, auth, "*") == -1) + return -1; + if(xlator_set_option (xl, "rpc-auth.auth-null", "off") == -1) + return -1; + if(xlator_set_option (xl, "rpc-auth.auth-unix", "off") == -1) + return -1; + if(xlator_set_option (xl, "rpc-auth.auth-glusterfs", "off") == -1) + return -1; + if(volgen_write_volfile(&ng, filepath) == -1) + return -1; + + bzero(&ng, sizeof(ng)); + kid = volgen_graph_add_nolink (&ng, "protocol/client", + "%s-client-%u", lp, 0); + if (!kid) + return -1; + if (xlator_set_option(kid,"remote-host",brickinfo->hostname)) + return -1; +#if 0 + strcpy(lp, brickinfo->path); + strcat(lp,"/recon"); +#endif + if (xlator_set_option(kid,"remote-subvolume",lp)) + return -1; + if(xlator_set_option (kid, "transport-type", transt) == -1) + return -1; + sprintf(s,"%d",nsr_port++); + if(xlator_set_option (kid, "remote-port", s) == -1) + return -1; + snprintf (c_d, PATH_MAX, + "%s/%s/con:%s:%s", + path, + GLUSTERD_BRICK_INFO_DIR, + that, this); + if (volgen_write_volfile(&ng, c_d)) + return -1; + + bzero(&ng, sizeof(ng)); + kid = volgen_graph_add_nolink (&ng, "protocol/client", + "%s-client-%u", lp, 0); + if (!kid) + return -1; + if (xlator_set_option(kid,"remote-host",brickinfo->hostname)) + return -1; + if (xlator_set_option(kid,"remote-subvolume",brickinfo->path)) + return -1; + if(xlator_set_option (kid, "transport-type", transt) == -1) + return -1; + username = glusterd_auth_get_username (volinfo); + password = glusterd_auth_get_password (volinfo); + if(xlator_set_option (kid, "username", username) == -1) + return -1; + if(xlator_set_option (kid, "password", password) == -1) + return -1; + snprintf (c_d, PATH_MAX, + "%s/%s/data:%s:%s", + path, + GLUSTERD_BRICK_INFO_DIR, that, + this); + if (volgen_write_volfile(&ng, c_d)) + return -1; + + return 0; + +} + static int server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, void *param) @@ -1506,10 +1806,17 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, if (ret) return -1; + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr") > 0) { + ret = xlator_set_option (xl, "encoding", "ascii"); + if (ret) + return -1; + } + ret = check_and_add_debug_xl (graph, set_dict, volname, "changelog"); if (ret) return -1; + xl = volgen_graph_add (graph, "features/access-control", volname); if (!xl) return -1; @@ -1584,9 +1891,19 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, return -1; } - xl = volgen_graph_add (graph, "features/index", volname); - if (!xl) - return -1; + /* TBD: conditionalize on NSR being enabled */ + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr") > 0) { + assign_groups(volinfo); + ret = add_nsr_stuff (graph, volname, brickinfo, volinfo, changelog_basepath); + if (ret) { + return -1; + } + } + else { + xl = volgen_graph_add (graph, "features/index", volname); + if (!xl) + return -1; + } snprintf (index_basepath, sizeof (index_basepath), "%s/%s", path, ".glusterfs/indices"); @@ -2407,8 +2724,8 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, gf_boolean_t is_quotad) { - char *replicate_args[] = {"cluster/replicate", - "%s-replicate-%d"}; + char *replicate_type = "cluster/replicate"; + char *replicate_fmt = "%s-replicate-%d"; char *stripe_args[] = {"cluster/stripe", "%s-stripe-%d"}; int rclusters = 0; @@ -2422,12 +2739,16 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, if (volinfo->dist_leaf_count == 1) goto build_distribute; + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr") > 0) { + replicate_type = "cluster/nsrc"; + } + /* All other cases, it will have one or the other cluster type */ switch (volinfo->type) { case GF_CLUSTER_TYPE_REPLICATE: clusters = volgen_graph_build_clusters (graph, volinfo, - replicate_args[0], - replicate_args[1], + replicate_type, + replicate_fmt, volinfo->brick_count, volinfo->replica_count); if (clusters < 0) @@ -2447,8 +2768,8 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, if (volinfo->replica_count == 0) goto out; clusters = volgen_graph_build_clusters (graph, volinfo, - replicate_args[0], - replicate_args[1], + replicate_type, + replicate_fmt, volinfo->brick_count, volinfo->replica_count); if (clusters < 0) diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index 1683f9050..4411bc4de 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -35,6 +35,10 @@ #define AUTH_REJECT_OPT_KEY "auth.addr.*.reject" #define NFS_DISABLE_OPT_KEY "nfs.*.disable" +// TBD - bring this from a common conf file +#define NSR_MAX_REPLICA_GROUP_SIZE 8 +#define NSR_MAX_PATH_SIZE (1024 + PATH_MAX) +#define NSR_CONF_PATH "/var/lib/glusterd/nsr/" typedef enum { GF_CLIENT_TRUSTED, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index b1989567a..520b0f774 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -870,6 +870,19 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, .op_version = 2 }, + { .key = "cluster.nsr", + .voltype = "cluster/nsr", + .option = "!nsr", + .op_version = 3, + .description = "enable NSR instead of AFR for replication", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "cluster.nsr.recon", + .voltype = "cluster/nsr", + .op_version = 3, + .description = "enable NSR reconciliation", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, /* Performance xlators enable/disbable options */ { .key = "performance.write-behind", diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index d59aaa44a..834a39968 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1371,7 +1371,21 @@ init (xlator_t *this) if (list_empty (&conf->peers)) { glusterd_launch_synctask (glusterd_spawn_daemons, NULL); + gf_log (this->name, GF_LOG_INFO, + "no peers, should start FRESH etcd"); + /* + * We might not have any peers now, but if we did once before + * then we don't want to start up with a config that still has + * references to them. + */ + nuke_etcd_dir(); } + else { + gf_log (this->name, GF_LOG_INFO, + "have peers, should start etcd with old config"); + } + conf->etcd_pid = start_etcd(uuid_utoa(MY_UUID),NULL); + ret = glusterd_options_init (this); if (ret < 0) goto out; @@ -1417,6 +1431,8 @@ fini (xlator_t *this) conf = this->private; glusterd_stop_uds_listener (this); + stop_etcd(conf->etcd_pid); + nuke_etcd_dir(); FREE (conf->pmap); if (conf->handle) diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 9b6e2fb33..e704de44b 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -38,6 +38,7 @@ #include "cli1-xdr.h" #include "syncop.h" #include "store.h" +#include "glusterd-etcd.h" #define GLUSTERD_MAX_VOLUME_NAME 1000 #define GLUSTERD_TR_LOG_SIZE 50 @@ -157,6 +158,7 @@ typedef struct { gf_boolean_t restart_done; rpcsvc_t *uds_rpc; /* RPCSVC for the unix domain socket */ uint32_t base_port; + pid_t etcd_pid; } glusterd_conf_t; @@ -180,6 +182,15 @@ struct glusterd_brickinfo { int decommissioned; char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ int caps; /* Capability */ + /* + * The group is used to identify which bricks are part of the same + * replica set during brick-volfile generation, so that NSR volfiles + * can "cross-connect" the bricks to one another. This same approach + * could be used to make client-volfile generation much simpler and + * more efficient too, though it would require some further adaptation + * to support more than one layer of hierarchy. + */ + uint16_t group; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; @@ -418,6 +429,15 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); volpath, brickinfo->hostname, exp_path); \ } while (0) +#define GLUSTERD_GET_BRICK_RECON_PIDFILE(pidfile,volinfo,brickinfo, priv) do { \ + char exp_path[PATH_MAX] = {0,}; \ + char volpath[PATH_MAX] = {0,}; \ + GLUSTERD_GET_VOLUME_DIR (volpath, volinfo, priv); \ + GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); \ + snprintf (pidfile, PATH_MAX, "%s/run/%s:-%s-recon.pid", \ + volpath, brickinfo->hostname, exp_path); \ + } while (0) + #define GLUSTERD_GET_NFS_PIDFILE(pidfile,nfspath) { \ snprintf (pidfile, PATH_MAX, "%s/run/nfs.pid", \ nfspath); \ |