diff options
Diffstat (limited to 'xlators/mgmt/glusterd')
-rw-r--r-- | xlators/mgmt/glusterd/src/Makefile.am | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-etcd.c | 87 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-etcd.h | 23 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-handler.c | 12 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-sm.c | 11 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-utils.c | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 351 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.h | 4 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 19 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.c | 16 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd.h | 21 |
11 files changed, 537 insertions, 13 deletions
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index b89ec6ddc..9b33edf4d 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -13,7 +13,7 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \ - glusterd-mgmt.c + glusterd-mgmt.c glusterd-etcd.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ @@ -24,7 +24,7 @@ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ glusterd-syncop.h glusterd-hooks.h glusterd-locks.h \ - glusterd-mgmt.h + glusterd-mgmt.h glusterd-etcd.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ diff --git a/xlators/mgmt/glusterd/src/glusterd-etcd.c b/xlators/mgmt/glusterd/src/glusterd-etcd.c new file mode 100644 index 000000000..656ea3b9b --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-etcd.c @@ -0,0 +1,87 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <errno.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "glusterfs.h" +#include "run.h" +#include "glusterd-etcd.h" + +#define GLUSTERD_ETCD_DIR "/var/lib/glusterd/etcd" +#define GLUSTERD_ETCD_CMD "/root/etcd/bin/etcd" + +pid_t +start_etcd (char *this_host, char *other_host) +{ + runner_t runner; + char me[256]; + + if (gethostname(me,sizeof(me)-1) != 0) { + gf_log (__func__, GF_LOG_ERROR, "gethostname failed?!?"); + return -1; + } + me[sizeof(me)-1] = '\0'; + + if ((mkdir(GLUSTERD_ETCD_DIR,0700) < 0) && (errno != EEXIST)) { + gf_log (__func__, GF_LOG_ERROR, + "failed to create %s", GLUSTERD_ETCD_DIR); + return -1; + } + + runinit (&runner); + runner_add_args (&runner, GLUSTERD_ETCD_CMD, + "-name", this_host, + "-data-dir", GLUSTERD_ETCD_DIR, + "-bind-addr", NULL); + runner_argprintf( &runner, "%s:4001", me); + runner_add_arg (&runner, "-peer-addr"); + runner_argprintf (&runner, "%s:7001", me); + if (other_host) { + runner_add_arg (&runner, "-peers"); + runner_argprintf (&runner, "%s:7001", other_host); + gf_log (__func__, GF_LOG_INFO, "starting etcd via %s", other_host); + } else { + gf_log (__func__, GF_LOG_INFO, "starting etcd standalone"); + } + + /* + * Runner_run would wait for it. Runner_run_nowait would not wait, + * but would detach it so thoroughly that it won't die when we do. + * Also, runner->chpid would be the PID of the transient middle + * process, not the one we might actually need to kill later. This + * seems to do exactly what we need. + */ + if (runner_start(&runner) != 0) { + gf_log (__func__, GF_LOG_ERROR, + "failed to start %s", GLUSTERD_ETCD_CMD); + return -1; + } + + return runner.chpid; +} + +void +stop_etcd (pid_t pid) +{ + if (pid > 0) { + gf_log (__func__, GF_LOG_INFO, "killing etcd %d", pid); + (void)kill(pid,SIGKILL); + (void)waitpid(pid,NULL,0); + } +} + +void +nuke_etcd_dir (void) +{ + (void)runcmd("rm","-rf",GLUSTERD_ETCD_DIR,NULL); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-etcd.h b/xlators/mgmt/glusterd/src/glusterd-etcd.h new file mode 100644 index 000000000..9459f6bbd --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-etcd.h @@ -0,0 +1,23 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _GLUSTERD_ETCD_H_ +#define _GLUSTERD_ETCD_H_ + +#include <sys/types.h> +#include "glusterfs.h" + +pid_t start_etcd (char *this_host, char *other_host); + +void stop_etcd (pid_t pid); + +void nuke_etcd_dir (void); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index b8202b233..e0373c774 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -50,6 +50,7 @@ #include "globals.h" #include "glusterd-syncop.h" +#include "glusterd-etcd.h" #ifdef HAVE_BD_XLATOR #include <lvm2app.h> @@ -2631,7 +2632,18 @@ __glusterd_handle_probe_query (rpcsvc_request_t *req) gf_log ("", GF_LOG_ERROR, "Failed to add peer %s", remote_hostname); rsp.op_errno = GF_PROBE_ADD_FAILED; + goto respond; } + gf_log (THIS->name, GF_LOG_INFO, + "joining, should point etcd at %s", remote_hostname); + /* + * We should have started a standalone etcd before. Now we + * need a new one, with a new config. + */ + stop_etcd(conf->etcd_pid); + nuke_etcd_dir(); + conf->etcd_pid = start_etcd (uuid_utoa(MY_UUID), + remote_hostname); } respond: diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c index e6ad34335..7a8b2c94f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-sm.c @@ -34,6 +34,7 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "glusterd-etcd.h" static struct list_head gd_friend_sm_queue; @@ -605,6 +606,9 @@ glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event, "Peer detach cleanup was not successful"); ret = 0; } + gf_log (THIS->name, GF_LOG_INFO, "detached, stopping etcd"); + stop_etcd(priv->etcd_pid); + nuke_etcd_dir(); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -733,6 +737,13 @@ glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx) peerinfo->hostname, ev_ctx->port, op_ret, op_errno); + // apply a deterministic function to decide via whom we should join the cluster + if (strcmp(peerinfo->hostname, ev_ctx->hostname) > 0) { + stop_etcd(conf->etcd_pid); + nuke_etcd_dir(); + conf->etcd_pid = start_etcd (uuid_utoa(MY_UUID), peerinfo->hostname); + } + out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 896827244..7883a98bf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1905,6 +1905,8 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, if (ret == 0) { glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); (void) glusterd_brick_unlink_socket_file (volinfo, brickinfo); + GLUSTERD_GET_BRICK_RECON_PIDFILE (pidfile, volinfo, brickinfo, priv); + ret = glusterd_service_stop ("recon", pidfile, SIGTERM, _gf_false); } } diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index ae095bf7c..f42d596ba 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -103,7 +103,6 @@ xlator_instantiate_va (const char *type, const char *format, va_list arg) return NULL; } -#ifdef __not_used_as_of_now_ static xlator_t * xlator_instantiate (const char *type, const char *format, ...) { @@ -116,7 +115,6 @@ xlator_instantiate (const char *type, const char *format, ...) return xl; } -#endif static int volgen_xlator_link (xlator_t *pxl, xlator_t *cxl) @@ -1445,6 +1443,312 @@ server_spec_extended_option_handler (volgen_graph_t *graph, static void get_vol_tstamp_file (char *filename, glusterd_volinfo_t *volinfo); +xlator_t * +add_one_peer (volgen_graph_t *graph, glusterd_brickinfo_t *peer, + char *volname, uint16_t index) +{ + xlator_t *kid; + + kid = volgen_graph_add_nolink (graph, "protocol/client", + "%s-client-%u", volname, + index++); + if (!kid) { + return NULL; + } + + /* TBD: figure out where to get the proper transport list */ + if (xlator_set_option(kid,"transport-type","socket")) { + return NULL; + } + if (xlator_set_option(kid,"remote-host",peer->hostname)) { + return NULL; + } + if (xlator_set_option(kid,"remote-subvolume",peer->path)) { + return NULL; + } + /* TBD: deal with RDMA, SSL */ + + return kid; +} + +void +assign_groups (glusterd_volinfo_t *volinfo) +{ + glusterd_brickinfo_t *brickinfo = NULL; + uint16_t group_num = 0; + int in_group = 0; + uuid_t tmp_uuid; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (in_group == 0) { + uuid_generate(tmp_uuid); + } + brickinfo->group = group_num; + uuid_copy(brickinfo->nsr_uuid,tmp_uuid); + if (++in_group >= volinfo->replica_count) { + in_group = 0; + ++group_num; + } + } +} + +int +add_nsr_stuff (volgen_graph_t *graph, char *volname, + glusterd_brickinfo_t *brickinfo, glusterd_volinfo_t *volinfo, + char *changelog_basepath) +{ + xlator_t *me; + xlator_t *kid; + glusterd_brickinfo_t *peer; + uint16_t index = 0; + //uint32_t i=0; + char *leader_opt; + uint32_t replica_group_size = 1; + char dst[NSR_MAX_PATH_SIZE]; + char local_path[NSR_MAX_PATH_SIZE]; + char local_name[NSR_MAX_PATH_SIZE]; + char hosts[NSR_MAX_PATH_SIZE * NSR_MAX_REPLICA_GROUP_SIZE]; + char remote_names[NSR_MAX_REPLICA_GROUP_SIZE * NSR_MAX_PATH_SIZE]; + char filepath[PATH_MAX] = {0,}; + char lp[PATH_MAX] = {0,}; + xlator_t *xl = NULL; + char s[256]; + char transt[16] = {0,}; + char auth[256]; + char c_d[NSR_MAX_PATH_SIZE]; + char *username = NULL, *password = NULL; + gf_boolean_t enable_recon = _gf_false; + static uint32_t nsr_port = 27000; + + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr.recon") > 0) { + enable_recon = _gf_true; + } + + volgen_graph_t ng = {0,}; + char path[PATH_MAX] = {0,}; + char *ptr = NULL, *this = NULL, *that = NULL; + glusterd_conf_t *priv = NULL; + + + priv = THIS->private; + remote_names[0] = '\0'; + that = gf_strdup (brickinfo->hostname); + this = gf_strdup (brickinfo->path); + ptr = strchr (this, '/'); + while (ptr) { + *ptr = '-'; + ptr = strchr (this, '/'); + } + GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv); + snprintf (dst, PATH_MAX, + "%s/%s/%s:%s", + path, + GLUSTERD_BRICK_INFO_DIR, + that, + this); + + /* Create the NSR xlator, but defer linkage for now. */ + me = xlator_instantiate ("cluster/nsr", "%s-nsr", volname); + if (!me || volgen_xlator_link(me,first_of(graph))) { + return -1; + } + + strcpy(local_name, brickinfo->hostname); + strcpy(local_path, brickinfo->hostname); + strcat(local_name, ":"); + strcat(local_name, brickinfo->path); + strcpy(hosts, brickinfo->hostname); + + peer = list_prev (brickinfo, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + /* Check leader status while we have this pointer in hand. */ + leader_opt = (!peer || (peer->group != brickinfo->group)) ? "yes" + : "no"; + if (xlator_set_option(me,"vol-name",volname)) + return -1; + if (xlator_set_option(me,"my-name",local_name)) + return -1; + if (xlator_set_option(me,"leader",leader_opt)) + return -1; + if (xlator_set_option(me,"subvol-uuid", + uuid_utoa(brickinfo->nsr_uuid))) { + return -1; + } + +#define FILL_REMOTE_NAMES { \ + strcat(remote_names, \ + peer->hostname); \ + strcat(remote_names, \ + ":"); \ + strcat(remote_names, \ + peer->path); \ + strcat(remote_names, \ + ","); \ + strcat(hosts, ","); \ + strcat(hosts, \ + peer->hostname); \ + replica_group_size++; \ +} + + /* Now get on with the show. */ + while (peer) { + if (peer->group != brickinfo->group) { + break; + } + gf_log ("glusterd", GF_LOG_INFO, + "%s:%s needs client for %s:%s", + brickinfo->hostname, brickinfo->path, + peer->hostname, peer->path); + kid = add_one_peer (graph, peer, volname, index++); + if (!kid || volgen_xlator_link(me,kid)) { + return -1; + } + FILL_REMOTE_NAMES; + peer = list_prev (peer, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + } + + peer = list_next (brickinfo, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + while (peer) { + if (peer->group != brickinfo->group) { + break; + } + gf_log ("glusterd", GF_LOG_INFO, + "%s:%s needs client for %s:%s", + brickinfo->hostname, brickinfo->path, + peer->hostname, peer->path); + kid = add_one_peer (graph, peer, volname, index++); + if (!kid || volgen_xlator_link(me,kid)) { + return -1; + } + FILL_REMOTE_NAMES; + peer = list_next (peer, &volinfo->bricks, + glusterd_brickinfo_t, brick_list); + } + + // to remove the final "," + if (strlen(remote_names)) { + remote_names[strlen(remote_names) - 1] = '\0'; + } + if (xlator_set_option(me,"etcd-servers",hosts)) + return -1; + + // Finish linkage to client file + glusterfs_graph_set_first(&graph->graph,me); + + if (enable_recon == _gf_false) + return 0; + + /* Now fill in the various files required for reconciliation */ + snprintf (filepath, PATH_MAX, + "%s-nsr-recon.vol", + dst); + gf_log ("glusterd", GF_LOG_INFO, + "writing nsr recon volfile in %s\n", + filepath); +#if 0 + strcpy(lp, local_name); +#else + strcpy(lp, brickinfo->path); +#endif + strcat(lp,"/recon"); + bzero(&ng, sizeof(ng)); + xl = volgen_graph_add_as (&ng, "cluster/nsr_recon",lp); + if (!xl) + return -1; + sprintf(s,"%d",replica_group_size); + if (xlator_set_option(xl, "replica-group-size", s) == -1) + return -1; + if (xlator_set_option(xl, "local-member", local_name) == -1) + return -1; + if (xlator_set_option(xl, "replica-group-members", remote_names) == -1) + return -1; + if (xlator_set_option(xl,"vol-name",volname)) + return -1; + if (xlator_set_option(xl,"changelog-dir",changelog_basepath)) + return -1; + if (xlator_set_option(xl,"base-dir",brickinfo->path)) + return -1; + + xl = volgen_graph_add (&ng, "protocol/server", lp); + if (!xl) + return -1; + get_vol_transport_type (volinfo, transt); + if(xlator_set_option (xl, "transport-type", transt) == -1) + return -1; + sprintf(s,"%d",nsr_port); + if(xlator_set_option (xl, "transport.socket.listen-port", s) == -1) + return -1; + strcpy(auth, "auth.addr."); + strcat(auth, lp); + strcat(auth, ".allow"); + if(xlator_set_option (xl, auth, "*") == -1) + return -1; + if(xlator_set_option (xl, "rpc-auth.auth-null", "off") == -1) + return -1; + if(xlator_set_option (xl, "rpc-auth.auth-unix", "off") == -1) + return -1; + if(xlator_set_option (xl, "rpc-auth.auth-glusterfs", "off") == -1) + return -1; + if(volgen_write_volfile(&ng, filepath) == -1) + return -1; + + bzero(&ng, sizeof(ng)); + kid = volgen_graph_add_nolink (&ng, "protocol/client", + "%s-client-%u", lp, 0); + if (!kid) + return -1; + if (xlator_set_option(kid,"remote-host",brickinfo->hostname)) + return -1; +#if 0 + strcpy(lp, brickinfo->path); + strcat(lp,"/recon"); +#endif + if (xlator_set_option(kid,"remote-subvolume",lp)) + return -1; + if(xlator_set_option (kid, "transport-type", transt) == -1) + return -1; + sprintf(s,"%d",nsr_port++); + if(xlator_set_option (kid, "remote-port", s) == -1) + return -1; + snprintf (c_d, PATH_MAX, + "%s/%s/con:%s:%s", + path, + GLUSTERD_BRICK_INFO_DIR, + that, this); + if (volgen_write_volfile(&ng, c_d)) + return -1; + + bzero(&ng, sizeof(ng)); + kid = volgen_graph_add_nolink (&ng, "protocol/client", + "%s-client-%u", lp, 0); + if (!kid) + return -1; + if (xlator_set_option(kid,"remote-host",brickinfo->hostname)) + return -1; + if (xlator_set_option(kid,"remote-subvolume",brickinfo->path)) + return -1; + if(xlator_set_option (kid, "transport-type", transt) == -1) + return -1; + username = glusterd_auth_get_username (volinfo); + password = glusterd_auth_get_password (volinfo); + if(xlator_set_option (kid, "username", username) == -1) + return -1; + if(xlator_set_option (kid, "password", password) == -1) + return -1; + snprintf (c_d, PATH_MAX, + "%s/%s/data:%s:%s", + path, + GLUSTERD_BRICK_INFO_DIR, that, + this); + if (volgen_write_volfile(&ng, c_d)) + return -1; + + return 0; + +} + static int server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, void *param) @@ -1561,10 +1865,17 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, if (ret) return -1; + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr") > 0) { + ret = xlator_set_option (xl, "encoding", "ascii"); + if (ret) + return -1; + } + ret = check_and_add_debug_xl (graph, set_dict, volname, "changelog"); if (ret) return -1; + xl = volgen_graph_add (graph, "features/access-control", volname); if (!xl) return -1; @@ -1643,9 +1954,19 @@ server_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, return -1; } - xl = volgen_graph_add (graph, "features/index", volname); - if (!xl) - return -1; + /* TBD: conditionalize on NSR being enabled */ + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr") > 0) { + ret = add_nsr_stuff (graph, volname, brickinfo, volinfo, + changelog_basepath); + if (ret) { + return -1; + } + } + else { + xl = volgen_graph_add (graph, "features/index", volname); + if (!xl) + return -1; + } snprintf (index_basepath, sizeof (index_basepath), "%s/%s", path, ".glusterfs/indices"); @@ -2470,8 +2791,8 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, gf_boolean_t is_quotad) { - char *replicate_args[] = {"cluster/replicate", - "%s-replicate-%d"}; + char *replicate_type = "cluster/replicate"; + char *replicate_fmt = "%s-replicate-%d"; char *stripe_args[] = {"cluster/stripe", "%s-stripe-%d"}; int rclusters = 0; @@ -2485,12 +2806,16 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, if (volinfo->dist_leaf_count == 1) goto build_distribute; + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr") > 0) { + replicate_type = "cluster/nsrc"; + } + /* All other cases, it will have one or the other cluster type */ switch (volinfo->type) { case GF_CLUSTER_TYPE_REPLICATE: clusters = volgen_graph_build_clusters (graph, volinfo, - replicate_args[0], - replicate_args[1], + replicate_type, + replicate_fmt, volinfo->brick_count, volinfo->replica_count); if (clusters < 0) @@ -2510,8 +2835,8 @@ volume_volgen_graph_build_clusters (volgen_graph_t *graph, if (volinfo->replica_count == 0) goto out; clusters = volgen_graph_build_clusters (graph, volinfo, - replicate_args[0], - replicate_args[1], + replicate_type, + replicate_fmt, volinfo->brick_count, volinfo->replica_count); if (clusters < 0) @@ -3539,6 +3864,10 @@ generate_brick_volfiles (glusterd_volinfo_t *volinfo) } } + if (glusterd_volinfo_get_boolean(volinfo,"cluster.nsr") > 0) { + assign_groups(volinfo); + } + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { gf_log ("", GF_LOG_DEBUG, "Found a brick - %s:%s", brickinfo->hostname, diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.h b/xlators/mgmt/glusterd/src/glusterd-volgen.h index ef92087fc..f4703c288 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.h +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.h @@ -35,6 +35,10 @@ #define AUTH_REJECT_OPT_KEY "auth.addr.*.reject" #define NFS_DISABLE_OPT_KEY "nfs.*.disable" +// TBD - bring this from a common conf file +#define NSR_MAX_REPLICA_GROUP_SIZE 8 +#define NSR_MAX_PATH_SIZE (1024 + PATH_MAX) +#define NSR_CONF_PATH "/var/lib/glusterd/nsr/" typedef enum { GF_CLIENT_TRUSTED, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index e3af4e7f5..1374e82cd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -898,6 +898,25 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, .op_version = 2 }, + { .key = "cluster.nsr", + .voltype = "cluster/nsr", + .option = "!nsr", + .op_version = 3, + .description = "enable NSR instead of AFR for replication", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "cluster.nsr.recon", + .voltype = "cluster/nsr", + .op_version = 3, + .description = "enable NSR reconciliation", + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_XLATOR_OPT + }, + { .key = "cluster.nsr.quorum-percent", + .voltype = "cluster/nsr", + .option = "quorum-percent", + .op_version = 3, + .description = "percent of rep_count-1 bricks that must be up" + }, /* Performance xlators enable/disbable options */ { .key = "performance.write-behind", diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 1a6aa81d3..4d09d7fd9 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1475,7 +1475,21 @@ init (xlator_t *this) if (list_empty (&conf->peers)) { glusterd_launch_synctask (glusterd_spawn_daemons, NULL); + gf_log (this->name, GF_LOG_INFO, + "no peers, should start FRESH etcd"); + /* + * We might not have any peers now, but if we did once before + * then we don't want to start up with a config that still has + * references to them. + */ + nuke_etcd_dir(); } + else { + gf_log (this->name, GF_LOG_INFO, + "have peers, should start etcd with old config"); + } + conf->etcd_pid = start_etcd(uuid_utoa(MY_UUID),NULL); + ret = glusterd_options_init (this); if (ret < 0) goto out; @@ -1521,6 +1535,8 @@ fini (xlator_t *this) conf = this->private; glusterd_stop_uds_listener (this); + stop_etcd(conf->etcd_pid); + nuke_etcd_dir(); FREE (conf->pmap); if (conf->handle) diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index b53d8e412..7157bee64 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -38,6 +38,7 @@ #include "cli1-xdr.h" #include "syncop.h" #include "store.h" +#include "glusterd-etcd.h" #define GLUSTERD_MAX_VOLUME_NAME 1000 #define GLUSTERD_TR_LOG_SIZE 50 @@ -178,6 +179,7 @@ typedef struct { char *snap_bricks_directory; gf_store_handle_t *missed_snaps_list_shandle; struct list_head missed_snaps_list; + pid_t etcd_pid; } glusterd_conf_t; @@ -204,6 +206,16 @@ struct glusterd_brickinfo { char vg[PATH_MAX]; /* FIXME: Use max size for length of vg */ int caps; /* Capability */ int32_t snap_status; + /* + * The group is used to identify which bricks are part of the same + * replica set during brick-volfile generation, so that NSR volfiles + * can "cross-connect" the bricks to one another. This same approach + * could be used to make client-volfile generation much simpler and + * more efficient too, though it would require some further adaptation + * to support more than one layer of hierarchy. + */ + uint16_t group; + uuid_t nsr_uuid; }; typedef struct glusterd_brickinfo glusterd_brickinfo_t; @@ -532,6 +544,15 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); volpath, brickinfo->hostname, exp_path); \ } while (0) +#define GLUSTERD_GET_BRICK_RECON_PIDFILE(pidfile,volinfo,brickinfo, priv) do { \ + char exp_path[PATH_MAX] = {0,}; \ + char volpath[PATH_MAX] = {0,}; \ + GLUSTERD_GET_VOLUME_DIR (volpath, volinfo, priv); \ + GLUSTERD_REMOVE_SLASH_FROM_PATH (brickinfo->path, exp_path); \ + snprintf (pidfile, PATH_MAX, "%s/run/%s:-%s-recon.pid", \ + volpath, brickinfo->hostname, exp_path); \ + } while (0) + #define GLUSTERD_GET_NFS_PIDFILE(pidfile,nfspath) { \ snprintf (pidfile, PATH_MAX, "%s/run/nfs.pid", \ nfspath); \ |