diff options
author | Harshavardhana <harsha@harshavardhana.net> | 2013-07-24 13:16:08 -0700 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2013-09-06 15:56:34 -0700 |
commit | b610f1be7cd71b8f3e51c224c8b6fe0e7366c8cf (patch) | |
tree | 131c4eed6ab553961340013335bc17b634597031 | |
parent | 94fdc3dd79fa052ba3eb8951fe5c4ddf9dd1c6e8 (diff) |
glusterfsd: Round robin DNS should not be relied upon with
config service availability for clients.
Backupvolfile server as it stands is slow and prone to errors
with mount script and its combination with RRDNS. Instead in
theory it should use all the available nodes in 'trusted pool'
by default (Right now we don't have a mechanism in place for
this)
Nevertheless this patch provides a scenario where a list of
volfile-server can be provided on command as shown below
-----------------------------------------------------------------
$ glusterfs -s server1 .. -s serverN --volfile-id=<volname> \
<mount_point>
-----------------------------------------------------------------
OR
-----------------------------------------------------------------
$ mount -t glusterfs -obackup-volfile-servers=<server2>: \
<server3>:...:<serverN> <server1>:/<volname> <mount_point>
-----------------------------------------------------------------
Here ':' is used as a separator for mount script parsing
Now these will be remembered and recursively attempted for
fetching vol-file until exhausted. This would ensure that the
clients get 'volume' configs in a consistent manner avoiding the
need to poll through RRDNS.
Change-Id: If808bb8a52e6034c61574cdae3ac4e7e83513a40
BUG: 986429
Signed-off-by: Harshavardhana <harsha@harshavardhana.net>
Reviewed-on: http://review.gluster.org/5400
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | glusterfsd/src/glusterfsd-mem-types.h | 1 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd-mgmt.c | 49 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.c | 82 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs.h | 29 | ||||
-rw-r--r-- | rpc/rpc-transport/socket/src/socket.c | 2 | ||||
-rw-r--r-- | tests/bugs/bug-986429.t | 19 | ||||
-rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 151 |
7 files changed, 211 insertions, 122 deletions
diff --git a/glusterfsd/src/glusterfsd-mem-types.h b/glusterfsd/src/glusterfsd-mem-types.h index 83f97b5d3..7135c0ada 100644 --- a/glusterfsd/src/glusterfsd-mem-types.h +++ b/glusterfsd/src/glusterfsd-mem-types.h @@ -17,6 +17,7 @@ enum gfd_mem_types_ { gfd_mt_xlator_list_t = GF_MEM_TYPE_START, gfd_mt_xlator_t, + gfd_mt_server_cmdline_t, gfd_mt_xlator_cmdline_option_t, gfd_mt_char, gfd_mt_call_pool_t, diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index 4b9b82fff..e40d19b08 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -1821,30 +1821,52 @@ static int mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, void *data) { - xlator_t *this = NULL; - cmd_args_t *cmd_args = NULL; - glusterfs_ctx_t *ctx = NULL; + xlator_t *this = NULL; + glusterfs_ctx_t *ctx = NULL; int ret = 0; - int need_term = 0; - int emval = 0; + server_cmdline_t *server = NULL; + rpc_transport_t *rpc_trans = NULL; + int need_term = 0; + int emval = 0; this = mydata; + rpc_trans = rpc->conn.trans; ctx = this->ctx; - cmd_args = &ctx->cmd_args; + switch (event) { case RPC_CLNT_DISCONNECT: if (!ctx->active) { - cmd_args->max_connect_attempts--; gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, - "failed to connect with remote-host: %s", + "failed to connect with remote-host: %s (%s)", + ctx->cmd_args.volfile_server, strerror (errno)); - gf_log ("glusterfsd-mgmt", GF_LOG_INFO, - "%d connect attempts left", - cmd_args->max_connect_attempts); - if (0 >= cmd_args->max_connect_attempts) { + server = ctx->cmd_args.curr_server; + if (server->list.next == &ctx->cmd_args.volfile_servers) { + need_term = 1; + emval = ENOTCONN; + gf_log("glusterfsd-mgmt", GF_LOG_INFO, + "Exhausted all volfile servers"); + break; + } + server = list_entry (server->list.next, typeof(*server), + list); + ctx->cmd_args.curr_server = server; + ctx->cmd_args.volfile_server = server->volfile_server; + + ret = dict_set_str (rpc_trans->options, + "remote-host", + server->volfile_server); + if (ret != 0) { + gf_log ("glusterfsd-mgmt", GF_LOG_ERROR, + "failed to set remote-host: %s", + server->volfile_server); need_term = 1; emval = ENOTCONN; + break; } + gf_log ("glusterfsd-mgmt", GF_LOG_INFO, + "connecting to next volfile server %s", + server->volfile_server); } break; case RPC_CLNT_CONNECT: @@ -2048,7 +2070,8 @@ glusterfs_mgmt_init (glusterfs_ctx_t *ctx) ret = rpc_clnt_register_notify (rpc, mgmt_rpc_notify, THIS); if (ret) { - gf_log (THIS->name, GF_LOG_WARNING, "failed to register notify function"); + gf_log (THIS->name, GF_LOG_WARNING, + "failed to register notify function"); goto out; } diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 7b2ccd6eb..e8ebdae14 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -98,10 +98,6 @@ static struct argp_option gf_options[] = { {"volfile-server", ARGP_VOLFILE_SERVER_KEY, "SERVER", 0, "Server to get the volume file from. This option overrides " "--volfile option"}, - {"volfile-max-fetch-attempts", ARGP_VOLFILE_MAX_FETCH_ATTEMPTS, - "MAX-ATTEMPTS", 0, "Maximum number of connect attempts to server. " - "This option should be provided with --volfile-server option" - "[default: 1]"}, {"volfile", ARGP_VOLUME_FILE_KEY, "VOLFILE", 0, "File to use as VOLUME_FILE"}, {"spec-file", ARGP_VOLUME_FILE_KEY, "VOLFILE", OPTION_HIDDEN, @@ -582,7 +578,58 @@ get_volfp (glusterfs_ctx_t *ctx) } static int -gf_remember_xlator_option (struct list_head *options, char *arg) +gf_remember_backup_volfile_server (char *arg) +{ + glusterfs_ctx_t *ctx = NULL; + cmd_args_t *cmd_args = NULL; + int ret = -1; + server_cmdline_t *server = NULL; + + ctx = glusterfsd_ctx; + if (!ctx) + goto out; + cmd_args = &ctx->cmd_args; + + if(!cmd_args) + goto out; + + server = GF_CALLOC (1, sizeof (server_cmdline_t), + gfd_mt_server_cmdline_t); + if (!server) + goto out; + + INIT_LIST_HEAD(&server->list); + + server->volfile_server = gf_strdup(arg); + + if (!cmd_args->volfile_server) { + cmd_args->volfile_server = server->volfile_server; + cmd_args->curr_server = server; + } + + if (!server->volfile_server) { + gf_log ("", GF_LOG_WARNING, + "xlator option %s is invalid", arg); + goto out; + } + + list_add_tail (&server->list, &cmd_args->volfile_servers); + + ret = 0; +out: + if (ret == -1) { + if (server) { + GF_FREE (server->volfile_server); + GF_FREE (server); + } + } + + return ret; + +} + +static int +gf_remember_xlator_option (char *arg) { glusterfs_ctx_t *ctx = NULL; cmd_args_t *cmd_args = NULL; @@ -673,19 +720,8 @@ parse_opts (int key, char *arg, struct argp_state *state) switch (key) { case ARGP_VOLFILE_SERVER_KEY: - cmd_args->volfile_server = gf_strdup (arg); - break; - - case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS: - n = 0; + gf_remember_backup_volfile_server (arg); - if (gf_string2uint_base10 (arg, &n) == 0) { - cmd_args->max_connect_attempts = n; - break; - } - - argp_failure (state, -1, 0, - "Invalid limit on connect attempts %s", arg); break; case ARGP_READ_ONLY_KEY: @@ -694,14 +730,12 @@ parse_opts (int key, char *arg, struct argp_state *state) case ARGP_ACL_KEY: cmd_args->acl = 1; - gf_remember_xlator_option (&cmd_args->xlator_options, - "*-md-cache.cache-posix-acl=true"); + gf_remember_xlator_option ("*-md-cache.cache-posix-acl=true"); break; case ARGP_SELINUX_KEY: cmd_args->selinux = 1; - gf_remember_xlator_option (&cmd_args->xlator_options, - "*-md-cache.cache-selinux=true"); + gf_remember_xlator_option ("*-md-cache.cache-selinux=true"); break; case ARGP_AUX_GFID_MOUNT_KEY: @@ -906,8 +940,9 @@ parse_opts (int key, char *arg, struct argp_state *state) break; case ARGP_XLATOR_OPTION_KEY: - if (gf_remember_xlator_option (&cmd_args->xlator_options, arg)) - argp_failure (state, -1, 0, "invalid xlator option %s", arg); + if (gf_remember_xlator_option (arg)) + argp_failure (state, -1, 0, "invalid xlator option %s", + arg); break; @@ -1297,6 +1332,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) cmd_args->fuse_entry_timeout = -1; INIT_LIST_HEAD (&cmd_args->xlator_options); + INIT_LIST_HEAD (&cmd_args->volfile_servers); lim.rlim_cur = RLIM_INFINITY; lim.rlim_max = RLIM_INFINITY; diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index ede208b38..068b307e1 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -294,6 +294,11 @@ struct _xlator_cmdline_option { }; typedef struct _xlator_cmdline_option xlator_cmdline_option_t; +struct _server_cmdline { + struct list_head list; + char *volfile_server; +}; +typedef struct _server_cmdline server_cmdline_t; #define GF_OPTION_ENABLE _gf_true #define GF_OPTION_DISABLE _gf_false @@ -301,9 +306,12 @@ typedef struct _xlator_cmdline_option xlator_cmdline_option_t; struct _cmd_args { /* basic options */ - char *volfile_server; - char *volfile; - char *log_server; + char *volfile_server; + server_cmdline_t *curr_server; + /* List of backup volfile servers, including original */ + struct list_head volfile_servers; + char *volfile; + char *log_server; gf_loglevel_t log_level; char *log_file; int32_t max_connect_attempts; @@ -322,14 +330,14 @@ struct _cmd_args { int enable_ino32; int worm; int mac_compat; - int fopen_keep_cache; - int gid_timeout; + int fopen_keep_cache; + int gid_timeout; int aux_gfid_mount; - struct list_head xlator_options; /* list of xlator_option_t */ + struct list_head xlator_options; /* list of xlator_option_t */ - /* fuse options */ - int fuse_direct_io_mode; - char *use_readdirp; + /* fuse options */ + int fuse_direct_io_mode; + char *use_readdirp; int volfile_check; double fuse_entry_timeout; double fuse_negative_timeout; @@ -343,7 +351,7 @@ struct _cmd_args { unsigned uid_map_root; int background_qlen; int congestion_threshold; - char *fuse_mountopts; + char *fuse_mountopts; /* key args */ char *mount_point; @@ -353,7 +361,6 @@ struct _cmd_args { int brick_port; char *brick_name; int brick_port2; - }; typedef struct _cmd_args cmd_args_t; diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index e63c27018..06b74b204 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -2162,8 +2162,6 @@ socket_connect_finish (rpc_transport_t *this) priv->connect_finish_log = 1; } __socket_disconnect (this); - notify_rpc = 1; - event = RPC_TRANSPORT_DISCONNECT; goto unlock; } diff --git a/tests/bugs/bug-986429.t b/tests/bugs/bug-986429.t new file mode 100644 index 000000000..6e43f72b7 --- /dev/null +++ b/tests/bugs/bug-986429.t @@ -0,0 +1,19 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +## This tests failover achieved by providing multiple +## servers from the trusted pool for fetching volume +## specification + +cleanup; + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 $H0:$B0/$V0 +TEST $CLI volume start $V0 +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s non-existent -s $H0 --volfile-id=/$V0 $M0 + +cleanup; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 3e8fa4042..a192d6059 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -27,6 +27,8 @@ _init () LOG_DEBUG=DEBUG; LOG_TRACE=TRACE; + HOST_NAME_MAX=64; + prefix="@prefix@"; exec_prefix=@exec_prefix@; cmd_line=$(echo "@sbindir@/glusterfs"); @@ -53,51 +55,69 @@ _init () UPDATEDBCONF=/etc/updatedb.conf } +parse_backup_volfile_servers () +{ + local server_list=$1 + local servers="" + local new_servers="" + + servers=$(echo ${server_list} | sed 's/\:/ /g') + for server in ${servers}; do + length=$(echo $server | wc -c) + if [ ${length} -gt ${HOST_NAME_MAX} ]; then + echo "Hostname:${server} provided is too long.. skipping" + continue + fi + new_servers=$(echo "$new_servers $server") + done + echo ${new_servers} +} + start_glusterfs () { # lets the comparsion be case insensitive for all strings if [ -n "$log_level_str" ]; then - case "$( echo $log_level_str | tr '[a-z]' '[A-Z]')" in - "ERROR") - log_level=$LOG_ERROR; - ;; + case "$( echo $log_level_str | tr '[a-z]' '[A-Z]')" in + "ERROR") + log_level=$LOG_ERROR; + ;; "INFO") log_level=$LOG_INFO ;; - "DEBUG") - log_level=$LOG_DEBUG; - ;; - "CRITICAL") - log_level=$LOG_CRITICAL; - ;; - "WARNING") - log_level=$LOG_WARNING; - ;; - "TRACE") - log_level=$LOG_TRACE; - ;; - "NONE") - log_level=$LOG_NONE; - ;; - *) - echo "invalid log level $log_level_str, using INFO"; - log_level=$LOG_INFO; - ;; - esac + "DEBUG") + log_level=$LOG_DEBUG; + ;; + "CRITICAL") + log_level=$LOG_CRITICAL; + ;; + "WARNING") + log_level=$LOG_WARNING; + ;; + "TRACE") + log_level=$LOG_TRACE; + ;; + "NONE") + log_level=$LOG_NONE; + ;; + *) + echo "invalid log level $log_level_str, using INFO"; + log_level=$LOG_INFO; + ;; + esac fi #options without values start here if [ -n "$read_only" ]; then - cmd_line=$(echo "$cmd_line --read-only"); + cmd_line=$(echo "$cmd_line --read-only"); fi if [ -n "$acl" ]; then - cmd_line=$(echo "$cmd_line --acl"); + cmd_line=$(echo "$cmd_line --acl"); fi if [ -n "$selinux" ]; then - cmd_line=$(echo "$cmd_line --selinux"); + cmd_line=$(echo "$cmd_line --selinux"); fi if [ -n "$enable_ino32" ]; then @@ -109,11 +129,11 @@ start_glusterfs () fi if [ -n "$fopen_keep_cache" ]; then - cmd_line=$(echo "$cmd_line --fopen-keep-cache"); + cmd_line=$(echo "$cmd_line --fopen-keep-cache"); fi if [ -n "$volfile_check" ]; then - cmd_line=$(echo "$cmd_line --volfile-check"); + cmd_line=$(echo "$cmd_line --volfile-check"); fi if [ -n "$mem_accounting" ]; then @@ -130,15 +150,15 @@ start_glusterfs () fi if [ -n "$log_file" ]; then - cmd_line=$(echo "$cmd_line --log-file=$log_file"); + cmd_line=$(echo "$cmd_line --log-file=$log_file"); fi if [ -n "$direct_io_mode" ]; then - cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); + cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); fi if [ -n "$use_readdirp" ]; then - cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp"); + cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp"); fi if [ -n "$volume_name" ]; then @@ -158,19 +178,19 @@ start_glusterfs () fi if [ -n "$gid_timeout" ]; then - cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); + cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); fi if [ -n "$bg_qlen" ]; then - cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); fi if [ -n "$cong_threshold" ]; then - cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold"); + cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold"); fi if [ -n "$fuse_mountopts" ]; then - cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); fi if [ -n "$xlator_option" ]; then @@ -184,36 +204,40 @@ start_glusterfs () if [ -z "$volfile_loc" ]; then if [ -n "$server_ip" ]; then + + cmd_line=$(echo "$cmd_line --volfile-server=$server_ip"); + + if [ -n "$backup_volfile_servers" ]; then + servers=$(parse_backup_volfile_servers ${backup_volfile_servers}) + for i in $(echo ${servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done + fi + if [ -n "$server_port" ]; then cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port"); fi - if [ -n "$transport" ]; then + + if [ -n "$transport" ]; then cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport"); if [ "$transport" = "rdma" ]; then volume_id_rdma=".rdma"; fi fi + if [ -n "$volume_id" ]; then if [ -n "$volume_id_rdma" ]; then volume_id="$volume_id$volume_id_rdma"; fi cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); fi - - if [ -n "$backupvolfile_server" ]; then - cmd_line1=$(echo "$cmd_line --volfile-server=$backupvolfile_server"); - fi - if [ -n "$volfile_max_fetch_attempts" ]; then - cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts"); - fi - cmd_line=$(echo "$cmd_line --volfile-server=$server_ip"); fi else cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); fi if [ -n "$fuse_mountopts" ]; then - cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); fi cmd_line=$(echo "$cmd_line $mount_point"); @@ -228,24 +252,8 @@ start_glusterfs () inode="0"; fi - # retry the failover - # if [ $? != "0" ]; then # <--- TODO: Once glusterfs returns proper error code, change it. if [ $inode -ne 1 ]; then err=1; - if [ -n "$cmd_line1" ]; then - cmd_line1=$(echo "$cmd_line1 $mount_point"); - $cmd_line1; - err=0; - - inode=$( ${getinode} $mount_point 2>/dev/null); - # this is required if the stat returns error - if [ -z "$inode" ]; then - inode="0"; - fi - if [ $inode -ne 1 ]; then - err=1; - fi - fi fi if [ $err -eq "1" ]; then @@ -346,7 +354,7 @@ main () "acl") acl=1 ;; "selinux") selinux=1 ;; "worm") worm=1 ;; - "fopen-keep-cache") fopen_keep_cache=1 ;; + "fopen-keep-cache") fopen_keep_cache=1 ;; "enable-ino32") enable_ino32=1 ;; "mem-accounting") mem_accounting=1;; "aux-gfid-mount") @@ -373,19 +381,16 @@ main () "volume-id") volume_id=$value ;; "volfile-check") volfile_check=$value ;; "server-port") server_port=$value ;; - "fetch-attempts") - volfile_max_fetch_attempts=$value ;; - "backupvolfile-server") - backupvolfile_server=$value ;; "attribute-timeout") attribute_timeout=$value ;; "entry-timeout") entry_timeout=$value ;; "negative-timeout") negative_timeout=$value ;; - "gid-timeout") gid_timeout=$value ;; - "background-qlen") bg_qlen=$value ;; - "congestion-threshold") cong_threshold=$value ;; - "xlator-option") xlator_option=$xlator_option" "$pair ;; - "fuse-mountopts") fuse_mountopts=$value ;; + "gid-timeout") gid_timeout=$value ;; + "background-qlen") bg_qlen=$value ;; + "backup-volfile-servers") backup_volfile_servers=$value ;; + "congestion-threshold") cong_threshold=$value ;; + "xlator-option") xlator_option=$xlator_option" "$pair ;; + "fuse-mountopts") fuse_mountopts=$value ;; "use-readdirp") use_readdirp=$value ;; *) # Passthru @@ -418,7 +423,7 @@ main () [ -n "$test_str" ] && { volume_id="$test_str"; } - volfile_loc=""; + volfile_loc=""; } # |