From 493008a299cd1197df0caee72eacd12c1a54606b Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Wed, 11 Dec 2013 15:19:25 +0530 Subject: cluster/dht: Make sure gf_defrag_migrate_data is not optimized Problem: Whenever there syncop_xxx() is used inside a synctask and gcc optimizes it when compiled with -O2 there is a problem where 'errno' would not work as expected. Fix: Until http://review.gluster.com/6475 is reviewed and merged we are making sure the function is not going to be optimized. Change-Id: I504c18c8a7789f0c776a56f0aa60db3618b21601 BUG: 1040356 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6481 Tested-by: Gluster Build System Reviewed-by: Shyamsundar Ranganathan Reviewed-by: Anand Avati --- xlators/cluster/dht/src/dht-rebalance.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 3e471edca..9446dbe03 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -1105,6 +1105,10 @@ gf_defrag_pattern_match (gf_defrag_info_t *defrag, char *name, uint64_t size) * have been fixed */ +#ifdef GF_LINUX_HOST_OS +#pragma GCC push_options +#pragma GCC optimize ("O0") +#endif int gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc, dict_t *migrate_data) @@ -1371,6 +1375,9 @@ out: return ret; } +#ifdef GF_LINUX_HOST_OS +#pragma GCC pop_options +#endif int -- cgit From 8f2fc6fb3a63ca87d82b6fa933f94fb1e3283a26 Mon Sep 17 00:00:00 2001 From: Ajeet Jha Date: Mon, 2 Dec 2013 13:04:51 +0530 Subject: features/changelog: more changelog fixes. -> log additional records. -> include FOP number for metadata. -> prevent crash if inode is not found in a fop. Change-Id: I9edd4b71819ebd68c6a2b4150ae279c471d129da BUG: 1036536 Signed-off-by: Ajeet Jha Reviewed-on: http://review.gluster.org/6403 Tested-by: Gluster Build System Reviewed-by: Amar Tumballi Reviewed-by: Venky Shankar --- .../changelog/lib/examples/c/get-changes.c | 2 +- .../changelog/lib/src/gf-changelog-process.c | 79 ++++++++++--- .../features/changelog/src/changelog-encoders.c | 21 ++++ .../features/changelog/src/changelog-encoders.h | 2 + xlators/features/changelog/src/changelog-helpers.c | 3 + xlators/features/changelog/src/changelog-helpers.h | 12 +- xlators/features/changelog/src/changelog-misc.h | 2 +- xlators/features/changelog/src/changelog.c | 129 ++++++++++++++++++--- 8 files changed, 212 insertions(+), 38 deletions(-) diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c index 14562585a..6d0d0357d 100644 --- a/xlators/features/changelog/lib/examples/c/get-changes.c +++ b/xlators/features/changelog/lib/examples/c/get-changes.c @@ -40,7 +40,7 @@ main (int argc, char ** argv) char fbuf[PATH_MAX] = {0,}; /* get changes for brick "/home/vshankar/export/yow/yow-1" */ - ret = gf_changelog_register ("/home/vshankar/export/yow/yow-1", + ret = gf_changelog_register ("/home/vshankar/exports/yow/yow-1", "/tmp/scratch", "/tmp/change.log", 9, 5); if (ret) { handle_error ("register failed"); diff --git a/xlators/features/changelog/lib/src/gf-changelog-process.c b/xlators/features/changelog/lib/src/gf-changelog-process.c index df7204931..3ea2700c6 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-process.c +++ b/xlators/features/changelog/lib/src/gf-changelog-process.c @@ -36,6 +36,17 @@ int nr_gfids[] = { [GF_FOP_CREATE] = 1, }; +int nr_extra_recs[] = { + [GF_FOP_MKNOD] = 3, + [GF_FOP_MKDIR] = 3, + [GF_FOP_UNLINK] = 0, + [GF_FOP_RMDIR] = 0, + [GF_FOP_SYMLINK] = 0, + [GF_FOP_RENAME] = 0, + [GF_FOP_LINK] = 0, + [GF_FOP_CREATE] = 3, +}; + static char * binary_to_ascii (uuid_t uuid) { @@ -211,20 +222,20 @@ gf_changelog_parse_ascii (xlator_t *this, gf_changelog_t *gfc, int from_fd, int to_fd, size_t start_offset, struct stat *stbuf) { - int ng = 0; - int ret = -1; - int fop = 0; - int len = 0; - off_t off = 0; - off_t nleft = 0; - char *ptr = NULL; - char *eptr = NULL; - char *start = NULL; - char *mover = NULL; - int parse_err = 0; - char current_mover = ' '; - char ascii[LINE_BUFSIZE] = {0,}; - const char *fopname = NULL; + int ng = 0; + int ret = -1; + int fop = 0; + int len = 0; + off_t off = 0; + off_t nleft = 0; + char *ptr = NULL; + char *eptr = NULL; + char *start = NULL; + char *mover = NULL; + int parse_err = 0; + char current_mover = ' '; + char ascii[LINE_BUFSIZE] = {0,}; + const char *fopname = NULL; nleft = stbuf->st_size; @@ -249,7 +260,6 @@ gf_changelog_parse_ascii (xlator_t *this, switch (current_mover) { case 'D': - case 'M': MOVER_MOVE (mover, nleft, 1); /* target gfid */ @@ -258,6 +268,32 @@ gf_changelog_parse_ascii (xlator_t *this, FILL_AND_MOVE(ptr, ascii, off, mover, nleft, UUID_CANONICAL_FORM_LEN); break; + case 'M': + MOVER_MOVE (mover, nleft, 1); + + /* target gfid */ + PARSE_GFID (mover, ptr, UUID_CANONICAL_FORM_LEN, + conv_noop, parse_err); + FILL_AND_MOVE (ptr, ascii, off, + mover, nleft, UUID_CANONICAL_FORM_LEN); + FILL_AND_MOVE (" ", ascii, off, mover, nleft, 1); + + /* fop */ + len = strlen (mover); + VERIFY_SEPARATOR (mover, len, parse_err); + + fop = atoi (mover); + if ( (fopname = gf_fop_list[fop]) == NULL) { + parse_err = 1; + break; + } + + MOVER_MOVE (mover, nleft, len); + + len = strlen (fopname); + GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len); + + break; case 'E': MOVER_MOVE (mover, nleft, 1); @@ -285,6 +321,17 @@ gf_changelog_parse_ascii (xlator_t *this, len = strlen (fopname); GF_CHANGELOG_FILL_BUFFER (fopname, ascii, off, len); + ng = nr_extra_recs[fop]; + for (;ng > 0; ng--) { + MOVER_MOVE (mover, nleft, 1); + len = strlen (mover); + VERIFY_SEPARATOR (mover, len, parse_err); + + GF_CHANGELOG_FILL_BUFFER (" ", ascii, off, 1); + FILL_AND_MOVE (mover, ascii, + off, mover, nleft, len); + } + /* pargfid + bname */ ng = nr_gfids[fop]; while (ng-- > 0) { @@ -320,7 +367,7 @@ gf_changelog_parse_ascii (xlator_t *this, if (gf_changelog_write (to_fd, ascii, off) != off) { gf_log (this->name, GF_LOG_ERROR, "processing ascii changelog failed due to " - " wrror in writing change (reason: %s)", + " error in writing change (reason: %s)", strerror (errno)); break; } diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c index 553eec85c..08626ee2f 100644 --- a/xlators/features/changelog/src/changelog-encoders.c +++ b/xlators/features/changelog/src/changelog-encoders.c @@ -56,6 +56,24 @@ fop_fn (void *data, char *buffer, gf_boolean_t encode) return bufsz; } +size_t +number_fn (void *data, char *buffer, gf_boolean_t encode) +{ + size_t bufsz = 0; + unsigned int nr = 0; + char buf[20] = {0,}; + + nr = *(unsigned int *) data; + + if (encode) { + (void) snprintf (buf, sizeof (buf), "%u", nr); + CHANGELOG_FILL_BUFFER (buffer, bufsz, buf, strlen (buf)); + } else + CHANGELOG_FILL_BUFFER (buffer, bufsz, &nr, sizeof (unsigned int)); + + return bufsz; +} + void entry_free_fn (void *data) { @@ -94,6 +112,9 @@ changelog_encode_write_xtra (changelog_log_data_t *cld, case CHANGELOG_OPT_REC_ENTRY: data = &co->co_entry; break; + case CHANGELOG_OPT_REC_UINT32: + data = &co->co_uint32; + break; } if (co->co_convert) diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h index a3efbee05..c5dcc8a77 100644 --- a/xlators/features/changelog/src/changelog-encoders.h +++ b/xlators/features/changelog/src/changelog-encoders.h @@ -34,6 +34,8 @@ size_t entry_fn (void *data, char *buffer, gf_boolean_t encode); size_t fop_fn (void *data, char *buffer, gf_boolean_t encode); +size_t +number_fn (void *data, char *buffer, gf_boolean_t encode); void entry_free_fn (void *data); int diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c index 7ab0091b5..91c43a16c 100644 --- a/xlators/features/changelog/src/changelog-helpers.c +++ b/xlators/features/changelog/src/changelog-helpers.c @@ -55,6 +55,9 @@ changelog_get_usable_buffer (changelog_local_t *local) { changelog_log_data_t *cld = NULL; + if (!local) + return NULL; + cld = &local->cld; if (!cld->cld_iobuf) return NULL; diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index ad79636b0..16d60b99b 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -225,6 +225,7 @@ typedef struct changelog_inode_ctx { typedef enum { CHANGELOG_OPT_REC_FOP, CHANGELOG_OPT_REC_ENTRY, + CHANGELOG_OPT_REC_UINT32, } changelog_optional_rec_type_t; struct changelog_entry_fields { @@ -253,7 +254,8 @@ typedef struct { size_t co_len; union { - glusterfs_fop_t co_fop; + unsigned int co_uint32; + glusterfs_fop_t co_fop; struct changelog_entry_fields co_entry; }; } changelog_opt_t; @@ -346,6 +348,14 @@ changelog_forget (xlator_t *this, inode_t *inode); } \ } while (0) +#define CHANGELOG_FILL_UINT32(co, number, converter, xlen) do { \ + co->co_convert = converter; \ + co->co_free = NULL; \ + co->co_type = CHANGELOG_OPT_REC_UINT32; \ + co->co_uint32 = number; \ + xlen += sizeof (unsigned int); \ + } while (0) + #define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) do { \ co->co_convert = converter; \ co->co_free = NULL; \ diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h index 0712a3771..127b03e2e 100644 --- a/xlators/features/changelog/src/changelog-misc.h +++ b/xlators/features/changelog/src/changelog-misc.h @@ -18,7 +18,7 @@ #define CHANGELOG_FILE_NAME "CHANGELOG" #define CHANGELOG_VERSION_MAJOR 1 -#define CHANGELOG_VERSION_MINOR 0 +#define CHANGELOG_VERSION_MINOR 1 #define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY"/changelog-%s.sock" diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c index cea0e8c70..5fe3b4362 100644 --- a/xlators/features/changelog/src/changelog.c +++ b/xlators/features/changelog/src/changelog.c @@ -329,19 +329,28 @@ changelog_mkdir (call_frame_t *frame, xlator_t *this, } uuid_copy (gfid, uuid_req); - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2); + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5); co = changelog_get_usable_buffer (frame->local); if (!co) goto wind; CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32 (co, S_IFDIR | mode, number_fn, xtra_len); + co++; + CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len); co++; + + CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len); + co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, entry_fn, entry_free_fn, xtra_len, wind); - changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + changelog_set_usable_record_and_length (frame->local, xtra_len, 5); wind: STACK_WIND (frame, changelog_mkdir_cbk, @@ -405,8 +414,8 @@ changelog_symlink (call_frame_t *frame, xlator_t *this, goto wind; CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); - co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, entry_fn, entry_free_fn, xtra_len, wind); @@ -467,19 +476,28 @@ changelog_mknod (call_frame_t *frame, } uuid_copy (gfid, uuid_req); - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2); + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5); co = changelog_get_usable_buffer (frame->local); if (!co) goto wind; CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32 (co, mode, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len); + co++; + CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len); co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, entry_fn, entry_free_fn, xtra_len, wind); - changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + changelog_set_usable_record_and_length (frame->local, xtra_len, 5); wind: STACK_WIND (frame, changelog_mknod_cbk, @@ -539,7 +557,7 @@ changelog_create (call_frame_t *frame, xlator_t *this, uuid_copy (gfid, uuid_req); /* init with two extra records */ - CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 2); + CHANGELOG_INIT_NOCHECK (this, frame->local, NULL, gfid, 5); if (!frame->local) goto wind; @@ -548,12 +566,21 @@ changelog_create (call_frame_t *frame, xlator_t *this, goto wind; CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + co++; + CHANGELOG_FILL_UINT32 (co, mode, number_fn, xtra_len); co++; + + CHANGELOG_FILL_UINT32 (co, frame->root->uid, number_fn, xtra_len); + co++; + + CHANGELOG_FILL_UINT32 (co, frame->root->gid, number_fn, xtra_len); + co++; + CHANGELOG_FILL_ENTRY (co, loc->pargfid, loc->name, entry_fn, entry_free_fn, xtra_len, wind); - changelog_set_usable_record_and_length (frame->local, xtra_len, 2); + changelog_set_usable_record_and_length (frame->local, xtra_len, 5); wind: STACK_WIND (frame, changelog_create_cbk, @@ -601,13 +628,25 @@ changelog_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; priv = this->private; CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); CHANGELOG_INIT (this, frame->local, - fd->inode, fd->inode->gfid, 0); + fd->inode, fd->inode->gfid, 1); + if (!frame->local) + goto wind; + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 1); wind: STACK_WIND (frame, changelog_fsetattr_cbk, @@ -646,13 +685,25 @@ changelog_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; priv = this->private; CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); CHANGELOG_INIT (this, frame->local, - loc->inode, loc->inode->gfid, 0); + loc->inode, loc->inode->gfid, 1); + if (!frame->local) + goto wind; + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 1); wind: STACK_WIND (frame, changelog_setattr_cbk, @@ -688,13 +739,23 @@ int32_t changelog_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; priv = this->private; CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); CHANGELOG_INIT (this, frame->local, - fd->inode, fd->inode->gfid, 0); + fd->inode, fd->inode->gfid, 1); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 1); wind: STACK_WIND (frame, changelog_fremovexattr_cbk, @@ -728,13 +789,23 @@ int32_t changelog_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; priv = this->private; CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); CHANGELOG_INIT (this, frame->local, - loc->inode, loc->inode->gfid, 0); + loc->inode, loc->inode->gfid, 1); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 1); wind: STACK_WIND (frame, changelog_removexattr_cbk, @@ -771,13 +842,23 @@ changelog_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; priv = this->private; CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); CHANGELOG_INIT (this, frame->local, - loc->inode, loc->inode->gfid, 0); + loc->inode, loc->inode->gfid, 1); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 1); wind: STACK_WIND (frame, changelog_setxattr_cbk, @@ -812,13 +893,23 @@ changelog_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata) { - changelog_priv_t *priv = NULL; + changelog_priv_t *priv = NULL; + changelog_opt_t *co = NULL; + size_t xtra_len = 0; priv = this->private; CHANGELOG_NOT_ACTIVE_THEN_GOTO (frame, priv, wind); CHANGELOG_INIT (this, frame->local, - fd->inode, fd->inode->gfid, 0); + fd->inode, fd->inode->gfid, 1); + + co = changelog_get_usable_buffer (frame->local); + if (!co) + goto wind; + + CHANGLOG_FILL_FOP_NUMBER (co, frame->root->op, fop_fn, xtra_len); + + changelog_set_usable_record_and_length (frame->local, xtra_len, 1); wind: STACK_WIND (frame, changelog_fsetxattr_cbk, -- cgit From f999c17da5a5353196e68e7a68af64f91df6b902 Mon Sep 17 00:00:00 2001 From: Ajeet Jha Date: Mon, 2 Dec 2013 12:37:34 +0530 Subject: gsyncd / geo-rep: geo-replication fixes -> "threaded" hybrid crawl. -> Enabling metatadata synchronization. -> Handling EINVAL/ESTALE gracefully while syncing metadata. -> Improvments to changelog crawl code. -> Initial crawl changelog generation format. -> No gsyncd restart when checkpoint updated. -> Fix symlink handling in hybrid crawl. -> Slave's xtime key is 'stime'. -> tar+ssh as data synchronization. -> Instead of 'raise', just log in warning level for xtime missing cases. -> Fix for JSON object load failure -> Get new config value after config value reset. -> Skip already processed changelogs. -> Saving status of each individual worker thread. -> GFID fetch on slave for purges. -> Add tar ssh keys and config options. -> Fix nlink count when using backend. -> Include "data" operation for hardlink. -> Use changelog time prefix as slave's time. -> Process changelogs in parallel. Change-Id: I09fcbb2e2e418149a6d8435abd2ac6b2f015bb06 BUG: 1036539 Signed-off-by: Ajeet Jha Reviewed-on: http://review.gluster.org/6404 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- geo-replication/src/peer_gsec_create.in | 10 +- geo-replication/syncdaemon/configinterface.py | 41 +- geo-replication/syncdaemon/gsyncd.py | 10 + geo-replication/syncdaemon/master.py | 799 ++++++++++++++++++-------- geo-replication/syncdaemon/resource.py | 176 ++++-- geo-replication/syncdaemon/syncdutils.py | 2 +- 6 files changed, 769 insertions(+), 269 deletions(-) diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in index ef630bd44..a39fdbfb5 100755 --- a/geo-replication/src/peer_gsec_create.in +++ b/geo-replication/src/peer_gsec_create.in @@ -8,5 +8,11 @@ if [ ! -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub ]; then ssh-keygen -N '' -f "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem > /dev/null fi -output=`echo command=\"@libexecdir@/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub` -echo $output +if [ ! -f "$GLUSTERD_WORKING_DIR"/geo-replication/tar_ssh.pem.pub ]; then + \rm -rf "$GLUSTERD_WORKING_DIR"/geo-replication/tar_ssh.pem* + ssh-keygen -N '' -f "$GLUSTERD_WORKING_DIR"/geo-replication/tar_ssh.pem > /dev/null +fi + +output1=`echo command=\"${exec_prefix}/libexec/glusterfs/gsyncd\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/secret.pem.pub` +output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" " "``cat "$GLUSTERD_WORKING_DIR"/geo-replication/tar_ssh.pem.pub` +echo -e "$output1\n$output2" diff --git a/geo-replication/syncdaemon/configinterface.py b/geo-replication/syncdaemon/configinterface.py index a326e8246..0f764c47a 100644 --- a/geo-replication/syncdaemon/configinterface.py +++ b/geo-replication/syncdaemon/configinterface.py @@ -5,6 +5,10 @@ except ImportError: import configparser as ConfigParser import re from string import Template +import os +import errno +import sys +from stat import ST_DEV, ST_INO, ST_MTIME from syncdutils import escape, unescape, norm, update_file, GsyncdError @@ -65,8 +69,38 @@ class GConffile(object): self.auxdicts = dd self.config = ConfigParser.RawConfigParser() self.config.read(path) + self.dev, self.ino, self.mtime = -1, -1, -1 self._normconfig() + def _load(self): + try: + sres = os.stat(self.path) + self.dev = sres[ST_DEV] + self.ino = sres[ST_INO] + self.mtime = sres[ST_MTIME] + except (OSError, IOError): + if sys.exc_info()[1].errno == errno.ENOENT: + sres = None + + self.config.read(self.path) + self._normconfig() + + def get_realtime(self, opt): + try: + sres = os.stat(self.path) + except (OSError, IOError): + if sys.exc_info()[1].errno == errno.ENOENT: + sres = None + else: + raise + + # compare file system stat with that of our stream file handle + if not sres or sres[ST_DEV] != self.dev or \ + sres[ST_INO] != self.ino or self.mtime != sres[ST_MTIME]: + self._load() + + return self.get(opt, printValue=False) + def section(self, rx=False): """get the section name of the section representing .peers in .config""" peers = self.peers @@ -162,7 +196,7 @@ class GConffile(object): if self.config.has_section(self.section()): update_from_sect(self.section(), MultiDict(dct, *self.auxdicts)) - def get(self, opt=None): + def get(self, opt=None, printValue=True): """print the matching key/value pairs from .config, or if @opt given, the value for @opt (according to the logic described in .update_to) @@ -173,7 +207,10 @@ class GConffile(object): opt = norm(opt) v = d.get(opt) if v: - print(v) + if printValue: + print(v) + else: + return v else: for k, v in d.iteritems(): if k == '__name__': diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index 7fcc3165a..64c26a5d2 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -191,6 +191,7 @@ def main_i(): op.add_option('--log-file-mbr', metavar='LOGF', type=str, action='callback', callback=store_abs) op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs) op.add_option('--state-detail-file', metavar='STATF', type=str, action='callback', callback=store_abs) + op.add_option('--georep-session-working-dir', metavar='STATF', type=str, action='callback', callback=store_abs) op.add_option('--ignore-deletes', default=False, action='store_true') op.add_option('--isolated-slave', default=False, action='store_true') op.add_option('--use-rsync-xattrs', default=False, action='store_true') @@ -202,6 +203,7 @@ def main_i(): op.add_option('--local-id', metavar='ID', help=SUPPRESS_HELP, default='') op.add_option('--local-path', metavar='PATH', help=SUPPRESS_HELP, default='') op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh') + op.add_option('--ssh-command-tar', metavar='CMD', default='ssh') op.add_option('--rsync-command', metavar='CMD', default='rsync') op.add_option('--rsync-options', metavar='OPTS', default='') op.add_option('--rsync-ssh-options', metavar='OPTS', default='--compress') @@ -228,6 +230,7 @@ def main_i(): op.add_option('--change-interval', metavar='SEC', type=int, default=3) # working directory for changelog based mechanism op.add_option('--working-dir', metavar='DIR', type=str, action='callback', callback=store_abs) + op.add_option('--use-tarssh', default=False, action='store_true') op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local) # duh. need to specify dest or value will be mapped to None :S @@ -474,8 +477,15 @@ def main_i(): GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf') if confdata.op == 'set': logging.info('checkpoint %s set' % confdata.val) + gcnf.delete('checkpoint_completed') + gcnf.delete('checkpoint_target') elif confdata.op == 'del': logging.info('checkpoint info was reset') + # if it is removing 'checkpoint' then we need + # to remove 'checkpoint_completed' and 'checkpoint_target' too + gcnf.delete('checkpoint_completed') + gcnf.delete('checkpoint_target') + except IOError: if sys.exc_info()[1].errno == ENOENT: # directory of log path is not present, diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 95810a61e..721fe18bd 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -10,15 +10,16 @@ import socket import string import errno from shutil import copyfileobj -from errno import ENOENT, ENODATA, EPIPE, EEXIST +from errno import ENOENT, ENODATA, EPIPE, EEXIST, errorcode from threading import currentThread, Condition, Lock from datetime import datetime +from libcxattr import Xattr from gconf import gconf from tempfile import mkdtemp, NamedTemporaryFile from syncdutils import FreeObject, Thread, GsyncdError, boolify, escape, \ unescape, select, gauxpfx, md5hex, selfkill, entry2pb, \ - lstat, errno_wrap + lstat, errno_wrap, update_file URXTIME = (-1, 0) @@ -59,7 +60,8 @@ def gmaster_builder(excrawl=None): crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin') sendmarkmixin = boolify(gconf.use_rsync_xattrs) and SendmarkRsyncMixin or SendmarkNormalMixin purgemixin = boolify(gconf.ignore_deletes) and PurgeNoopMixin or PurgeNormalMixin - class _GMaster(crawlmixin, modemixin, sendmarkmixin, purgemixin): + syncengine = boolify(gconf.use_tarssh) and TarSSHEngine or RsyncEngine + class _GMaster(crawlmixin, modemixin, sendmarkmixin, purgemixin, syncengine): pass return _GMaster @@ -101,14 +103,17 @@ class NormalMixin(object): if not 'default_xtime' in opts: opts['default_xtime'] = URXTIME - def xtime_low(self, server, path, **opts): - xt = server.xtime(path, self.uuid) + def xtime_low(self, rsc, path, **opts): + if rsc == self.master: + xt = rsc.server.xtime(path, self.uuid) + else: + xt = rsc.server.stime(path, self.uuid) if isinstance(xt, int) and xt != ENODATA: return xt if xt == ENODATA or xt < self.volmark: if opts['create']: xt = _xtime_now() - server.aggregated.set_xtime(path, self.uuid, xt) + rsc.server.aggregated.set_xtime(path, self.uuid, xt) else: xt = opts['default_xtime'] return xt @@ -140,7 +145,7 @@ class NormalMixin(object): return xte > xtrd def set_slave_xtime(self, path, mark): - self.slave.server.set_xtime(path, self.uuid, mark) + self.slave.server.set_stime(path, self.uuid, mark) self.slave.server.set_xtime_remote(path, self.uuid, mark) class PartialMixin(NormalMixin): @@ -190,6 +195,65 @@ class PurgeNoopMixin(object): def purge_missing(self, path, names): pass +class TarSSHEngine(object): + """Sync engine that uses tar(1) piped over ssh(1) + for data transfers. Good for lots of small files. + """ + def a_syncdata(self, files): + logging.debug('files: %s' % (files)) + for f in files: + pb = self.syncer.add(f) + def regjob(se, xte, pb): + rv = pb.wait() + if rv[0]: + logging.debug('synced ' + se) + return True + else: + # stat check for file presence + st = lstat(se) + if isinstance(st, int): + return True + logging.warn('tar+ssh: %s [errcode: %d]' % (se, rv[1])) + self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb) + + def syncdata_wait(self): + if self.wait(self.FLAT_DIR_HIERARCHY, None): + return True + + def syncdata(self, files): + self.a_syncdata(files) + self.syncdata_wait() + +class RsyncEngine(object): + """Sync engine that uses rsync(1) for data transfers""" + def a_syncdata(self, files): + logging.debug('files: %s' % (files)) + for f in files: + logging.debug('candidate for syncing %s' % f) + pb = self.syncer.add(f) + def regjob(se, xte, pb): + rv = pb.wait() + if rv[0]: + logging.debug('synced ' + se) + return True + else: + if rv[1] in [23, 24]: + # stat to check if the file exist + st = lstat(se) + if isinstance(st, int): + # file got unlinked in the interim + return True + logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1])) + self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb) + + def syncdata_wait(self): + if self.wait(self.FLAT_DIR_HIERARCHY, None): + return True + + def syncdata(self, files): + self.a_syncdata(files) + self.syncdata_wait() + class GMasterCommon(object): """abstract class impementling master role""" @@ -234,7 +298,7 @@ class GMasterCommon(object): else: rsc = self.master self.make_xtime_opts(rsc == self.master, opts) - return self.xtime_low(rsc.server, path, **opts) + return self.xtime_low(rsc, path, **opts) def get_initial_crawl_data(self): # while persisting only 'files_syncd' is non-zero, rest of @@ -243,18 +307,26 @@ class GMasterCommon(object): default_data = {'files_syncd': 0, 'files_remaining': 0, 'bytes_remaining': 0, - 'purges_remaining': 0} + 'purges_remaining': 0, + 'total_files_skipped': 0} if getattr(gconf, 'state_detail_file', None): try: - return json.load(open(gconf.state_detail_file)) - except (IOError, OSError): + with open(gconf.state_detail_file, 'r+') as f: + loaded_data= json.load(f) + diff_data = set(default_data) - set (loaded_data) + if len(diff_data): + for i in diff_data: + loaded_data[i] = default_data[i] + return loaded_data + except (IOError): ex = sys.exc_info()[1] - if ex.errno == ENOENT: - # Create file with initial data + logging.warn ('Creating new gconf.state_detail_file.') + # Create file with initial data + try: with open(gconf.state_detail_file, 'wb') as f: json.dump(default_data, f) return default_data - else: + except: raise return default_data @@ -264,6 +336,8 @@ class GMasterCommon(object): same_dir = os.path.dirname(gconf.state_detail_file) with NamedTemporaryFile(dir=same_dir, delete=False) as tmp: json.dump(self.total_crawl_stats, tmp) + tmp.flush() + os.fsync(tmp.fileno()) os.rename(tmp.name, gconf.state_detail_file) except (IOError, OSError): raise @@ -272,7 +346,13 @@ class GMasterCommon(object): self.master = master self.slave = slave self.jobtab = {} - self.syncer = Syncer(slave) + if boolify(gconf.use_tarssh): + logging.info("using 'tar over ssh' as the sync engine") + self.syncer = Syncer(slave, self.slave.tarssh) + else: + logging.info("using 'rsync' as the sync engine") + # partial transfer (cf. rsync(1)), that's normal + self.syncer = Syncer(slave, self.slave.rsync, [23, 24]) # crawls vs. turns: # - self.crawls is simply the number of crawl() invocations on root # - one turn is a maximal consecutive sequence of crawls so that each @@ -294,6 +374,8 @@ class GMasterCommon(object): self.terminate = False self.sleep_interval = 1 self.checkpoint_thread = None + self.current_files_skipped_count = 0 + self.skipped_gfid_list = [] def init_keep_alive(cls): """start the keep-alive thread """ @@ -336,7 +418,8 @@ class GMasterCommon(object): gconf.configinterface.set('volume_id', self.uuid) if self.volinfo: if self.volinfo['retval']: - raise GsyncdError("master is corrupt") + logging.warn("master cluster's info may not be valid %d" % \ + self.volinfo['retval']) self.start_checkpoint_thread() else: raise GsyncdError("master volinfo unavailable") @@ -349,7 +432,7 @@ class GMasterCommon(object): while not self.terminate: if self.start: logging.debug("... crawl #%d done, took %.6f seconds" % \ - (self.crawls, time.time() - self.start)) + (self.crawls, time.time() - self.start)) self.start = time.time() should_display_info = self.start - self.lastreport['time'] >= 60 if should_display_info: @@ -363,9 +446,20 @@ class GMasterCommon(object): if int(t1 - t0) >= 60: #lets hardcode this check to 60 seconds crawl = self.should_crawl() t0 = t1 + self.update_worker_remote_node() if not crawl: + self.update_worker_health("Passive") + # bring up _this_ brick to the cluster stime + # which is min of cluster (but max of the replicas) + brick_stime = self.xtime('.', self.slave) + cluster_stime = self.master.server.aggregated.stime_mnt('.', '.'.join([str(self.uuid), str(gconf.slave_id)])) + logging.debug("Cluster stime: %s | Brick stime: %s" % (repr(cluster_stime), repr(brick_stime))) + if not isinstance(cluster_stime, int): + if brick_stime < cluster_stime: + self.slave.server.set_stime(self.FLAT_DIR_HIERARCHY, self.uuid, cluster_stime) time.sleep(5) continue + self.update_worker_health("Active") self.crawl() if oneshot: return @@ -375,7 +469,7 @@ class GMasterCommon(object): def _checkpt_param(cls, chkpt, prm, xtimish=True): """use config backend to lookup a parameter belonging to checkpoint @chkpt""" - cprm = getattr(gconf, 'checkpoint_' + prm, None) + cprm = gconf.configinterface.get_realtime('checkpoint_' + prm) if not cprm: return chkpt_mapped, val = cprm.split(':', 1) @@ -402,17 +496,6 @@ class GMasterCommon(object): ts += '.' + str(tpair[1]) return ts - def get_extra_info(self): - str_info = '\nUptime=%s;FilesSyncd=%d;FilesPending=%d;BytesPending=%d;DeletesPending=%d;' % \ - (self._crawl_time_format(datetime.now() - self.crawl_start), \ - self.total_crawl_stats['files_syncd'], \ - self.total_crawl_stats['files_remaining'], \ - self.total_crawl_stats['bytes_remaining'], \ - self.total_crawl_stats['purges_remaining']) - str_info += '\0' - logging.debug(str_info) - return str_info - def _crawl_time_format(self, crawl_time): # Ex: 5 years, 4 days, 20:23:10 years, days = divmod(crawl_time.days, 365.25) @@ -431,27 +514,49 @@ class GMasterCommon(object): date += "%s:%s:%s" % (string.zfill(h, 2), string.zfill(m, 2), string.zfill(s, 2)) return date - def checkpt_service(self, chan, chkpt, tgt): + def checkpt_service(self, chan, chkpt): """checkpoint service loop monitor and verify checkpoint status for @chkpt, and listen for incoming requests for whom we serve a pretty-formatted status report""" - if not chkpt: - # dummy loop for the case when there is no checkpt set - while True: + while True: + chkpt = gconf.configinterface.get_realtime("checkpoint") + if not chkpt: + gconf.configinterface.delete("checkpoint_completed") + gconf.configinterface.delete("checkpoint_target") + # dummy loop for the case when there is no checkpt set select([chan], [], []) conn, _ = chan.accept() - conn.send(self.get_extra_info()) + conn.send('\0') conn.close() - completed = self._checkpt_param(chkpt, 'completed', xtimish=False) - if completed: - completed = tuple(int(x) for x in completed.split('.')) - while True: + continue + + checkpt_tgt = self._checkpt_param(chkpt, 'target') + if not checkpt_tgt: + checkpt_tgt = self.xtime('.') + if isinstance(checkpt_tgt, int): + raise GsyncdError("master root directory is unaccessible (%s)", + os.strerror(checkpt_tgt)) + self._set_checkpt_param(chkpt, 'target', checkpt_tgt) + logging.debug("checkpoint target %s has been determined for checkpoint %s" % \ + (repr(checkpt_tgt), chkpt)) + + # check if the label is 'now' + chkpt_lbl = chkpt + try: + x1,x2 = chkpt.split(':') + if x1 == 'now': + chkpt_lbl = "as of " + self.humantime(x2) + except: + pass + completed = self._checkpt_param(chkpt, 'completed', xtimish=False) + if completed: + completed = tuple(int(x) for x in completed.split('.')) s,_,_ = select([chan], [], [], (not completed) and 5 or None) # either request made and we re-check to not # give back stale data, or we still hunting for completion - if self.native_xtime(tgt) and self.native_xtime(tgt) < self.volmark: + if self.native_xtime(checkpt_tgt) and self.native_xtime(checkpt_tgt) < self.volmark: # indexing has been reset since setting the checkpoint status = "is invalid" else: @@ -459,12 +564,12 @@ class GMasterCommon(object): if isinstance(xtr, int): raise GsyncdError("slave root directory is unaccessible (%s)", os.strerror(xtr)) - ncompleted = self.xtime_geq(xtr, tgt) + ncompleted = self.xtime_geq(xtr, checkpt_tgt) if completed and not ncompleted: # stale data logging.warn("completion time %s for checkpoint %s became stale" % \ (self.humantime(*completed), chkpt)) completed = None - gconf.confdata.delete('checkpoint-completed') + gconf.configinterface.delete('checkpoint_completed') if ncompleted and not completed: # just reaching completion completed = "%.6f" % time.time() self._set_checkpt_param(chkpt, 'completed', completed, xtimish=False) @@ -478,7 +583,7 @@ class GMasterCommon(object): try: conn, _ = chan.accept() try: - conn.send(" | checkpoint %s %s %s" % (chkpt, status, self.get_extra_info())) + conn.send("checkpoint %s is %s\0" % (chkpt_lbl, status)) except: exc = sys.exc_info()[1] if (isinstance(exc, OSError) or isinstance(exc, IOError)) and \ @@ -505,18 +610,8 @@ class GMasterCommon(object): pass chan.bind(state_socket) chan.listen(1) - checkpt_tgt = None - if gconf.checkpoint: - checkpt_tgt = self._checkpt_param(gconf.checkpoint, 'target') - if not checkpt_tgt: - checkpt_tgt = self.xtime('.') - if isinstance(checkpt_tgt, int): - raise GsyncdError("master root directory is unaccessible (%s)", - os.strerror(checkpt_tgt)) - self._set_checkpt_param(gconf.checkpoint, 'target', checkpt_tgt) - logging.debug("checkpoint target %s has been determined for checkpoint %s" % \ - (repr(checkpt_tgt), gconf.checkpoint)) - t = Thread(target=self.checkpt_service, args=(chan, gconf.checkpoint, checkpt_tgt)) + chkpt = gconf.configinterface.get_realtime("checkpoint") + t = Thread(target=self.checkpt_service, args=(chan, chkpt)) t.start() self.checkpoint_thread = t @@ -567,15 +662,11 @@ class GMasterChangelogMixin(GMasterCommon): POS_GFID = 0 POS_TYPE = 1 - POS_ENTRY1 = 2 - POS_ENTRY2 = 3 # renames - - _CL_TYPE_DATA_PFX = "D " - _CL_TYPE_METADATA_PFX = "M " - _CL_TYPE_ENTRY_PFX = "E " + POS_ENTRY1 = -1 - TYPE_GFID = [_CL_TYPE_DATA_PFX] # ignoring metadata ops - TYPE_ENTRY = [_CL_TYPE_ENTRY_PFX] + TYPE_META = "M " + TYPE_GFID = "D " + TYPE_ENTRY = "E " # flat directory heirarchy for gfid based access FLAT_DIR_HIERARCHY = '.' @@ -594,39 +685,11 @@ class GMasterChangelogMixin(GMasterCommon): logging.debug('changelog working dir %s (log: %s)' % (workdir, logfile)) return (workdir, logfile) - # update stats from *this* crawl - def update_cumulative_stats(self, files_pending): - self.total_crawl_stats['files_remaining'] = files_pending['count'] - self.total_crawl_stats['bytes_remaining'] = files_pending['bytes'] - self.total_crawl_stats['purges_remaining'] = files_pending['purge'] - - # sync data - def syncdata(self, datas): - logging.debug('datas: %s' % (datas)) - for data in datas: - logging.debug('candidate for syncing %s' % data) - pb = self.syncer.add(data) - def regjob(se, xte, pb): - rv = pb.wait() - if rv[0]: - logging.debug('synced ' + se) - return True - else: - if rv[1] in [23, 24]: - # stat to check if the file exist - st = lstat(se) - if isinstance(st, int): - # file got unlinked in the interim - return True - logging.warn('Rsync: %s [errcode: %d]' % (se, rv[1])) - self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, data, None, pb) - if self.wait(self.FLAT_DIR_HIERARCHY, None): - return True - def process_change(self, change, done, retry): pfx = gauxpfx() clist = [] entries = [] + meta_gfid = set() datas = set() # basic crawl stats: files and bytes @@ -652,136 +715,351 @@ class GMasterChangelogMixin(GMasterCommon): dct[k] = ed[k] return dct - # regular file update: bytes & count - def _update_reg(entry, size): - if not entry in files_pending['files']: - files_pending['count'] += 1 - files_pending['bytes'] += size - files_pending['files'].append(entry) - # updates for directories, symlinks etc.. - def _update_rest(): + # entry counts (not purges) + def entry_update(): files_pending['count'] += 1 - # entry count - def entry_update(entry, size, mode): - if stat.S_ISREG(mode): - _update_reg(entry, size) - else: - _update_rest() # purge count def purge_update(): files_pending['purge'] += 1 for e in clist: e = e.strip() - et = e[self.IDX_START:self.IDX_END] - ec = e[self.IDX_END:].split(' ') - if et in self.TYPE_ENTRY: + et = e[self.IDX_START:self.IDX_END] # entry type + ec = e[self.IDX_END:].split(' ') # rest of the bits + + if et == self.TYPE_ENTRY: + # extract information according to the type of + # the entry operation. create(), mkdir() and mknod() + # have mode, uid, gid information in the changelog + # itself, so no need to stat()... ty = ec[self.POS_TYPE] + + # PARGFID/BNAME en = unescape(os.path.join(pfx, ec[self.POS_ENTRY1])) + # GFID of the entry gfid = ec[self.POS_GFID] - # definitely need a better way bucketize entry ops + if ty in ['UNLINK', 'RMDIR']: purge_update() entries.append(edct(ty, gfid=gfid, entry=en)) - continue - go = os.path.join(pfx, gfid) - st = lstat(go) - if isinstance(st, int): - if ty == 'RENAME': - entries.append(edct('UNLINK', gfid=gfid, entry=en)) - else: - logging.debug('file %s got purged in the interim' % go) - continue - entry_update(go, st.st_size, st.st_mode) - if ty in ['CREATE', 'MKDIR', 'MKNOD']: - entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) - elif ty == 'LINK': - entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) - elif ty == 'SYMLINK': - rl = errno_wrap(os.readlink, [en], [ENOENT]) - if isinstance(rl, int): - continue - entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) - elif ty == 'RENAME': - e2 = unescape(os.path.join(pfx, ec[self.POS_ENTRY2])) - entries.append(edct(ty, gfid=gfid, entry=en, entry1=e2, stat=st)) + elif ty in ['CREATE', 'MKDIR', 'MKNOD']: + entry_update() + # stat information present in the changelog itself + entries.append(edct(ty, gfid=gfid, entry=en, mode=int(ec[2]),\ + uid=int(ec[3]), gid=int(ec[4]))) else: - logging.warn('ignoring %s [op %s]' % (gfid, ty)) - elif et in self.TYPE_GFID: - go = os.path.join(pfx, ec[0]) - st = lstat(go) - if isinstance(st, int): - logging.debug('file %s got purged in the interim' % go) - continue - entry_update(go, st.st_size, st.st_mode) - datas.update([go]) + # stat() to get mode and other information + go = os.path.join(pfx, gfid) + st = lstat(go) + if isinstance(st, int): + if ty == 'RENAME': # special hack for renames... + entries.append(edct('UNLINK', gfid=gfid, entry=en)) + else: + logging.debug('file %s got purged in the interim' % go) + continue + + if ty == 'LINK': + entry_update() + entries.append(edct(ty, stat=st, entry=en, gfid=gfid)) + elif ty == 'SYMLINK': + rl = errno_wrap(os.readlink, [en], [ENOENT]) + if isinstance(rl, int): + continue + entry_update() + entries.append(edct(ty, stat=st, entry=en, gfid=gfid, link=rl)) + elif ty == 'RENAME': + entry_update() + e1 = unescape(os.path.join(pfx, ec[self.POS_ENTRY1 - 1])) + entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en, stat=st)) + else: + logging.warn('ignoring %s [op %s]' % (gfid, ty)) + elif et == self.TYPE_GFID: + datas.add(os.path.join(pfx, ec[0])) + elif et == self.TYPE_META: + if ec[1] == 'SETATTR': # only setattr's for now... + meta_gfid.add(os.path.join(pfx, ec[0])) + else: + logging.warn('got invalid changelog type: %s' % (et)) logging.debug('entries: %s' % repr(entries)) if not retry: - self.update_cumulative_stats(files_pending) + self.update_worker_cumilitive_status(files_pending) # sync namespace if (entries): self.slave.server.entry_ops(entries) + # sync metadata + if (meta_gfid): + meta_entries = [] + for go in meta_gfid: + st = lstat(go) + if isinstance(st, int): + logging.debug('file %s got purged in the interim' % go) + continue + meta_entries.append(edct('META', go=go, stat=st)) + if meta_entries: + self.slave.server.meta_ops(meta_entries) # sync data - if self.syncdata(datas): - if done: - self.master.server.changelog_done(change) - return True - - def sync_done(self): - self.total_crawl_stats['files_syncd'] += self.total_crawl_stats['files_remaining'] - self.total_crawl_stats['files_remaining'] = 0 - self.total_crawl_stats['bytes_remaining'] = 0 - self.total_crawl_stats['purges_remaining'] = 0 - self.update_crawl_data() + if datas: + self.a_syncdata(datas) def process(self, changes, done=1): - for change in changes: - tries = 0 - retry = False - while True: - logging.debug('processing change %s' % change) - if self.process_change(change, done, retry): - self.sync_done() - break - retry = True - tries += 1 - if tries == self.MAX_RETRIES: - logging.warn('changelog %s could not be processed - moving on...' % os.path.basename(change)) - self.sync_done() - if done: - self.master.server.changelog_done(change) - break - # it's either entry_ops() or Rsync that failed to do it's - # job. Mostly it's entry_ops() [which currently has a problem - # of failing to create an entry but failing to return an errno] - # Therefore we do not know if it's either Rsync or the freaking - # entry_ops() that failed... so we retry the _whole_ changelog - # again. - # TODO: remove entry retries when it's gets fixed. - logging.warn('incomplete sync, retrying changelog: %s' % change) - time.sleep(0.5) - self.turns += 1 + tries = 0 + retry = False - def upd_stime(self, stime): + while True: + self.skipped_gfid_list = [] + self.current_files_skipped_count = 0 + + # first, fire all changelog transfers in parallel. entry and metadata + # are performed synchronously, therefore in serial. However at the end + # of each changelog, data is synchronized with syncdata_async() - which + # means it is serial w.r.t entries/metadata of that changelog but + # happens in parallel with data of other changelogs. + + for change in changes: + logging.debug('processing change %s' % change) + self.process_change(change, done, retry) + if not retry: + self.turns += 1 # number of changelogs processed in the batch + + # Now we wait for all the data transfers fired off in the above step + # to complete. Note that this is not ideal either. Ideally we want to + # trigger the entry/meta-data transfer of the next batch while waiting + # for the data transfer of the current batch to finish. + + # Note that the reason to wait for the data transfer (vs doing it + # completely in the background and call the changelog_done() + # asynchronously) is because this waiting acts as a "backpressure" + # and prevents a spiraling increase of wait stubs from consuming + # unbounded memory and resources. + + # update the slave's time with the timestamp of the _last_ changelog + # file time suffix. Since, the changelog prefix time is the time when + # the changelog was rolled over, introduce a tolerence of 1 second to + # counter the small delta b/w the marker update and gettimeofday(). + # NOTE: this is only for changelog mode, not xsync. + + # @change is the last changelog (therefore max time for this batch) + if self.syncdata_wait(): + if done: + xtl = (int(change.split('.')[-1]) - 1, 0) + self.upd_stime(xtl) + map(self.master.server.changelog_done, changes) + self.update_worker_files_syncd() + break + + # We do not know which changelog transfer failed, retry everything. + retry = True + tries += 1 + if tries == self.MAX_RETRIES: + logging.warn('changelogs %s could not be processed - moving on...' % \ + ' '.join(map(os.path.basename, changes))) + self.update_worker_total_files_skipped(self.current_files_skipped_count) + logging.warn('SKIPPED GFID = %s' % ','.join(self.skipped_gfid_list)) + self.update_worker_files_syncd() + if done: + xtl = (int(change.split('.')[-1]) - 1, 0) + self.upd_stime(xtl) + map(self.master.server.changelog_done, changes) + break + # it's either entry_ops() or Rsync that failed to do it's + # job. Mostly it's entry_ops() [which currently has a problem + # of failing to create an entry but failing to return an errno] + # Therefore we do not know if it's either Rsync or the freaking + # entry_ops() that failed... so we retry the _whole_ changelog + # again. + # TODO: remove entry retries when it's gets fixed. + logging.warn('incomplete sync, retrying changelogs: %s' % \ + ' '.join(map(os.path.basename, changes))) + time.sleep(0.5) + + def upd_stime(self, stime, path=None): + if not path: + path = self.FLAT_DIR_HIERARCHY if not stime == URXTIME: - self.sendmark(self.FLAT_DIR_HIERARCHY, stime) + self.sendmark(path, stime) + + def get_worker_status_file(self): + file_name = gconf.local_path+'.status' + file_name = file_name.replace("/", "_") + worker_status_file = gconf.georep_session_working_dir+file_name + return worker_status_file + + def update_worker_status(self, key, value): + default_data = {"remote_node":"N/A", + "worker status":"Not Started", + "crawl status":"N/A", + "files_syncd": 0, + "files_remaining": 0, + "bytes_remaining": 0, + "purges_remaining": 0, + "total_files_skipped": 0} + worker_status_file = self.get_worker_status_file() + try: + with open(worker_status_file, 'r+') as f: + loaded_data = json.load(f) + loaded_data[key] = value + os.ftruncate(f.fileno(), 0) + os.lseek(f.fileno(), 0, os.SEEK_SET) + json.dump(loaded_data, f) + f.flush() + os.fsync(f.fileno()) + except (IOError, OSError, ValueError): + logging.info ('Creating new %s' % worker_status_file) + try: + with open(worker_status_file, 'wb') as f: + default_data[key] = value + json.dump(default_data, f) + f.flush() + os.fsync(f.fileno()) + except: + raise + + def update_worker_cumilitive_status(self, files_pending): + default_data = {"remote_node":"N/A", + "worker status":"Not Started", + "crawl status":"N/A", + "files_syncd": 0, + "files_remaining": 0, + "bytes_remaining": 0, + "purges_remaining": 0, + "total_files_skipped": 0} + worker_status_file = self.get_worker_status_file() + try: + with open(worker_status_file, 'r+') as f: + loaded_data = json.load(f) + loaded_data['files_remaining'] = files_pending['count'] + loaded_data['bytes_remaining'] = files_pending['bytes'] + loaded_data['purges_remaining'] = files_pending['purge'] + os.ftruncate(f.fileno(), 0) + os.lseek(f.fileno(), 0, os.SEEK_SET) + json.dump(loaded_data, f) + f.flush() + os.fsync(f.fileno()) + except (IOError, OSError, ValueError): + logging.info ('Creating new %s' % worker_status_file) + try: + with open(worker_status_file, 'wb') as f: + default_data['files_remaining'] = files_pending['count'] + default_data['bytes_remaining'] = files_pending['bytes'] + default_data['purges_remaining'] = files_pending['purge'] + json.dump(default_data, f) + f.flush() + os.fsync(f.fileno()) + except: + raise + + def update_worker_remote_node (self): + node = sys.argv[-1] + node = node.split("@")[-1] + remote_node_ip = node.split(":")[0] + remote_node_vol = node.split(":")[3] + remote_node = remote_node_ip + '::' + remote_node_vol + self.update_worker_status ('remote_node', remote_node) + + def update_worker_health (self, state): + self.update_worker_status ('worker status', state) + + def update_worker_crawl_status (self, state): + self.update_worker_status ('crawl status', state) + + def update_worker_files_syncd (self): + default_data = {"remote_node":"N/A", + "worker status":"Not Started", + "crawl status":"N/A", + "files_syncd": 0, + "files_remaining": 0, + "bytes_remaining": 0, + "purges_remaining": 0, + "total_files_skipped": 0} + worker_status_file = self.get_worker_status_file() + try: + with open(worker_status_file, 'r+') as f: + loaded_data = json.load(f) + loaded_data['files_syncd'] += loaded_data['files_remaining'] + loaded_data['files_remaining'] = 0 + loaded_data['bytes_remaining'] = 0 + loaded_data['purges_remaining'] = 0 + os.ftruncate(f.fileno(), 0) + os.lseek(f.fileno(), 0, os.SEEK_SET) + json.dump(loaded_data, f) + f.flush() + os.fsync(f.fileno()) + except (IOError, OSError, ValueError): + logging.info ('Creating new %s' % worker_status_file) + try: + with open(worker_status_file, 'wb') as f: + json.dump(default_data, f) + f.flush() + os.fsync(f.fileno()) + except: + raise + + def update_worker_files_remaining (self, state): + self.update_worker_status ('files_remaining', state) + + def update_worker_bytes_remaining (self, state): + self.update_worker_status ('bytes_remaining', state) + + def update_worker_purges_remaining (self, state): + self.update_worker_status ('purges_remaining', state) + + def update_worker_total_files_skipped (self, value): + default_data = {"remote_node":"N/A", + "worker status":"Not Started", + "crawl status":"N/A", + "files_syncd": 0, + "files_remaining": 0, + "bytes_remaining": 0, + "purges_remaining": 0, + "total_files_skipped": 0} + worker_status_file = self.get_worker_status_file() + try: + with open(worker_status_file, 'r+') as f: + loaded_data = json.load(f) + loaded_data['total_files_skipped'] = value + loaded_data['files_remaining'] -= value + os.ftruncate(f.fileno(), 0) + os.lseek(f.fileno(), 0, os.SEEK_SET) + json.dump(loaded_data, f) + f.flush() + os.fsync(f.fileno()) + except (IOError, OSError, ValueError): + logging.info ('Creating new %s' % worker_status_file) + try: + with open(worker_status_file, 'wb') as f: + default_data['total_files_skipped'] = value + json.dump(default_data, f) + f.flush() + os.fsync(f.fileno()) + except: + raise def crawl(self): + self.update_worker_crawl_status("Changelog Crawl") changes = [] + # get stime (from the brick) and purge changelogs + # that are _historical_ to that time. + purge_time = self.xtime('.', self.slave) + if isinstance(purge_time, int): + purge_time = None try: self.master.server.changelog_scan() self.crawls += 1 except OSError: self.fallback_xsync() + self.update_worker_crawl_status("Hybrid Crawl") changes = self.master.server.changelog_getchanges() if changes: - xtl = self.xtime(self.FLAT_DIR_HIERARCHY) - if isinstance(xtl, int): - raise GsyncdError('master is corrupt') + if purge_time: + logging.info("slave's time: %s" % repr(purge_time)) + processed = [x for x in changes if int(x.split('.')[-1]) < purge_time[0]] + for pr in processed: + logging.info('skipping already processed change: %s...' % os.path.basename(pr)) + self.master.server.changelog_done(pr) + changes.remove(pr) logging.debug('processing changes %s' % repr(changes)) self.process(changes) - self.upd_stime(xtl) def register(self): (workdir, logfile) = self.setup_working_dir() @@ -799,17 +1077,20 @@ class GMasterChangelogMixin(GMasterCommon): class GMasterXsyncMixin(GMasterChangelogMixin): """ - This crawl needs to be xtime based (as of now it's not. this is beacuse we generate CHANGELOG file during each crawl which is then processed by process_change()). For now it's used as a one-shot initial sync mechanism and only syncs directories, regular - files and symlinks. + files, hardlinks and symlinks. """ + XSYNC_MAX_ENTRIES = 1<<13 + def register(self): + self.counter = 0 + self.comlist = [] self.sleep_interval = 60 self.tempdir = self.setup_working_dir()[0] self.tempdir = os.path.join(self.tempdir, 'xsync') @@ -823,6 +1104,36 @@ class GMasterXsyncMixin(GMasterChangelogMixin): else: raise + def crawl(self): + """ + event dispatcher thread + + this thread dispatches either changelog or synchronizes stime. + additionally terminates itself on recieving a 'finale' event + """ + def Xsyncer(): + self.Xcrawl() + t = Thread(target=Xsyncer) + t.start() + logging.info('starting hybrid crawl...') + self.update_worker_crawl_status("Hybrid Crawl") + while True: + try: + item = self.comlist.pop(0) + if item[0] == 'finale': + logging.info('finished hybrid crawl syncing') + break + elif item[0] == 'xsync': + logging.info('processing xsync changelog %s' % (item[1])) + self.process([item[1]], 0) + elif item[0] == 'stime': + logging.debug('setting slave time: %s' % repr(item[1])) + self.upd_stime(item[1][1], item[1][0]) + else: + logging.warn('unknown tuple in comlist (%s)' % repr(item)) + except IndexError: + time.sleep(1) + def write_entry_change(self, prefix, data=[]): self.fh.write("%s %s\n" % (prefix, ' '.join(data))) @@ -839,24 +1150,61 @@ class GMasterXsyncMixin(GMasterChangelogMixin): def fname(self): return self.xsync_change - def crawl(self, path='.', xtr=None, done=0): - """ generate a CHANGELOG file consumable by process_change """ + def put(self, mark, item): + self.comlist.append((mark, item)) + + def sync_xsync(self, last): + """schedule a processing of changelog""" + self.close() + self.put('xsync', self.fname()) + self.counter = 0 + if not last: + time.sleep(1) # make sure changelogs are 1 second apart + self.open() + + def sync_stime(self, stime=None, last=False): + """schedule a stime synchronization""" + if stime: + self.put('stime', stime) + if last: + self.put('finale', None) + + def sync_done(self, stime=None, last=False): + self.sync_xsync(last) + if stime: + self.sync_stime(stime, last) + + def Xcrawl(self, path='.', xtr_root=None): + """ + generate a CHANGELOG file consumable by process_change. + + slave's xtime (stime) is _cached_ for comparisons across + the filesystem tree, but set after directory synchronization. + """ if path == '.': self.open() self.crawls += 1 - if not xtr: + if not xtr_root: # get the root stime and use it for all comparisons - xtr = self.xtime('.', self.slave) - if isinstance(xtr, int): - if xtr != ENOENT: - raise GsyncdError('slave is corrupt') - xtr = self.minus_infinity + xtr_root = self.xtime('.', self.slave) + if isinstance(xtr_root, int): + if xtr_root != ENOENT: + logging.warn("slave cluster not returning the " \ + "correct xtime for root (%d)" % xtr_root) + xtr_root = self.minus_infinity xtl = self.xtime(path) if isinstance(xtl, int): - raise GsyncdError('master is corrupt') - if xtr == xtl: + logging.warn("master cluster's xtime not found") + xtr = self.xtime(path, self.slave) + if isinstance(xtr, int): + if xtr != ENOENT: + logging.warn("slave cluster not returning the " \ + "correct xtime for %s (%d)" % (path, xtr)) + xtr = self.minus_infinity + xtr = max(xtr, xtr_root) + if not self.need_sync(path, xtl, xtr): if path == '.': - self.close() + self.sync_done((path, xtl), True) return self.xtime_reversion_hook(path, xtl, xtr) logging.debug("entering " + path) @@ -867,43 +1215,42 @@ class GMasterXsyncMixin(GMasterChangelogMixin): for e in dem: bname = e e = os.path.join(path, e) - st = lstat(e) + xte = self.xtime(e) + if isinstance(xte, int): + logging.warn("irregular xtime for %s: %s" % (e, errno.errorcode[xte])) + continue + if not self.need_sync(e, xte, xtr): + continue + st = self.master.server.lstat(e) if isinstance(st, int): - logging.warn('%s got purged in the interim..' % e) + logging.warn('%s got purged in the interim ...' % e) continue gfid = self.master.server.gfid(e) if isinstance(gfid, int): - logging.warn('skipping entry %s..' % (e)) - continue - xte = self.xtime(e) - if isinstance(xte, int): - raise GsyncdError('master is corrupt') - if not self.need_sync(e, xte, xtr): + logging.warn('skipping entry %s..' % e) continue mo = st.st_mode + self.counter += 1 + if self.counter == self.XSYNC_MAX_ENTRIES: + self.sync_done() if stat.S_ISDIR(mo): - self.write_entry_change("E", [gfid, 'MKDIR', escape(os.path.join(pargfid, bname))]) - self.crawl(e, xtr) + self.write_entry_change("E", [gfid, 'MKDIR', str(mo), str(st.st_uid), str(st.st_gid), escape(os.path.join(pargfid, bname))]) + self.Xcrawl(e, xtr_root) + self.sync_done((e, xte), False) elif stat.S_ISLNK(mo): - rl = errno_wrap(os.readlink, [en], [ENOENT]) - if isinstance(rl, int): - continue - self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname)), rl]) - else: + self.write_entry_change("E", [gfid, 'SYMLINK', escape(os.path.join(pargfid, bname))]) + elif stat.S_ISREG(mo): + nlink = st.st_nlink + nlink -= 1 # fixup backend stat link count # if a file has a hardlink, create a Changelog entry as 'LINK' so the slave # side will decide if to create the new entry, or to create link. - if st.st_nlink == 1: - self.write_entry_change("E", [gfid, 'MKNOD', escape(os.path.join(pargfid, bname))]) + if nlink == 1: + self.write_entry_change("E", [gfid, 'MKNOD', str(mo), str(st.st_uid), str(st.st_gid), escape(os.path.join(pargfid, bname))]) else: self.write_entry_change("E", [gfid, 'LINK', escape(os.path.join(pargfid, bname))]) - if stat.S_ISREG(mo): - self.write_entry_change("D", [gfid]) - + self.write_entry_change("D", [gfid]) if path == '.': - logging.info('processing xsync changelog %s' % self.fname()) - self.close() - self.process([self.fname()], done) - self.upd_stime(xtl) + self.sync_done((path, xtl), True) class BoxClosedErr(Exception): pass @@ -979,12 +1326,13 @@ class Syncer(object): each completed syncjob. """ - def __init__(self, slave): + def __init__(self, slave, sync_engine, resilient_errnos=[]): """spawn worker threads""" self.slave = slave self.lock = Lock() self.pb = PostBox() - self.bytes_synced = 0 + self.sync_engine = sync_engine + self.errnos_ok = resilient_errnos for i in range(int(gconf.sync_jobs)): t = Thread(target=self.syncjob) t.start() @@ -1002,11 +1350,10 @@ class Syncer(object): break time.sleep(0.5) pb.close() - po = self.slave.rsync(pb) + po = self.sync_engine(pb) if po.returncode == 0: ret = (True, 0) - elif po.returncode in (23, 24): - # partial transfer (cf. rsync(1)), that's normal + elif po.returncode in self.errnos_ok: ret = (False, po.returncode) else: po.errfail() diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index faf62f868..8deb5114b 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -265,6 +265,9 @@ class Server(object): FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0' + GFID_XATTR = 'trusted.gfid' # for backend gfid fetch, do not use GX_NSPACE_PFX + GFID_FMTSTR = "!" + "B"*16 + local_path = '' @classmethod @@ -305,6 +308,38 @@ class Server(object): raise OSError(ENOTDIR, os.strerror(ENOTDIR)) return os.listdir(path) + + @classmethod + @_pathguard + def lstat(cls, path): + try: + return os.lstat(path) + except (IOError, OSError): + ex = sys.exc_info()[1] + if ex.errno == ENOENT: + return ex.errno + else: + raise + + + @classmethod + @_pathguard + def gfid(cls, path): + try: + buf = Xattr.lgetxattr(path, cls.GFID_XATTR, 16) + m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in struct.unpack(cls.GFID_FMTSTR, buf)])) + return '-'.join(m.groups()) + except (IOError, OSError): + ex = sys.exc_info()[1] + if ex.errno == ENOENT: + return ex.errno + else: + raise + + @classmethod + def gfid_mnt(cls, gfidpath): + return errno_wrap(Xattr.lgetxattr, [gfidpath, 'glusterfs.gfid.string', cls.GX_GFID_CANONICAL_LEN], [ENOENT]) + @classmethod @_pathguard def purge(cls, path, entries=None): @@ -397,8 +432,42 @@ class Server(object): raise @classmethod - def gfid(cls, gfidpath): - return errno_wrap(Xattr.lgetxattr, [gfidpath, 'glusterfs.gfid.string', cls.GX_GFID_CANONICAL_LEN], [ENOENT]) + @_pathguard + def stime_mnt(cls, path, uuid): + """query xtime extended attribute + + Return xtime of @path for @uuid as a pair of integers. + "Normal" errors due to non-existent @path or extended attribute + are tolerated and errno is returned in such a case. + """ + + try: + return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'stime']), 8)) + except OSError: + ex = sys.exc_info()[1] + if ex.errno in (ENOENT, ENODATA, ENOTDIR): + return ex.errno + else: + raise + + @classmethod + @_pathguard + def stime(cls, path, uuid): + """query xtime extended attribute + + Return xtime of @path for @uuid as a pair of integers. + "Normal" errors due to non-existent @path or extended attribute + are tolerated and errno is returned in such a case. + """ + + try: + return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'stime']), 8)) + except OSError: + ex = sys.exc_info()[1] + if ex.errno in (ENOENT, ENODATA, ENOTDIR): + return ex.errno + else: + raise @classmethod def node_uuid(cls, path='.'): @@ -409,21 +478,10 @@ class Server(object): raise @classmethod - def xtime_vec(cls, path, *uuids): - """vectored version of @xtime - - accepts a list of uuids and returns a dictionary - with uuid as key(s) and xtime as value(s) - """ - xt = {} - for uuid in uuids: - xtu = cls.xtime(path, uuid) - if xtu == ENODATA: - xtu = None - if isinstance(xtu, int): - return xtu - xt[uuid] = xtu - return xt + @_pathguard + def set_stime(cls, path, uuid, mark): + """set @mark as stime for @uuid on @path""" + Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'stime']), struct.pack('!II', *mark)) @classmethod @_pathguard @@ -443,21 +501,17 @@ class Server(object): """ Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark)) - @classmethod - def set_xtime_vec(cls, path, mark_dct): - """vectored (or dictered) version of set_xtime - - ignore values that match @ignore - """ - for u,t in mark_dct.items(): - cls.set_xtime(path, u, t) - @classmethod def entry_ops(cls, entries): pfx = gauxpfx() logging.debug('entries: %s' % repr(entries)) # regular file - def entry_pack_reg(gf, bn, st): + def entry_pack_reg(gf, bn, mo, uid, gid): + blen = len(bn) + return struct.pack(cls._fmt_mknod(blen), + uid, gid, gf, mo, bn, + stat.S_IMODE(mo), 0, umask()) + def entry_pack_reg_stat(gf, bn, st): blen = len(bn) mo = st['mode'] return struct.pack(cls._fmt_mknod(blen), @@ -465,12 +519,10 @@ class Server(object): gf, mo, bn, stat.S_IMODE(mo), 0, umask()) # mkdir - def entry_pack_mkdir(gf, bn, st): + def entry_pack_mkdir(gf, bn, mo, uid, gid): blen = len(bn) - mo = st['mode'] return struct.pack(cls._fmt_mkdir(blen), - st['uid'], st['gid'], - gf, mo, bn, + uid, gid, gf, mo, bn, stat.S_IMODE(mo), umask()) #symlink def entry_pack_symlink(gf, bn, lnk, st): @@ -485,7 +537,7 @@ class Server(object): # to be purged is the GFID gotten from the changelog. # (a stat(changelog_gfid) would also be valid here) # The race here is between the GFID check and the purge. - disk_gfid = cls.gfid(entry) + disk_gfid = cls.gfid_mnt(entry) if isinstance(disk_gfid, int): return if not gfid == disk_gfid: @@ -510,15 +562,15 @@ class Server(object): else: break elif op in ['CREATE', 'MKNOD']: - blob = entry_pack_reg(gfid, bname, e['stat']) + blob = entry_pack_reg(gfid, bname, e['mode'], e['uid'], e['uid']) elif op == 'MKDIR': - blob = entry_pack_mkdir(gfid, bname, e['stat']) + blob = entry_pack_mkdir(gfid, bname, e['mode'], e['uid'], e['uid']) elif op == 'LINK': slink = os.path.join(pfx, gfid) st = lstat(slink) if isinstance(st, int): (pg, bname) = entry2pb(entry) - blob = entry_pack_reg(gfid, bname, e['stat']) + blob = entry_pack_reg_stat(gfid, bname, e['stat']) else: errno_wrap(os.link, [slink, entry], [ENOENT, EEXIST]) elif op == 'SYMLINK': @@ -528,12 +580,23 @@ class Server(object): st = lstat(entry) if isinstance(st, int): (pg, bname) = entry2pb(en) - blob = entry_pack_reg(gfid, bname, e['stat']) + blob = entry_pack_reg_stat(gfid, bname, e['stat']) else: errno_wrap(os.rename, [entry, en], [ENOENT, EEXIST]) if blob: errno_wrap(Xattr.lsetxattr_l, [pg, 'glusterfs.gfid.newfile', blob], [EEXIST], [ENOENT, ESTALE, EINVAL]) + @classmethod + def meta_ops(cls, meta_entries): + logging.debug('Meta-entries: %s' % repr(meta_entries)) + for e in meta_entries: + mode = e['stat']['mode'] + uid = e['stat']['uid'] + gid = e['stat']['gid'] + go = e['go'] + errno_wrap(os.chmod, [go, mode], [ENOENT], [ESTALE, EINVAL]) + errno_wrap(os.chown, [go, uid, gid], [ENOENT], [ESTALE, EINVAL]) + @classmethod def changelog_register(cls, cl_brick, cl_dir, cl_log, cl_level, retries = 0): Changes.cl_register(cl_brick, cl_dir, cl_log, cl_level, retries) @@ -699,6 +762,29 @@ class SlaveRemote(object): return po + def tarssh(self, files, slaveurl): + """invoke tar+ssh + -z (compress) can be use if needed, but ommitting it now + as it results in wierd error (tar+ssh errors out (errcode: 2) + """ + if not files: + raise GsyncdError("no files to sync") + logging.debug("files: " + ", ".join(files)) + (host, rdir) = slaveurl.split(':') + tar_cmd = ["tar", "-cf", "-", "--files-from", "-"] + ssh_cmd = gconf.ssh_command_tar.split() + [host, "tar", "--overwrite", "-xf", "-", "-C", rdir] + p0 = Popen(tar_cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, stderr=subprocess.PIPE) + p1 = Popen(ssh_cmd, stdin=p0.stdout, stderr=subprocess.PIPE) + for f in files: + p0.stdin.write(f) + p0.stdin.write('\n') + p0.stdin.close() + p0.wait() + + p1.wait() + p1.terminate_geterr(fail_on_err = False) + + return p1 class AbstractUrl(object): """abstract base class for url scheme classes""" @@ -1041,12 +1127,20 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): except ValueError: pass return e + @classmethod + def lstat(cls, e): + """ path based backend stat """ + return super(brickserver, cls).lstat(e) + @classmethod + def gfid(cls, e): + """ path based backend gfid fetch """ + return super(brickserver, cls).gfid(e) if gconf.slave_id: # define {,set_}xtime in slave, thus preempting # the call to remote, so that it takes data from # the local brick - slave.server.xtime = types.MethodType(lambda _self, path, uuid: brickserver.xtime(path, uuid + '.' + gconf.slave_id), slave.server) - slave.server.set_xtime = types.MethodType(lambda _self, path, uuid, mark: brickserver.set_xtime(path, uuid + '.' + gconf.slave_id, mark), slave.server) + slave.server.stime = types.MethodType(lambda _self, path, uuid: brickserver.stime(path, uuid + '.' + gconf.slave_id), slave.server) + slave.server.set_stime = types.MethodType(lambda _self, path, uuid, mark: brickserver.set_stime(path, uuid + '.' + gconf.slave_id, mark), slave.server) (g1, g2) = self.gmaster_instantiate_tuple(slave) g1.master.server = brickserver g2.master.server = brickserver @@ -1067,6 +1161,9 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): def rsync(self, files): return sup(self, files, self.slavedir) + def tarssh(self, files): + return sup(self, files, self.slavedir) + class SSH(AbstractUrl, SlaveRemote): """scheme class for ssh:// urls @@ -1170,3 +1267,6 @@ class SSH(AbstractUrl, SlaveRemote): def rsync(self, files): return sup(self, files, '-e', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args), *(gconf.rsync_ssh_options.split() + [self.slaveurl])) + + def tarssh(self, files): + return sup(self, files, self.slaveurl) diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py index 348eb38c1..1b5684c6d 100644 --- a/geo-replication/syncdaemon/syncdutils.py +++ b/geo-replication/syncdaemon/syncdutils.py @@ -227,7 +227,7 @@ def log_raise_exception(excont): logging.warn("!!!!!!!!!!!!!") logging.warn('!!! getting "No such file or directory" errors ' "is most likely due to MISCONFIGURATION, please consult " - "http://access.redhat.com/knowledge/docs/en-US/Red_Hat_Storage/2.0/html/Administration_Guide/chap-User_Guide-Geo_Rep-Preparation-Settingup_Environment.html") + "https://access.redhat.com/site/documentation/en-US/Red_Hat_Storage/2.1/html/Administration_Guide/chap-User_Guide-Geo_Rep-Preparation-Settingup_Environment.html") logging.warn("!!!!!!!!!!!!!") gconf.transport.terminate_geterr() elif isinstance(exc, OSError) and exc.errno in (ENOTCONN, ECONNABORTED): -- cgit From b198e072cda4bbb98e19701399c4bb4f0743cf20 Mon Sep 17 00:00:00 2001 From: Ajeet Jha Date: Mon, 2 Dec 2013 12:55:18 +0530 Subject: glusterd/geo-rep: more glusterd and cli fixes for geo-rep. -> handle option validation cases in reset case. -> Creating valid conf path when glusterd restarts. -> Reading the gsyncd worker thread status and displaying it. -> Displaying status-detail per worker. -> Fetch checkpoint info in geo-rep status. -> use-tarssh value validation added. misc: misc geo-rep fixes based on cluster, logrotate etc.. -> cluster/dht: fix 'stime' getxattr getting overwritten. -> cluster/afr: return max of 'stime' values in subvol. -> geo-rep-logrotate: Sending SIGHUP to geo-rep auxiliary. -> cluster/dht: fix convoluted logic while aggregating. -> cluster/*: fix 'stime' min/max fetch logic. Change-Id: I811acea0bbd6194797a3e55d89295d1ea021ac85 BUG: 1036552 Signed-off-by: Ajeet Jha Reviewed-on: http://review.gluster.org/6405 Tested-by: Gluster Build System Reviewed-by: Amar Tumballi Reviewed-by: Anand Avati --- cli/src/cli-cmd-parser.c | 12 +- cli/src/cli-rpc-ops.c | 258 +++-------- cli/src/cli.h | 14 - extras/glusterfs-georep-logrotate | 18 + libglusterfs/src/mem-types.h | 3 +- rpc/rpc-lib/src/protocol-common.h | 17 + xlators/cluster/afr/src/afr-inode-read.c | 2 +- xlators/cluster/dht/src/dht-common.c | 8 +- xlators/lib/src/libxlator.c | 55 +++ xlators/lib/src/libxlator.h | 3 + xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 563 +++++++++++++++-------- xlators/mgmt/glusterd/src/glusterd-mountbroker.c | 1 - xlators/mgmt/glusterd/src/glusterd-utils.c | 136 +++--- xlators/mgmt/glusterd/src/glusterd-utils.h | 11 + xlators/mgmt/glusterd/src/glusterd.c | 21 +- xlators/mgmt/glusterd/src/glusterd.h | 2 +- 16 files changed, 646 insertions(+), 478 deletions(-) diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 8a37d8e78..0aeaccd0c 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -1737,13 +1737,13 @@ config_parse (const char **words, int wordcount, dict_t *dict, } append_str[append_len - 2] = '\0'; /* "checkpoint now" is special: we resolve that "now" */ - if (strcmp (words[cmdi + 1], "checkpoint") == 0 && - strcmp (append_str, "now") == 0) { + if ((strcmp (words[cmdi + 1], "checkpoint") == 0) && + (strcmp (append_str, "now") == 0)) { struct timeval tv = {0,}; ret = gettimeofday (&tv, NULL); if (ret == -1) - goto out; /* FIXME: free append_str? */ + goto out; GF_FREE (append_str); append_str = GF_CALLOC (1, 300, cli_mt_append_str); @@ -1751,10 +1751,8 @@ config_parse (const char **words, int wordcount, dict_t *dict, ret = -1; goto out; } - strcpy (append_str, "as of "); - gf_time_fmt (append_str + strlen ("as of "), - 300 - strlen ("as of "), - tv.tv_sec, gf_timefmt_FT); + snprintf (append_str, 300, "now:%ld.%06ld", + tv.tv_sec, tv.tv_usec); } ret = dict_set_dynstr (dict, "op_value", append_str); diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 2cb0ba3d4..d1b39014d 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -3874,85 +3874,22 @@ gf_cli_gsync_config_command (dict_t *dict) return runner_run (&runner); } -int -gf_cli_fetch_gsyncd_status_values (char *status, - gf_cli_gsync_status_t *sts_val) -{ - int32_t ret = -1; - char *tmp = NULL; - char *save_ptr = NULL; - char *key = NULL; - char *value = NULL; - - if (!status || !sts_val) { - gf_log ("", GF_LOG_ERROR, "status or sts_val is null"); - goto out; - } - - tmp = strtok_r (status, "\n", &save_ptr); - - if (tmp) - sts_val->health = gf_strdup (tmp); - - while (tmp) { - key = strtok_r (tmp, "=", &value); - - if ((key) && (!strcmp(key, "Uptime"))) - sts_val->uptime = gf_strdup (value); - - if ((key) && (!strcmp(key, "FilesSyncd"))) - sts_val->files_syncd = gf_strdup (value); - - if ((key) && (!strcmp(key, "FilesPending"))) - sts_val->files_pending = gf_strdup (value); - - if ((key) && (!strcmp(key, "BytesPending"))) { - value = gf_uint64_2human_readable(atol(value)); - sts_val->bytes_pending = gf_strdup (value); - } - - if ((key) && (!strcmp(key, "DeletesPending"))) - sts_val->deletes_pending = gf_strdup (value); - - tmp = strtok_r (NULL, ";", &save_ptr); - } - - if (sts_val->health) - ret = 0; - - if (!sts_val->uptime) - sts_val->uptime = gf_strdup ("N/A"); - - if (!sts_val->files_syncd) - sts_val->files_syncd = gf_strdup ("N/A"); - - if (!sts_val->files_pending) - sts_val->files_pending = gf_strdup ("N/A"); - - if (!sts_val->bytes_pending) - sts_val->bytes_pending = gf_strdup ("N/A"); - - if (!sts_val->deletes_pending) - sts_val->deletes_pending = gf_strdup ("N/A"); - -out: - gf_log ("", GF_LOG_DEBUG, "Returning %d.", ret); - return ret; -} - char* -get_struct_variable (int mem_num, gf_cli_gsync_status_t *sts_val) +get_struct_variable (int mem_num, gf_gsync_status_t *sts_val) { switch (mem_num) { - case 0: return (sts_val->node); - case 1: return (sts_val->master); - case 2: return (sts_val->slave); - case 3: return (sts_val->health); - case 4: return (sts_val->uptime); - case 5: return (sts_val->files_syncd); - case 6: return (sts_val->files_pending); - case 7: return (sts_val->bytes_pending); - case 8: return (sts_val->deletes_pending); + case 0: return (sts_val->node); + case 1: return (sts_val->master); + case 2: return (sts_val->brick); + case 3: return (sts_val->slave_node); + case 4: return (sts_val->worker_status); + case 5: return (sts_val->checkpoint_status); + case 6: return (sts_val->crawl_status); + case 7: return (sts_val->files_syncd); + case 8: return (sts_val->files_remaining); + case 9: return (sts_val->bytes_remaining); + case 10: return (sts_val->purges_remaining); + case 11: return (sts_val->total_files_skipped); default: goto out; } @@ -3963,28 +3900,23 @@ out: int gf_cli_print_status (char **title_values, - gf_cli_gsync_status_t **sts_vals, + gf_gsync_status_t **sts_vals, int *spacing, int gsync_count, int number_of_fields, int is_detail) { - int indents = 0; int i = 0; int j = 0; int ret = 0; + int status_fields = 6; /* Indexed at 0 */ int total_spacing = 0; char **output_values = NULL; char *tmp = NULL; char *hyphens = NULL; - char heading[PATH_MAX] = {0, }; - char indent_spaces[PATH_MAX] = {0, }; /* calculating spacing for hyphens */ for (i = 0; i < number_of_fields; i++) { - /* Suppressing master and slave output for status detail */ - if ((is_detail) && ((i == 1) || (i == 2))) { - total_spacing++; - continue; - } else if ((!is_detail) && (i > 4)) { + /* Suppressing detail output for status */ + if ((!is_detail) && (i > status_fields)) { /* Suppressing detailed output for * status */ continue; @@ -4018,64 +3950,29 @@ gf_cli_print_status (char **title_values, goto out; } - ret = snprintf(heading, sizeof(heading), "MASTER: %s SLAVE: %s", - sts_vals[0]->master, sts_vals[0]->slave); - if (ret) { - if (ret < sizeof(heading)) - heading[ret] = '\0'; - else - heading[sizeof(heading) - 1] = '\0'; - ret = 0; - } else { - ret = -1; - goto out; - } - - if (is_detail) { - cli_out (" "); - if (strlen(heading) > total_spacing) - cli_out ("%s", heading); - else { - /* Printing the heading with centre justification */ - indents = (total_spacing - strlen(heading)) / 2; - memset (indent_spaces, ' ', indents); - indent_spaces[indents] = '\0'; - ret = snprintf (hyphens, total_spacing, "%s%s", - indent_spaces, heading); - if (ret) { - hyphens[ret] = '\0'; - cli_out ("%s", hyphens); - ret = 0; - } else { - ret = -1; - goto out; - } - } - cli_out (" "); - } + cli_out (" "); /* setting the title "NODE", "MASTER", etc. from title_values[] and printing the same */ for (j = 0; j < number_of_fields; j++) { - /* Suppressing master and slave output for status detail */ - if ((is_detail) && ((j == 1) || (j == 2))) { - output_values[j][0] = '\0'; - continue; - } else if ((!is_detail) && (j > 4)) { + if ((!is_detail) && (j > status_fields)) { /* Suppressing detailed output for * status */ output_values[j][0] = '\0'; - continue; + continue; } memset (output_values[j], ' ', spacing[j]); memcpy (output_values[j], title_values[j], strlen(title_values[j])); output_values[j][spacing[j]] = '\0'; } - cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0], - output_values[1], output_values[2], output_values[3], - output_values[4], output_values[5], output_values[6], - output_values[7], output_values[8]); + cli_out ("%s %s %s %s %s %s %s %s %s %s %s %s", + output_values[0], output_values[1], + output_values[2], output_values[3], + output_values[4], output_values[5], + output_values[6], output_values[7], + output_values[8], output_values[9], + output_values[10], output_values[11]); /* setting and printing the hyphens */ memset (hyphens, '-', total_spacing); @@ -4084,12 +3981,7 @@ gf_cli_print_status (char **title_values, for (i = 0; i < gsync_count; i++) { for (j = 0; j < number_of_fields; j++) { - /* Suppressing master and slave output for - * status detail */ - if ((is_detail) && ((j == 1) || (j == 2))) { - output_values[j][0] = '\0'; - continue; - } else if ((!is_detail) && (j > 4)) { + if ((!is_detail) && (j > status_fields)) { /* Suppressing detailed output for * status */ output_values[j][0] = '\0'; @@ -4107,10 +3999,13 @@ gf_cli_print_status (char **title_values, output_values[j][spacing[j]] = '\0'; } - cli_out ("%s %s %s %s %s %s %s %s %s", output_values[0], - output_values[1], output_values[2], output_values[3], - output_values[4], output_values[5], output_values[6], - output_values[7], output_values[8]); + cli_out ("%s %s %s %s %s %s %s %s %s %s %s %s", + output_values[0], output_values[1], + output_values[2], output_values[3], + output_values[4], output_values[5], + output_values[6], output_values[7], + output_values[8], output_values[9], + output_values[10], output_values[11]); } out: @@ -4130,47 +4025,23 @@ out: int gf_cli_read_status_data (dict_t *dict, - gf_cli_gsync_status_t **sts_vals, + gf_gsync_status_t **sts_vals, int *spacing, int gsync_count, int number_of_fields) { - int ret = 0; - int i = 0; - int j = 0; - char mst[PATH_MAX] = {0, }; - char slv[PATH_MAX] = {0, }; - char sts[PATH_MAX] = {0, }; - char nds[PATH_MAX] = {0, }; - char *status = NULL; - char *tmp = NULL; + char *tmp = NULL; + char sts_val_name[PATH_MAX] = ""; + int ret = 0; + int i = 0; + int j = 0; /* Storing per node status info in each object */ for (i = 0; i < gsync_count; i++) { - snprintf (nds, sizeof(nds), "node%d", i + 1); - snprintf (mst, sizeof(mst), "master%d", i + 1); - snprintf (slv, sizeof(slv), "slave%d", i + 1); - snprintf (sts, sizeof(sts), "status%d", i + 1); + snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i); /* Fetching the values from dict, and calculating the max length for each field */ - ret = dict_get_str (dict, nds, &(sts_vals[i]->node)); - if (ret) - goto out; - - ret = dict_get_str (dict, mst, &(sts_vals[i]->master)); - if (ret) - goto out; - - ret = dict_get_str (dict, slv, &(sts_vals[i]->slave)); - if (ret) - goto out; - - ret = dict_get_str (dict, sts, &status); - if (ret) - goto out; - - /* Fetching health and uptime from sts_val */ - ret = gf_cli_fetch_gsyncd_status_values (status, sts_vals[i]); + ret = dict_get_bin (dict, sts_val_name, (void **)&(sts_vals[i])); if (ret) goto out; @@ -4192,25 +4063,23 @@ out: } int -gf_cli_gsync_status_output (dict_t *dict, int status_detail) +gf_cli_gsync_status_output (dict_t *dict, gf_boolean_t is_detail) { int gsync_count = 0; int i = 0; - int j = 0; int ret = 0; - int spacing[10] = {0}; - int num_of_fields = 9; + int spacing[13] = {0}; + int num_of_fields = 12; char errmsg[1024] = ""; char *master = NULL; char *slave = NULL; - char *tmp = NULL; - char *title_values[] = {"NODE", "MASTER", "SLAVE", - "HEALTH", "UPTIME", - "FILES SYNCD", - "FILES PENDING", - "BYTES PENDING", - "DELETES PENDING"}; - gf_cli_gsync_status_t **sts_vals = NULL; + char *title_values[] = {"MASTER NODE", "MASTER VOL", + "MASTER BRICK", "SLAVE", + "STATUS", "CHECKPOINT STATUS", + "CRAWL STATUS", "FILES SYNCD", + "FILES PENDING", "BYTES PENDING", + "DELETES PENDING", "FILES SKIPPED"}; + gf_gsync_status_t **sts_vals = NULL; /* Checks if any session is active or not */ ret = dict_get_int32 (dict, "gsync-count", &gsync_count); @@ -4244,14 +4113,14 @@ gf_cli_gsync_status_output (dict_t *dict, int status_detail) /* gsync_count = number of nodes reporting output. each sts_val object will store output of each node */ - sts_vals = GF_CALLOC (gsync_count, sizeof (gf_cli_gsync_status_t *), + sts_vals = GF_CALLOC (gsync_count, sizeof (gf_gsync_status_t *), gf_common_mt_char); if (!sts_vals) { ret = -1; goto out; } for (i = 0; i < gsync_count; i++) { - sts_vals[i] = GF_CALLOC (1, sizeof (gf_cli_gsync_status_t), + sts_vals[i] = GF_CALLOC (1, sizeof (gf_gsync_status_t), gf_common_mt_char); if (!sts_vals[i]) { ret = -1; @@ -4267,23 +4136,15 @@ gf_cli_gsync_status_output (dict_t *dict, int status_detail) } ret = gf_cli_print_status (title_values, sts_vals, spacing, gsync_count, - num_of_fields, status_detail); + num_of_fields, is_detail); if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to print status output"); goto out; } out: - if (sts_vals) { - for (i = 0; i < gsync_count; i++) { - for (j = 3; j < num_of_fields; j++) { - tmp = get_struct_variable(j, sts_vals[i]); - if (tmp) - GF_FREE (tmp); - } - } + if (sts_vals) GF_FREE (sts_vals); - } return ret; } @@ -4602,7 +4463,10 @@ gf_cli_gsync_set_cbk (struct rpc_req *req, struct iovec *iov, status_detail = dict_get_str_boolean (dict, "status-detail", _gf_false); - ret = gf_cli_gsync_status_output (dict, status_detail); + if (status_detail) + ret = gf_cli_gsync_status_output (dict, status_detail); + else + ret = gf_cli_gsync_status_output (dict, status_detail); break; case GF_GSYNC_OPTION_TYPE_DELETE: diff --git a/cli/src/cli.h b/cli/src/cli.h index f5db29383..53537c642 100644 --- a/cli/src/cli.h +++ b/cli/src/cli.h @@ -148,18 +148,6 @@ struct cli_local { #endif }; -struct gf_cli_gsync_detailed_status_ { - char *node; - char *master; - char *slave; - char *health; - char *uptime; - char *files_syncd; - char *files_pending; - char *bytes_pending; - char *deletes_pending; -}; - struct cli_volume_status { int port; int online; @@ -178,8 +166,6 @@ struct cli_volume_status { #endif }; -typedef struct gf_cli_gsync_detailed_status_ gf_cli_gsync_status_t; - typedef struct cli_volume_status cli_volume_status_t; typedef struct cli_local cli_local_t; diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate index 6a69ab1e3..85e69d2c0 100644 --- a/extras/glusterfs-georep-logrotate +++ b/extras/glusterfs-georep-logrotate @@ -7,12 +7,30 @@ delaycompress notifempty /var/log/glusterfs/geo-replication/*/*.log { + sharedscripts + postrotate + for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do + /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true + done + endscript } /var/log/glusterfs/geo-replication-slaves/*.log { + sharedscripts + postrotate + for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do + /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true + done + endscript } /var/log/glusterfs/geo-replication-slaves/*/*.log { + sharedscripts + postrotate + for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do + /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true + done + endscript } diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index fc0aa9018..726d38eb6 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -118,6 +118,7 @@ enum gf_common_mem_types_ { gf_common_mt_auxgids = 102, gf_common_mt_syncopctx = 103, gf_common_mt_iobrefs = 104, - gf_common_mt_end = 105 + gf_common_mt_gsync_status_t = 105, + gf_common_mt_end = 106 }; #endif diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index d64f280cf..5876a500b 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -221,6 +221,23 @@ typedef enum { GF_AFR_OP_STATISTICS_HEAL_COUNT_PER_REPLICA, } gf_xl_afr_op_t ; +struct gf_gsync_detailed_status_ { + char node[NAME_MAX]; + char master[NAME_MAX]; + char brick[NAME_MAX]; + char slave_node[NAME_MAX]; + char worker_status[NAME_MAX]; + char checkpoint_status[NAME_MAX]; + char crawl_status[NAME_MAX]; + char files_syncd[NAME_MAX]; + char files_remaining[NAME_MAX]; + char bytes_remaining[NAME_MAX]; + char purges_remaining[NAME_MAX]; + char total_files_skipped[NAME_MAX]; +}; + +typedef struct gf_gsync_detailed_status_ gf_gsync_status_t; + #define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */ #define GLUSTER_HNDSK_VERSION 2 /* 0.0.2 */ diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 8a2853319..3e80129f9 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1348,7 +1348,7 @@ afr_aggregate_stime_xattr (dict_t *this, char *key, data_t *value, void *data) int ret = 0; if (fnmatch (GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) - ret = gf_get_min_stime (THIS, data, key, value); + ret = gf_get_max_stime (THIS, data, key, value); return ret; } diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 898f41f0e..ed4babd32 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2015,13 +2015,7 @@ dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local->xattr) { local->xattr = dict_copy_with_ref (xattr, NULL); } else { - /* first aggregate everything into xattr and then copy into - * local->xattr. This is required as we want to have - * 'local->xattr' as the proper final dictionary passed above - * distribute xlator. - */ - dht_aggregate_xattr (xattr, local->xattr); - local->xattr = dict_copy (xattr, local->xattr); + dht_aggregate_xattr (local->xattr, xattr); } out: if (is_last_call (this_call_cnt)) { diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c index 9e5357255..63e9bcf9f 100644 --- a/xlators/lib/src/libxlator.c +++ b/xlators/lib/src/libxlator.c @@ -452,6 +452,61 @@ gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value) /* can't use 'min()' macro here as we need to compare two fields in the array, selectively */ + if ((host_value_timebuf[0] < host_timebuf[0]) || + ((host_value_timebuf[0] == host_timebuf[0]) && + (host_value_timebuf[1] < host_timebuf[1]))) { + update_timebuf (value_timebuf, net_timebuf); + } + + ret = 0; +out: + return ret; +error: + /* To be used only when net_timebuf is not set in the dict */ + if (net_timebuf) + GF_FREE (net_timebuf); + + return ret; +} + +int +gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value) +{ + int ret = -1; + uint32_t *net_timebuf = NULL; + uint32_t *value_timebuf = NULL; + uint32_t host_timebuf[2] = {0,}; + uint32_t host_value_timebuf[2] = {0,}; + + /* stime should be maximum of all the other nodes */ + ret = dict_get_bin (dst, key, (void **)&net_timebuf); + if (ret < 0) { + net_timebuf = GF_CALLOC (1, sizeof (int64_t), + gf_common_mt_char); + if (!net_timebuf) + goto out; + + ret = dict_set_bin (dst, key, net_timebuf, sizeof (int64_t)); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "key=%s: dict set failed", key); + goto error; + } + } + + value_timebuf = data_to_bin (value); + if (!value_timebuf) { + gf_log (this->name, GF_LOG_WARNING, + "key=%s: getting value of stime failed", key); + ret = -1; + goto out; + } + + get_hosttime (value_timebuf, host_value_timebuf); + get_hosttime (net_timebuf, host_timebuf); + + /* can't use 'max()' macro here as we need to compare two fields + in the array, selectively */ if ((host_value_timebuf[0] > host_timebuf[0]) || ((host_value_timebuf[0] == host_timebuf[0]) && (host_value_timebuf[1] > host_timebuf[1]))) { diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h index 08bd77b91..175d3141d 100644 --- a/xlators/lib/src/libxlator.h +++ b/xlators/lib/src/libxlator.h @@ -151,4 +151,7 @@ match_uuid_local (const char *name, char *uuid); int gf_get_min_stime (xlator_t *this, dict_t *dst, char *key, data_t *value); +int +gf_get_max_stime (xlator_t *this, dict_t *dst, char *key, data_t *value); + #endif /* !_LIBXLATOR_H */ diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 5786694bd..5d724cc2a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -28,17 +28,6 @@ static int dict_get_param (dict_t *dict, char *key, char **param); -static int -glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, - char *conf_path, char **statefile); - -static int -glusterd_get_slave_info (char *slave, char **slave_ip, - char **slave_vol, char **op_errstr); - -static int -glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); - struct gsync_config_opt_vals_ gsync_confopt_vals[] = { {.op_name = "change_detector", .no_of_pos_vals = 2, @@ -55,6 +44,11 @@ struct gsync_config_opt_vals_ gsync_confopt_vals[] = { .case_sensitive = _gf_false, .values = {"critical", "error", "warning", "info", "debug"} }, + {.op_name = "use-tarssh", + .no_of_pos_vals = 6, + .case_sensitive = _gf_false, + .values = {"true", "false", "0", "1", "yes", "no"} + }, {.op_name = NULL, }, }; @@ -74,6 +68,11 @@ static char *gsync_reserved_opts[] = { NULL }; +static char *gsync_no_restart_opts[] = { + "checkpoint", + NULL +}; + int __glusterd_handle_sys_exec (rpcsvc_request_t *req) { @@ -899,6 +898,8 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) } if (op_match) { + if (!op_value) + goto out; val_match = _gf_false; for (i = 0; i < conf_vals->no_of_pos_vals; i++) { if(conf_vals->case_sensitive){ @@ -912,7 +913,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) if (!val_match) { ret = snprintf (errmsg, sizeof(errmsg) - 1, - "Invalid values (%s) for" + "Invalid value(%s) for" " option %s", op_value, op_name); errmsg[ret] = '\0'; @@ -923,7 +924,7 @@ gsync_verify_config_options (dict_t *dict, char **op_errstr, char *volname) } } } - +out: return 0; } @@ -1581,7 +1582,7 @@ out: return ret; } -static int +int glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, char *conf_path, char **statefile) { @@ -1736,7 +1737,7 @@ glusterd_verify_slave (char *volname, char *slave_ip, char *slave, gf_log ("", GF_LOG_ERROR, "Not a valid slave"); ret = glusterd_gsync_read_frm_status (log_file_path, buf, sizeof(buf)); - if (ret) { + if (ret <= 0) { gf_log ("", GF_LOG_ERROR, "Unable to read from %s", log_file_path); goto out; @@ -2391,6 +2392,8 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, char *slave_ip = NULL; char *slave_vol = NULL; struct stat stbuf = {0, }; + gf_boolean_t restart_required = _gf_true; + char **resopt = NULL; GF_ASSERT (slave); GF_ASSERT (op_errstr); @@ -2495,18 +2498,28 @@ glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave, out: if (!ret && volinfo) { + for (resopt = gsync_no_restart_opts; *resopt; resopt++) { + restart_required = _gf_true; + if (!strcmp ((*resopt), op_name)){ + restart_required = _gf_false; + break; + } + } + + if (restart_required) { ret = glusterd_check_restart_gsync_session (volinfo, slave, resp_dict, path_list, conf_path, 0); if (ret) - *op_errstr = gf_strdup ("internal error"); + *op_errstr = gf_strdup ("internal error"); + } } gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } -static int +int glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) { int ret = 0; @@ -2530,7 +2543,6 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) char *p = buf + len - 1; while (isspace (*p)) *p-- = '\0'; - ret = 0; } } else if (ret < 0) gf_log ("", GF_LOG_ERROR, "Status file of gsyncd is corrupt"); @@ -2540,20 +2552,146 @@ glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen) } static int -glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen) +dict_get_param (dict_t *dict, char *key, char **param) +{ + char *dk = NULL; + char *s = NULL; + char x = '\0'; + int ret = 0; + + if (dict_get_str (dict, key, param) == 0) + return 0; + + dk = gf_strdup (key); + if (!key) + return -1; + + s = strpbrk (dk, "-_"); + if (!s) + return -1; + x = (*s == '-') ? '_' : '-'; + *s++ = x; + while ((s = strpbrk (s, "-_"))) + *s++ = x; + + ret = dict_get_str (dict, dk, param); + + GF_FREE (dk); + return ret; +} + +static int +glusterd_parse_gsync_status (char *buf, gf_gsync_status_t *sts_val) +{ + int ret = -1; + int i = -1; + int num_of_fields = 8; + char *token = NULL; + char **tokens = NULL; + char **ptr = NULL; + char *save_ptr = NULL; + char na_buf[] = "N/A"; + + if (!buf) { + gf_log ("", GF_LOG_ERROR, "Empty buf"); + goto out; + } + + tokens = calloc (num_of_fields, sizeof (char *)); + if (!tokens) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + goto out; + } + + ptr = tokens; + + for (token = strtok_r (buf, ",", &save_ptr); token; + token = strtok_r (NULL, ",", &save_ptr)) { + *ptr = gf_strdup(token); + if (!*ptr) { + gf_log ("", GF_LOG_ERROR, "Out of memory"); + goto out; + } + ptr++; + } + + for (i = 0; i < num_of_fields; i++) { + token = strtok_r (tokens[i], ":", &save_ptr); + token = strtok_r (NULL, "\0", &save_ptr); + token++; + + /* token NULL check */ + if (!token && (i != 0) && + (i != 5) && (i != 7)) + token = na_buf; + + if (i == 0) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 1] = '\0'; + } + memcpy (sts_val->slave_node, token, strlen(token)); + } + if (i == 1) + memcpy (sts_val->files_syncd, token, strlen(token)); + if (i == 2) + memcpy (sts_val->purges_remaining, token, strlen(token)); + if (i == 3) + memcpy (sts_val->total_files_skipped, token, strlen(token)); + if (i == 4) + memcpy (sts_val->files_remaining, token, strlen(token)); + if (i == 5) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 1] = '\0'; + } + memcpy (sts_val->worker_status, token, strlen(token)); + } + if (i == 6) + memcpy (sts_val->bytes_remaining, token, strlen(token)); + if (i == 7) { + if (!token) + token = na_buf; + else { + token++; + if (!token) + token = na_buf; + else + token[strlen(token) - 2] = '\0'; + } + memcpy (sts_val->crawl_status, token, strlen(token)); + } + } + + ret = 0; +out: + for (i = 0; i< num_of_fields; i++) + if (tokens[i]) + GF_FREE(tokens[i]); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +static int +glusterd_gsync_fetch_status_extra (char *path, gf_gsync_status_t *sts_val) { char sockpath[PATH_MAX] = {0,}; struct sockaddr_un sa = {0,}; - size_t l = 0; int s = -1; struct pollfd pfd = {0,}; int ret = 0; - l = strlen (buf); - /* seek to end of data in buf */ - buf += l; - blen -= l; - glusterd_set_socket_filepath (path, sockpath, sizeof (sockpath)); strncpy(sa.sun_path, sockpath, sizeof(sa.sun_path)); @@ -2581,66 +2719,40 @@ glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen) ret = -1; goto out; } - ret = read(s, buf, blen); + ret = read(s, sts_val->checkpoint_status, + sizeof(sts_val->checkpoint_status)); /* we expect a terminating 0 byte */ - if (ret == 0 || (ret > 0 && buf[ret - 1])) + if (ret == 0 || (ret > 0 && sts_val->checkpoint_status[ret - 1])) ret = -1; - if (ret > 0) + if (ret > 0) { ret = 0; + } - out: +out: close (s); return ret; } -static int -dict_get_param (dict_t *dict, char *key, char **param) -{ - char *dk = NULL; - char *s = NULL; - char x = '\0'; - int ret = 0; - - if (dict_get_str (dict, key, param) == 0) - return 0; - - dk = gf_strdup (key); - if (!key) - return -1; - - s = strpbrk (dk, "-_"); - if (!s) - return -1; - x = (*s == '-') ? '_' : '-'; - *s++ = x; - while ((s = strpbrk (s, "-_"))) - *s++ = x; - - ret = dict_get_str (dict, dk, param); - - GF_FREE (dk); - return ret; -} - -static int +int glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, char *conf_path, dict_t *dict, char *node) { - glusterd_conf_t *priv = NULL; - int ret = 0; - char *statefile = NULL; - char *master = NULL; - char buf[1024] = "defunct"; - char nds[1024] = {0, }; - char mst[1024] = {0, }; - char slv[1024] = {0, }; - char sts[1024] = {0, }; - char *bufp = NULL; - dict_t *confd = NULL; - int gsync_count = 0; - int status = 0; - char *dyn_node = NULL; - char *path_list = NULL; + char brick_state_file[PATH_MAX] = ""; + char brick_path[PATH_MAX] = ""; + char *georep_session_wrkng_dir = NULL; + char *master = NULL; + char tmp[1024] = ""; + char sts_val_name[1024] = ""; + char monitor_status[PATH_MAX] = ""; + char *statefile = NULL; + char *socketfile = NULL; + dict_t *confd = NULL; + int gsync_count = 0; + int i = 0; + int ret = 0; + glusterd_brickinfo_t *brickinfo = NULL; + gf_gsync_status_t *sts_val = NULL; + glusterd_conf_t *priv = NULL; GF_ASSERT (THIS); GF_ASSERT (THIS->private); @@ -2661,7 +2773,7 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, if (ret) { gf_log ("", GF_LOG_ERROR, "Unable to get configuration data" "for %s(master), %s(slave)", master, slave); - goto done; + goto out; } @@ -2670,120 +2782,168 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, gf_log ("", GF_LOG_ERROR, "Unable to get state_file's name " "for %s(master), %s(slave). Please check gsync " "config file.", master, slave); - goto done; + goto out; } - ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); - if (ret) { + + ret = glusterd_gsync_read_frm_status (statefile, monitor_status, + sizeof (monitor_status)); + if (ret <= 0) { gf_log ("", GF_LOG_ERROR, "Unable to read the status" "file for %s(master), %s(slave)", master, slave); - strncpy (buf, "defunct", sizeof (buf)); - goto done; - } - - ret = gsync_status (master, slave, conf_path, &status); - if (ret == 0 && status == -1) { - if ((strcmp (buf, "Not Started")) && - (strcmp (buf, "Stopped"))) - strncpy (buf, "defunct", sizeof (buf)); - goto done; - } else if (ret == -1) { - gf_log ("", GF_LOG_ERROR, "Unable to get gsync status"); - goto done; + strncpy (monitor_status, "defunct", sizeof (monitor_status)); } - if (strcmp (buf, "Stable") != 0) - goto done; - - ret = dict_get_param (confd, "state_socket_unencoded", &statefile); + ret = dict_get_param (confd, "georep_session_working_dir", + &georep_session_wrkng_dir); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to get state_socket_unencoded" - " filepath. Please check gsync config file."); - goto done; + gf_log ("", GF_LOG_ERROR, "Unable to get geo-rep session's " + "working directory name for %s(master), %s(slave). " + "Please check gsync config file.", master, slave); + goto out; } - ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf)); + + ret = dict_get_param (confd, "state_socket_unencoded", &socketfile); if (ret) { - gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status" - "for %s(master), %s(slave)", master, slave); - /* there is a slight chance that this occurs due to race - * -- in that case, the following options all seem bad: - * - * - suppress irregurlar behavior by just leaving status - * on "OK" - * - freak out users with a misleading "defunct" - * - overload the meaning of the regular error signal - * mechanism of gsyncd, that is, when status is "faulty" - * - * -- so we just come up with something new... - */ - strncpy (buf, "N/A", sizeof (buf)); - goto done; + gf_log ("", GF_LOG_ERROR, "Unable to get socket file's name " + "for %s(master), %s(slave). Please check gsync " + "config file.", master, slave); + goto out; } - done: - if ((!strcmp (buf, "defunct")) || - (!strcmp (buf, "Not Started")) || - (!strcmp (buf, "Stopped"))) { - ret = glusterd_get_local_brickpaths (volinfo, &path_list); - if (!path_list) { - gf_log ("", GF_LOG_DEBUG, "This node not being part of" - " volume should not be running gsyncd. Hence" - " shouldn't display status for this node."); - ret = 0; + ret = dict_get_int32 (dict, "gsync-count", &gsync_count); + if (ret) + gsync_count = 0; + + list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) { + if (uuid_compare (brickinfo->uuid, MY_UUID)) + continue; + + sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t), + gf_common_mt_gsync_status_t); + if (!sts_val) { + gf_log ("", GF_LOG_ERROR, "Out Of Memory"); goto out; } - } - ret = dict_get_int32 (dict, "gsync-count", &gsync_count); + /* Creating the brick state file's path */ + memset(brick_state_file, '\0', PATH_MAX); + memcpy (brick_path, brickinfo->path, PATH_MAX - 1); + for (i = 0; i < strlen(brick_path) - 1; i++) + if (brick_path[i] == '/') + brick_path[i] = '_'; + ret = snprintf(brick_state_file, PATH_MAX - 1, "%s%s.status", + georep_session_wrkng_dir, brick_path); + brick_state_file[ret] = '\0'; + + gf_log ("", GF_LOG_DEBUG, "brick_state_file = %s", brick_state_file); + + memset (tmp, '\0', sizeof(tmp)); + + ret = glusterd_gsync_read_frm_status (brick_state_file, + tmp, sizeof (tmp)); + if (ret <= 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read the status" + "file for %s brick for %s(master), %s(slave) " + "session", brickinfo->path, master, slave); + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); + sts_val->worker_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A"); + sts_val->files_syncd[ret] = '\0'; + ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A"); + sts_val->purges_remaining[ret] = '\0'; + ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A"); + sts_val->total_files_skipped[ret] = '\0'; + ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A"); + sts_val->files_remaining[ret] = '\0'; + ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A"); + sts_val->bytes_remaining[ret] = '\0'; + goto store_status; + } - if (ret) - gsync_count = 1; - else - gsync_count++; + ret = glusterd_gsync_fetch_status_extra (socketfile, sts_val); + if (ret || strlen(sts_val->checkpoint_status) == 0) { + gf_log ("", GF_LOG_DEBUG, "No checkpoint status" + "for %s(master), %s(slave)", master, slave); + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + } - (void) snprintf (nds, sizeof (nds), "node%d", gsync_count); - dyn_node = gf_strdup (node); - if (!dyn_node) - goto out; - ret = dict_set_dynstr (dict, nds, dyn_node); - if (ret) { - GF_FREE (dyn_node); - goto out; - } + ret = glusterd_parse_gsync_status (tmp, sts_val); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to parse the gsync status for %s", + brickinfo->path); + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + ret = snprintf (sts_val->worker_status, sizeof(sts_val->worker_status), "N/A"); + sts_val->worker_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->files_syncd, sizeof(sts_val->files_syncd), "N/A"); + sts_val->files_syncd[ret] = '\0'; + ret = snprintf (sts_val->purges_remaining, sizeof(sts_val->purges_remaining), "N/A"); + sts_val->purges_remaining[ret] = '\0'; + ret = snprintf (sts_val->total_files_skipped, sizeof(sts_val->total_files_skipped), "N/A"); + sts_val->total_files_skipped[ret] = '\0'; + ret = snprintf (sts_val->files_remaining, sizeof(sts_val->files_remaining), "N/A"); + sts_val->files_remaining[ret] = '\0'; + ret = snprintf (sts_val->bytes_remaining, sizeof(sts_val->bytes_remaining), "N/A"); + sts_val->bytes_remaining[ret] = '\0'; + } - snprintf (mst, sizeof (mst), "master%d", gsync_count); - master = gf_strdup (master); - if (!master) - goto out; - ret = dict_set_dynstr (dict, mst, master); - if (ret) { - GF_FREE (master); - goto out; - } +store_status: + if ((strcmp (monitor_status, "Stable"))) { + memcpy (sts_val->worker_status, monitor_status, strlen(monitor_status)); + sts_val->worker_status[strlen(monitor_status)] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + } - snprintf (slv, sizeof (slv), "slave%d", gsync_count); - slave = gf_strdup (slave); - if (!slave) - goto out; - ret = dict_set_dynstr (dict, slv, slave); - if (ret) { - GF_FREE (slave); - goto out; - } + if (strcmp (sts_val->worker_status, "Active")) { + ret = snprintf (sts_val->checkpoint_status, sizeof(sts_val->checkpoint_status), "N/A"); + sts_val->checkpoint_status[ret] = '\0'; + ret = snprintf (sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A"); + sts_val->crawl_status[ret] = '\0'; + } - snprintf (sts, sizeof (slv), "status%d", gsync_count); - bufp = gf_strdup (buf); - if (!bufp) - goto out; - ret = dict_set_dynstr (dict, sts, bufp); - if (ret) { - GF_FREE (bufp); - goto out; + if (!strcmp (sts_val->slave_node, "N/A")) { + memcpy (sts_val->slave_node, slave, strlen(slave)); + sts_val->slave_node[strlen(slave)] = '\0'; + } + + memcpy (sts_val->node, node, strlen(node)); + sts_val->node[strlen(node)] = '\0'; + memcpy (sts_val->brick, brickinfo->path, strlen(brickinfo->path)); + sts_val->brick[strlen(brickinfo->path)] = '\0'; + memcpy (sts_val->master, master, strlen(master)); + sts_val->master[strlen(master)] = '\0'; + + snprintf (sts_val_name, sizeof (sts_val_name), "status_value%d", gsync_count); + ret = dict_set_bin (dict, sts_val_name, sts_val, sizeof(gf_gsync_status_t)); + if (ret) { + GF_FREE (sts_val); + goto out; + } + + gsync_count++; + sts_val = NULL; } + ret = dict_set_int32 (dict, "gsync-count", gsync_count); if (ret) goto out; - out: +out: dict_destroy (confd); return 0; @@ -3246,30 +3406,32 @@ glusterd_op_sys_exec (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } - ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); - if (ptr) { - ret = dict_get_int32 (rsp_dict, "output_count", &output_count); - if (ret) - output_count = 1; - else - output_count++; - memset (output_name, '\0', sizeof (output_name)); - snprintf (output_name, sizeof (output_name), - "output_%d", output_count); - if (buf[strlen(buf) - 1] == '\n') - buf[strlen(buf) - 1] = '\0'; - bufp = gf_strdup (buf); - if (!bufp) - gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); - ret = dict_set_dynstr (rsp_dict, output_name, bufp); - if (ret) { - GF_FREE (bufp); - gf_log ("", GF_LOG_ERROR, "output set failed."); + do { + ptr = fgets(buf, sizeof(buf), runner_chio (&runner, STDOUT_FILENO)); + if (ptr) { + ret = dict_get_int32 (rsp_dict, "output_count", &output_count); + if (ret) + output_count = 1; + else + output_count++; + memset (output_name, '\0', sizeof (output_name)); + snprintf (output_name, sizeof (output_name), + "output_%d", output_count); + if (buf[strlen(buf) - 1] == '\n') + buf[strlen(buf) - 1] = '\0'; + bufp = gf_strdup (buf); + if (!bufp) + gf_log ("", GF_LOG_ERROR, "gf_strdup failed."); + ret = dict_set_dynstr (rsp_dict, output_name, bufp); + if (ret) { + GF_FREE (bufp); + gf_log ("", GF_LOG_ERROR, "output set failed."); + } + ret = dict_set_int32 (rsp_dict, "output_count", output_count); + if (ret) + gf_log ("", GF_LOG_ERROR, "output_count set failed."); } - ret = dict_set_int32 (rsp_dict, "output_count", output_count); - if (ret) - gf_log ("", GF_LOG_ERROR, "output_count set failed."); - } + } while (ptr); ret = runner_end (&runner); if (ret) { @@ -3708,7 +3870,7 @@ out: } -static int +int glusterd_get_slave_info (char *slave, char **slave_ip, char **slave_vol, char **op_errstr) { @@ -3888,7 +4050,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", ".", NULL); RUN_GSYNCD_CMD; @@ -3902,6 +4064,16 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* ssh-command tar */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "ssh-command-tar"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/tar_ssh.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* pid-file */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_arg (&runner, "pid-file"); @@ -3909,6 +4081,13 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* geo-rep-working-dir */ + runinit_gsyncd_setrx (&runner, conf_path); + runner_add_arg (&runner, "georep-session-working-dir"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* state-file */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_arg (&runner, "state-file"); @@ -3986,7 +4165,7 @@ create_conf_file (glusterd_conf_t *conf, char *conf_path) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf_path); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", NULL); RUN_GSYNCD_CMD; diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c index 0d67d1303..4ce441da8 100644 --- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c +++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c @@ -231,7 +231,6 @@ parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc) const char *georep_mnt_desc_template = "SUP(" - "xlator-option=\\*-dht.assert-no-child-down=true " "volfile-server=localhost " "client-pid=%d " "user-map-root=%s " diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 15c40f3e4..8cf9f790f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -4591,14 +4591,24 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) { char *path_list = NULL; char *slave = NULL; + char *slave_ip = NULL; + char *slave_vol = NULL; + char *statefile = NULL; + char buf[1024] = "faulty"; int uuid_len = 0; int ret = 0; char uuid_str[64] = {0}; - glusterd_volinfo_t *volinfo = NULL; - char *conf_path = NULL; + glusterd_volinfo_t *volinfo = NULL; + char confpath[PATH_MAX] = ""; + char *op_errstr = NULL; + glusterd_conf_t *priv = NULL; + + GF_ASSERT (THIS); + priv = THIS->private; + GF_ASSERT (priv); + GF_ASSERT (data); volinfo = data; - GF_ASSERT (volinfo); slave = strchr(value->data, ':'); if (slave) slave ++; @@ -4608,22 +4618,63 @@ _local_gsyncd_start (dict_t *this, char *key, data_t *value, void *data) strncpy (uuid_str, (char*)value->data, uuid_len); + /* Getting Local Brickpaths */ ret = glusterd_get_local_brickpaths (volinfo, &path_list); - ret = dict_get_str (this, "conf_path", &conf_path); + /*Generating the conf file path needed by gsyncd */ + ret = glusterd_get_slave_info (slave, &slave_ip, + &slave_vol, &op_errstr); if (ret) { gf_log ("", GF_LOG_ERROR, - "Unable to fetch conf file path."); + "Unable to fetch slave details."); + ret = -1; goto out; } - glusterd_start_gsync (volinfo, slave, path_list, conf_path, - uuid_str, NULL); + ret = snprintf (confpath, sizeof(confpath) - 1, + "%s/"GEOREP"/%s_%s_%s/gsyncd.conf", + priv->workdir, volinfo->volname, + slave_ip, slave_vol); + confpath[ret] = '\0'; + + /* Fetching the last status of the node */ + ret = glusterd_get_statefile_name (volinfo, slave, + confpath, &statefile); + if (ret) { + if (!strstr(slave, "::")) + gf_log ("", GF_LOG_INFO, + "%s is not a valid slave url.", slave); + else + gf_log ("", GF_LOG_INFO, "Unable to get" + " statefile's name"); + goto out; + } + + ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf)); + if (ret < 0) { + gf_log ("", GF_LOG_ERROR, "Unable to read the status"); + goto out; + } - GF_FREE (path_list); - path_list = NULL; + /* Looks for the last status, to find if the sessiom was running + * when the node went down. If the session was not started or + * not started, do not restart the geo-rep session */ + if ((!strcmp (buf, "Not Started")) || + (!strcmp (buf, "Stopped"))) { + gf_log ("", GF_LOG_INFO, + "Geo-Rep Session was not started between " + "%s and %s::%s. Not Restarting", volinfo->volname, + slave_ip, slave_vol); + goto out; + } + + glusterd_start_gsync (volinfo, slave, path_list, confpath, + uuid_str, NULL); out: + if (path_list) + GF_FREE (path_list); + return ret; } @@ -7205,21 +7256,16 @@ glusterd_append_gsync_status (dict_t *dst, dict_t *src) } -static int32_t +int32_t glusterd_append_status_dicts (dict_t *dst, dict_t *src) { - int dst_count = 0; - int src_count = 0; - int i = 0; - int ret = 0; - char mst[PATH_MAX] = {0,}; - char slv[PATH_MAX] = {0, }; - char sts[PATH_MAX] = {0, }; - char nds[PATH_MAX] = {0, }; - char *mst_val = NULL; - char *slv_val = NULL; - char *sts_val = NULL; - char *nds_val = NULL; + char sts_val_name[PATH_MAX] = {0, }; + int dst_count = 0; + int src_count = 0; + int i = 0; + int ret = 0; + gf_gsync_status_t *sts_val = NULL; + gf_gsync_status_t *dst_sts_val = NULL; GF_ASSERT (dst); @@ -7237,49 +7283,29 @@ glusterd_append_status_dicts (dict_t *dst, dict_t *src) goto out; } - for (i = 1; i <= src_count; i++) { - snprintf (nds, sizeof(nds), "node%d", i); - snprintf (mst, sizeof(mst), "master%d", i); - snprintf (slv, sizeof(slv), "slave%d", i); - snprintf (sts, sizeof(sts), "status%d", i); - - ret = dict_get_str (src, nds, &nds_val); - if (ret) - goto out; - - ret = dict_get_str (src, mst, &mst_val); - if (ret) - goto out; - - ret = dict_get_str (src, slv, &slv_val); - if (ret) - goto out; + for (i = 0; i < src_count; i++) { + memset (sts_val_name, '\0', sizeof(sts_val_name)); + snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i); - ret = dict_get_str (src, sts, &sts_val); + ret = dict_get_bin (src, sts_val_name, (void **) &sts_val); if (ret) goto out; - snprintf (nds, sizeof(nds), "node%d", i+dst_count); - snprintf (mst, sizeof(mst), "master%d", i+dst_count); - snprintf (slv, sizeof(slv), "slave%d", i+dst_count); - snprintf (sts, sizeof(sts), "status%d", i+dst_count); - - ret = dict_set_dynstr (dst, nds, gf_strdup (nds_val)); - if (ret) + dst_sts_val = GF_CALLOC (1, sizeof(gf_gsync_status_t), + gf_common_mt_gsync_status_t); + if (!dst_sts_val) { + gf_log ("", GF_LOG_ERROR, "Out Of Memory"); goto out; + } - ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val)); - if (ret) - goto out; + memcpy (dst_sts_val, sts_val, sizeof(gf_gsync_status_t)); - ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val)); - if (ret) - goto out; + memset (sts_val_name, '\0', sizeof(sts_val_name)); + snprintf (sts_val_name, sizeof(sts_val_name), "status_value%d", i + dst_count); - ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val)); + ret = dict_set_bin (dst, sts_val_name, dst_sts_val, sizeof(gf_gsync_status_t)); if (ret) goto out; - } ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 970b1f8a6..20cd00cbe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -582,6 +582,17 @@ glusterd_get_slave_details_confpath (glusterd_volinfo_t *volinfo, dict_t *dict, char **slave_ip, char **slave_vol, char **conf_path, char **op_errstr); +int +glusterd_get_slave_info (char *slave, char **slave_ip, + char **slave_vol, char **op_errstr); + +int +glusterd_get_statefile_name (glusterd_volinfo_t *volinfo, char *slave, + char *conf_path, char **statefile); + +int +glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen); + int glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave, dict_t *resp_dict, char *path_list, diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index c2be2c9da..58833869a 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -594,7 +594,7 @@ configure_syncdaemon (glusterd_conf_t *conf) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", ".", NULL); RUN_GSYNCD_CMD; @@ -608,6 +608,16 @@ configure_syncdaemon (glusterd_conf_t *conf) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* ssh-command tar */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "ssh-command-tar"); + runner_argprintf (&runner, + "ssh -oPasswordAuthentication=no " + "-oStrictHostKeyChecking=no " + "-i %s/tar_ssh.pem", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* pid-file */ runinit_gsyncd_setrx (&runner, conf); runner_add_arg (&runner, "pid-file"); @@ -615,6 +625,13 @@ configure_syncdaemon (glusterd_conf_t *conf) runner_add_args (&runner, ".", ".", NULL); RUN_GSYNCD_CMD; + /* geo-rep working dir */ + runinit_gsyncd_setrx (&runner, conf); + runner_add_arg (&runner, "georep-session-working-dir"); + runner_argprintf (&runner, "%s/${mastervol}_${remotehost}_${slavevol}/", georepdir); + runner_add_args (&runner, ".", ".", NULL); + RUN_GSYNCD_CMD; + /* state-file */ runinit_gsyncd_setrx (&runner, conf); runner_add_arg (&runner, "state-file"); @@ -701,7 +718,7 @@ configure_syncdaemon (glusterd_conf_t *conf) /* gluster-params */ runinit_gsyncd_setrx (&runner, conf); runner_add_args (&runner, "gluster-params", - "aux-gfid-mount xlator-option=*-dht.assert-no-child-down=true", + "aux-gfid-mount", ".", NULL); RUN_GSYNCD_CMD; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index e1e9e591f..23b4205b0 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -47,7 +47,7 @@ #define GLUSTERD_QUORUM_RATIO_KEY "cluster.server-quorum-ratio" #define GLUSTERD_GLOBAL_OPT_VERSION "global-option-version" #define GLUSTERD_COMMON_PEM_PUB_FILE "/geo-replication/common_secret.pem.pub" -#define GEO_CONF_MAX_OPT_VALS 5 +#define GEO_CONF_MAX_OPT_VALS 6 #define GLUSTERD_CREATE_HOOK_SCRIPT "/hooks/1/gsync-create/post/" \ "S56glusterd-geo-rep-create-post.sh" -- cgit From 8bdc329e892f35ca19dfd07b542aa81afd855fce Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 10 Dec 2013 14:28:22 -0800 Subject: posix: if brick-uid or brick-gid is not specified, do not set MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current code would set owner uid/gid explicitly to 0/0 on start even if none was specified. Fix it. Change-Id: I72dec9e79c51bd1eb3af5334c42b7c23b01d0258 BUG: 1040275 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/6476 Tested-by: Gluster Build System Tested-by: Lukáš Bezdička Reviewed-by: Krishnan Parthasarathi Reviewed-by: Vijay Bellur --- tests/bugs/brick-uid-reset-on-volume-restart.t | 47 ++++++++++++++++++++++++++ xlators/storage/posix/src/posix.c | 41 +++++++++++++++------- 2 files changed, 76 insertions(+), 12 deletions(-) create mode 100755 tests/bugs/brick-uid-reset-on-volume-restart.t diff --git a/tests/bugs/brick-uid-reset-on-volume-restart.t b/tests/bugs/brick-uid-reset-on-volume-restart.t new file mode 100755 index 000000000..99629733f --- /dev/null +++ b/tests/bugs/brick-uid-reset-on-volume-restart.t @@ -0,0 +1,47 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function get_uid() { + stat -c '%u' $1; +} + +function get_gid() { + stat -c '%g' $1; +} + + +cleanup; + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8}; + +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; +EXPECT '8' brick_count $V0 + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +TEST glusterfs -s $H0 --volfile-id $V0 $M0; + +EXPECT 0 get_uid $M0; +EXPECT 0 get_gid $M0; + +TEST chown 100:101 $M0; + +EXPECT 100 get_uid $M0; +EXPECT 101 get_gid $M0; + +TEST $CLI volume stop $V0; +TEST $CLI volume start $V0; + +sleep 10; + +EXPECT 100 get_uid $M0; +EXPECT 101 get_gid $M0; + +cleanup; diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index b72cd5e0f..1ebf42150 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -5093,9 +5093,22 @@ posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) { struct posix_private *priv = NULL; int ret = -1; + struct stat st = {0,}; priv = this->private; + ret = sys_lstat (priv->base_path, &st); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to stat " + "brick path %s (%s)", + priv->base_path, strerror (errno)); + return ret; + } + + if ((uid == -1 || st.st_uid == uid) && + (gid == -1 || st.st_gid == gid)) + return 0; + ret = sys_chown (priv->base_path, uid, gid); if (ret) gf_log (this->name, GF_LOG_ERROR, "Failed to set " @@ -5131,15 +5144,16 @@ reconfigure (xlator_t *this, dict_t *options) { int ret = -1; struct posix_private *priv = NULL; - uid_t uid = -1; - gid_t gid = -1; + int32_t uid = -1; + int32_t gid = -1; char *batch_fsync_mode_str = NULL; priv = this->private; - GF_OPTION_RECONF ("brick-uid", uid, options, uint32, out); - GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out); - posix_set_owner (this, uid, gid); + GF_OPTION_RECONF ("brick-uid", uid, options, int32, out); + GF_OPTION_RECONF ("brick-gid", gid, options, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner (this, uid, gid); GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, options, uint32, out); @@ -5205,8 +5219,8 @@ init (xlator_t *this) uuid_t gfid = {0,}; uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; char *guuid = NULL; - uid_t uid = -1; - gid_t gid = -1; + int32_t uid = -1; + int32_t gid = -1; char *batch_fsync_mode_str; dir_data = dict_get (this->options, "directory"); @@ -5541,9 +5555,10 @@ init (xlator_t *this) _private->aio_init_done = _gf_false; _private->aio_capable = _gf_false; - GF_OPTION_INIT ("brick-uid", uid, uint32, out); - GF_OPTION_INIT ("brick-gid", gid, uint32, out); - posix_set_owner (this, uid, gid); + GF_OPTION_INIT ("brick-uid", uid, int32, out); + GF_OPTION_INIT ("brick-gid", gid, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner (this, uid, gid); GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); @@ -5704,15 +5719,17 @@ struct volume_options options[] = { { .key = {"brick-uid"}, .type = GF_OPTION_TYPE_INT, - .min = 0, + .min = -1, .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", .description = "Support for setting uid of brick's owner" }, { .key = {"brick-gid"}, .type = GF_OPTION_TYPE_INT, - .min = 0, + .min = -1, .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", .description = "Support for setting gid of brick's owner" }, { .key = {"node-uuid-pathinfo"}, -- cgit From ea89a25b0b4e8796c421c32fb6dbc4661081f6e1 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Thu, 12 Dec 2013 15:43:28 -0800 Subject: dht: handle ESTALE/ENOENT in dht_access Had misssed out dht_access in the previous round of cleanup Change-Id: Ib255b9ad13ca62a8bc2eea225c46632aff8e820f BUG: 1032894 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/6496 Tested-by: Gluster Build System Reviewed-by: Amar Tumballi --- xlators/cluster/dht/src/dht-inode-read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index 12a551505..e8a9a7196 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -531,7 +531,7 @@ dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, &local->loc, local->rebalance.flags, NULL); return 0; } - if ((op_ret == -1) && (op_errno == ENOENT)) { + if ((op_ret == -1) && dht_inode_missing(op_errno)) { /* File would be migrated to other node */ local->op_errno = op_errno; local->rebalance.target_op_fn = dht_access2; -- cgit From 0404be9ca1d9fa15c83bc4132561091c1c839d84 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 14 Sep 2013 19:51:13 -0700 Subject: mount.glusterfs: getopts support and cleanup This patch is an attempt to provide some much needed cleanup for future maintenance of `mount.glusterfs` - Add checks for command failures - Spliting large code into subsequent simpler functions - Standardized variables - use 'bash' instead of 'sh' - since string manipulation and variable handling is far superior - Overall code cleanup and Copyright change to Red, Hat Inc. - Add new style of mounting with a comma separated list ~~~ $ mount -t glusterfs ,,..:/ ~~~ - Update age old `manpage` with new options :-) Change-Id: I294e4d078a067d67d9a67eb8dde5eb2634cc0e45 BUG: 1040348 Signed-off-by: Harshavardhana Reviewed-on: http://review.gluster.org/5931 Tested-by: Gluster Build System Reviewed-by: Amar Tumballi Reviewed-by: Vijay Bellur --- doc/mount.glusterfs.8 | 85 ++++- xlators/mount/fuse/utils/mount.glusterfs.in | 527 +++++++++++++++++----------- 2 files changed, 389 insertions(+), 223 deletions(-) diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 index 01b7f7554..e6061ffc6 100644 --- a/doc/mount.glusterfs.8 +++ b/doc/mount.glusterfs.8 @@ -1,4 +1,4 @@ -.\" Copyright (c) 2008-2012 Red Hat, Inc. +.\" Copyright (c) 2008-2013 Red Hat, Inc. .\" This file is part of GlusterFS. .\" .\" This file is licensed to you under your choice of the GNU Lesser @@ -8,21 +8,24 @@ .\" .\" .\" -.TH GlusterFS 8 "Cluster Filesystem" "18 March 2010" "Gluster Inc." +.TH GlusterFS 8 "Cluster Filesystem" "14 September 2013" "Red Hat, Inc." .SH NAME -mount.glusterfs - script to mount native GlusterFS volume +.B mount.glusterfs - script to mount native GlusterFS volume .SH SYNOPSIS -.B mount -t glusterfs -.I [-o ] : +.B mount -t glusterfs [-o ] :/ +.B .TP -.B mount -t glusterfs -.I [-o ] +.B mount -t glusterfs [-o ] ,, +.B ,..:/ +.TP +.TP +.B mount -t glusterfs [-o ] .PP .SH DESCRIPTION This tool is part of \fBglusterfs\fR(8) package, which is used to mount using GlusterFS native binary. -\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting +\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting native GlusterFS client. This subcommand, however, can also be used as a standalone command with limited functionality. @@ -38,17 +41,46 @@ File to use for logging [default:/var/log/glusterfs/glusterfs.log] Logging severity. Valid options are TRACE, DEBUG, WARNING, ERROR, CRITICAL INFO and NONE [default: INFO] .TP +\fBacl +Mount the filesystem with POSIX ACL support +.TP +\fBfopen\-keep\-cache +Do not purge the cache on file open +.TP +\fBselinux +Enable SELinux label (extened attributes) support on inodes +.TP +\fBworm +Mount the filesystem in 'worm' mode +.TP +\fBaux\-gfid\-mount +Enable access to filesystem through gfid directly +.TP \fBro\fR Mount the filesystem read-only +.TP +\fBenable\-ino32=\fRBOOL +Use 32-bit inodes when mounting to workaround broken applications that don't +support 64-bit inodes + .PP .SS "Advanced options" .PP .TP -\fBvolfile\-id=\fRKEY -Volume key or name of the volume file to be fetched from server +\fBattribute\-timeout=\fRSECONDS +Set attribute timeout to SECONDS for inodes in fuse kernel module [default: 1] +.TP +\fBentry\-timeout=\fRSECONDS +Set entry timeout to SECONDS in fuse kernel module [default: 1] +.TP +\fBbackground\-qlen=\fRN +Set fuse module's background queue length to N [default: 64] .TP -\fBtransport=\fRTRANSPORT-TYPE -Transport type to get volume file from server [default: tcp] +\fBgid\-timeout=\fRSECONDS +Set auxilary group list timeout to SECONDS for fuse translator [default: 0] +.TP +\fBnegative\-timeout=\fRSECONDS +Set negative timeout to SECONDS in fuse kernel module [default: 0] .TP \fBvolume\-name=\fRVOLUME-NAME Volume name to be used for MOUNT-POINT [default: top most volume in @@ -56,6 +88,25 @@ VOLUME-FILE] .TP \fBdirect\-io\-mode=\fRdisable Disable direct I/O mode in fuse kernel module +.TP +\fBcongestion\-threshold=\fRN +Set fuse module's congestion threshold to N [default: 48] +.TP +.TP +\fBbackup\-volfile\-servers=\fRSERVERLIST +Provide list of backup volfile servers in the following format [default: None] + +\fB$ mount -t glusterfs -obackup-volfile-servers=:\fR +\fB :...: :/ \fR + +.TP +.TP +\fBbackupvolfile\-server=\fRSERVER +Provide list of backup volfile servers in the following format [default: None] + +\fB $ mount -t glusterfs -obackupvolfile-server= +\fB :/ + .TP .PP .SH FILES @@ -63,16 +114,16 @@ Disable direct I/O mode in fuse kernel module .I /etc/fstab A typical GlusterFS entry in /etc/fstab looks like below -server1.gluster.com:mirror /mnt/mirror glusterfs log-file=/var/log/mirror.vol,ro,defaults 0 0 +\fBserver1:/mirror /mnt/mirror glusterfs log-file=/var/log/mirror.log,acl,selinux 0 0\fR .TP -.I /etc/mtab -An example entry of a GlusterFS mountpoint in /etc/mtab looks like below +.I /proc/mounts +An example entry of a GlusterFS mountpoint in /proc/mounts looks like below -mirror.vol /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0 +\fBserver1:/mirror /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0\fR .SH SEE ALSO \fBglusterfs\fR(8), \fBmount\fR(8), \fBgluster\fR(8) .SH COPYRIGHT -Copyright(c) 2006-2011 Gluster, Inc. +Copyright(c) 2006-2013 Red Hat, Inc. diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index bf89e9d52..d5993618c 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -1,20 +1,12 @@ -#!/bin/sh -# (C) 2006, 2007, 2008 Gluster Inc. +#!/bin/bash # -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of -# the License, or (at your option) any later version. +# Copyright (c) 2013 Red Hat, Inc. +# This file is part of GlusterFS. # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public -# License along with this program; if not, write to the Free -# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301 USA +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. _init () { @@ -33,28 +25,43 @@ _init () exec_prefix=@exec_prefix@; cmd_line=$(echo "@sbindir@/glusterfs"); - case `uname -s` in - NetBSD) - getinode="stat -f %i" - getdev="stat -f %d" - lgetinode="${getinode} -L" - lgetdev="${getdev} -L" - - mounttab=/proc/mounts - ;; - Linux) - getinode="stat -c %i $i" - getdev="stat -c %d $d" - lgetinode="${getinode} -L" - lgetdev="${getdev} -L" - - mounttab=/etc/mtab - ;; + # check whether getfattr exists + getfattr=$(which getfattr 2>/dev/null); + if [ $? -ne 0 ]; then + echo "WARNING: getfattr not found, certain checks will be skipped.." + fi + + alias lsL='ls -L' + mounttab=/proc/mounts + uname_s=`uname -s` + case ${uname_s} in + NetBSD) + getinode="stat -f %i" + getdev="stat -f %d" + lgetinode="${getinode} -L" + lgetdev="${getdev} -L" + ;; + Linux) + getinode="stat -c %i" + getdev="stat -c %d" + lgetinode="${getinode} -L" + lgetdev="${getdev} -L" + ;; esac UPDATEDBCONF=/etc/updatedb.conf } +is_valid_hostname () +{ + local server=$1 + + length=$(echo $server | wc -c) + if [ ${length} -gt ${HOST_NAME_MAX} ]; then + return 1 + fi +} + parse_backup_volfile_servers () { local server_list=$1 @@ -63,40 +70,43 @@ parse_backup_volfile_servers () servers=$(echo ${server_list} | sed 's/\:/ /g') for server in ${servers}; do - length=$(echo $server | wc -c) - if [ ${length} -gt ${HOST_NAME_MAX} ]; then - echo "Hostname:${server} provided is too long.. skipping" + is_valid_hostname ${server} + if [ $? -eq 1 ]; then continue fi - new_servers=$(echo "$new_servers $server") + new_servers=$(echo "${new_servers} ${server}") done + echo ${new_servers} } -parse_backupvolfile_server () +parse_volfile_servers () { - local server=$1 + local server_list=$1 + local servers="" + local new_servers="" - length=$(echo $server | wc -c) - if [ ${length} -gt ${HOST_NAME_MAX} ]; then - echo "Hostname:${server} provided is too long.. exiting" - exit 1 - fi + servers=$(echo ${server_list} | sed 's/,/ /g') + for server in ${servers}; do + is_valid_hostname ${server} + if [ $? -eq 1 ]; then + continue + fi + new_servers=$(echo "${new_servers} ${server}") + done - echo ${server} + echo ${new_servers} } start_glusterfs () { - # lets the comparsion be case insensitive for all strings - if [ -n "$log_level_str" ]; then - case "$( echo $log_level_str | tr '[a-z]' '[A-Z]')" in + case "$( echo $log_level_str | awk '{print toupper($0)}')" in "ERROR") log_level=$LOG_ERROR; ;; "INFO") - log_level=$LOG_INFO + log_level=$LOG_INFO; ;; "DEBUG") log_level=$LOG_DEBUG; @@ -120,7 +130,7 @@ start_glusterfs () esac fi -#options without values start here + # options without values start here if [ -n "$read_only" ]; then cmd_line=$(echo "$cmd_line --read-only"); fi @@ -130,7 +140,7 @@ start_glusterfs () fi if [ -n "$selinux" ]; then - cmd_line=$(echo "$cmd_line --selinux"); + cmd_line=$(echo "$cmd_line --selinux"); fi if [ -n "$enable_ino32" ]; then @@ -157,7 +167,7 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --aux-gfid-mount"); fi -#options with values start here + # options with values start here if [ -n "$log_level" ]; then cmd_line=$(echo "$cmd_line --log-level=$log_level"); fi @@ -207,8 +217,7 @@ start_glusterfs () fi if [ -n "$xlator_option" ]; then - xlator_option=$(echo $xlator_option | sed s/"xlator-option="/"--xlator-option "/g) - cmd_line=$(echo "$cmd_line $xlator_option"); + cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option"); fi # for rdma volume, we have to fetch volfile with '.rdma' added @@ -218,18 +227,31 @@ start_glusterfs () if [ -z "$volfile_loc" ]; then if [ -n "$server_ip" ]; then - cmd_line=$(echo "$cmd_line --volfile-server=$server_ip"); + servers=$(parse_volfile_servers ${server_ip}); + if [ -n "$servers" ]; then + for i in $(echo ${servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done + else + echo "ERROR: No valid servers found on command line.. exiting" + print_usage + exit 1 + fi if [ -n "$backupvolfile_server" ]; then if [ -z "$backup_volfile_servers" ]; then - server=$(parse_backupvolfile_server ${backupvolfile_server}) - cmd_line=$(echo "$cmd_line --volfile-server=$server"); + is_valid_hostname ${backupvolfile_server}; + if [ $? -eq 1 ]; then + echo "ERROR: Invalid backup server specified.. exiting" + exit 1 + fi + cmd_line=$(echo "$cmd_line --volfile-server=$backupvolfile_server"); fi fi if [ -n "$backup_volfile_servers" ]; then - servers=$(parse_backup_volfile_servers ${backup_volfile_servers}) - for i in $(echo ${servers}); do + backup_servers=$(parse_backup_volfile_servers ${backup_volfile_servers}) + for i in $(echo ${backup_servers}); do cmd_line=$(echo "$cmd_line --volfile-server=$i"); done fi @@ -261,205 +283,307 @@ start_glusterfs () fi cmd_line=$(echo "$cmd_line $mount_point"); - err=0; - $cmd_line; + $cmd_line; inode=$( ${getinode} $mount_point 2>/dev/null); - # this is required if the stat returns error - if [ -z "$inode" ]; then - inode="0"; - fi - - if [ $inode -ne 1 ]; then - err=1; - fi - - if [ $err -eq "1" ]; then - echo "Mount failed. Please check the log file for more details." - umount $mount_point > /dev/null 2>&1; - exit 1; + if [ $? -ne 0 ]; then + echo "Mount failed. Please check the log file for more details." + umount $mount_point > /dev/null 2>&1; + exit 1; fi } -usage () +print_usage () { - -echo "Usage: mount.glusterfs : -o +cat << EOF +Usage: $0 : -o Options: -man 8 mount.glusterfs - -To display the version number of the mount helper: -mount.glusterfs --version" - +man 8 $0 +To display the version number of the mount helper: $0 -V +EOF } # check for recursive mounts. i.e, mounting over an existing brick check_recursive_mount () { if [ $1 = "/" ]; then - echo Cannot mount over root; + echo "Cannot mount over root"; exit 2; fi + # GFID check first # remove trailing / from mount point mnt_dir=${1%/}; - export PATH; - # check whether getfattr exists - which getfattr > /dev/null 2>&1; - if [ $? -ne 0 ]; then - return; - fi - - getfattr -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid="; - if [ $? -eq 0 ]; then - echo "ERROR: $mnt_dir is in use as a brick of a gluster volume"; - exit 2; + if [ -n ${getfattr} ]; then + ${getfattr} -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid="; + if [ $? -eq 0 ]; then + echo "ERROR: $mnt_dir is in use as a brick of a gluster volume"; + exit 2; + fi fi # check if the mount point is a brick's parent directory GLUSTERD_WORKDIR="/var/lib/glusterd"; - ls -L "$GLUSTERD_WORKDIR"/vols/*/bricks/* > /dev/null 2>&1; + lsL "$GLUSTERD_WORKDIR"/vols/*/bricks/* > /dev/null 2>&1; if [ $? -ne 0 ]; then return; fi - brick_path=`grep ^path "$GLUSTERD_WORKDIR"/vols/*/bricks/* | cut -d "=" -f 2`; + brick_path=`grep ^path "$GLUSTERD_WORKDIR"/vols/*/bricks/* 2>/dev/null | cut -d "=" -f 2`; root_inode=`${lgetinode} /`; root_dev=`${lgetdev} /`; mnt_inode=`${lgetinode} $mnt_dir`; mnt_dev=`${lgetdev} $mnt_dir`; - for brick in "$brick_path"; - do + for brick in "$brick_path"; do # evaluate brick path to see if this is local, if non-local, skip iteration ls $brick > /dev/null 2>&1; if [ $? -ne 0 ]; then continue; fi - getfattr -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid="; - if [ $? -ne 0 ]; then - continue; + + if [ -n ${getfattr} ]; then + ${getfattr} -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid="; + if [ $? -eq 0 ]; then + # brick is local + while [ 1 ]; do + tmp_brick="$brick"; + brick="$brick"/..; + brick_dev=`${lgetdev} $brick`; + brick_inode=`${lgetinode} $brick`; + if [ "$mnt_inode" -eq "$brick_inode" \ + -a "$mnt_dev" -eq "$brick_dev" ]; then + echo "ERROR: ${mnt_dir} is a parent of the brick ${tmp_brick}"; + exit 2; + fi + [ "$root_inode" -ne "$brick_inode" \ + -o "$root_dev" -ne "$brick_dev" ] || break; + done; + else + continue; + fi else - # brick is local - while [ 1 ]; - do - tmp_brick="$brick"; - brick="$brick"/..; - brick_dev=`${lgetdev} $brick`; - brick_inode=`${lgetinode} $brick`; - if [ "$mnt_inode" -eq "$brick_inode" -a "$mnt_dev" -eq "$brick_dev" ]; then - echo ERROR: $mnt_dir is a parent of the brick $tmp_brick; - exit 2; - fi - [ "$root_inode" -ne "$brick_inode" -o "$root_dev" -ne "$brick_dev" ] || break; - done; + continue; fi done; } -main () +with_options() { - helper=$(echo "$@" | sed -n 's/.*\--[ ]*\([^ ]*\).*/\1/p'); - in_opt="no" - pos_args=0 - for opt in "$@"; do - if [ "$in_opt" = "yes" ]; then - for pair in $(echo "$opt" | tr "," " "); do - # Handle options without values. - case "$pair" in - "ro") read_only=1 ;; - "acl") acl=1 ;; - "selinux") selinux=1 ;; - "worm") worm=1 ;; - "fopen-keep-cache") fopen_keep_cache=1 ;; - "enable-ino32") enable_ino32=1 ;; - "mem-accounting") mem_accounting=1;; - "aux-gfid-mount") - if [ `uname -s` = "Linux" ]; then - aux_gfid_mount=1 - fi - ;; - # "mount -t glusterfs" sends this, but it's useless. - "rw") ;; - # these ones are interpreted during system initialization - "noauto") ;; - "_netdev") ;; - *) - key=$(echo "$pair" | cut -f1 -d'='); - value=$(echo "$pair" | cut -f2- -d'='); - - # Handle options with values. - case "$key" in - "log-level") log_level_str=$value ;; - "log-file") log_file=$value ;; - "transport") transport=$value ;; - "direct-io-mode") direct_io_mode=$value ;; - "volume-name") volume_name=$value ;; - "volume-id") volume_id=$value ;; - "volfile-check") volfile_check=$value ;; - "server-port") server_port=$value ;; - "attribute-timeout") - attribute_timeout=$value ;; - "entry-timeout") entry_timeout=$value ;; - "negative-timeout") negative_timeout=$value ;; - "gid-timeout") gid_timeout=$value ;; - "background-qlen") bg_qlen=$value ;; - "backup-volfile-servers") backup_volfile_servers=$value ;; - ## Place-holder backward compatibility - "backupvolfile-server") backupvolfile_server=$value ;; - "congestion-threshold") cong_threshold=$value ;; - "xlator-option") xlator_option=$xlator_option" "$pair ;; - "fuse-mountopts") fuse_mountopts=$value ;; - "use-readdirp") use_readdirp=$value ;; - *) - # Passthru - [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," - fuse_mountopts="$fuse_mountopts$pair" - ;; - esac - esac - done - in_opt="no" - elif [ "$opt" = "-o" ]; then - in_opt="yes" + local key=$1 + local value=$2 + + # Handle options with values. + case "$key" in + "log-level") + log_level_str=$value + ;; + "log-file") + log_file=$value + ;; + "transport") + transport=$value + ;; + "direct-io-mode") + direct_io_mode=$value + ;; + "volume-name") + volume_name=$value + ;; + "volume-id") + volume_id=$value + ;; + "volfile-check") + volfile_check=$value + ;; + "server-port") + server_port=$value + ;; + "attribute-timeout") + attribute_timeout=$value + ;; + "entry-timeout") + entry_timeout=$value + ;; + "negative-timeout") + negative_timeout=$value + ;; + "gid-timeout") + gid_timeout=$value + ;; + "background-qlen") + bg_qlen=$value + ;; + "backup-volfile-servers") + backup_volfile_servers=$value + ;; + "backupvolfile-server") + backupvolfile_server=$value + ;; + "congestion-threshold") + cong_threshold=$value + ;; + "xlator-option") + xlator_option=$value + ;; + "fuse-mountopts") + fuse_mountopts=$value + ;; + "use-readdirp") + use_readdirp=$value + ;; + *) + echo "Invalid option: $key" + exit 0 + ;; + esac +} + +without_options() +{ + local option=$1 + # Handle options without values. + case "$option" in + "ro") + read_only=1 + ;; + "acl") + acl=1 + ;; + "selinux") + selinux=1 + ;; + "worm") + worm=1 + ;; + "fopen-keep-cache") + fopen_keep_cache=1 + ;; + "enable-ino32") + enable_ino32=1 + ;; + "mem-accounting") + mem_accounting=1 + ;; + "aux-gfid-mount") + if [ ${uname_s} = "Linux" ]; then + aux_gfid_mount=1 + fi + ;; + # "mount -t glusterfs" sends this, but it's useless. + "rw") + ;; + # these ones are interpreted during system initialization + "noauto") + ;; + "_netdev") + ;; + *) + echo "Invalid option $option"; + exit 0 + ;; + esac +} + +parse_options() +{ + local optarg=${1} + for pair in $(echo ${optarg//,/ }); do + key=$(echo "$pair" | cut -f1 -d'='); + value=$(echo "$pair" | cut -f2- -d'='); + if [ "$key" = "$value" ]; then + without_options $pair; else - case $pos_args in - 0) volfile_loc=$opt ;; - 1) mount_point=$opt ;; - *) echo "extra arguments at end (ignored)" ;; - esac - pos_args=$((pos_args+1)) + with_options $key $value; fi done - if [ $in_opt = "yes" -o $pos_args -lt 2 ]; then - usage - exit 1 - fi +} + +update_updatedb() +{ + # Append fuse.glusterfs to PRUNEFS variable in updatedb.conf(5). + # updatedb(8) should not index files under GlusterFS, indexing + # GlusterFS is not necessary and should be avoided. + # Following code disables updatedb crawl on 'glusterfs' + test -f $UPDATEDBCONF && { + if ! grep -q 'glusterfs' $UPDATEDBCONF; then + sed 's/\(PRUNEFS.*\)"/\1 fuse.glusterfs"/' $UPDATEDBCONF \ + > ${UPDATEDBCONF}.bak + mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF + fi + } +} + +main () +{ + + volfile_loc=$1 + mount_point=$2 + + ## `mount` specifies options as a last argument + shift 2; + while getopts "Vo:h" opt; do + case "${opt}" in + o) + parse_options ${OPTARG}; + ;; + V) + ${cmd_line} -V; + exit 0; + ;; + h) + print_usage; + exit 0; + ;; + ?) + print_usage; + exit 0; + ;; + esac + done [ -r "$volfile_loc" ] || { server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p'); - test_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); - [ -n "$test_str" ] && { - volume_id="$test_str"; + volume_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); + [ -n "$volume_str" ] && { + volume_id="$volume_str"; } volfile_loc=""; } - # - [ -n "$helper" ] && { - cmd_line=$(echo "$cmd_line --$helper"); - exec $cmd_line; + [ -z "$volume_id" -o -z "$server_ip" ] && { + cat < ${UPDATEDBCONF}.bak - mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF - fi - } + update_updatedb; start_glusterfs; } -- cgit From 7fc2499db89e385332f09fb06c10cb524f761875 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Wed, 11 Dec 2013 17:30:13 +0530 Subject: glusterd: fix error in remove-brick-replica validation Problem: Reducing replica count of a volume using remove-brick command fails if bricks are specified in a random order. Fix: Modify subvol_matcher_verify() to permit order agnostic replica count reduction. Change-Id: I1f3d33e82a70d9b69c297f69c4c1b847937d1031 BUG: 1040408 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/6489 Reviewed-by: Krishnan Parthasarathi Tested-by: Gluster Build System --- tests/bugs/bug-1040408.t | 31 +++++++++++++ xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 62 +++++++------------------- 2 files changed, 46 insertions(+), 47 deletions(-) create mode 100644 tests/bugs/bug-1040408.t diff --git a/tests/bugs/bug-1040408.t b/tests/bugs/bug-1040408.t new file mode 100644 index 000000000..2982d6a81 --- /dev/null +++ b/tests/bugs/bug-1040408.t @@ -0,0 +1,31 @@ +#!/bin/bash + +#Test case: Create a distributed replicate volume, and reduce +#replica count + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +#Basic checks +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +#Create a 2X3 distributed-replicate volume +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..6}; +TEST $CLI volume start $V0 + +# Reduce to 2x2 volume by specifying bricks in reverse order +function remove_brick_status { + $CLI volume remove-brick $V0 replica 2 \ + $H0:$B0/${V0}6 $H0:$B0/${V0}3 force 2>&1 |grep -oE "success|failed" +} +EXPECT "success" remove_brick_status; + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 6c316af88..d2dc88ed6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -585,10 +585,23 @@ subvol_matcher_update (int *subvols, glusterd_volinfo_t *volinfo, static int subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str, - size_t err_len, char *vol_type) + size_t err_len, char *vol_type, int replica_count) { int i = 0; int ret = 0; + int count = volinfo->replica_count-replica_count; + + if (replica_count) { + for (i = 0; i < volinfo->subvol_count; i++) { + if (subvols[i] != count) { + ret = -1; + snprintf (err_str, err_len, "Remove exactly %d" + " brick(s) from each subvolume.", count); + break; + } + } + return ret; + } do { @@ -598,7 +611,6 @@ subvol_matcher_verify (int *subvols, glusterd_volinfo_t *volinfo, char *err_str, ret = -1; snprintf (err_str, err_len, "Bricks not from same subvol for %s", vol_type); - gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str); break; } } while (++i < volinfo->subvol_count); @@ -626,16 +638,11 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) glusterd_volinfo_t *volinfo = NULL; glusterd_brickinfo_t *brickinfo = NULL; int *subvols = NULL; - glusterd_brickinfo_t *tmp = NULL; char err_str[2048] = {0}; gf_cli_rsp rsp = {0,}; void *cli_rsp = NULL; char vol_type[256] = {0,}; int32_t replica_count = 0; - int32_t brick_index = 0; - int32_t tmp_brick_idx = 0; - int found = 0; - int diff_count = 0; char *volname = 0; xlator_t *this = NULL; @@ -826,45 +833,6 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) (volinfo->brick_count <= volinfo->dist_leaf_count)) continue; - if (replica_count) { - /* do the validation of bricks here */ - /* -2 because i++ is already done, and i starts with 1, - instead of 0 */ - diff_count = (volinfo->replica_count - replica_count); - brick_index = (((i -2) / diff_count) * volinfo->replica_count); - tmp_brick_idx = 0; - found = 0; - list_for_each_entry (tmp, &volinfo->bricks, brick_list) { - tmp_brick_idx++; - gf_log (this->name, GF_LOG_TRACE, - "validate brick %s:%s (%d %d %d)", - tmp->hostname, tmp->path, tmp_brick_idx, - brick_index, volinfo->replica_count); - if (tmp_brick_idx <= brick_index) - continue; - if (tmp_brick_idx > - (brick_index + volinfo->replica_count)) - break; - if ((!strcmp (tmp->hostname,brickinfo->hostname)) && - !strcmp (tmp->path, brickinfo->path)) { - found = 1; - break; - } - } - if (found) - continue; - - snprintf (err_str, sizeof (err_str), "Bricks are from " - "same subvol"); - gf_log (this->name, GF_LOG_INFO, - "failed to validate brick %s:%s (%d %d %d)", - tmp->hostname, tmp->path, tmp_brick_idx, - brick_index, volinfo->replica_count); - ret = -1; - /* brick order is not valid */ - goto out; - } - /* Find which subvolume the brick belongs to */ subvol_matcher_update (subvols, volinfo, brickinfo); } @@ -874,7 +842,7 @@ __glusterd_handle_remove_brick (rpcsvc_request_t *req) (volinfo->subvol_count > 1)) { ret = subvol_matcher_verify (subvols, volinfo, err_str, sizeof(err_str), - vol_type); + vol_type, replica_count); if (ret) goto out; } -- cgit From 51a614384e694f37d957a3aa3a562c34877e71d5 Mon Sep 17 00:00:00 2001 From: "Kaleb S. KEITHLEY" Date: Tue, 10 Dec 2013 07:26:46 -0500 Subject: build: Merge glusterfs.spec changes from Fedora sync with FedoraSCM glusterfs.spec N.B. the %doc hack for clear_xattrs.sh doesn't work in the CentOS 5 mock builds of the rpm.t regression test, thus I'm leaving that out for now Change-Id: I6fa85618c916a90fdbed5b231ea58bb51c9d2b70 BUG: 950083 Signed-off-by: Kaleb S. KEITHLEY Reviewed-on: http://review.gluster.org/6473 Tested-by: Gluster Build System Reviewed-by: Niels de Vos --- glusterfs.spec.in | 224 ++++++++++++++++++++++++++---------------------------- 1 file changed, 109 insertions(+), 115 deletions(-) diff --git a/glusterfs.spec.in b/glusterfs.spec.in index f7c2fc5b8..1bbb17f11 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -3,7 +3,7 @@ %global _for_fedora_koji_builds 0 # uncomment and add '%' to use the prereltag for pre-releases -# global prereltag beta4 +# %%global prereltag qa3 # if you wish to compile an rpm without rdma support, compile like this... # rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without rdma @@ -46,7 +46,7 @@ # rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without bd %{?_without_bd:%global _without_bd --disable-bd-xlator} -%if ( 0%{?rhel} && 0%{?rhel} < 6 ) +%if ( 0%{?rhel} && 0%{?rhel} < 6 || 0%{?sles_version} ) %define _without_bd --disable-bd-xlator %endif @@ -65,15 +65,15 @@ # From https://fedoraproject.org/wiki/Packaging:Python#Macros %if ( 0%{?rhel} && 0%{?rhel} <= 5 ) -%{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")} -%{!?python_sitearch: %global python_sitearch %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")} +%{!?python_sitelib: %global python_sitelib %(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")} +%{!?python_sitearch: %global python_sitearch %(python -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")} %endif Summary: Cluster File System %if ( 0%{_for_fedora_koji_builds} ) Name: glusterfs -Version: 3.4.1 -Release: 3%{?prereltag:.%{prereltag}}%{?dist} +Version: 3.5.0 +Release: 0.1%{?prereltag:.%{prereltag}}%{?dist} Vendor: Fedora Project %else Name: @PACKAGE_NAME@ @@ -94,9 +94,6 @@ Source5: glusterfsd.logrotate Source6: rhel5-load-fuse-modules Source11: glusterfsd.service Source13: glusterfsd.init -Patch0: %{name}-3.2.5.configure.ac.patch -Patch1: %{name}-3.3.0.libglusterfs.Makefile.patch -Patch2: %{name}-3.3.1.rpc.rpcxprt.rdma.name.c.patch %else Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz %endif @@ -118,7 +115,7 @@ Requires(postun): systemd-units %define _init_disable() /bin/systemctl disable %1.service ; %define _init_restart() /bin/systemctl try-restart %1.service ; %define _init_stop() /bin/systemctl stop %1.service ; -%define _init_install() %{__install} -D -p -m 0644 %1 %{buildroot}%{_unitdir}/%2.service ; +%define _init_install() install -D -p -m 0644 %1 %{buildroot}%{_unitdir}/%2.service ; # can't seem to make a generic macro that works %define _init_glusterd %{_unitdir}/glusterd.service %define _init_glusterfsd %{_unitdir}/glusterfsd.service @@ -134,7 +131,7 @@ Requires(postun): /sbin/service %define _init_disable() /sbin/chkconfig --del %1 ; %define _init_restart() /sbin/service %1 condrestart &>/dev/null ; %define _init_stop() /sbin/service %1 stop &>/dev/null ; -%define _init_install() %{__install} -D -p -m 0755 %1 %{buildroot}%{_sysconfdir}/init.d/%2 ; +%define _init_install() install -D -p -m 0755 %1 %{buildroot}%{_sysconfdir}/init.d/%2 ; # can't seem to make a generic macro that works %define _init_glusterd %{_sysconfdir}/init.d/glusterd %define _init_glusterfsd %{_sysconfdir}/init.d/glusterfsd @@ -158,10 +155,11 @@ BuildRequires: lvm2-devel BuildRequires: glib2-devel %endif -Obsoletes: hekafs <= 0.7 +Obsoletes: hekafs Obsoletes: %{name}-libs <= 2.0.0 Obsoletes: %{name}-common < %{version}-%{release} Obsoletes: %{name}-core < %{version}-%{release} +Obsoletes: %{name}-ufo Provides: %{name}-libs = %{version}-%{release} Provides: %{name}-common = %{version}-%{release} Provides: %{name}-core = %{version}-%{release} @@ -173,14 +171,16 @@ Provides: %{name}-core = %{version}-%{release} %if ( 0%{?rhel} == 6 ) # filter_setup exists in RHEL6 only %filter_provides_in %{_libdir}/glusterfs/%{version}/ - %global __filter_from_req %{?__filter_from_req} | %{__grep} -v -P '^(?!lib).*\.so.*$' + %global __filter_from_req %{?__filter_from_req} | grep -v -P '^(?!lib).*\.so.*$' %filter_setup %else - # modern rpm and current Fedora do not generate requires if the provides - # are filtered + # modern rpm and current Fedora do not generate requires when the + # provides are filtered %global __provides_exclude_from ^%{_libdir}/glusterfs/%{version}/.*$ %endif +%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}} + %if ( 0%{?rhel} && 0%{?rhel} < 6 ) # _sharedstatedir is not provided by RHEL5 %define _sharedstatedir /var/lib @@ -346,7 +346,7 @@ This package provides the glusterfs libgfapi library. %package resource-agents Summary: OCF Resource Agents for GlusterFS License: GPLv3+ -%if ( ! ( 0%{?rhel} && 0%{?rhel} < 6 ) ) +%if ( ! ( 0%{?rhel} && 0%{?rhel} < 6 || 0%{?sles_version} ) ) # EL5 does not support noarch sub-packages BuildArch: noarch %endif @@ -423,13 +423,6 @@ regression testing of Gluster. %prep %setup -q -n %{name}-%{version}%{?prereltag} -%if ( 0%{_for_fedora_koji_builds} ) -#%patch0 -p0 -%patch1 -p0 -F4 -%if ( "%{version}" == "3.3.1" ) -%patch2 -p1 -%endif -%endif %build ./autogen.sh @@ -446,67 +439,74 @@ regression testing of Gluster. # fix hardening and remove rpath in shlibs %if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) -%{__sed} -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool +sed -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool %endif -%{__sed} -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool -%{__sed} -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool +sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool +sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool -%{__make} %{?_smp_mflags} +make %{?_smp_mflags} pushd api/examples -FLAGS="$RPM_OPT_FLAGS" %{__python} setup.py build +FLAGS="$RPM_OPT_FLAGS" python setup.py build popd %install -%{__rm} -rf %{buildroot} -%{__make} install DESTDIR=%{buildroot} +rm -rf %{buildroot} +make install DESTDIR=%{buildroot} # install the gfapi Python library in /usr/lib/python*/site-packages pushd api/examples -%{__python} setup.py install --skip-build --verbose --root %{buildroot} +python setup.py install --skip-build --verbose --root %{buildroot} popd # Install include directory -%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs -%{__install} -p -m 0644 libglusterfs/src/*.h \ +mkdir -p %{buildroot}%{_includedir}/glusterfs +install -p -m 0644 libglusterfs/src/*.h \ %{buildroot}%{_includedir}/glusterfs/ -%{__install} -p -m 0644 contrib/uuid/*.h \ +install -p -m 0644 contrib/uuid/*.h \ %{buildroot}%{_includedir}/glusterfs/ # Following needed by hekafs multi-tenant translator -%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/rpc -%{__install} -p -m 0644 rpc/rpc-lib/src/*.h \ +mkdir -p %{buildroot}%{_includedir}/glusterfs/rpc +install -p -m 0644 rpc/rpc-lib/src/*.h \ %{buildroot}%{_includedir}/glusterfs/rpc/ -%{__install} -p -m 0644 rpc/xdr/src/*.h \ +install -p -m 0644 rpc/xdr/src/*.h \ %{buildroot}%{_includedir}/glusterfs/rpc/ -%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/server -%{__install} -p -m 0644 xlators/protocol/server/src/*.h \ +mkdir -p %{buildroot}%{_includedir}/glusterfs/server +install -p -m 0644 xlators/protocol/server/src/*.h \ %{buildroot}%{_includedir}/glusterfs/server/ %if ( 0%{_for_fedora_koji_builds} ) -%{__install} -D -p -m 0644 %{SOURCE1} \ +install -D -p -m 0644 %{SOURCE1} \ %{buildroot}%{_sysconfdir}/sysconfig/glusterd -%{__install} -D -p -m 0644 %{SOURCE2} \ +install -D -p -m 0644 %{SOURCE2} \ %{buildroot}%{_sysconfdir}/sysconfig/glusterfsd %else -%{__install} -D -p -m 0644 extras/glusterd-sysconfig \ +install -D -p -m 0644 extras/glusterd-sysconfig \ %{buildroot}%{_sysconfdir}/sysconfig/glusterd %endif %if ( 0%{_for_fedora_koji_builds} ) %if ( 0%{?rhel} && 0%{?rhel} <= 5 ) -%{__install} -D -p -m 0755 %{SOURCE6} \ +install -D -p -m 0755 %{SOURCE6} \ %{buildroot}%{_sysconfdir}/sysconfig/modules/glusterfs-fuse.modules %endif %endif -%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterd -%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterfs -%{__mkdir_p} %{buildroot}%{_localstatedir}/log/glusterfsd -%{__mkdir_p} %{buildroot}%{_localstatedir}/run/gluster +mkdir -p %{buildroot}%{_localstatedir}/log/glusterd +mkdir -p %{buildroot}%{_localstatedir}/log/glusterfs +mkdir -p %{buildroot}%{_localstatedir}/log/glusterfsd +mkdir -p %{buildroot}%{_localstatedir}/run/gluster # Remove unwanted files from all the shared libraries find %{buildroot}%{_libdir} -name '*.a' -delete find %{buildroot}%{_libdir} -name '*.la' -delete -# Remove installed docs, they're included by %%doc -%{__rm} -rf %{buildroot}%{_datadir}/doc/glusterfs/ +# Remove installed docs, the ones we want are included by %%doc, in +# /usr/share/doc/glusterfs or /usr/share/doc/glusterfs-x.y.z depending +# on the distribution +%if ( 0%{?fedora} && 0%{?fedora} > 19 ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) +rm -rf %{buildroot}%{_pkgdocdir}/* +%else +rm -rf %{buildroot}%{_defaultdocdir}/%{name} +mkdir -p %{buildroot}%{_pkgdocdir} +%endif head -50 ChangeLog > ChangeLog.head && mv ChangeLog.head ChangeLog cat << EOM >> ChangeLog @@ -515,12 +515,12 @@ https://forge.gluster.org/glusterfs-core/glusterfs/commits/v%{version}%{?prerelt EOM # Remove benchmarking and other unpackaged files -%{__rm} -rf %{buildroot}/benchmarking -%{__rm} -f %{buildroot}/glusterfs-mode.el -%{__rm} -f %{buildroot}/glusterfs.vim +rm -rf %{buildroot}/benchmarking +rm -f %{buildroot}/glusterfs-mode.el +rm -f %{buildroot}/glusterfs.vim # Create working directory -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd # Update configuration file to /var/lib working directory sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sharedstatedir}/glusterd|g' \ @@ -533,41 +533,41 @@ sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sha %if ( 0%{_for_fedora_koji_builds} ) # Client logrotate entry -%{__install} -D -p -m 0644 %{SOURCE3} \ +install -D -p -m 0644 %{SOURCE3} \ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-fuse # Server logrotate entry -%{__install} -D -p -m 0644 %{SOURCE4} \ +install -D -p -m 0644 %{SOURCE4} \ %{buildroot}%{_sysconfdir}/logrotate.d/glusterd # Legacy server logrotate entry -%{__install} -D -p -m 0644 %{SOURCE5} \ +install -D -p -m 0644 %{SOURCE5} \ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfsd %else -%{__install} -D -p -m 0644 extras/glusterfs-logrotate \ +install -D -p -m 0644 extras/glusterfs-logrotate \ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs %endif %if ( 0%{!?_without_georeplication:1} ) # geo-rep ghosts -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/geo-replication +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf -%{__install} -D -p -m 0644 extras/glusterfs-georep-logrotate \ +install -D -p -m 0644 extras/glusterfs-georep-logrotate \ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep %endif %if ( 0%{!?_without_syslog:1} ) %if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 ) -%{__install} -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \ +install -D -p -m 0644 extras/gluster-rsyslog-7.2.conf \ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example %endif %if ( 0%{?rhel} && 0%{?rhel} == 6 ) -%{__install} -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \ +install -D -p -m 0644 extras/gluster-rsyslog-5.8.conf \ %{buildroot}%{_sysconfdir}/rsyslog.d/gluster.conf.example %endif %if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 ) -%{__install} -D -p -m 0644 extras/logger.conf.example \ +install -D -p -m 0644 extras/logger.conf.example \ %{buildroot}%{_sysconfdir}/glusterfs/logger.conf.example %endif %endif @@ -575,47 +575,47 @@ touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.co # the rest of the ghosts touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info touch %{buildroot}%{_sharedstatedir}/glusterd/options -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1 -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/post -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/glustershd -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/peers -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/vols -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/groups -%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/nfs/run +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1 +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/stop/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/start/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/add-brick/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/set/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/create/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/delete/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/copy-file/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/post +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/glustershd +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/peers +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/vols +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/groups +mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/nfs/run touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs %clean -%{__rm} -rf %{buildroot} +rm -rf %{buildroot} %post /sbin/ldconfig @@ -634,8 +634,7 @@ find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glus %endif %files -%defattr(-,root,root,-) -%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README THANKS +%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README THANKS extras/clear_xattrs.sh %config(noreplace) %{_sysconfdir}/logrotate.d/* %config(noreplace) %{_sysconfdir}/sysconfig/* %if ( 0%{!?_without_syslog:1} ) @@ -682,7 +681,6 @@ find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glus %if ( 0%{!?_without_rdma:1} ) %files rdma -%defattr(-,root,root,-) %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/rdma* %endif @@ -694,7 +692,6 @@ if [ $1 -ge 1 ]; then fi %files geo-replication -%defattr(-,root,root) %{_libexecdir}/glusterfs/gsyncd %{_libexecdir}/glusterfs/python/syncdaemon/* %{_libexecdir}/glusterfs/gverify.sh @@ -705,7 +702,6 @@ fi %endif %files fuse -%defattr(-,root,root,-) %if ( 0%{_for_fedora_koji_builds} ) %config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-fuse %endif @@ -721,8 +717,6 @@ fi %endif %files server -%defattr(-,root,root,-) -%doc extras/clear_xattrs.sh %if ( 0%{_for_fedora_koji_builds} ) %config(noreplace) %{_sysconfdir}/logrotate.d/glusterd %endif @@ -747,9 +741,9 @@ fi %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs* %ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options -# This is really ugly, but I have no idea how to mark these directories in an -# other way. They should belong to the glusterfs-server package, but don't -# exist after installation. They are generated on the first start... +# This is really ugly, but I have no idea how to mark these directories in +# any other way. They should belong to the glusterfs-server package, but +# don't exist after installation. They are generated on the first start... %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1 %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop @@ -796,13 +790,11 @@ fi %if ( 0%{!?_without_ocf:1} ) %files resource-agents -%defattr(-,root,root) # /usr/lib is the standard for OCF, also on x86_64 %{_prefix}/lib/ocf/resource.d/glusterfs %endif %files devel -%defattr(-,root,root,-) %{_includedir}/glusterfs %exclude %{_includedir}/glusterfs/y.tab.h %exclude %{_includedir}/glusterfs/api @@ -816,7 +808,6 @@ fi %{_includedir}/glusterfs/api/* %files regression-tests -%defattr(-,root,root,-) %{_prefix}/share/glusterfs/* %exclude %{_prefix}/share/glusterfs/tests/basic/rpm.t @@ -831,14 +822,14 @@ fi # in gluster.org RPMs.) Be careful to copy them on the off chance that # /etc and /var/lib are on separate file systems if [ -d /etc/glusterd -a ! -h %{_sharedstatedir}/glusterd ]; then - %{__mkdir_p} %{_sharedstatedir}/glusterd + mkdir -p %{_sharedstatedir}/glusterd cp -a /etc/glusterd %{_sharedstatedir}/glusterd rm -rf /etc/glusterd ln -sf %{_sharedstatedir}/glusterd /etc/glusterd fi # Rename old volfiles in an RPM-standard way. These aren't actually -# considered package config files, so %config doesn't work for them. +# considered package config files, so %%config doesn't work for them. if [ -d %{_sharedstatedir}/glusterd/vols ]; then for file in $(find %{_sharedstatedir}/glusterd/vols -name '*.vol'); do newfile=${file}.rpmsave @@ -883,6 +874,9 @@ if [ $1 -ge 1 ]; then fi %changelog +* Tue Dec 10 2013 Kaleb S. KEITHLEY +- Sync with Fedora glusterfs.spec 3.5.0-0.1.qa3 + * Wed Oct 11 2013 Harshavardhana - Add '_sharedstatedir' macro to `/var/lib` on <= RHEL5 (#1003184) @@ -912,16 +906,16 @@ fi - Sync with Fedora glusterfs.spec, add glusterfs-libs RPM for oVirt/qemu-kvm * Thu Jul 25 2013 Csaba Henk -- Added peer_add_secret_pub and peer_gsec_create to %{_libexecdir}/glusterfs +- Added peer_add_secret_pub and peer_gsec_create to %%{_libexecdir}/glusterfs * Thu Jul 25 2013 Aravinda VK -- Added gverify.sh to %{_libexecdir}/glusterfs directory. +- Added gverify.sh to %%{_libexecdir}/glusterfs directory. * Thu Jul 25 2013 Harshavardhana - Allow to build with '--without bd' to disable 'bd' xlator * Thu Jun 27 2013 Kaleb S. KEITHLEY -- fix the hardening fix for shlibs, use %%{__sed} macro, shorter ChangeLog +- fix the hardening fix for shlibs, use %%sed macro, shorter ChangeLog * Wed Jun 26 2013 Niels de Vos - move the mount/api xlator to glusterfs-api -- cgit From 44d644729b57ffbf71c70f21df0ffdd1e5fbb7c7 Mon Sep 17 00:00:00 2001 From: Raghavendra G Date: Wed, 11 Dec 2013 14:32:49 +0530 Subject: features/quota: log usage only if hard limit not exceeded. Change-Id: I60abf576999996e0d0d65534e1e416f6e10994c8 Signed-off-by: Raghavendra G BUG: 969461 Reviewed-on: http://review.gluster.org/6479 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/features/quota/src/quota.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 7156edcad..a531ab123 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -732,10 +732,6 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, } UNLOCK (&ctx->lock); - /* We log usage only if quota limit is configured on - that inode. */ - quota_log_usage (this, ctx, _inode, delta); - if (need_validate) { ret = quota_validate (frame, _inode, this, quota_validate_cbk); @@ -763,7 +759,16 @@ quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this, = space_available; } + + if (space_available == 0) { + op_errno = EDQUOT; + goto err; + } } + + /* We log usage only if quota limit is configured on + that inode. */ + quota_log_usage (this, ctx, _inode, delta); } if (__is_root_gfid (_inode->gfid)) { -- cgit From 7f70a9d2b2a0c3141ccdabb79401d39c871e47a9 Mon Sep 17 00:00:00 2001 From: Krutika Dhananjay Date: Mon, 9 Dec 2013 17:12:49 +0530 Subject: glusterd: Fix incorrect remove-brick status PROBLEM: 'remove-brick status' was reported to be showing the status of a previous rebalance op that was aborted, on the node which doesn't participate in the remove-brick operation. FIX: Unconditionally reset defrag status to NOT_STARTED whenever a remove-brick or a rebalance op is attempted. Change-Id: Iddf3a14a2ef352e77e0f690fe65aa36ec3011257 BUG: 1040371 Signed-off-by: Krutika Dhananjay Reviewed-on: http://review.gluster.org/6482 Tested-by: Gluster Build System Reviewed-by: Krishnan Parthasarathi Reviewed-by: Kaushal M Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 5 +++++ xlators/mgmt/glusterd/src/glusterd-rebalance.c | 6 ++++++ 2 files changed, 11 insertions(+) diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index d2dc88ed6..d832cdf24 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1793,6 +1793,11 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } case GF_OP_CMD_START: + /* Reset defrag status to 'NOT STARTED' whenever a + * remove-brick/rebalance command is issued to remove + * stale information from previous run. + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; ret = dict_get_str (dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str); if (ret) { gf_log (this->name, GF_LOG_DEBUG, diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 7911c3d21..ea8558894 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -634,6 +634,12 @@ glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: + /* Reset defrag status to 'NOT STARTED' whenever a + * remove-brick/rebalance command is issued to remove + * stale information from previous run. + */ + volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED; + ret = dict_get_str (dict, GF_REBALANCE_TID_KEY, &task_id_str); if (ret) { gf_log (this->name, GF_LOG_DEBUG, "Missing rebalance " -- cgit From f502e28e8b416f80bd9506ac204948681610b305 Mon Sep 17 00:00:00 2001 From: Kaushal M Date: Tue, 10 Dec 2013 11:34:06 +0530 Subject: glusterd: Save/restore/sync rebalance dict A dictionary was added to store additional information of a rebalance process, like the bricks being removed in case of a rebalance started by remove-brick. This dictionary wasn't being stored/restored or synced during volume sync, leading to errors like a volume status command failing. These issues have been fixed in this patch. The rebalance dict is now stored/restored and also exported/imported during volume sync. Also, this makes sure that the rebalance dict is only create on remove-brick start. This adds a bricks decommissioned status to the information imported/exported during volume sync. Change-Id: I56fed23dc2de80a96648055fe705e9c3ffd55227 BUG: 1040809 Signed-off-by: Kaushal M Reviewed-on: http://review.gluster.org/6492 Tested-by: Gluster Build System Reviewed-by: Krishnan Parthasarathi Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-brick-ops.c | 60 ++++++------ xlators/mgmt/glusterd/src/glusterd-store.c | 100 +++++++++++++++----- xlators/mgmt/glusterd/src/glusterd-utils.c | 125 ++++++++++++++++++++----- 3 files changed, 208 insertions(+), 77 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index d832cdf24..26d608a2f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1842,19 +1842,22 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) goto out; } - /* Save the list of bricks for later usage. Right now this is required - * for displaying the task parameters with task status in volume status. + /* Save the list of bricks for later usage only on starting a + * remove-brick. Right now this is required for displaying the task + * parameters with task status in volume status. */ - bricks_dict = dict_new (); - if (!bricks_dict) { - ret = -1; - goto out; - } - ret = dict_set_int32 (bricks_dict, "count", count); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to save remove-brick count"); - goto out; + if (GF_OP_CMD_START == cmd) { + bricks_dict = dict_new (); + if (!bricks_dict) { + ret = -1; + goto out; + } + ret = dict_set_int32 (bricks_dict, "count", count); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to save remove-brick count"); + goto out; + } } while ( i <= count) { snprintf (key, 256, "brick%d", i); @@ -1865,20 +1868,22 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) goto out; } - brick_tmpstr = gf_strdup (brick); - if (!brick_tmpstr) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "Failed to duplicate brick name"); - goto out; - } - ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, - "Failed to add brick to dict"); - goto out; + if (GF_OP_CMD_START == cmd) { + brick_tmpstr = gf_strdup (brick); + if (!brick_tmpstr) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to duplicate brick name"); + goto out; + } + ret = dict_set_dynstr (bricks_dict, key, brick_tmpstr); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to add brick to dict"); + goto out; + } + brick_tmpstr = NULL; } - brick_tmpstr = NULL; ret = glusterd_op_perform_remove_brick (volinfo, brick, force, &need_rebalance); @@ -1886,6 +1891,9 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) goto out; i++; } + if (GF_OP_CMD_START == cmd) + volinfo->rebal.dict = dict_ref (bricks_dict); + ret = dict_get_int32 (dict, "replica-count", &replica_count); if (!ret) { gf_log (this->name, GF_LOG_INFO, @@ -1910,8 +1918,6 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) } } } - volinfo->rebal.dict = bricks_dict; - bricks_dict = NULL; ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 5b51aabad..2ba2548d2 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -883,6 +883,19 @@ out: return ret; } +int +_gd_store_rebalance_dict (dict_t *dict, char *key, data_t *value, void *data) +{ + int ret = -1; + int fd = 0; + + fd = *(int *)data; + + ret = gf_store_save_value (fd, key, value->data); + + return ret; +} + int32_t glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo) { @@ -907,9 +920,14 @@ glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo) if (ret) goto out; - if (volinfo->rebal.defrag_cmd) { - uuid_unparse (volinfo->rebal.rebalance_id, buf); - ret = gf_store_save_value (fd, GF_REBALANCE_TID_KEY, buf); + uuid_unparse (volinfo->rebal.rebalance_id, buf); + ret = gf_store_save_value (fd, GF_REBALANCE_TID_KEY, buf); + if (ret) + goto out; + + if (volinfo->rebal.dict) { + dict_foreach (volinfo->rebal.dict, _gd_store_rebalance_dict, + &fd); } out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1726,17 +1744,22 @@ out: int32_t glusterd_store_retrieve_node_state (char *volname) { - int32_t ret = -1; - glusterd_volinfo_t *volinfo = NULL; - gf_store_iter_t *iter = NULL; - char *key = NULL; - char *value = NULL; - char volpath[PATH_MAX] = {0,}; - glusterd_conf_t *priv = NULL; - char path[PATH_MAX] = {0,}; - gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + int32_t ret = -1; + glusterd_volinfo_t *volinfo = NULL; + gf_store_iter_t *iter = NULL; + char *key = NULL; + char *value = NULL; + char *dup_value = NULL; + char volpath[PATH_MAX] = {0,}; + glusterd_conf_t *priv = NULL; + char path[PATH_MAX] = {0,}; + gf_store_op_errno_t op_errno = GD_STORE_SUCCESS; + dict_t *tmp_dict = NULL; + xlator_t *this = NULL; - priv = THIS->private; + this = THIS; + GF_ASSERT (this); + priv = this->private; ret = glusterd_volinfo_find (volname, &volinfo); if (ret) { @@ -1766,16 +1789,35 @@ glusterd_store_retrieve_node_state (char *volname) if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG, strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) { volinfo->rebal.defrag_cmd = atoi (value); - } - - if (volinfo->rebal.defrag_cmd) { - if (!strncmp (key, GF_REBALANCE_TID_KEY, - strlen (GF_REBALANCE_TID_KEY))) - uuid_parse (value, volinfo->rebal.rebalance_id); - - if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP, - strlen (GLUSTERD_STORE_KEY_DEFRAG_OP))) - volinfo->rebal.op = atoi (value); + } else if (!strncmp (key, GF_REBALANCE_TID_KEY, + strlen (GF_REBALANCE_TID_KEY))) { + uuid_parse (value, volinfo->rebal.rebalance_id); + } else if (!strncmp (key, GLUSTERD_STORE_KEY_DEFRAG_OP, + strlen (GLUSTERD_STORE_KEY_DEFRAG_OP))) { + volinfo->rebal.op = atoi (value); + } else { + if (!tmp_dict) { + tmp_dict = dict_new (); + if (!tmp_dict) { + ret = -1; + goto out; + } + } + dup_value = gf_strdup (value); + if (!dup_value) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Failed to strdup value string"); + goto out; + } + ret = dict_set_str (tmp_dict, key, dup_value); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Error setting data in rebal " + "dict."); + goto out; + } + dup_value = NULL; } GF_FREE (key); @@ -1785,9 +1827,13 @@ glusterd_store_retrieve_node_state (char *volname) ret = gf_store_iter_get_next (iter, &key, &value, &op_errno); } + if (tmp_dict) + volinfo->rebal.dict = dict_ref (tmp_dict); - if (op_errno != GD_STORE_EOF) + if (op_errno != GD_STORE_EOF) { + ret = -1; goto out; + } ret = gf_store_iter_destroy (iter); @@ -1795,6 +1841,12 @@ glusterd_store_retrieve_node_state (char *volname) goto out; out: + if (dup_value) + GF_FREE (dup_value); + if (ret && volinfo->rebal.dict) + dict_unref (volinfo->rebal.dict); + if (tmp_dict) + dict_unref (tmp_dict); gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 8cf9f790f..f398e49dc 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1956,20 +1956,18 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; - if (volinfo->rebal.defrag_cmd) { - rebalance_id_str = gf_strdup (uuid_utoa - (volinfo->rebal.rebalance_id)); - if (!rebalance_id_str) { - ret = -1; - goto out; - } - memset (key, 0, sizeof (key)); - snprintf (key, 256, "volume%d.rebalance-id", count); - ret = dict_set_dynstr (dict, key, rebalance_id_str); - if (ret) - goto out; - rebalance_id_str = NULL; + rebalance_id_str = gf_strdup (uuid_utoa + (volinfo->rebal.rebalance_id)); + if (!rebalance_id_str) { + ret = -1; + goto out; } + memset (key, 0, sizeof (key)); + snprintf (key, 256, "volume%d.rebalance-id", count); + ret = dict_set_dynstr (dict, key, rebalance_id_str); + if (ret) + goto out; + rebalance_id_str = NULL; memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.rebalance-op", count); @@ -1977,6 +1975,23 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + if (volinfo->rebal.dict) { + snprintf (prefix, sizeof (prefix), "volume%d", count); + ctx.dict = dict; + ctx.prefix = prefix; + ctx.opt_count = 1; + ctx.key_name = "rebal-dict-key"; + ctx.val_name = "rebal-dict-value"; + + dict_foreach (volinfo->rebal.dict, _add_dict_to_prdict, &ctx); + ctx.opt_count--; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebal-dict-count", count); + ret = dict_set_int32 (dict, key, ctx.opt_count); + if (ret) + goto out; + } + memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); ret = dict_set_int32 (dict, key, volinfo->rep_brick.rb_status); @@ -2066,6 +2081,13 @@ glusterd_add_volume_to_dict (glusterd_volinfo_t *volinfo, if (ret) goto out; + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.brick%d.decommissioned", + count, i); + ret = dict_set_int32 (dict, key, brickinfo->decommissioned); + if (ret) + goto out; + i++; } @@ -2733,6 +2755,7 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, int ret = -1; char *hostname = NULL; char *path = NULL; + int decommissioned = 0; glusterd_brickinfo_t *new_brickinfo = NULL; char msg[2048] = {0}; @@ -2758,12 +2781,22 @@ glusterd_import_new_brick (dict_t *vols, int32_t vol_count, goto out; } + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.brick%d.decommissioned", + vol_count, brick_count); + ret = dict_get_int32 (vols, key, &decommissioned); + if (ret) { + /* For backward compatibility */ + ret = 0; + } + ret = glusterd_brickinfo_new (&new_brickinfo); if (ret) goto out; strcpy (new_brickinfo->path, path); strcpy (new_brickinfo->hostname, hostname); + new_brickinfo->decommissioned = decommissioned; //peerinfo might not be added yet (void) glusterd_resolve_brick (new_brickinfo); ret = 0; @@ -2908,6 +2941,43 @@ out: return ret; } +int +gd_import_friend_volume_rebal_dict (dict_t *dict, int count, + glusterd_volinfo_t *volinfo) +{ + int ret = -1; + char key[256] = {0,}; + int dict_count = 0; + char prefix[64] = {0}; + + GF_ASSERT (dict); + GF_ASSERT (volinfo); + + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebal-dict-count", count); + ret = dict_get_int32 (dict, key, &dict_count); + if (ret) { + /* Older peers will not have this dict */ + ret = 0; + goto out; + } + + volinfo->rebal.dict = dict_new (); + if(!volinfo->rebal.dict) { + ret = -1; + goto out; + } + + snprintf (prefix, sizeof (prefix), "volume%d", count); + ret = import_prdict_dict (dict, volinfo->rebal.dict, "rebal-dict-key", + "rebal-dict-value", dict_count, prefix); +out: + if (ret && volinfo->rebal.dict) + dict_unref (volinfo->rebal.dict); + gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); + return ret; +} + int32_t glusterd_import_volinfo (dict_t *vols, int count, glusterd_volinfo_t **volinfo) @@ -3071,19 +3141,16 @@ glusterd_import_volinfo (dict_t *vols, int count, goto out; } - if (new_volinfo->rebal.defrag_cmd) { - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "volume%d.rebalance-id", count); - ret = dict_get_str (vols, key, &rebalance_id_str); - if (ret) { - /* This is not present in older glusterfs versions, - * so don't error out - */ - ret = 0; - } else { - uuid_parse (rebalance_id_str, - new_volinfo->rebal.rebalance_id); - } + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "volume%d.rebalance-id", count); + ret = dict_get_str (vols, key, &rebalance_id_str); + if (ret) { + /* This is not present in older glusterfs versions, + * so don't error out + */ + ret = 0; + } else { + uuid_parse (rebalance_id_str, new_volinfo->rebal.rebalance_id); } memset (key, 0, sizeof (key)); @@ -3095,6 +3162,12 @@ glusterd_import_volinfo (dict_t *vols, int count, */ ret = 0; } + ret = gd_import_friend_volume_rebal_dict (vols, count, new_volinfo); + if (ret) { + snprintf (msg, sizeof (msg), "Failed to import rebalance dict " + "for volume."); + goto out; + } memset (key, 0, sizeof (key)); snprintf (key, 256, "volume%d."GLUSTERD_STORE_KEY_RB_STATUS, count); -- cgit From 40e13bc5b44d0b0cdaf7833c848d4a52352e0a13 Mon Sep 17 00:00:00 2001 From: Kaushal M Date: Thu, 8 Aug 2013 15:50:31 +0530 Subject: rpc,glusterd: Use rpc_clnt notifyfn to cleanup mydata rpc: - On a RPC_TRANSPORT_CLEANUP event, rpc_clnt_notify calls the registered notifyfn with a RPC_CLNT_DESTROY event. The notifyfn should properly cleanup the saved mydata on this event. - Break the reconnect chain when an rpc client is disabled. This will prevent new disconnect events which can lead to crashes. glusterd: - Added support for RPC_CLNT_DESTROY in glusterd_brick_rpc_notify - Use a common glusterd_rpc_clnt_unref() function throught glusterd in place of rpc_clnt_unref(). This function correctly gives up the big-lock before performing the unref. Change-Id: I93230441c5089039643fc9f5632477ef1b695348 BUG: 962619 Signed-off-by: Kaushal M Reviewed-on: http://review.gluster.org/5512 Tested-by: Gluster Build System Reviewed-by: Krishnan Parthasarathi Reviewed-by: Vijay Bellur --- rpc/rpc-lib/src/rpc-clnt.c | 9 ++++++++- rpc/rpc-lib/src/rpc-clnt.h | 3 ++- xlators/mgmt/glusterd/src/glusterd-handler.c | 6 ++++-- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 2 +- xlators/mgmt/glusterd/src/glusterd-utils.c | 24 +++++++++++++++++++----- xlators/mgmt/glusterd/src/glusterd-utils.h | 3 +++ xlators/nfs/server/src/nlm4.c | 2 ++ 7 files changed, 39 insertions(+), 10 deletions(-) diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c index ac98a5c91..1e9f307be 100644 --- a/rpc/rpc-lib/src/rpc-clnt.c +++ b/rpc/rpc-lib/src/rpc-clnt.c @@ -416,7 +416,7 @@ rpc_clnt_reconnect (void *trans_ptr) conn->reconnect); conn->reconnect = 0; - if (conn->connected == 0) { + if ((conn->connected == 0) && !clnt->disabled) { ts.tv_sec = 3; ts.tv_nsec = 0; @@ -834,6 +834,7 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, rpc_request_info_t *req_info = NULL; rpc_transport_pollin_t *pollin = NULL; struct timespec ts = {0, }; + void *clnt_mydata = NULL; conn = mydata; if (conn == NULL) { @@ -870,6 +871,12 @@ rpc_clnt_notify (rpc_transport_t *trans, void *mydata, } case RPC_TRANSPORT_CLEANUP: + if (clnt->notifyfn) { + clnt_mydata = clnt->mydata; + clnt->mydata = NULL; + ret = clnt->notifyfn (clnt, clnt_mydata, + RPC_CLNT_DESTROY, NULL); + } rpc_clnt_destroy (clnt); ret = 0; break; diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h index 584963ad0..2596c3508 100644 --- a/rpc/rpc-lib/src/rpc-clnt.h +++ b/rpc/rpc-lib/src/rpc-clnt.h @@ -19,7 +19,8 @@ typedef enum { RPC_CLNT_CONNECT, RPC_CLNT_DISCONNECT, - RPC_CLNT_MSG + RPC_CLNT_MSG, + RPC_CLNT_DESTROY } rpc_clnt_event_t; diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index 3aafa122b..0407741bb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -3713,10 +3713,12 @@ __glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata, "%s:%s", brickinfo->hostname, brickinfo->path); glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED); - if (rpc_clnt_is_disabled (rpc)) - GF_FREE (brickid); break; + case RPC_CLNT_DESTROY: + GF_FREE (mydata); + mydata = NULL; + break; default: gf_log (this->name, GF_LOG_TRACE, "got some other RPC event %d", event); diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index ea8558894..1ac9d64ce 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -152,7 +152,7 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, glusterd_store_perform_node_state_store (volinfo); if (defrag->rpc) { - rpc_clnt_unref (defrag->rpc); + glusterd_rpc_clnt_unref (priv, defrag->rpc); defrag->rpc = NULL; } if (defrag->cbk_fn) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index f398e49dc..c11965d40 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1054,7 +1054,7 @@ glusterd_friend_cleanup (glusterd_peerinfo_t *peerinfo) peerctx = peerinfo->rpc->mydata; peerinfo->rpc->mydata = NULL; - peerinfo->rpc = rpc_clnt_unref (peerinfo->rpc); + peerinfo->rpc = glusterd_rpc_clnt_unref (priv, peerinfo->rpc); peerinfo->rpc = NULL; if (peerctx) { GF_FREE (peerctx->errstr); @@ -1466,9 +1466,7 @@ glusterd_brick_disconnect (glusterd_brickinfo_t *brickinfo) brickinfo->rpc = NULL; if (rpc) { - synclock_unlock (&priv->big_lock); - rpc_clnt_unref (rpc); - synclock_lock (&priv->big_lock); + glusterd_rpc_clnt_unref (priv, rpc); } return 0; @@ -3894,12 +3892,13 @@ int32_t glusterd_nodesvc_disconnect (char *server) { struct rpc_clnt *rpc = NULL; + glusterd_conf_t *priv = THIS->private; rpc = glusterd_nodesvc_get_rpc (server); (void)glusterd_nodesvc_set_rpc (server, NULL); if (rpc) - rpc_clnt_unref (rpc); + glusterd_rpc_clnt_unref (priv, rpc); return 0; } @@ -9357,3 +9356,18 @@ out: return ret; } + +rpc_clnt_t * +glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc) +{ + rpc_clnt_t *ret = NULL; + + GF_ASSERT (conf); + GF_ASSERT (rpc); + synclock_unlock (&conf->big_lock); + ret = rpc_clnt_unref (rpc); + synclock_lock (&conf->big_lock); + + return ret; +} + diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 20cd00cbe..9ef09d7b0 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -635,4 +635,7 @@ glusterd_status_has_tasks (int cmd); int gd_stop_rebalance_process (glusterd_volinfo_t *volinfo); + +rpc_clnt_t * +glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc); #endif diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c index 5c5d87412..c186537ea 100644 --- a/xlators/nfs/server/src/nlm4.c +++ b/xlators/nfs/server/src/nlm4.c @@ -918,6 +918,8 @@ nlm_rpcclnt_notify (struct rpc_clnt *rpc_clnt, void *mydata, case RPC_CLNT_DISCONNECT: nlm_unset_rpc_clnt (rpc_clnt); break; + default: + break; } err: -- cgit From a9623ada6f7b39ac2d567f66a496072487d8e6ec Mon Sep 17 00:00:00 2001 From: Vijaykumar M Date: Thu, 12 Dec 2013 11:40:36 +0530 Subject: pathinfo: Provide user namespace access. Locality can be now queried by unprivileged users with key "glusterfs.pathinfo". Setting both "glusterfs.pathinfo" and "trusted.glusterfs.pathinfo" on disk is prevented with this patch. Original Author: Vijay Bellur Change-Id: I4f7a0db8ad59165c4aeda04b23173255157a8b79 Signed-off-by: Vijaykumar M Reviewed-on: http://review.gluster.org/5101 Reviewed-by: Krishnan Parthasarathi Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- libglusterfs/src/glusterfs.h | 9 ++++++--- xlators/cluster/afr/src/afr-inode-read.c | 3 ++- xlators/cluster/dht/src/dht-common.c | 4 ++-- xlators/cluster/stripe/src/stripe.c | 7 ++----- xlators/storage/posix/src/posix-helpers.c | 5 ++++- xlators/storage/posix/src/posix.c | 6 ++---- 6 files changed, 18 insertions(+), 16 deletions(-) diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index dfe443016..ed483d19c 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -84,15 +84,18 @@ #define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid" #define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id" #define GF_XATTR_LOCKINFO_KEY "trusted.glusterfs.lockinfo" -#define GF_XATTR_GET_REAL_FILENAME_KEY "user.glusterfs.get_real_filename:" +#define GF_XATTR_GET_REAL_FILENAME_KEY "glusterfs.get_real_filename:" +#define GF_XATTR_USER_PATHINFO_KEY "glusterfs.pathinfo" #define QUOTA_LIMIT_KEY "trusted.glusterfs.quota.limit-set" #define GF_READDIR_SKIP_DIRS "readdir-filter-directories" #define BD_XATTR_KEY "user.glusterfs" -#define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \ - strlen (GF_XATTR_PATHINFO_KEY)) == 0) +#define XATTR_IS_PATHINFO(x) ((strncmp (x, GF_XATTR_PATHINFO_KEY, \ + strlen (x)) == 0) || \ + (strncmp (x, GF_XATTR_USER_PATHINFO_KEY, \ + strlen (x)) == 0)) #define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \ strlen (GF_XATTR_NODE_UUID_KEY)) == 0) #define XATTR_IS_LOCKINFO(x) (strncmp (x, GF_XATTR_LOCKINFO_KEY, \ diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 3e80129f9..0cfebcb9d 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -1410,7 +1410,8 @@ afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk, goto out; } - if (!strcmp (name, GF_XATTR_PATHINFO_KEY)) { + if (!strcmp (name, GF_XATTR_PATHINFO_KEY) || + !strcmp (name, GF_XATTR_USER_PATHINFO_KEY)) { if (is_fgetxattr) { *cbk = afr_fgetxattr_pathinfo_cbk; } else { diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index ed4babd32..f59bc9667 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -2161,7 +2161,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, * (until inode_link() happens) */ if (key && DHT_IS_DIR(layout) && - ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0) + (XATTR_IS_PATHINFO (key) || (strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0))) { (void) strncpy (local->xsel, key, 256); cnt = local->call_cnt = layout->cnt; @@ -2176,7 +2176,7 @@ dht_getxattr (call_frame_t *frame, xlator_t *this, /* node-uuid or pathinfo for files */ if (key && ((strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0) - || (strcmp (key, GF_XATTR_PATHINFO_KEY) == 0))) { + || XATTR_IS_PATHINFO (key))) { cached_subvol = local->cached_subvol; (void) strncpy (local->xsel, key, 256); diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index 8ac14cb3a..c98126225 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -5548,9 +5548,7 @@ stripe_getxattr (call_frame_t *frame, xlator_t *this, return 0; } - if (name && - ((strncmp (name, GF_XATTR_PATHINFO_KEY, - strlen (GF_XATTR_PATHINFO_KEY)) == 0))) { + if (name && (XATTR_IS_PATHINFO (name))) { if (IA_ISREG (loc->inode->ia_type)) { ret = inode_ctx_get (loc->inode, this, (uint64_t *) &local->fctx); @@ -5637,8 +5635,7 @@ stripe_is_special_xattr (const char *name) if (!strncmp (name, GF_XATTR_LOCKINFO_KEY, strlen (GF_XATTR_LOCKINFO_KEY)) - || !strncmp (name, GF_XATTR_PATHINFO_KEY, - strlen (GF_XATTR_PATHINFO_KEY))) + || XATTR_IS_PATHINFO (name)) is_spl = _gf_true; out: return is_spl; diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 0e187e020..86ce08cb3 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -781,7 +781,10 @@ posix_handle_pair (xlator_t *this, const char *real_path, int sys_ret = -1; int ret = 0; - if (ZR_FILE_CONTENT_REQUEST(key)) { + if (XATTR_IS_PATHINFO (key)) { + ret = -EACCES; + goto out; + } else if (ZR_FILE_CONTENT_REQUEST(key)) { ret = posix_set_file_contents (this, real_path, key, value, flags); } else { diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 1ebf42150..7695289fa 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3430,8 +3430,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, } goto done; } - if (loc->inode && name && - (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) { + if (loc->inode && name && (XATTR_IS_PATHINFO (name))) { if (LOC_HAS_ABSPATH (loc)) MAKE_REAL_PATH (rpath, this, loc->path); else @@ -3451,8 +3450,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, goto done; } size = strlen (dyn_rpath) + 1; - ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY, - dyn_rpath); + ret = dict_set_dynstr (dict, (char *)name, dyn_rpath); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "could not set value (%s) in dictionary", -- cgit From 588185463d1bbf1b011e3b0471771b3d4f4aa145 Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Fri, 13 Dec 2013 16:20:11 +0530 Subject: doc: Fix markdown format errors Made the following minor changes: * Fix broken links and point to correct image paths * Remove dead links and references * Fix table format to conform to Github Flavoured Markdown * Add few common terms to glossary * Maintain consistency of format in writing headings * Remove irrelevant files * Remove references to contact Red Hat support. Change-Id: I4aed4945d56b5d68b8ea133ce5fa3162bfc2864f Signed-off-by: Prashanth Pai Reviewed-on: http://review.gluster.org/6514 Reviewed-by: Vijay Bellur Tested-by: Vijay Bellur --- .../en-US/markdown/Administration_Guide.md | 1 - doc/admin-guide/en-US/markdown/Author_Group.md | 5 - doc/admin-guide/en-US/markdown/Book_Info.md | 1 - doc/admin-guide/en-US/markdown/Chapter.md | 18 -- doc/admin-guide/en-US/markdown/Preface.md | 22 -- doc/admin-guide/en-US/markdown/Revision_History.md | 4 - doc/admin-guide/en-US/markdown/admin_ACLs.md | 46 ++-- doc/admin-guide/en-US/markdown/admin_Hadoop.md | 60 ++--- doc/admin-guide/en-US/markdown/admin_UFO.md | 286 +++++++++------------ doc/admin-guide/en-US/markdown/admin_commandref.md | 180 ------------- doc/admin-guide/en-US/markdown/admin_console.md | 5 +- .../en-US/markdown/admin_directory_Quota.md | 26 +- .../en-US/markdown/admin_geo-replication.md | 134 +++------- .../en-US/markdown/admin_managing_volumes.md | 274 +++++++------------- .../en-US/markdown/admin_monitoring_workload.md | 118 +++------ .../en-US/markdown/admin_setting_volumes.md | 161 +++++------- .../en-US/markdown/admin_settingup_clients.md | 120 +++------ .../en-US/markdown/admin_start_stop_daemon.md | 34 +-- .../en-US/markdown/admin_storage_pools.md | 21 +- .../en-US/markdown/admin_troubleshooting.md | 214 ++++++--------- doc/admin-guide/en-US/markdown/gfs_introduction.md | 29 +-- doc/admin-guide/en-US/markdown/glossary.md | 104 ++++++-- 22 files changed, 634 insertions(+), 1229 deletions(-) delete mode 100644 doc/admin-guide/en-US/markdown/Administration_Guide.md delete mode 100644 doc/admin-guide/en-US/markdown/Author_Group.md delete mode 100644 doc/admin-guide/en-US/markdown/Book_Info.md delete mode 100644 doc/admin-guide/en-US/markdown/Chapter.md delete mode 100644 doc/admin-guide/en-US/markdown/Preface.md delete mode 100644 doc/admin-guide/en-US/markdown/Revision_History.md delete mode 100644 doc/admin-guide/en-US/markdown/admin_commandref.md diff --git a/doc/admin-guide/en-US/markdown/Administration_Guide.md b/doc/admin-guide/en-US/markdown/Administration_Guide.md deleted file mode 100644 index 8b1378917..000000000 --- a/doc/admin-guide/en-US/markdown/Administration_Guide.md +++ /dev/null @@ -1 +0,0 @@ - diff --git a/doc/admin-guide/en-US/markdown/Author_Group.md b/doc/admin-guide/en-US/markdown/Author_Group.md deleted file mode 100644 index ef2a5e677..000000000 --- a/doc/admin-guide/en-US/markdown/Author_Group.md +++ /dev/null @@ -1,5 +0,0 @@ -Divya -Muntimadugu -Red Hat -Engineering Content Services -divya@redhat.com diff --git a/doc/admin-guide/en-US/markdown/Book_Info.md b/doc/admin-guide/en-US/markdown/Book_Info.md deleted file mode 100644 index 8b1378917..000000000 --- a/doc/admin-guide/en-US/markdown/Book_Info.md +++ /dev/null @@ -1 +0,0 @@ - diff --git a/doc/admin-guide/en-US/markdown/Chapter.md b/doc/admin-guide/en-US/markdown/Chapter.md deleted file mode 100644 index 8420259c4..000000000 --- a/doc/admin-guide/en-US/markdown/Chapter.md +++ /dev/null @@ -1,18 +0,0 @@ -Test Chapter -============ - -This is a test paragraph - -Test Section 1 -============== - -This is a test paragraph in a section - -Test Section 2 -============== - -This is a test paragraph in Section 2 - -1. listitem text - - diff --git a/doc/admin-guide/en-US/markdown/Preface.md b/doc/admin-guide/en-US/markdown/Preface.md deleted file mode 100644 index f7e934ae8..000000000 --- a/doc/admin-guide/en-US/markdown/Preface.md +++ /dev/null @@ -1,22 +0,0 @@ -Preface -======= - -This guide describes how to configure, operate, and manage Gluster File -System (GlusterFS). - -Audience -======== - -This guide is intended for Systems Administrators interested in -configuring and managing GlusterFS. - -This guide assumes that you are familiar with the Linux operating -system, concepts of File System, GlusterFS concepts, and GlusterFS -Installation - -License -======= - -The License information is available at [][]. - - []: http://www.redhat.com/licenses/rhel_rha_eula.html diff --git a/doc/admin-guide/en-US/markdown/Revision_History.md b/doc/admin-guide/en-US/markdown/Revision_History.md deleted file mode 100644 index 2084309d1..000000000 --- a/doc/admin-guide/en-US/markdown/Revision_History.md +++ /dev/null @@ -1,4 +0,0 @@ -Revision History -================ - -1-0 Thu Apr 5 2012 Divya Muntimadugu Draft diff --git a/doc/admin-guide/en-US/markdown/admin_ACLs.md b/doc/admin-guide/en-US/markdown/admin_ACLs.md index 308e069ca..8fc4e1dae 100644 --- a/doc/admin-guide/en-US/markdown/admin_ACLs.md +++ b/doc/admin-guide/en-US/markdown/admin_ACLs.md @@ -1,5 +1,4 @@ -POSIX Access Control Lists -========================== +#POSIX Access Control Lists POSIX Access Control Lists (ACLs) allows you to assign different permissions for different users or groups even though they do not @@ -13,14 +12,12 @@ This means, in addition to the file owner, the file group, and others, additional users and groups can be granted or denied access by using POSIX ACLs. -Activating POSIX ACLs Support -============================= +##Activating POSIX ACLs Support To use POSIX ACLs for a file or directory, the partition of the file or directory must be mounted with POSIX ACLs support. -Activating POSIX ACLs Support on Sever --------------------------------------- +###Activating POSIX ACLs Support on Sever To mount the backend export directories for POSIX ACLs support, use the following command: @@ -36,8 +33,7 @@ the following entry for the partition to include the POSIX ACLs option: `LABEL=/work /export1 ext3 rw, acl 14 ` -Activating POSIX ACLs Support on Client ---------------------------------------- +###Activating POSIX ACLs Support on Client To mount the glusterfs volumes for POSIX ACLs support, use the following command: @@ -48,8 +44,7 @@ For example: `# mount -t glusterfs -o acl 198.192.198.234:glustervolume /mnt/gluster` -Setting POSIX ACLs -================== +##Setting POSIX ACLs You can set two types of POSIX ACLs, that is, access ACLs and default ACLs. You can use access ACLs to grant permission for a specific file or @@ -60,8 +55,7 @@ of the default ACLs of the directory. You can set ACLs for per user, per group, for users not in the user group for the file, and via the effective right mask. -Setting Access ACLs -------------------- +##Setting Access ACLs You can apply access ACLs to grant permission for both files and directories. @@ -80,12 +74,12 @@ Permissions must be a combination of the characters `r` (read), `w` following format and can specify multiple entry types separated by commas. - ACL Entry Description - ---------------------- -------------------------------------------------------------------------------------------------------------------------------------------------- - u:uid:\ Sets the access ACLs for a user. You can specify user name or UID - g:gid:\ Sets the access ACLs for a group. You can specify group name or GID. - m:\ Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries. - o:\ Sets the access ACLs for users other than the ones in the group for the file. + ACL Entry | Description + --- | --- + u:uid:\ | Sets the access ACLs for a user. You can specify user name or UID + g:gid:\ | Sets the access ACLs for a group. You can specify group name or GID. + m:\ | Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries. + o:\ | Sets the access ACLs for users other than the ones in the group for the file. If a file or directory already has an POSIX ACLs, and the setfacl command is used, the additional permissions are added to the existing @@ -95,8 +89,7 @@ For example, to give read and write permissions to user antony: `# setfacl -m u:antony:rw /mnt/gluster/data/testfile ` -Setting Default ACLs --------------------- +##Setting Default ACLs You can apply default ACLs only to directories. They determine the permissions of a file system objects that inherits from its parent @@ -126,11 +119,9 @@ default ACLs are passed to the files and subdirectories in it: - A subdirectory inherits the default ACLs of the parent directory both as its default ACLs and as an access ACLs. - - A file inherits the default ACLs as its access ACLs. -Retrieving POSIX ACLs -===================== +##Retrieving POSIX ACLs You can view the existing POSIX ACLs for a file or directory. @@ -169,8 +160,7 @@ You can view the existing POSIX ACLs for a file or directory. default:mask::rwx default:other::r-x -Removing POSIX ACLs -=================== +##Removing POSIX ACLs To remove all the permissions for a user, groups, or others, use the following command: @@ -181,16 +171,14 @@ For example, to remove all permissions from the user antony: `# setfacl -x u:antony /mnt/gluster/data/test-file` -Samba and ACLs -============== +##Samba and ACLs If you are using Samba to access GlusterFS FUSE mount, then POSIX ACLs are enabled by default. Samba has been compiled with the `--with-acl-support` option, so no special flags are required when accessing or mounting a Samba share. -NFS and ACLs -============ +##NFS and ACLs Currently we do not support ACLs configuration through NFS, i.e. setfacl and getfacl commands do not work. However, ACLs permissions set using diff --git a/doc/admin-guide/en-US/markdown/admin_Hadoop.md b/doc/admin-guide/en-US/markdown/admin_Hadoop.md index 2894fa713..742e8ad62 100644 --- a/doc/admin-guide/en-US/markdown/admin_Hadoop.md +++ b/doc/admin-guide/en-US/markdown/admin_Hadoop.md @@ -1,5 +1,4 @@ -Managing Hadoop Compatible Storage -================================== +#Managing Hadoop Compatible Storage GlusterFS provides compatibility for Apache Hadoop and it uses the standard file system APIs available in Hadoop to provide a new storage @@ -7,54 +6,44 @@ option for Hadoop deployments. Existing MapReduce based applications can use GlusterFS seamlessly. This new functionality opens up data within Hadoop deployments to any file-based or object-based application. -Architecture Overview -===================== +##Architecture Overview The following diagram illustrates Hadoop integration with GlusterFS: -Advantages -========== +![ Hadoop Architecture ](../images/Hadoop_Architecture.png) + +##Advantages The following are the advantages of Hadoop Compatible Storage with GlusterFS: - Provides simultaneous file-based and object-based access within Hadoop. - - Eliminates the centralized metadata server. - - Provides compatibility with MapReduce applications and rewrite is not required. - - Provides a fault tolerant file system. -Preparing to Install Hadoop Compatible Storage -============================================== +##Preparing to Install Hadoop Compatible Storage This section provides information on pre-requisites and list of dependencies that will be installed during installation of Hadoop compatible storage. -Pre-requisites --------------- +###Pre-requisites The following are the pre-requisites to install Hadoop Compatible Storage : - Hadoop 0.20.2 is installed, configured, and is running on all the machines in the cluster. - - Java Runtime Environment - - Maven (mandatory only if you are building the plugin from the source) - - JDK (mandatory only if you are building the plugin from the source) - - getfattr - command line utility -Installing, and Configuring Hadoop Compatible Storage -===================================================== +##Installing, and Configuring Hadoop Compatible Storage This section describes how to install and configure Hadoop Compatible Storage in your storage environment and verify that it is functioning @@ -70,9 +59,8 @@ correctly. The following files will be extracted: - - /usr/local/lib/glusterfs-Hadoop-version-gluster\_plugin\_version.jar - - - /usr/local/lib/conf/core-site.xml + - /usr/local/lib/glusterfs-Hadoop-version-gluster\_plugin\_version.jar + - /usr/local/lib/conf/core-site.xml 3. (Optional) To install Hadoop Compatible Storage in a different location, run the following command: @@ -116,22 +104,13 @@ correctly. The following are the configurable fields: - ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Property Name Default Value Description - ---------------------- -------------------------- --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - fs.default.name glusterfs://fedora1:9000 Any hostname in the cluster as the server and any port number. - - fs.glusterfs.volname hadoopvol GlusterFS volume to mount. - - fs.glusterfs.mount /mnt/glusterfs The directory used to fuse mount the volume. - - fs.glusterfs.server fedora2 Any hostname or IP address on the cluster except the client/master. - - quick.slave.io Off Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster. - > **Note** - > - > This option is not tested widely - ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Property Name | Default Value | Description + --- | --- | --- + fs.default.name | glusterfs://fedora1:9000 | Any hostname in the cluster as the server and any port number. + fs.glusterfs.volname | hadoopvol | GlusterFS volume to mount. + fs.glusterfs.mount | /mnt/glusterfs | The directory used to fuse mount the volume. + fs.glusterfs.server | fedora2 | Any hostname or IP address on the cluster except the client/master. + quick.slave.io | Off | Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster. **Note*: This option is not tested widely 5. Create a soft link in Hadoop’s library and configuration directory for the downloaded files (in Step 3) using the following commands: @@ -141,7 +120,6 @@ correctly. For example, `# ln –s /usr/local/lib/glusterfs-0.20.2-0.1.jar /lib/glusterfs-0.20.2-0.1.jar` - `# ln –s /usr/local/lib/conf/core-site.xml /conf/core-site.xml ` 6. (Optional) You can run the following command on Hadoop master to @@ -150,8 +128,7 @@ correctly. `# build-deploy-jar.py -d -c ` -Starting and Stopping the Hadoop MapReduce Daemon -================================================= +##Starting and Stopping the Hadoop MapReduce Daemon To start and stop MapReduce daemon @@ -164,7 +141,6 @@ To start and stop MapReduce daemon `# /bin/stop-mapred.sh ` > **Note** -> > You must start Hadoop MapReduce daemon on all servers. []: http://download.gluster.com/pub/gluster/glusterfs/qa-releases/3.3-beta-2/glusterfs-hadoop-0.20.2-0.1.x86_64.rpm diff --git a/doc/admin-guide/en-US/markdown/admin_UFO.md b/doc/admin-guide/en-US/markdown/admin_UFO.md index 3311eff01..882710410 100644 --- a/doc/admin-guide/en-US/markdown/admin_UFO.md +++ b/doc/admin-guide/en-US/markdown/admin_UFO.md @@ -1,5 +1,4 @@ -Managing Unified File and Object Storage -======================================== +#Managing Unified File and Object Storage Unified File and Object Storage (UFO) unifies NAS and object storage technology. It provides a system for data storage that enables users to @@ -35,8 +34,7 @@ a traditional file system. You will not be able to mount this system like traditional SAN or NAS volumes and perform POSIX compliant operations. -Components of Object Storage -============================ +##Components of Object Storage The major components of Object Storage are: @@ -88,35 +86,26 @@ objects within that account. If a user wants to access the content from another account, they must have API access key or a session token provided by their authentication system. -Advantages of using GlusterFS Unified File and Object Storage -============================================================= +##Advantages of using GlusterFS Unified File and Object Storage The following are the advantages of using GlusterFS UFO: - No limit on upload and download files sizes as compared to Open Stack Swift which limits the object size to 5GB. - - A unified view of data across NAS and Object Storage technologies. - - Using GlusterFS's UFO has other advantages like the following: - - High availability - - Scalability - - Replication - - Elastic Volume management -Preparing to Deploy Unified File and Object Storage -=================================================== +##Preparing to Deploy Unified File and Object Storage This section provides information on pre-requisites and list of dependencies that will be installed during the installation of Unified File and Object Storage. -Pre-requisites --------------- +###Pre-requisites GlusterFS's Unified File and Object Storage needs `user_xattr` support from the underlying disk file system. Use the following command to @@ -128,50 +117,33 @@ For example, `# mount –o remount,user_xattr /dev/hda1 ` -Dependencies +####Dependencies ------------ The following packages are installed on GlusterFS when you install Unified File and Object Storage: - curl - - memcached - - openssl - - xfsprogs - - python2.6 - - pyxattr - - python-configobj - - python-setuptools - - python-simplejson - - python-webob - - python-eventlet - - python-greenlet - - python-pastedeploy - - python-netifaces -Installing and Configuring Unified File and Object Storage -========================================================== +##Installing and Configuring Unified File and Object Storage This section provides instructions on how to install and configure Unified File and Object Storage in your storage environment. -Installing Unified File and Object Storage ------------------------------------------- - -To install Unified File and Object Storage: +##Installing Unified File and Object Storage 1. Download `rhel_install.sh` install script from [][] . @@ -197,15 +169,13 @@ To install Unified File and Object Storage: > use a load balancer like pound, nginx, and so on to distribute the > request across the machines. -Adding Users ------------- +###Adding Users The authentication system allows the administrator to grant different levels of access to different users based on the requirement. The following are the types of user permissions: - admin user - - normal user Admin user has read and write permissions on the account. By default, a @@ -228,10 +198,7 @@ For example, > the `proxy-server.conf` file. It is highly recommended that you remove > all the default sample user entries from the configuration file. -For more information on setting ACLs, see ?. - -Configuring Proxy Server ------------------------- +##Configuring Proxy Server The Proxy Server is responsible for connecting to the rest of the OpenStack Object Storage architecture. For each request, it looks up the @@ -251,7 +218,8 @@ The configurable options pertaining to proxy server are stored in account_autocreate=true [filter:tempauth] - use = egg:swift#tempauth user_admin_admin=admin.admin.reseller_admin + use = egg:swift#tempauth + user_admin_admin=admin.admin.reseller_admin user_test_tester=testing.admin user_test2_tester2=testing2.admin user_test_tester3=testing3 @@ -266,15 +234,12 @@ By default, GlusterFS's Unified File and Object Storage is configured to support HTTP protocol and uses temporary authentication to authenticate the HTTP requests. -Configuring Authentication System ---------------------------------- +###Configuring Authentication System -Proxy server must be configured to authenticate using ` - - `. +There are several different authentication system like tempauth, keystone, +swauth etc. Their respective documentation has detailed usage. -Configuring Proxy Server for HTTPS ----------------------------------- +###Configuring Proxy Server for HTTPS By default, proxy server only handles HTTP request. To configure the proxy server to process HTTPS requests, perform the following steps: @@ -288,8 +253,8 @@ proxy server to process HTTPS requests, perform the following steps: [DEFAULT] bind_port = 443 - cert_file = /etc/swift/cert.crt - key_file = /etc/swift/cert.key + cert_file = /etc/swift/cert.crt + key_file = /etc/swift/cert.key 3. Restart the servers using the following commands: @@ -298,41 +263,40 @@ proxy server to process HTTPS requests, perform the following steps: The following are the configurable options: - Option Default Description - ------------ ------------ ------------------------------- - bind\_ip 0.0.0.0 IP Address for server to bind - bind\_port 80 Port for server to bind - swift\_dir /etc/swift Swift configuration directory - workers 1 Number of workers to fork - user swift swift user - cert\_file Path to the ssl .crt - key\_file Path to the ssl .key + Option | Default | Description + ------------ | ------------ | ------------------------------- + bind\_ip | 0.0.0.0 | IP Address for server to bind + bind\_port | 80 | Port for server to bind + swift\_dir | /etc/swift | Swift configuration directory + workers | 1 | Number of workers to fork + user | swift | swift user + cert\_file | | Path to the ssl .crt + key\_file | | Path to the ssl .key : proxy-server.conf Default Options in the [DEFAULT] section - Option Default Description - ------------------------------- ----------------- ----------------------------------------------------------------------------------------------------------- - use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`. - log\_name proxy-server Label used when logging - log\_facility LOG\_LOCAL0 Syslog log facility - log\_level INFO Log level - log\_headers True If True, log headers in each request - recheck\_account\_existence 60 Cache timeout in seconds to send memcached for account existence - recheck\_container\_existence 60 Cache timeout in seconds to send memcached for container existence - object\_chunk\_size 65536 Chunk size to read from object servers - client\_chunk\_size 65536 Chunk size to read from clients - memcache\_servers 127.0.0.1:11211 Comma separated list of memcached servers ip:port - node\_timeout 10 Request timeout to external services - client\_timeout 60 Timeout to read one chunk from a client - conn\_timeout 0.5 Connection timeout to external services - error\_suppression\_interval 60 Time in seconds that must elapse since the last error for a node to be considered no longer error limited - error\_suppression\_limit 10 Error count to consider a node error limited - allow\_account\_management false Whether account `PUT`s and `DELETE`s are even callable + Option | Default | Description + ------------------------------- | ----------------- | ----------------------------------------------------------------------- + use | | paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`. + log\_name | proxy-server | Label used when logging + log\_facility | LOG\_LOCAL0 | Syslog log facility + log\_level | INFO | Log level + log\_headers | True | If True, log headers in each request + recheck\_account\_existence | 60 | Cache timeout in seconds to send memcached for account existence + recheck\_container\_existence | 60 | Cache timeout in seconds to send memcached for container existence + object\_chunk\_size | 65536 | Chunk size to read from object servers + client\_chunk\_size | 65536 | Chunk size to read from clients + memcache\_servers | 127.0.0.1:11211 | Comma separated list of memcached servers ip:port + node\_timeout | 10 | Request timeout to external services + client\_timeout | 60 | Timeout to read one chunk from a client + conn\_timeout | 0.5 | Connection timeout to external services + error\_suppression\_interval | 60 | Time in seconds that must elapse since the last error for a node to be considered no longer error limited + error\_suppression\_limit | 10 | Error count to consider a node error limited + allow\_account\_management | false | Whether account `PUT`s and `DELETE`s are even callable : proxy-server.conf Server Options in the [proxy-server] section -Configuring Object Server -------------------------- +##Configuring Object Server The Object Server is a very simple blob storage server that can store, retrieve, and delete objects stored on local devices. Objects are stored @@ -368,36 +332,35 @@ The configurable options pertaining Object Server are stored in the file The following are the configurable options: - Option Default Description - -------------- ------------ ---------------------------------------------------------------------------------------------------- - swift\_dir /etc/swift Swift configuration directory - devices /srv/node Mount parent directory where devices are mounted - mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device - bind\_ip 0.0.0.0 IP Address for server to bind - bind\_port 6000 Port for server to bind - workers 1 Number of workers to fork + Option | Default | Description + -------------- | ------------ | ---------------------------------------------------------------------------------------------- + swift\_dir | /etc/swift | Swift configuration directory + devices | /srv/node | Mount parent directory where devices are mounted + mount\_check | true | Whether or not check if the devices are mounted to prevent accidentally writing to the root device + bind\_ip | 0.0.0.0 | IP Address for server to bind + bind\_port | 6000 | Port for server to bind + workers | 1 | Number of workers to fork : object-server.conf Default Options in the [DEFAULT] section - Option Default Description - ---------------------- --------------- ---------------------------------------------------------------------------------------------------- - use paste.deploy entry point for the object server. For most cases, this should be `egg:swift#object`. - log\_name object-server log name used when logging - log\_facility LOG\_LOCAL0 Syslog log facility - log\_level INFO Logging level - log\_requests True Whether or not to log each request - user swift swift user - node\_timeout 3 Request timeout to external services - conn\_timeout 0.5 Connection timeout to external services - network\_chunk\_size 65536 Size of chunks to read or write over the network - disk\_chunk\_size 65536 Size of chunks to read or write to disk - max\_upload\_time 65536 Maximum time allowed to upload an object - slow 0 If \> 0, Minimum time in seconds for a `PUT` or `DELETE` request to complete + Option | Default | Description + ---------------------- | --------------- | ------------ + use | | paste.deploy entry point for the object server. For most cases, this should be `egg:swift#object`. + log\_name | object-server | log name used when logging + log\_facility | LOG\_LOCAL0 | Syslog log facility + log\_level | INFO | Logging level + log\_requests | True | Whether or not to log each request + user | swift | swift user + node\_timeout | 3 | Request timeout to external services + conn\_timeout | 0.5 | Connection timeout to external services + network\_chunk\_size | 65536 | Size of chunks to read or write over the network + disk\_chunk\_size | 65536 | Size of chunks to read or write to disk + max\_upload\_time | 65536 | Maximum time allowed to upload an object + slow | 0 | If \> 0, Minimum time in seconds for a `PUT` or `DELETE` request to complete : object-server.conf Server Options in the [object-server] section -Configuring Container Server ----------------------------- +##Configuring Container Server The Container Server’s primary job is to handle listings of objects. The listing is done by querying the GlusterFS mount point with path. This @@ -430,32 +393,31 @@ The configurable options pertaining to container server are stored in The following are the configurable options: - Option Default Description - -------------- ------------ ---------------------------------------------------------------------------------------------------- - swift\_dir /etc/swift Swift configuration directory - devices /srv/node Mount parent directory where devices are mounted - mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device - bind\_ip 0.0.0.0 IP Address for server to bind - bind\_port 6001 Port for server to bind - workers 1 Number of workers to fork - user swift Swift user + Option | Default | Description + -------------- | ------------ | ------------ + swift\_dir | /etc/swift | Swift configuration directory + devices | /srv/node | Mount parent directory where devices are mounted + mount\_check | true | Whether or not check if the devices are mounted to prevent accidentally writing to the root device + bind\_ip | 0.0.0.0 | IP Address for server to bind + bind\_port | 6001 | Port for server to bind + workers | 1 | Number of workers to fork + user | swift | Swift user : container-server.conf Default Options in the [DEFAULT] section - Option Default Description - --------------- ------------------ ---------------------------------------------------------------------------------------------------------- - use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`. - log\_name container-server Label used when logging - log\_facility LOG\_LOCAL0 Syslog log facility - log\_level INFO Logging level - node\_timeout 3 Request timeout to external services - conn\_timeout 0.5 Connection timeout to external services + Option | Default | Description + --------------- | ------------------ | ------------ + use | | paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`. + log\_name | container-server | Label used when logging + log\_facility | LOG\_LOCAL0 | Syslog log facility + log\_level | INFO | Logging level + node\_timeout | 3 | Request timeout to external services + conn\_timeout | 0.5 | Connection timeout to external services : container-server.conf Server Options in the [container-server] section -Configuring Account Server --------------------------- +##Configuring Account Server The Account Server is very similar to the Container Server, except that it is responsible for listing of containers rather than objects. In UFO, @@ -489,29 +451,28 @@ The configurable options pertaining to account server are stored in The following are the configurable options: - Option Default Description - -------------- ------------ ---------------------------------------------------------------------------------------------------- - swift\_dir /etc/swift Swift configuration directory - devices /srv/node mount parent directory where devices are mounted - mount\_check true Whether or not check if the devices are mounted to prevent accidentally writing to the root device - bind\_ip 0.0.0.0 IP Address for server to bind - bind\_port 6002 Port for server to bind - workers 1 Number of workers to fork - user swift Swift user + Option | Default | Description + -------------- | ------------ | --------------------------- + swift\_dir | /etc/swift | Swift configuration directory + devices | /srv/node | mount parent directory where devices are mounted + mount\_check | true | Whether or not check if the devices are mounted to prevent accidentally writing to the root device + bind\_ip | 0.0.0.0 | IP Address for server to bind + bind\_port | 6002 | Port for server to bind + workers | 1 | Number of workers to fork + user | swift | Swift user : account-server.conf Default Options in the [DEFAULT] section - Option Default Description - --------------- ---------------- ---------------------------------------------------------------------------------------------------------- - use paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`. - log\_name account-server Label used when logging - log\_facility LOG\_LOCAL0 Syslog log facility - log\_level INFO Logging level + Option | Default | Description + --------------- | ---------------- | --------------------------- + use | | paste.deploy entry point for the container server. For most cases, this should be `egg:swift#container`. + log\_name | account-server | Label used when logging + log\_facility | LOG\_LOCAL0 | Syslog log facility + log\_level | INFO | Logging level : account-server.conf Server Options in the [account-server] section -Starting and Stopping Server ----------------------------- +##Starting and Stopping Server You must start the server manually when system reboots and whenever you update/modify the configuration files. @@ -524,16 +485,14 @@ update/modify the configuration files. `# swift_init main stop` -Working with Unified File and Object Storage -============================================ +##Working with Unified File and Object Storage This section describes the REST API for administering and managing Object Storage. All requests will be directed to the host and URL described in the `X-Storage-URL HTTP` header obtained during successful authentication. -Configuring Authenticated Access --------------------------------- +###Configuring Authenticated Access Authentication is the process of proving identity to the system. To use the REST interface, you must obtain an authorization token using GET @@ -581,8 +540,7 @@ the headers of the response. > > The authentication tokens are valid for a 24 hour period. -Working with Accounts ---------------------- +##Working with Accounts This section describes the list of operations you can perform at the account level of the URL. @@ -593,11 +551,11 @@ You can list the objects of a specific container, or all containers, as needed using GET command. You can use the following optional parameters with GET request to refine the results: - Parameter Description - ----------- -------------------------------------------------------------------------- - limit Limits the number of results to at most *n* value. - marker Returns object names greater in value than the specified marker. - format Specify either json or xml to return the respective serialized response. + Parameter | Description + ----------- | -------------------------------------------------------------------------- + limit | Limits the number of results to at most *n* value. + marker | Returns object names greater in value than the specified marker. + format | Specify either json or xml to return the respective serialized response. **To display container information** @@ -660,8 +618,7 @@ containers and the total bytes stored in the account. AUTH_tkde3ad38b087b49bbbac0494f7600a554' https://example.storage.com:443/v1/AUTH_test -k -Working with Containers ------------------------ +##Working with Containers This section describes the list of operations you can perform at the container level of the URL. @@ -706,14 +663,14 @@ You can list the objects of a container using GET command. You can use the following optional parameters with GET request to refine the results: - Parameter Description - ----------- -------------------------------------------------------------------------------------------------------------- - limit Limits the number of results to at most *n* value. - marker Returns object names greater in value than the specified marker. - prefix Displays the results limited to object names beginning with the substring x. beginning with the substring x. - path Returns the object names nested in the pseudo path. - format Specify either json or xml to return the respective serialized response. - delimiter Returns all the object names nested in the container. + Parameter | Description + ----------- | -------------------------------------------------------------------------------------------------------------- + limit | Limits the number of results to at most *n* value. + marker | Returns object names greater in value than the specified marker. + prefix | Displays the results limited to object names beginning with the substring x. beginning with the substring x. + path | Returns the object names nested in the pseudo path. + format | Specify either json or xml to return the respective serialized response. + delimiter | Returns all the object names nested in the container. To display objects of a container @@ -896,8 +853,7 @@ container using cURL (for the above example), run the following command: https://example.storage.com:443/v1/AUTH_test/images -H 'X-Container-Read: .r:*' -k -Working with Objects --------------------- +##Working with Objects An object represents the data and any metadata for the files stored in the system. Through the REST interface, metadata for an object can be diff --git a/doc/admin-guide/en-US/markdown/admin_commandref.md b/doc/admin-guide/en-US/markdown/admin_commandref.md deleted file mode 100644 index 4ff05f4ef..000000000 --- a/doc/admin-guide/en-US/markdown/admin_commandref.md +++ /dev/null @@ -1,180 +0,0 @@ -Command Reference -================= - -This section describes the available commands and includes the following -section: - -- gluster Command - - Gluster Console Manager (command line interpreter) - -- glusterd Daemon - - Gluster elastic volume management daemon - -gluster Command -=============== - -**NAME** - -gluster - Gluster Console Manager (command line interpreter) - -**SYNOPSIS** - -To run the program and display the gluster prompt: - -**gluster** - -To specify a command directly: gluster [COMMANDS] [OPTIONS] - -**DESCRIPTION** - -The Gluster Console Manager is a command line utility for elastic volume -management. You can run the gluster command on any export server. The -command enables administrators to perform cloud operations such as -creating, expanding, shrinking, rebalancing, and migrating volumes -without needing to schedule server downtime. - -**COMMANDS** - - --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Command Description - ---------------------------------------------------------------------------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - **Volume** - - volume info [all | VOLNAME] Displays information about all volumes, or the specified volume. - - volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ... Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp). - - volume delete VOLNAME Deletes the specified volume. - - volume start VOLNAME Starts the specified volume. - - volume stop VOLNAME [force] Stops the specified volume. - - volume rename VOLNAME NEW-VOLNAME Renames the specified volume. - - volume help Displays help for the volume command. - - **Brick** - - volume add-brick VOLNAME NEW-BRICK ... Adds the specified brick to the specified volume. - - volume replace-brick VOLNAME (BRICK NEW-BRICK) start | pause | abort | status Replaces the specified brick. - - volume remove-brick VOLNAME [(replica COUNT)|(stripe COUNT)] BRICK ... Removes the specified brick from the specified volume. - - **Rebalance** - - volume rebalance VOLNAME start Starts rebalancing the specified volume. - - volume rebalance VOLNAME stop Stops rebalancing the specified volume. - - volume rebalance VOLNAME status Displays the rebalance status of the specified volume. - - **Log** - - volume log filename VOLNAME [BRICK] DIRECTORY Sets the log directory for the corresponding volume/brick. - - volume log rotate VOLNAME [BRICK] Rotates the log file for corresponding volume/brick. - - volume log locate VOLNAME [BRICK] Locates the log file for corresponding volume/brick. - - **Peer** - - peer probe HOSTNAME Probes the specified peer. - - peer detach HOSTNAME Detaches the specified peer. - - peer status Displays the status of peers. - - peer help Displays help for the peer command. - - **Geo-replication** - - volume geo-replication MASTER SLAVE start Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME. - - You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY. - - volume geo-replication MASTER SLAVE stop Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY. - - You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY. - - volume geo-replication MASTER SLAVE config [options] Configure geo-replication options between the hosts specified by MASTER and SLAVE. - - gluster-command COMMAND The path where the gluster command is installed. - - gluster-log-level LOGFILELEVEL The log level for gluster processes. - - log-file LOGFILE The path to the geo-replication log file. - - log-level LOGFILELEVEL The log level for geo-replication. - - remote-gsyncd COMMAND The path where the gsyncd binary is installed on the remote machine. - - ssh-command COMMAND The ssh command to use to connect to the remote machine (the default is ssh). - - rsync-command COMMAND The rsync command to use for synchronizing the files (the default is rsync). - - volume\_id= UID The command to delete the existing master UID for the intermediate/slave node. - - timeout SECONDS The timeout period. - - sync-jobs N The number of simultaneous files/directories that can be synchronized. - - ignore-deletes If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete. - - **Other** - - help Display the command options. - - quit Exit the gluster command line interface. - --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -**FILES** - -/var/lib/glusterd/\* - -**SEE ALSO** - -fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), -glusterd(8) - -glusterd Daemon -=============== - -**NAME** - -glusterd - Gluster elastic volume management daemon - -**SYNOPSIS** - -glusterd [OPTION...] - -**DESCRIPTION** - -The glusterd daemon is used for elastic volume management. The daemon -must be run on all export servers. - -**OPTIONS** - - Option Description - ----------------------------------- ---------------------------------------------------------------------------------------------------------------- - **Basic** - -l=LOGFILE, --log-file=LOGFILE Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log). - -L=LOGLEVEL, --log-level=LOGLEVEL Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO). - --debug Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console. - -N, --no-daemon Runs the program in the foreground. - **Miscellaneous** - -?, --help Displays this help. - --usage Displays a short usage message. - -V, --version Prints the program version. - -**FILES** - -/var/lib/glusterd/\* - -**SEE ALSO** - -fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), -gluster(8) diff --git a/doc/admin-guide/en-US/markdown/admin_console.md b/doc/admin-guide/en-US/markdown/admin_console.md index 9b69de02d..126b7e206 100644 --- a/doc/admin-guide/en-US/markdown/admin_console.md +++ b/doc/admin-guide/en-US/markdown/admin_console.md @@ -1,5 +1,4 @@ -Using the Gluster Console Manager – Command Line Utility -======================================================== +##Using the Gluster Console Manager – Command Line Utility The Gluster Console Manager is a single command line utility that simplifies configuration and management of your storage environment. The @@ -18,7 +17,7 @@ You can also use the commands to create scripts for automation, as well as use the commands as an API to allow integration with third-party applications. -**Running the Gluster Console Manager** +###Running the Gluster Console Manager You can run the Gluster Console Manager on any GlusterFS server either by invoking the commands or by running the Gluster CLI in interactive diff --git a/doc/admin-guide/en-US/markdown/admin_directory_Quota.md b/doc/admin-guide/en-US/markdown/admin_directory_Quota.md index 09c757781..21e42c669 100644 --- a/doc/admin-guide/en-US/markdown/admin_directory_Quota.md +++ b/doc/admin-guide/en-US/markdown/admin_directory_Quota.md @@ -1,5 +1,4 @@ -Managing Directory Quota -======================== +#Managing Directory Quota Directory quotas in GlusterFS allow you to set limits on usage of disk space by directories or volumes. The storage administrators can control @@ -19,9 +18,8 @@ the storage for the users depending on their role in the organization. You can set the quota at the following levels: -- Directory level – limits the usage at the directory level - -- Volume level – limits the usage at the volume level +- **Directory level** – limits the usage at the directory level +- **Volume level** – limits the usage at the volume level > **Note** > @@ -29,8 +27,7 @@ You can set the quota at the following levels: > The disk limit is enforced immediately after creating that directory. > For more information on setting disk limit, see ?. -Enabling Quota -============== +##Enabling Quota You must enable Quota to set disk limits. @@ -45,8 +42,7 @@ You must enable Quota to set disk limits. # gluster volume quota test-volume enable Quota is enabled on /test-volume -Disabling Quota -=============== +##Disabling Quota You can disable Quota, if needed. @@ -61,8 +57,7 @@ You can disable Quota, if needed. # gluster volume quota test-volume disable Quota translator is disabled on /test-volume -Setting or Replacing Disk Limit -=============================== +##Setting or Replacing Disk Limit You can create new directories in your storage environment and set the disk limit or set disk limit for the existing directories. The directory @@ -86,8 +81,7 @@ being treated as "/". > In a multi-level directory hierarchy, the strictest disk limit > will be considered for enforcement. -Displaying Disk Limit Information -================================= +##Displaying Disk Limit Information You can display disk limit information on all the directories on which the limit is set. @@ -119,8 +113,7 @@ the limit is set. /Test/data 10 GB 6 GB -Updating Memory Cache Size -========================== +##Updating Memory Cache Size For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum valid duration of directory sizes @@ -151,8 +144,7 @@ on client side. # gluster volume set test-volume features.quota-timeout 5 Set volume successful -Removing Disk Limit -=================== +##Removing Disk Limit You can remove set disk limit, if you do not want quota anymore. diff --git a/doc/admin-guide/en-US/markdown/admin_geo-replication.md b/doc/admin-guide/en-US/markdown/admin_geo-replication.md index 849957244..47a2f6628 100644 --- a/doc/admin-guide/en-US/markdown/admin_geo-replication.md +++ b/doc/admin-guide/en-US/markdown/admin_geo-replication.md @@ -1,5 +1,4 @@ -Managing Geo-replication -======================== +#Managing Geo-replication Geo-replication provides a continuous, asynchronous, and incremental replication service from one site to another over Local Area Networks @@ -8,9 +7,9 @@ replication service from one site to another over Local Area Networks Geo-replication uses a master–slave model, whereby replication and mirroring occurs between the following partners: -- Master – a GlusterFS volume +- **Master** – a GlusterFS volume -- Slave – a slave which can be of the following types: +- **Slave** – a slave which can be of the following types: - A local directory which can be represented as file URL like `file:///path/to/dir`. You can use shortened form, for example, @@ -34,37 +33,24 @@ This section introduces Geo-replication, illustrates the various deployment scenarios, and explains how to configure the system to provide replication and mirroring in your environment. -Replicated Volumes vs Geo-replication -===================================== +##Replicated Volumes vs Geo-replication The following table lists the difference between replicated volumes and geo-replication: - Replicated Volumes Geo-replication - --------------------------------------------------------------------------------------- ----------------------------------------------------------------------------------------------------------------- - Mirrors data across clusters Mirrors data across geographically distributed clusters - Provides high-availability Ensures backing up of data for disaster recovery - Synchronous replication (each and every file operation is sent across all the bricks) Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences) + Replicated Volumes | Geo-replication + --- | --- + Mirrors data across clusters | Mirrors data across geographically distributed clusters + Provides high-availability | Ensures backing up of data for disaster recovery + Synchronous replication (each and every file operation is sent across all the bricks) | Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences) -Preparing to Deploy Geo-replication -=================================== +##Preparing to Deploy Geo-replication This section provides an overview of the Geo-replication deployment scenarios, describes how you can check the minimum system requirements, and explores common deployment scenarios. -- ? - -- ? - -- ? - -- ? - -- ? - -Exploring Geo-replication Deployment Scenarios ----------------------------------------------- +##Exploring Geo-replication Deployment Scenarios Geo-replication provides an incremental replication service over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. @@ -72,11 +58,8 @@ This section illustrates the most common deployment scenarios for Geo-replication, including the following: - Geo-replication over LAN - - Geo-replication over WAN - - Geo-replication over the Internet - - Multi-site cascading Geo-replication **Geo-replication over LAN** @@ -106,22 +89,15 @@ across multiple sites. ![ Multi-site cascading Geo-replication ][] -Geo-replication Deployment Overview ------------------------------------ +##Geo-replication Deployment Overview Deploying Geo-replication involves the following steps: 1. Verify that your environment matches the minimum system requirement. - For more information, see ?. - -2. Determine the appropriate deployment scenario. For more information, - see ?. +2. Determine the appropriate deployment scenario. +3. Start Geo-replication on master and slave systems, as required. -3. Start Geo-replication on master and slave systems, as required. For - more information, see ?. - -Checking Geo-replication Minimum Requirements ---------------------------------------------- +##Checking Geo-replication Minimum Requirements Before deploying GlusterFS Geo-replication, verify that your systems match the minimum requirements. @@ -129,17 +105,16 @@ match the minimum requirements. The following table outlines the minimum requirements for both master and slave nodes within your environment: - Component Master Slave - ------------------------ --------------------------------------------------------------------- -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Operating System GNU/Linux GNU/Linux - Filesystem GlusterFS 3.2 or higher GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively) - Python Python 2.4 (with ctypes external module), or Python 2.5 (or higher) Python 2.4 (with ctypes external module), or Python 2.5 (or higher) - Secure shell OpenSSH version 4.0 (or higher) SSH2-compliant daemon - Remote synchronization rsync 3.0.7 or higher rsync 3.0.7 or higher - FUSE GlusterFS supported versions GlusterFS supported versions + Component | Master | Slave + --- | --- | --- + Operating System | GNU/Linux | GNU/Linux + Filesystem | GlusterFS 3.2 or higher | GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively) + Python | Python 2.4 (with ctypes external module), or Python 2.5 (or higher) | Python 2.4 (with ctypes external module), or Python 2.5 (or higher) + Secure shell | OpenSSH version 4.0 (or higher) | SSH2-compliant daemon + Remote synchronization | rsync 3.0.7 or higher | rsync 3.0.7 or higher + FUSE | GlusterFS supported versions | GlusterFS supported versions -Setting Up the Environment for Geo-replication ----------------------------------------------- +##Setting Up the Environment for Geo-replication **Time Synchronization** @@ -172,8 +147,7 @@ geo-replication Start command will be issued) and the remote machine `# ssh-copy-id -i /var/lib/glusterd/geo-replication/secret.pem.pub @` -Setting Up the Environment for a Secure Geo-replication Slave -------------------------------------------------------------- +##Setting Up the Environment for a Secure Geo-replication Slave You can configure a secure slave using SSH so that master is granted a restricted access. With GlusterFS, you need not specify configuration @@ -366,25 +340,13 @@ following command: `# gluster volume geo-replication '*' config allow-network ::1,127.0.0.1` -Starting Geo-replication -======================== +##Starting Geo-replication This section describes how to configure and start Gluster Geo-replication in your storage environment, and verify that it is functioning correctly. -- ? - -- ? - -- ? - -- ? - -- ? - -Starting Geo-replication ------------------------- +###Starting Geo-replication To start Gluster Geo-replication @@ -401,10 +363,9 @@ To start Gluster Geo-replication > **Note** > > You may need to configure the service before starting Gluster - > Geo-replication. For more information, see ?. + > Geo-replication. -Verifying Successful Deployment -------------------------------- +###Verifying Successful Deployment You can use the gluster command to verify the status of Gluster Geo-replication in your environment. @@ -425,8 +386,7 @@ Geo-replication in your environment. ______ ______________________________ ____________ Volume1 root@example.com:/data/remote_dir Starting.... -Displaying Geo-replication Status Information ---------------------------------------------- +###Displaying Geo-replication Status Information You can display status information about a specific geo-replication master session, or a particular master-slave session, or all @@ -480,15 +440,13 @@ geo-replication sessions, as needed. - **OK**: The geo-replication session is in a stable state. - **Faulty**: The geo-replication session has witnessed some - abnormality and the situation has to be investigated further. For - further information, see ? section. + abnormality and the situation has to be investigated further. - **Corrupt**: The monitor thread which is monitoring the geo-replication session has died. This situation should not occur - normally, if it persists contact Red Hat Support[][1]. + normally. -Configuring Geo-replication ---------------------------- +##Configuring Geo-replication To configure Gluster Geo-replication @@ -496,16 +454,13 @@ To configure Gluster Geo-replication `# gluster volume geo-replication config [options]` - For more information about the options, see ?. - For example: To view list of all option/value pair, use the following command: `# gluster volume geo-replication Volume1 example.com:/data/remote_dir config` -Stopping Geo-replication ------------------------- +##Stopping Geo-replication You can use the gluster command to stop Gluster Geo-replication (syncing of data from Master to Slave) in your environment. @@ -522,10 +477,7 @@ of data from Master to Slave) in your environment. Stopping geo-replication session between Volume1 and example.com:/data/remote_dir has been successful - See ? for more information about the gluster command. - -Restoring Data from the Slave -============================= +##Restoring Data from the Slave You can restore data from the slave to the master volume, whenever the master volume becomes faulty for reasons like hardware failure. @@ -687,15 +639,13 @@ Run the following command on slave (example.com): Starting geo-replication session between Volume1 & example.com:/data/remote_dir has been successful -Best Practices -============== +##Best Practices **Manually Setting Time** If you have to change the time on your bricks manually, then you must -set uniform time on all bricks. This avoids the out-of-time sync issue -described in ?. Setting time backward corrupts the geo-replication -index, so the recommended way to set the time manually is: +set uniform time on all bricks. Setting time backward corrupts the +geo-replication index, so the recommended way to set the time manually is: 1. Stop geo-replication between the master and slave using the following command: @@ -730,9 +680,9 @@ machine / chroot/container type solution) by the administrator to run the geo-replication slave in it. Enhancement in this regard will be available in follow-up minor release. - [ Geo-replication over LAN ]: images/Geo-Rep_LAN.png - [ Geo-replication over WAN ]: images/Geo-Rep_WAN.png - [ Geo-replication over Internet ]: images/Geo-Rep03_Internet.png - [ Multi-site cascading Geo-replication ]: images/Geo-Rep04_Cascading.png + [ Geo-replication over LAN ]: ../images/Geo-Rep_LAN.png + [ Geo-replication over WAN ]: ../images/Geo-Rep_WAN.png + [ Geo-replication over Internet ]: ../images/Geo-Rep03_Internet.png + [ Multi-site cascading Geo-replication ]: ../images/Geo-Rep04_Cascading.png []: http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Migration_Planning_Guide/ch04s07.html [1]: www.redhat.com/support/ diff --git a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md index 6c06e27a0..f59134b80 100644 --- a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md +++ b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md @@ -1,167 +1,104 @@ -Managing GlusterFS Volumes -========================== +#Managing GlusterFS Volumes This section describes how to perform common GlusterFS management operations, including the following: -- ? +- [Tuning Volume Options](#tuning-options) +- [Expanding Volumes](#expanding-volumes) +- [Shrinking Volumes](#shrinking-volumes) +- [Migrating Volumes](#migrating-volumes) +- [Rebalancing Volumes](#rebalancing-volumes) +- [Stopping Volumes](#stopping-volumes) +- [Deleting Volumes](#deleting-volumes) +- [Triggering Self-Heal on Replicate](#self-heal) -- ? - -- ? - -- ? - -- ? - -- ? - -- ? - -- ? - -Tuning Volume Options -===================== + +##Tuning Volume Options You can tune volume options, as needed, while the cluster is online and available. > **Note** > -> Red Hat recommends you to set server.allow-insecure option to ON if +> It is recommended that you to set server.allow-insecure option to ON if > there are too many bricks in each volume or if there are too many > services which have already utilized all the privileged ports in the > system. Turning this option ON allows ports to accept/reject messages > from insecure ports. So, use this option only if your deployment > requires it. -To tune volume options - -- Tune volume options using the following command: +Tune volume options using the following command: `# gluster volume set ` - For example, to specify the performance cache size for test-volume: - - # gluster volume set test-volume performance.cache-size 256MB - Set volume successful - - The following table lists the Volume options along with its - description and default value: - - > **Note** - > - > The default options given here are subject to modification at any - > given time and may not be the same for all versions. - - ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - Option Description Default Value Available Options - -------------------------------------- ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ---------------------------------- --------------------------------------------------------------------------------------- - auth.allow IP addresses of the clients which should be allowed to access the volume. \* (allow all) Valid IP address which includes wild card patterns including \*, such as 192.168.1.\* - - auth.reject IP addresses of the clients which should be denied to access the volume. NONE (reject none) Valid IP address which includes wild card patterns including \*, such as 192.168.2.\* - - client.grace-timeout Specifies the duration for the lock state to be maintained on the client after a network disconnection. 10 10 - 1800 secs - - cluster.self-heal-window-size Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. 16 0 - 1025 blocks - - cluster.data-self-heal-algorithm Specifies the type of self-heal. If you set the option as "full", the entire file is copied from source to destinations. If the option is set to "diff" the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the "diff" algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than "diff" which has to read checksums and then read and write. reset full | diff | reset - - cluster.min-free-disk Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks. 10% Percentage of required minimum free disk space - - cluster.stripe-block-size Specifies the size of the stripe unit that will be read from or written to. 128 KB (for all files) size in bytes - - cluster.self-heal-daemon Allows you to turn-off proactive self-heal on replicated volumes. on On | Off - - cluster.ensure-durability This option makes sure the data/metadata is durable across abrupt shutdown of the brick. on On | Off - - diagnostics.brick-log-level Changes the log-level of the bricks. INFO DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE - - diagnostics.client-log-level Changes the log-level of the clients. INFO DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE - - diagnostics.latency-measurement Statistics related to the latency of each operation would be tracked. off On | Off - - diagnostics.dump-fd-stats Statistics related to file-operations would be tracked. off On | Off - - feature.read-only Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it. off On | Off - - features.lock-heal Enables self-healing of locks when the network disconnects. on On | Off - - features.quota-timeout For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid. 0 0 - 3600 secs - - geo-replication.indexing Use this option to automatically sync the changes in the filesystem from Master to Slave. off On | Off - - network.frame-timeout The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. 1800 (30 mins) 1800 secs - - network.ping-timeout The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. 42 Secs 42 Secs - This reconnect is a very expensive operation and should be avoided. +For example, to specify the performance cache size for test-volume: - nfs.enable-ino32 For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. Applications that will benefit are those that were either: off On | Off - \* Built 32-bit and run on 32-bit machines. - - \* Built 32-bit on 64-bit systems. - - \* Built 64-bit but use a library built 32-bit, especially relevant for python and perl scripts. - - Either of the conditions above can lead to application on Linux NFS clients failing with "Invalid argument" or "Value too large for defined data type" errors. + # gluster volume set test-volume performance.cache-size 256MB + Set volume successful - nfs.volume-access Set the access type for the specified sub-volume. read-write read-write|read-only +The following table lists the Volume options along with its +description and default value: - nfs.trusted-write If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. off On | Off - In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner. - - nfs.trusted-sync All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. off On | Off - - nfs.export-dir This option can be used to export specified comma separated subdirectories in the volume. The path must be an absolute path. Along with path allowed list of IPs/hostname can be associated with each subdirectory. If provided connection will allowed only from these IPs. Format: \[(hostspec[|hostspec|...])][,...]. Where hostspec can be an IP address, hostname or an IP range in CIDR notation. **Note**: Care must be taken while configuring this option as invalid entries and/or unreachable DNS servers can introduce unwanted delay in all the mount calls. No sub directory exported. Absolute path with allowed list of IP/hostname. - - nfs.export-volumes Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. on On | Off - - nfs.rpc-auth-unix Enable/Disable the AUTH\_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required. on On | Off - - nfs.rpc-auth-null Enable/Disable the AUTH\_NULL authentication type. It is not recommended to change the default value for this option. on On | Off - - nfs.rpc-auth-allow\ Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes. Reject All IP address or Host name - - nfs.rpc-auth-reject IP- Addresses Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes. Reject All IP address or Host name - - nfs.ports-insecure Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. off On | Off - - nfs.addr-namelookup Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.\* filters. on On | Off - - nfs.register-with- portmap For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. on On | Off - - nfs.port \ Use this option on systems that need Gluster NFS to be associated with a non-default port number. 38465- 38467 - - nfs.disable Turn-off volume being exported by NFS off On | Off - - performance.write-behind-window-size Size of the per-file write-behind buffer. 1 MB Write-behind cache size - - performance.io-thread-count The number of threads in IO threads translator. 16 0 - 65 - - performance.flush-behind If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. On On | Off - - performance.cache-max-file-size Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. 2 \^ 64 -1 bytes size in bytes - - performance.cache-min-file-size Sets the minimum file size cached by the io-cache translator. Values same as "max" above. 0B size in bytes - - performance.cache-refresh-timeout The cached data for a file will be retained till 'cache-refresh-timeout' seconds, after which data re-validation is performed. 1 sec 0 - 61 - - performance.cache-size Size of the read cache. 32 MB size in bytes - - server.allow-insecure Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. on On | Off - - server.grace-timeout Specifies the duration for the lock state to be maintained on the server after a network disconnection. 10 10 - 1800 secs - - server.statedump-path Location of the state dump file. /tmp directory of the brick New directory path - - storage.health-check-interval Number of seconds between health-checks done on the filesystem that is used for the brick(s). Defaults to 30 seconds, set to 0 to disable. /tmp directory of the brick New directory path - ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- - - You can view the changed volume options using - the` # gluster volume info ` command. For more information, see ?. - -Expanding Volumes -================= +> **Note** +> +> The default options given here are subject to modification at any +> given time and may not be the same for all versions. + + +Option | Description | Default Value | Available Options +--- | --- | --- | --- +auth.allow | IP addresses of the clients which should be allowed to access the volume. | \* (allow all) | Valid IP address which includes wild card patterns including \*, such as 192.168.1.\* +auth.reject | IP addresses of the clients which should be denied to access the volume. | NONE (reject none) | Valid IP address which includes wild card patterns including \*, such as 192.168.2.\* +client.grace-timeout | Specifies the duration for the lock state to be maintained on the client after a network disconnection. | 10 | 10 - 1800 secs +cluster.self-heal-window-size | Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. | 16 | 0 - 1025 blocks +cluster.data-self-heal-algorithm | Specifies the type of self-heal. If you set the option as "full", the entire file is copied from source to destinations. If the option is set to "diff" the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the "diff" algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than "diff" which has to read checksums and then read and write. | reset | full/diff/reset +cluster.min-free-disk | Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks | 10% | Percentage of required minimum free disk space +cluster.stripe-block-size | Specifies the size of the stripe unit that will be read from or written to. | 128 KB (for all files) | size in bytes +cluster.self-heal-daemon | Allows you to turn-off proactive self-heal on replicated | On | On/Off +cluster.ensure-durability | This option makes sure the data/metadata is durable across abrupt shutdown of the brick. | On | On/Off +diagnostics.brick-log-level | Changes the log-level of the bricks. | INFO | DEBUG/WARNING/ERROR/CRITICAL/NONE/TRACE +diagnostics.client-log-level | Changes the log-level of the clients. | INFO | DEBUG/WARNING/ERROR/CRITICAL/NONE/TRACE +diagnostics.latency-measurement | Statistics related to the latency of each operation would be tracked. | Off | On/Off +diagnostics.dump-fd-stats | Statistics related to file-operations would be tracked. | Off | On +feature.read-only | Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it. | Off | On/Off +features.lock-heal | Enables self-healing of locks when the network disconnects. | On | On/Off +features.quota-timeout | For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid | 0 | 0 - 3600 secs +geo-replication.indexing | Use this option to automatically sync the changes in the filesystem from Master to Slave. | Off | On/Off +network.frame-timeout | The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. | 1800 (30 mins) | 1800 secs +network.ping-timeout | The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. This reconnect is a very expensive operation and should be avoided. | 42 Secs | 42 Secs +nfs.enable-ino32 | For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. | Off | On/Off +nfs.volume-access | Set the access type for the specified sub-volume. | read-write | read-write/read-only +nfs.trusted-write | If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner. | Off | On/Off +nfs.trusted-sync | All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. | Off | On/Off +nfs.export-dir | This option can be used to export specified comma separated subdirectories in the volume. The path must be an absolute path. Along with path allowed list of IPs/hostname can be associated with each subdirectory. If provided connection will allowed only from these IPs. Format: \[(hostspec[hostspec...])][,...]. Where hostspec can be an IP address, hostname or an IP range in CIDR notation. **Note**: Care must be taken while configuring this option as invalid entries and/or unreachable DNS servers can introduce unwanted delay in all the mount calls. | No sub directory exported. | Absolute path with allowed list of IP/hostname +nfs.export-volumes | Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. | On | On/Off +nfs.rpc-auth-unix | Enable/Disable the AUTH\_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required. | On | On/Off +nfs.rpc-auth-null | Enable/Disable the AUTH\_NULL authentication type. It is not recommended to change the default value for this option. | On | On/Off +nfs.rpc-auth-allow\ | Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes. | Reject All | IP address or Host name +nfs.rpc-auth-reject\ | Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes. | Reject All | IP address or Host name +nfs.ports-insecure | Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. | Off | On/Off +nfs.addr-namelookup | Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.\* filters. | On | On/Off +nfs.register-with-portmap | For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. | On | On/Off +nfs.port \ | Use this option on systems that need Gluster NFS to be associated with a non-default port number. | NA | 38465- 38467 +nfs.disable | Turn-off volume being exported by NFS | Off | On/Off +performance.write-behind-window-size | Size of the per-file write-behind buffer. | 1MB | Write-behind cache size +performance.io-thread-count | The number of threads in IO threads translator. | 16 | 0-65 +performance.flush-behind | If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. | On | On/Off +performance.cache-max-file-size | Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. | 2 \^ 64 -1 bytes | size in bytes +performance.cache-min-file-size | Sets the minimum file size cached by the io-cache translator. Values same as "max" above | 0B | size in bytes +performance.cache-refresh-timeout | The cached data for a file will be retained till 'cache-refresh-timeout' seconds, after which data re-validation is performed. | 1s | 0-61 +performance.cache-size | Size of the read cache. | 32 MB | size in bytes +server.allow-insecure | Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. | On | On/Off +server.grace-timeout | Specifies the duration for the lock state to be maintained on the server after a network disconnection. | 10 | 10 - 1800 secs +server.statedump-path | Location of the state dump file. | tmp directory of the brick | New directory path +storage.health-check-interval | Number of seconds between health-checks done on the filesystem that is used for the brick(s). Defaults to 30 seconds, set to 0 to disable. | tmp directory of the brick | New directory path + +You can view the changed volume options using command: + + ` # gluster volume info ` + + +##Expanding Volumes You can expand volumes, as needed, while the cluster is online and available. For example, you might want to add a brick to a distributed @@ -221,8 +158,8 @@ replicated volume, increasing the capacity of the GlusterFS volume. You can use the rebalance command as described in ?. -Shrinking Volumes -================= + +##Shrinking Volumes You can shrink volumes, as needed, while the cluster is online and available. For example, you might need to remove a brick that has become @@ -295,8 +232,8 @@ set). You can use the rebalance command as described in ?. -Migrating Volumes -================= + +##Migrating Volumes You can migrate the data from one brick to another, as needed, while the cluster is online and available. @@ -306,8 +243,6 @@ cluster is online and available. 1. Make sure the new brick, server5 in this example, is successfully added to the cluster. - For more information, see ?. - 2. Migrate the data from one brick to another using the following command: @@ -401,8 +336,8 @@ cluster is online and available. In the above example, previously, there were bricks; 1,2,3, and 4 and now brick 3 is replaced by brick 5. -Rebalancing Volumes -=================== + +##Rebalancing Volumes After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data @@ -414,15 +349,13 @@ layout and/or data. This section describes how to rebalance GlusterFS volumes in your storage environment, using the following common scenarios: -- Fix Layout - Fixes the layout changes so that the files can actually - go to newly added nodes. For more information, see ?. +- **Fix Layout** - Fixes the layout changes so that the files can actually + go to newly added nodes. -- Fix Layout and Migrate Data - Rebalances volume by fixing the layout - changes and migrating the existing data. For more information, see - ?. +- **Fix Layout and Migrate Data** - Rebalances volume by fixing the layout + changes and migrating the existing data. -Rebalancing Volume to Fix Layout Changes ----------------------------------------- +###Rebalancing Volume to Fix Layout Changes Fixing the layout is necessary because the layout structure is static for a given directory. In a scenario where new bricks have been added to @@ -450,8 +383,7 @@ the servers. # gluster volume rebalance test-volume fix-layout start Starting rebalance on volume test-volume has been successful -Rebalancing Volume to Fix Layout and Migrate Data -------------------------------------------------- +###Rebalancing Volume to Fix Layout and Migrate Data After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data @@ -479,14 +411,11 @@ among the servers. # gluster volume rebalance test-volume start force Starting rebalancing on volume test-volume has been successful -Displaying Status of Rebalance Operation ----------------------------------------- +###Displaying Status of Rebalance Operation You can display the status information about rebalance volume operation, as needed. -**To view status of rebalance volume** - - Check the status of the rebalance operation, using the following command: @@ -520,13 +449,10 @@ as needed. --------- ---------------- ---- ------- ----------- 617c923e-6450-4065-8e33-865e28d9428f 502 1873 334 completed -Stopping Rebalance Operation ----------------------------- +###Stopping Rebalance Operation You can stop the rebalance operation, as needed. -**To stop rebalance** - - Stop the rebalance operation using the following command: `# gluster volume rebalance stop` @@ -539,10 +465,8 @@ You can stop the rebalance operation, as needed. 617c923e-6450-4065-8e33-865e28d9428f 59 590 244 stopped Stopped rebalance process on volume test-volume -Stopping Volumes -================ - -To stop a volume + +##Stopping Volumes 1. Stop the volume using the following command: @@ -558,10 +482,8 @@ To stop a volume Stopping volume test-volume has been successful -Deleting Volumes -================ - -To delete a volume + +##Deleting Volumes 1. Delete the volume using the following command: @@ -577,8 +499,8 @@ To delete a volume Deleting volume test-volume has been successful -Triggering Self-Heal on Replicate -================================= + +##Triggering Self-Heal on Replicate In replicate module, previously you had to manually trigger a self-heal when a brick goes offline and comes back online, to bring all the diff --git a/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md b/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md index 0312bd048..c3ac0609b 100644 --- a/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md +++ b/doc/admin-guide/en-US/markdown/admin_monitoring_workload.md @@ -1,5 +1,4 @@ -Monitoring your GlusterFS Workload -================================== +#Monitoring your GlusterFS Workload You can monitor the GlusterFS volumes on different parameters. Monitoring volumes helps in capacity planning and performance tuning @@ -14,8 +13,7 @@ performance needs to be probed. You can also perform statedump of the brick processes and nfs server process of a volume, and also view volume status and volume information. -Running GlusterFS Volume Profile Command -======================================== +##Running GlusterFS Volume Profile Command GlusterFS Volume Profile command provides an interface to get the per-brick I/O information for each File Operation (FOP) of a volume. The @@ -25,21 +23,17 @@ system. This section describes how to run GlusterFS Volume Profile command by performing the following operations: -- ? +- [Start Profiling](#start-profiling) +- [Displaying the I/0 Information](#displaying-io) +- [Stop Profiling](#stop-profiling) -- ? - -- ? - -Start Profiling ---------------- + +###Start Profiling You must start the Profiling to view the File Operation information for each brick. -**To start profiling:** - -- Start profiling using the following command: +To start profiling, use following command: `# gluster volume profile start ` @@ -52,17 +46,12 @@ When profiling on the volume is started, the following additional options are displayed in the Volume Info: diagnostics.count-fop-hits: on - diagnostics.latency-measurement: on -Displaying the I/0 Information ------------------------------- - -You can view the I/O information of each brick. - -To display I/O information: + +###Displaying the I/0 Information -- Display the I/O information using the following command: +You can view the I/O information of each brick by using the following command: `# gluster volume profile info` @@ -117,26 +106,23 @@ For example, to see the I/O information on test-volume: BytesWritten : 195571980 -Stop Profiling --------------- + +###Stop Profiling You can stop profiling the volume, if you do not need profiling information anymore. -**To stop profiling** - -- Stop profiling using the following command: +Stop profiling using the following command: `# gluster volume profile stop` - For example, to stop profiling on test-volume: +For example, to stop profiling on test-volume: `# gluster volume profile stop` `Profiling stopped on test-volume` -Running GlusterFS Volume TOP Command -==================================== +##Running GlusterFS Volume TOP Command GlusterFS Volume Top command allows you to view the glusterfs bricks’ performance metrics like read, write, file open calls, file read calls, @@ -146,22 +132,16 @@ top command displays up to 100 results. This section describes how to run and view the results for the following GlusterFS Top commands: -- ? - -- ? - -- ? - -- ? - -- ? +- [Viewing Open fd Count and Maximum fd Count](#open-fd-count) +- [Viewing Highest File Read Calls](#file-read) +- [Viewing Highest File Write Calls](#file-write) +- [Viewing Highest Open Calls on Directories](#open-dir) +- [Viewing Highest Read Calls on Directory](#read-dir) +- [Viewing List of Read Performance on each Brick](#read-perf) +- [Viewing List of Write Performance on each Brick](#write-perf) -- ? - -- ? - -Viewing Open fd Count and Maximum fd Count ------------------------------------------- + +###Viewing Open fd Count and Maximum fd Count You can view both current open fd count (list of files that are currently the most opened and the count) on the brick and the maximum @@ -171,8 +151,6 @@ servers are up and running). If the brick name is not specified, then open fd metrics of all the bricks belonging to the volume will be displayed. -**To view open fd count and maximum fd count:** - - View open fd count and maximum fd count using the following command: `# gluster volume top open [brick ] [list-cnt ]` @@ -221,14 +199,12 @@ displayed. 9 /clients/client8/~dmtmp/PARADOX/ STUDENTS.DB -Viewing Highest File Read Calls -------------------------------- + +###Viewing Highest File Read Calls You can view highest read calls on each brick. If brick name is not specified, then by default, list of 100 files will be displayed. -**To view highest file Read calls:** - - View highest file Read calls using the following command: `# gluster volume top read [brick ] [list-cnt ] ` @@ -265,15 +241,13 @@ specified, then by default, list of 100 files will be displayed. 54 /clients/client8/~dmtmp/SEED/LARGE.FIL -Viewing Highest File Write Calls --------------------------------- + +###Viewing Highest File Write Calls You can view list of files which has highest file write calls on each brick. If brick name is not specified, then by default, list of 100 files will be displayed. -**To view highest file Write calls:** - - View highest file Write calls using the following command: `# gluster volume top write [brick ] [list-cnt ] ` @@ -308,15 +282,13 @@ files will be displayed. 59 /clients/client3/~dmtmp/SEED/LARGE.FIL -Viewing Highest Open Calls on Directories ------------------------------------------ + +###Viewing Highest Open Calls on Directories You can view list of files which has highest open calls on directories of each brick. If brick name is not specified, then the metrics of all the bricks belonging to that volume will be displayed. -To view list of open calls on each directory - - View list of open calls on each directory using the following command: @@ -353,15 +325,13 @@ To view list of open calls on each directory 402 /clients/client4/~dmtmp -Viewing Highest Read Calls on Directory ---------------------------------------- + +###Viewing Highest Read Calls on Directory You can view list of files which has highest directory read calls on each brick. If brick name is not specified, then the metrics of all the bricks belonging to that volume will be displayed. -**To view list of highest directory read calls on each brick** - - View list of highest directory read calls on each brick using the following command: @@ -398,8 +368,8 @@ bricks belonging to that volume will be displayed. 800 /clients/client4/~dmtmp -Viewing List of Read Performance on each Brick ----------------------------------------------- + +###Viewing List of Read Performance on each Brick You can view the read throughput of files on each brick. If brick name is not specified, then the metrics of all the bricks belonging to that @@ -443,8 +413,6 @@ volume will be displayed. The output will be the read throughput. This command will initiate a dd for the specified count and block size and measures the corresponding throughput. -**To view list of read performance on each brick** - - View list of read performance on each brick using the following command: @@ -494,9 +462,8 @@ and measures the corresponding throughput. 2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31 BASEMACH.DOC 15:39:09.336572 - -Viewing List of Write Performance on each Brick ------------------------------------------------ + +###Viewing List of Write Performance on each Brick You can view list of write throughput of files on each brick. If brick name is not specified, then the metrics of all the bricks belonging to @@ -552,14 +519,11 @@ performance on each brick: 516.00 /clients/client6/~dmtmp/ACCESS/ -2011-01-31 FASTENER.MDB 15:39:01.797317 -Displaying Volume Information -============================= +##Displaying Volume Information You can display information about a specific volume, or all volumes, as needed. -**To display volume information** - - Display information about a specific volume using the following command: @@ -611,8 +575,7 @@ needed. Bricks: Brick: server:/brick6 -Performing Statedump on a Volume -================================ +##Performing Statedump on a Volume Statedump is a mechanism through which you can get details of all internal variables and state of the glusterfs process at the time of @@ -668,8 +631,7 @@ dumped: `# gluster volume info ` -Displaying Volume Status -======================== +##Displaying Volume Status You can display the status information about a specific volume, brick or all volumes, as needed. Status information can be used to understand the diff --git a/doc/admin-guide/en-US/markdown/admin_setting_volumes.md b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md index 4038523c8..455238048 100644 --- a/doc/admin-guide/en-US/markdown/admin_setting_volumes.md +++ b/doc/admin-guide/en-US/markdown/admin_setting_volumes.md @@ -1,5 +1,4 @@ -Setting up GlusterFS Server Volumes -=================================== +#Setting up GlusterFS Server Volumes A volume is a logical collection of bricks where each brick is an export directory on a server in the trusted storage pool. Most of the gluster @@ -12,51 +11,46 @@ start it before attempting to mount it. - Volumes of the following types can be created in your storage environment: - - Distributed - Distributed volumes distributes files throughout + - **Distributed** - Distributed volumes distributes files throughout the bricks in the volume. You can use distributed volumes where the requirement is to scale storage and the redundancy is either not important or is provided by other hardware/software layers. - For more information, see ? . - - Replicated – Replicated volumes replicates files across bricks + - **Replicated** – Replicated volumes replicates files across bricks in the volume. You can use replicated volumes in environments - where high-availability and high-reliability are critical. For - more information, see ?. + where high-availability and high-reliability are critical. - - Striped – Striped volumes stripes data across bricks in the + - **Striped** – Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in - high concurrency environments accessing very large files. For - more information, see ?. + high concurrency environments accessing very large files. - - Distributed Striped - Distributed striped volumes stripe data + - **Distributed Striped** - Distributed striped volumes stripe data across two or more nodes in the cluster. You should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very - large files is critical. For more information, see ?. + large files is critical. - - Distributed Replicated - Distributed replicated volumes + - **Distributed Replicated** - Distributed replicated volumes distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved - read performance in most environments. For more information, see - ?. + read performance in most environments. - - Distributed Striped Replicated – Distributed striped replicated + - **Distributed Striped Replicated** – Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported - only for Map Reduce workloads. For more information, see ?. + only for Map Reduce workloads. - - Striped Replicated – Striped replicated volumes stripes data + - **Striped Replicated** – Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of - this volume type is supported only for Map Reduce workloads. For - more information, see ?. + this volume type is supported only for Map Reduce workloads. **To create a new volume** @@ -71,16 +65,14 @@ start it before attempting to mount it. Creation of test-volume has been successful Please start the volume to access data. -Creating Distributed Volumes -============================ +##Creating Distributed Volumes In a distributed volumes files are spread randomly across the bricks in the volume. Use distributed volumes where you need to scale storage and redundancy is either not important or is provided by other hardware/software layers. -> **Note** -> +> **Note**: > Disk/server failure in distributed volumes can result in a serious > loss of data because directory contents are spread randomly across the > bricks in the volume. @@ -89,7 +81,7 @@ hardware/software layers. **To create a distributed volume** -1. Create a trusted storage pool as described earlier in ?. +1. Create a trusted storage pool. 2. Create the distributed volume: @@ -125,23 +117,19 @@ hardware/software layers. If the transport type is not specified, *tcp* is used as the default. You can also set additional options if required, such as - auth.allow or auth.reject. For more information, see ? + auth.allow or auth.reject. - > **Note** - > + > **Note**: > Make sure you start your volumes before you try to mount them or - > else client operations after the mount will hang, see ? for - > details. + > else client operations after the mount will hang. -Creating Replicated Volumes -=========================== +##Creating Replicated Volumes Replicated volumes create copies of files across multiple bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. -> **Note** -> +> **Note**: > The number of bricks should be equal to of the replica count for a > replicated volume. To protect against server and disk failures, it is > recommended that the bricks of the volume are from different servers. @@ -150,7 +138,7 @@ high-availability and high-reliability are critical. **To create a replicated volume** -1. Create a trusted storage pool as described earlier in ?. +1. Create a trusted storage pool. 2. Create the replicated volume: @@ -164,23 +152,19 @@ high-availability and high-reliability are critical. If the transport type is not specified, *tcp* is used as the default. You can also set additional options if required, such as - auth.allow or auth.reject. For more information, see ? + auth.allow or auth.reject. - > **Note** - > + > **Note**: > Make sure you start your volumes before you try to mount them or - > else client operations after the mount will hang, see ? for - > details. + > else client operations after the mount will hang. -Creating Striped Volumes -======================== +##Creating Striped Volumes Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files. -> **Note** -> +> **Note**: > The number of bricks should be a equal to the stripe count for a > striped volume. @@ -188,7 +172,7 @@ environments accessing very large files. **To create a striped volume** -1. Create a trusted storage pool as described earlier in ?. +1. Create a trusted storage pool. 2. Create the striped volume: @@ -202,24 +186,20 @@ environments accessing very large files. If the transport type is not specified, *tcp* is used as the default. You can also set additional options if required, such as - auth.allow or auth.reject. For more information, see ? + auth.allow or auth.reject. - > **Note** - > + > **Note**: > Make sure you start your volumes before you try to mount them or - > else client operations after the mount will hang, see ? for - > details. + > else client operations after the mount will hang. -Creating Distributed Striped Volumes -==================================== +##Creating Distributed Striped Volumes Distributed striped volumes stripes files across two or more nodes in the cluster. For best results, you should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical. -> **Note** -> +> **Note**: > The number of bricks should be a multiple of the stripe count for a > distributed striped volume. @@ -227,7 +207,7 @@ concurrency environments accessing very large files is critical. **To create a distributed striped volume** -1. Create a trusted storage pool as described earlier in ?. +1. Create a trusted storage pool. 2. Create the distributed striped volume: @@ -242,16 +222,13 @@ concurrency environments accessing very large files is critical. If the transport type is not specified, *tcp* is used as the default. You can also set additional options if required, such as - auth.allow or auth.reject. For more information, see ? + auth.allow or auth.reject. - > **Note** - > + > **Note**: > Make sure you start your volumes before you try to mount them or - > else client operations after the mount will hang, see ? for - > details. + > else client operations after the mount will hang. -Creating Distributed Replicated Volumes -======================================= +##Creating Distributed Replicated Volumes Distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is @@ -259,8 +236,7 @@ to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments. -> **Note** -> +> **Note**: > The number of bricks should be a multiple of the replica count for a > distributed replicated volume. Also, the order in which bricks are > specified has a great effect on data protection. Each replica\_count @@ -274,7 +250,7 @@ environments. **To create a distributed replicated volume** -1. Create a trusted storage pool as described earlier in ?. +1. Create a trusted storage pool. 2. Create the distributed replicated volume: @@ -296,16 +272,13 @@ environments. If the transport type is not specified, *tcp* is used as the default. You can also set additional options if required, such as - auth.allow or auth.reject. For more information, see ? + auth.allow or auth.reject. - > **Note** - > + > **Note**: > Make sure you start your volumes before you try to mount them or - > else client operations after the mount will hang, see ? for - > details. + > else client operations after the mount will hang. -Creating Distributed Striped Replicated Volumes -=============================================== +##Creating Distributed Striped Replicated Volumes Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use @@ -314,14 +287,13 @@ where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. -> **Note** -> +> **Note**: > The number of bricks should be a multiples of number of stripe count > and replica count for a distributed striped replicated volume. **To create a distributed striped replicated volume** -1. Create a trusted storage pool as described earlier in ?. +1. Create a trusted storage pool. 2. Create a distributed striped replicated volume using the following command: @@ -337,16 +309,13 @@ Map Reduce workloads. If the transport type is not specified, *tcp* is used as the default. You can also set additional options if required, such as - auth.allow or auth.reject. For more information, see ? + auth.allow or auth.reject. - > **Note** - > + > **Note**: > Make sure you start your volumes before you try to mount them or - > else client operations after the mount will hang, see ? for - > details. + > else client operations after the mount will hang. -Creating Striped Replicated Volumes -=================================== +##Creating Striped Replicated Volumes Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in @@ -354,8 +323,7 @@ highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. -> **Note** -> +> **Note**: > The number of bricks should be a multiple of the replicate count and > stripe count for a striped replicated volume. @@ -366,8 +334,6 @@ of this volume type is supported only for Map Reduce workloads. 1. Create a trusted storage pool consisting of the storage servers that will comprise the volume. - For more information, see ?. - 2. Create a striped replicated volume : `# gluster volume create [stripe ] [replica ] [transport tcp | rdma | tcp,rdma] ` @@ -387,16 +353,13 @@ of this volume type is supported only for Map Reduce workloads. If the transport type is not specified, *tcp* is used as the default. You can also set additional options if required, such as - auth.allow or auth.reject. For more information, see ? + auth.allow or auth.reject. - > **Note** - > + > **Note**: > Make sure you start your volumes before you try to mount them or - > else client operations after the mount will hang, see ? for - > details. + > else client operations after the mount will hang. -Starting Volumes -================ +##Starting Volumes You must start your volumes before you try to mount them. @@ -411,9 +374,9 @@ You must start your volumes before you try to mount them. # gluster volume start test-volume Starting test-volume has been successful - []: images/Distributed_Volume.png - [1]: images/Replicated_Volume.png - [2]: images/Striped_Volume.png - [3]: images/Distributed_Striped_Volume.png - [4]: images/Distributed_Replicated_Volume.png - [5]: images/Striped_Replicated_Volume.png + []: ../images/Distributed_Volume.png + [1]: ../images/Replicated_Volume.png + [2]: ../images/Striped_Volume.png + [3]: ../images/Distributed_Striped_Volume.png + [4]: ../images/Distributed_Replicated_Volume.png + [5]: ../images/Striped_Replicated_Volume.png diff --git a/doc/admin-guide/en-US/markdown/admin_settingup_clients.md b/doc/admin-guide/en-US/markdown/admin_settingup_clients.md index 85b28c952..bb45c8b89 100644 --- a/doc/admin-guide/en-US/markdown/admin_settingup_clients.md +++ b/doc/admin-guide/en-US/markdown/admin_settingup_clients.md @@ -1,5 +1,4 @@ -Accessing Data - Setting Up GlusterFS Client -============================================ +#Accessing Data - Setting Up GlusterFS Client You can access gluster volumes in multiple ways. You can use Gluster Native Client method for high concurrency, performance and transparent @@ -13,8 +12,7 @@ You can use CIFS to access volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be present on the client side. -Gluster Native Client -===================== +##Gluster Native Client The Gluster Native Client is a FUSE-based client running in user space. Gluster Native Client is the recommended method for accessing volumes @@ -25,8 +23,7 @@ install the software on client machines. This section also describes how to mount volumes on clients (both manually and automatically) and how to verify that the volume has mounted successfully. -Installing the Gluster Native Client ------------------------------------- +###Installing the Gluster Native Client Before you begin installing the Gluster Native Client, you need to verify that the FUSE module is loaded on the client and has access to @@ -39,7 +36,6 @@ the required modules as follows: 2. Verify that the FUSE module is loaded: `# dmesg | grep -i fuse ` - `fuse init (API version 7.13)` ### Installing on Red Hat Package Manager (RPM) Distributions @@ -59,7 +55,6 @@ To install Gluster Native Client on RPM distribution-based systems You can use the following chains with iptables: `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT ` - `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT` > **Note** @@ -80,9 +75,7 @@ To install Gluster Native Client on RPM distribution-based systems 4. Install Gluster Native Client on the client. `$ sudo rpm -i glusterfs-3.3.0qa30-1.x86_64.rpm ` - `$ sudo rpm -i glusterfs-fuse-3.3.0qa30-1.x86_64.rpm ` - `$ sudo rpm -i glusterfs-rdma-3.3.0qa30-1.x86_64.rpm` > **Note** @@ -134,7 +127,6 @@ To install Gluster Native Client on Debian-based distributions You can use the following chains with iptables: `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT ` - `$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT` > **Note** @@ -150,7 +142,6 @@ To build and install Gluster Native Client from the source code 1. Create a new directory using the following commands: `# mkdir glusterfs ` - `# cd glusterfs` 2. Download the source code. @@ -165,21 +156,14 @@ To build and install Gluster Native Client from the source code `# ./configure ` - `GlusterFS configure summary ` - - `================== ` - - `FUSE client : yes ` - - `Infiniband verbs : yes ` - - `epoll IO multiplex : yes ` - - `argp-standalone : no ` - - `fusermount : no ` - - `readline : yes` + GlusterFS configure summary + =========================== + FUSE client : yes + Infiniband verbs : yes + epoll IO multiplex : yes + argp-standalone : no + fusermount : no + readline : yes The configuration summary shows the components that will be built with Gluster Native Client. @@ -188,7 +172,6 @@ To build and install Gluster Native Client from the source code commands: `# make ` - `# make install` 6. Verify that the correct version of Gluster Native Client is @@ -196,18 +179,13 @@ To build and install Gluster Native Client from the source code `# glusterfs –-version` -Mounting Volumes ----------------- +##Mounting Volumes After installing the Gluster Native Client, you need to mount Gluster volumes to access data. There are two methods you can choose: -- ? - -- ? - -After mounting a volume, you can test the mounted volume using the -procedure described in ?. +- [Manually Mounting Volumes](#manual-mount) +- [Automatically Mounting Volumes](#auto-mount) > **Note** > @@ -215,10 +193,9 @@ procedure described in ?. > in the client machine. You can use appropriate /etc/hosts entries or > DNS server to resolve server names to IP addresses. + ### Manually Mounting Volumes -To manually mount a Gluster volume - - To mount a volume, use the following command: `# mount -t glusterfs HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR` @@ -272,6 +249,7 @@ attempts to fetch volume files while mounting a volume. This option is useful when you mount a server with multiple IP addresses or when round-robin DNS is configured for the server-name.. + ### Automatically Mounting Volumes You can configure your system to automatically mount the Gluster volume @@ -282,8 +260,6 @@ gluster configuration volfile describing the volume name. Subsequently, the client will communicate directly with the servers mentioned in the volfile (which might not even include the one used for mount). -**To automatically mount a Gluster volume** - - To mount a volume, edit the /etc/fstab file and add the following line: @@ -337,17 +313,14 @@ To test mounted volumes following: `# cd MOUNTDIR ` - `# ls` - For example, `# cd /mnt/glusterfs ` - `# ls` -NFS -=== +#NFS You can use NFS v3 to access to gluster volumes. Extensive testing has be done on GNU/Linux clients and NFS implementation in other operating @@ -366,26 +339,23 @@ This section describes how to use NFS to mount Gluster volumes (both manually and automatically) and how to verify that the volume has been mounted successfully. -Using NFS to Mount Volumes +##Using NFS to Mount Volumes -------------------------- You can use either of the following methods to mount Gluster volumes: -- ? - -- ? +- [Manually Mounting Volumes Using NFS](#manual-nfs) +- [Automatically Mounting Volumes Using NFS](#auto-nfs) **Prerequisite**: Install nfs-common package on both servers and clients (only for Debian-based distribution), using the following command: `$ sudo aptitude install nfs-common ` -After mounting a volume, you can test the mounted volume using the -procedure described in ?. - + ### Manually Mounting Volumes Using NFS -To manually mount a Gluster volume using NFS +**To manually mount a Gluster volume using NFS** - To mount a volume, use the following command: @@ -423,6 +393,7 @@ To manually mount a Gluster volume using NFS ` # mount -o proto=tcp,vers=3 nfs://server1:38467/test-volume /mnt/glusterfs` + ### Automatically Mounting Volumes Using NFS You can configure your system to automatically mount Gluster volumes @@ -494,19 +465,9 @@ You can confirm that Gluster directories are mounting successfully. following: `# cd MOUNTDIR` - `# ls` - For example, - - ` - - ` - - `# ls` - -CIFS -==== +#CIFS You can use CIFS to access to volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be @@ -523,21 +484,18 @@ verify that the volume has mounted successfully. > can use the Mac OS X command line to access Gluster volumes using > CIFS. -Using CIFS to Mount Volumes ---------------------------- +##Using CIFS to Mount Volumes You can use either of the following methods to mount Gluster volumes: -- ? - -- ? - -After mounting a volume, you can test the mounted volume using the -procedure described in ?. +- [Exporting Gluster Volumes Through Samba](#export-samba) +- [Manually Mounting Volumes Using CIFS](#cifs-manual) +- [Automatically Mounting Volumes Using CIFS](#cifs-auto) You can also use Samba for exporting Gluster Volumes through CIFS protocol. + ### Exporting Gluster Volumes Through Samba We recommend you to use Samba for exporting Gluster volumes through the @@ -545,8 +503,7 @@ CIFS protocol. **To export volumes through CIFS protocol** -1. Mount a Gluster volume. For more information on mounting volumes, - see ?. +1. Mount a Gluster volume. 2. Setup Samba configuration to export the mount point of the Gluster volume. @@ -575,6 +532,7 @@ scripts (/etc/init.d/smb [re]start). > repeat these steps on each Gluster node. For more advanced > configurations, see Samba documentation. + ### Manually Mounting Volumes Using CIFS You can manually mount Gluster volumes using CIFS on Microsoft @@ -594,20 +552,10 @@ Windows-based client machines. The network drive (mapped to the volume) appears in the Computer window. -**Alternatively, to manually mount a Gluster volume using CIFS.** - -- Click **Start \> Run** and enter the following: - - ` - - ` - - For example: - - ` - - ` +Alternatively, to manually mount a Gluster volume using CIFS by going to +**Start \> Run** and entering Network path manually. + ### Automatically Mounting Volumes Using CIFS You can configure your system to automatically mount Gluster volumes diff --git a/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md b/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md index 43251cd01..a47ece8d9 100644 --- a/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md +++ b/doc/admin-guide/en-US/markdown/admin_start_stop_daemon.md @@ -1,5 +1,4 @@ -Managing the glusterd Service -============================= +#Managing the glusterd Service After installing GlusterFS, you must start glusterd service. The glusterd service serves as the Gluster elastic volume manager, @@ -10,16 +9,13 @@ servers non-disruptively. This section describes how to start the glusterd service in the following ways: -- ? +- [Starting and Stopping glusterd Manually](#manual) +- [Starting glusterd Automatically](#auto) -- ? +> **Note**: You must start glusterd on all GlusterFS servers. -> **Note** -> -> You must start glusterd on all GlusterFS servers. - -Starting and Stopping glusterd Manually -======================================= + +##Starting and Stopping glusterd Manually This section describes how to start and stop glusterd manually @@ -31,19 +27,13 @@ This section describes how to start and stop glusterd manually `# /etc/init.d/glusterd stop` -Starting glusterd Automatically -=============================== + +##Starting glusterd Automatically This section describes how to configure the system to automatically start the glusterd service every time the system boots. -To automatically start the glusterd service every time the system boots, -enter the following from the command line: - -`# chkconfig glusterd on ` - -Red Hat-based Systems ---------------------- +###Red Hat and Fedora distros To configure Red Hat-based systems to automatically start the glusterd service every time the system boots, enter the following from the @@ -51,8 +41,7 @@ command line: `# chkconfig glusterd on ` -Debian-based Systems --------------------- +###Debian and derivatives like Ubuntu To configure Debian-based systems to automatically start the glusterd service every time the system boots, enter the following from the @@ -60,8 +49,7 @@ command line: `# update-rc.d glusterd defaults` -Systems Other than Red Hat and Debain -------------------------------------- +###Systems Other than Red Hat and Debain To configure systems other than Red Hat or Debian to automatically start the glusterd service every time the system boots, enter the following diff --git a/doc/admin-guide/en-US/markdown/admin_storage_pools.md b/doc/admin-guide/en-US/markdown/admin_storage_pools.md index 2a35cbea5..a0d8837ff 100644 --- a/doc/admin-guide/en-US/markdown/admin_storage_pools.md +++ b/doc/admin-guide/en-US/markdown/admin_storage_pools.md @@ -1,5 +1,4 @@ -Setting up Trusted Storage Pools -================================ +#Setting up Trusted Storage Pools Before you can configure a GlusterFS volume, you must create a trusted storage pool consisting of the storage servers that provides bricks to a @@ -10,21 +9,18 @@ the first server, the storage pool consists of that server alone. To add additional storage servers to the storage pool, you can use the probe command from a storage server that is already trusted. -> **Note** -> -> Do not self-probe the first server/localhost. +> **Note**: Do not self-probe the first server/localhost. The GlusterFS service must be running on all storage servers that you want to add to the storage pool. See ? for more information. -Adding Servers to Trusted Storage Pool -====================================== +##Adding Servers to Trusted Storage Pool To create a trusted storage pool, add servers to the trusted storage pool -1. The hostnames used to create the storage pool must be resolvable by - DNS. +1. **The hostnames used to create the storage pool must be resolvable by + DNS** To add a server to the storage pool: @@ -42,8 +38,8 @@ pool # gluster peer probe server4 Probe successful -2. Verify the peer status from the first server using the following - commands: +2. **Verify the peer status from the first server using the following + commands:** # gluster peer status Number of Peers: 3 @@ -60,8 +56,7 @@ pool Uuid: 3e0caba-9df7-4f66-8e5d-cbc348f29ff7 State: Peer in Cluster (Connected) -Removing Servers from the Trusted Storage Pool -============================================== +##Removing Servers from the Trusted Storage Pool To remove a server from the storage pool: diff --git a/doc/admin-guide/en-US/markdown/admin_troubleshooting.md b/doc/admin-guide/en-US/markdown/admin_troubleshooting.md index 88fb85c24..fa19a2f71 100644 --- a/doc/admin-guide/en-US/markdown/admin_troubleshooting.md +++ b/doc/admin-guide/en-US/markdown/admin_troubleshooting.md @@ -1,60 +1,54 @@ -Troubleshooting GlusterFS -========================= +#Troubleshooting GlusterFS This section describes how to manage GlusterFS logs and most common troubleshooting scenarios related to GlusterFS. -Managing GlusterFS Logs -======================= +##Contents +* [Managing GlusterFS Logs](#logs) +* [Troubleshooting Geo-replication](#georep) +* [Troubleshooting POSIX ACLs](#posix-acls) +* [Troubleshooting Hadoop Compatible Storage](#hadoop) +* [Troubleshooting NFS](#nfs) +* [Troubleshooting File Locks](#file-locks) -This section describes how to manage GlusterFS logs by performing the -following operation: + +##Managing GlusterFS Logs -- Rotating Logs - -Rotating Logs -------------- +###Rotating Logs Administrators can rotate the log file in a volume, as needed. **To rotate a log file** -- Rotate the log file using the following command: - `# gluster volume log rotate ` - For example, to rotate the log file on test-volume: +For example, to rotate the log file on test-volume: - # gluster volume log rotate test-volume - log rotate successful + # gluster volume log rotate test-volume + log rotate successful - > **Note** - > - > When a log file is rotated, the contents of the current log file - > are moved to log-file- name.epoch-time-stamp. +> **Note** +> When a log file is rotated, the contents of the current log file +> are moved to log-file- name.epoch-time-stamp. -Troubleshooting Geo-replication -=============================== + +##Troubleshooting Geo-replication This section describes the most common troubleshooting scenarios related to GlusterFS Geo-replication. -Locating Log Files ------------------- +###Locating Log Files For every Geo-replication session, the following three log files are associated to it (four, if the slave is a gluster volume): -- Master-log-file - log file for the process which monitors the Master +- **Master-log-file** - log file for the process which monitors the Master volume - -- Slave-log-file - log file for process which initiates the changes in +- **Slave-log-file** - log file for process which initiates the changes in slave - -- Master-gluster-log-file - log file for the maintenance mount point +- **Master-gluster-log-file** - log file for the maintenance mount point that Geo-replication module uses to monitor the master volume - -- Slave-gluster-log-file - is the slave's counterpart of it +- **Slave-gluster-log-file** - is the slave's counterpart of it **Master Log File** @@ -87,8 +81,7 @@ running on slave machine), use the following commands: `/var/log/gluster/5f6e5200-756f-11e0-a1f0-0800200c9a66:remote-mirror.log` -Rotating Geo-replication Logs ------------------------------ +###Rotating Geo-replication Logs Administrators can rotate the log file of a particular master-slave session, as needed. When you run geo-replication's ` log-rotate` @@ -128,8 +121,7 @@ log file. # gluster volume geo-replication log rotate log rotate successful -Synchronization is not complete -------------------------------- +###Synchronization is not complete **Description**: GlusterFS Geo-replication did not synchronize the data completely but still the geo- replication status displayed is OK. @@ -138,39 +130,35 @@ completely but still the geo- replication status displayed is OK. index and restarting GlusterFS Geo- replication. After restarting, GlusterFS Geo-replication begins synchronizing all the data. All files are compared using checksum, which can be a lengthy and high resource -utilization operation on large data sets. If the error situation -persists, contact Red Hat Support. +utilization operation on large data sets. -For more information about erasing index, see ?. -Issues in Data Synchronization ------------------------------- +###Issues in Data Synchronization **Description**: Geo-replication display status as OK, but the files do not get synced, only directories and symlink gets synced with the following error message in the log: -[2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to -sync ./some\_file\` + [2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to + sync ./some\_file\` **Solution**: Geo-replication invokes rsync v3.0.0 or higher on the host and the remote machine. You must verify if you have installed the required version. -Geo-replication status displays Faulty very often -------------------------------------------------- +###Geo-replication status displays Faulty very often **Description**: Geo-replication displays status as faulty very often with a backtrace similar to the following: -2011-04-28 14:06:18.378859] E [syncdutils:131:log\_raise\_exception] -\: FAIL: Traceback (most recent call last): File -"/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line -152, in twraptf(\*aa) File -"/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in -listen rid, exc, res = recv(self.inf) File -"/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 42, in -recv return pickle.load(inf) EOFError + 2011-04-28 14:06:18.378859] E [syncdutils:131:log\_raise\_exception] + \: FAIL: Traceback (most recent call last): File + "/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py", line + 152, in twraptf(\*aa) File + "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 118, in + listen rid, exc, res = recv(self.inf) File + "/usr/local/libexec/glusterfs/python/syncdaemon/repce.py", line 42, in + recv return pickle.load(inf) EOFError **Solution**: This error indicates that the RPC communication between the master gsyncd module and slave gsyncd module is broken and this can @@ -179,34 +167,28 @@ pre-requisites: - Password-less SSH is set up properly between the host and the remote machine. - - If FUSE is installed in the machine, because geo-replication module mounts the GlusterFS volume using FUSE to sync data. - - If the **Slave** is a volume, check if that volume is started. - - If the Slave is a plain directory, verify if the directory has been created already with the required permissions. - - If GlusterFS 3.2 or higher is not installed in the default location (in Master) and has been prefixed to be installed in a custom location, configure the `gluster-command` for it to point to the exact location. - - If GlusterFS 3.2 or higher is not installed in the default location (in slave) and has been prefixed to be installed in a custom location, configure the `remote-gsyncd-command` for it to point to the exact place where gsyncd is located. -Intermediate Master goes to Faulty State ----------------------------------------- +###Intermediate Master goes to Faulty State **Description**: In a cascading set-up, the intermediate master goes to faulty state with the following log: -raise RuntimeError ("aborting on uuid change from %s to %s" % \\ -RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f- -4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154 + raise RuntimeError ("aborting on uuid change from %s to %s" % \\ + RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f- + 4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154 **Solution**: In a cascading set-up the Intermediate master is loyal to the original primary master. The above log means that the @@ -214,50 +196,42 @@ geo-replication module has detected change in primary master. If this is the desired behavior, delete the config option volume-id in the session initiated from the intermediate master. -Troubleshooting POSIX ACLs -========================== + +##Troubleshooting POSIX ACLs This section describes the most common troubleshooting issues related to POSIX ACLs. -setfacl command fails with “setfacl: \: Operation not supported” error ------------------------------------------------------------------------------------------------ + setfacl command fails with “setfacl: \: Operation not supported” error You may face this error when the backend file systems in one of the servers is not mounted with the "-o acl" option. The same can be confirmed by viewing the following error message in the log file of the server "Posix access control list is not supported". -**Solution**: Remount the backend file system with "-o acl" option. For -more information, see ?. +**Solution**: Remount the backend file system with "-o acl" option. -Troubleshooting Hadoop Compatible Storage -========================================= + +##Troubleshooting Hadoop Compatible Storage -This section describes the most common troubleshooting issues related to -Hadoop Compatible Storage. - -Time Sync ---------- +###Time Sync -Running MapReduce job may throw exceptions if the time is out-of-sync on +**Problem**: Running MapReduce job may throw exceptions if the time is out-of-sync on the hosts in the cluster. **Solution**: Sync the time on all hosts using ntpd program. -Troubleshooting NFS -=================== + +##Troubleshooting NFS This section describes the most common troubleshooting issues related to NFS . -mount command on NFS client fails with “RPC Error: Program not registered” --------------------------------------------------------------------------- +###mount command on NFS client fails with “RPC Error: Program not registered” -Start portmap or rpcbind service on the NFS server. + Start portmap or rpcbind service on the NFS server. This error is encountered when the server has not started correctly. - On most Linux distributions this is fixed by starting portmap: `$ /etc/init.d/portmap start` @@ -270,8 +244,7 @@ following command is required: After starting portmap or rpcbind, gluster NFS server needs to be restarted. -NFS server start-up fails with “Port is already in use” error in the log file." -------------------------------------------------------------------------------- +###NFS server start-up fails with “Port is already in use” error in the log file. Another Gluster NFS server is running on the same machine. @@ -291,27 +264,21 @@ To resolve this error one of the Gluster NFS servers will have to be shutdown. At this time, Gluster NFS server does not support running multiple NFS servers on the same machine. -mount command fails with “rpc.statd” related error message ----------------------------------------------------------- +###mount command fails with “rpc.statd” related error message If the mount command fails with the following error message: -mount.nfs: rpc.statd is not running but is required for remote locking. -mount.nfs: Either use '-o nolock' to keep locks local, or start statd. - -Start rpc.statd + mount.nfs: rpc.statd is not running but is required for remote locking. + mount.nfs: Either use '-o nolock' to keep locks local, or start statd. For NFS clients to mount the NFS server, rpc.statd service must be -running on the clients. - -Start rpc.statd service by running the following command: +running on the clients. Start rpc.statd service by running the following command: `$ rpc.statd ` -mount command takes too long to finish. ---------------------------------------- +###mount command takes too long to finish. -Start rpcbind service on the NFS client. +**Start rpcbind service on the NFS client** The problem is that the rpcbind or portmap service is not running on the NFS client. The resolution for this is to start either of these services @@ -324,8 +291,7 @@ following command is required: `$ /etc/init.d/rpcbind start` -NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed” error message in the log. ----------------------------------------------------------------------------------------------------------------------------------------------- +###NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed” error message in the log. NFS start-up can succeed but the initialization of the NFS service can still fail preventing clients from accessing the mount points. Such a @@ -341,7 +307,7 @@ file: [2010-05-26 23:33:49] E [rpcsvc.c:2731:rpcsvc_program_unregister] rpc-service: portmap unregistration of program failed [2010-05-26 23:33:49] E [rpcsvc.c:2744:rpcsvc_program_unregister] rpc-service: Program unregistration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465 -1. Start portmap or rpcbind service on the NFS server. +1. **Start portmap or rpcbind service on the NFS server** On most Linux distributions, portmap can be started using the following command: @@ -356,7 +322,7 @@ file: After starting portmap or rpcbind, gluster NFS server needs to be restarted. -2. Stop another NFS server running on the same machine. +2. **Stop another NFS server running on the same machine** Such an error is also seen when there is another NFS server running on the same machine but it is not the Gluster NFS server. On Linux @@ -372,18 +338,17 @@ file: `$ /etc/init.d/nfs stop` -3. Restart Gluster NFS server. +3. **Restart Gluster NFS server** -mount command fails with NFS server failed error. -------------------------------------------------- +###mount command fails with NFS server failed error. mount command fails with following error -*mount: mount to NFS server '10.1.10.11' failed: timed out (retrying).* + *mount: mount to NFS server '10.1.10.11' failed: timed out (retrying).* Perform one of the following to resolve this issue: -1. Disable name lookup requests from NFS server to a DNS server. +1. **Disable name lookup requests from NFS server to a DNS server** The NFS server attempts to authenticate NFS clients by performing a reverse DNS lookup to match hostnames in the volume file with the @@ -400,16 +365,14 @@ Perform one of the following to resolve this issue: `option rpc-auth.addr.namelookup off ` - > **Note** - > - > Note: Remember that disabling the NFS server forces authentication + > **Note**: Remember that disabling the NFS server forces authentication > of clients to use only IP addresses and if the authentication > rules in the volume file use hostnames, those authentication rules > will fail and disallow mounting for those clients. - or + **OR** -2. NFS version used by the NFS client is other than version 3. +2. **NFS version used by the NFS client is other than version 3** Gluster NFS server supports version 3 of NFS protocol. In recent Linux kernels, the default NFS version has been changed from 3 to 4. @@ -421,18 +384,14 @@ Perform one of the following to resolve this issue: `$ mount -o vers=3 ` -showmount fails with clnt\_create: RPC: Unable to receive ---------------------------------------------------------- +###showmount fails with clnt\_create: RPC: Unable to receive Check your firewall setting to open ports 111 for portmap requests/replies and Gluster NFS server requests/replies. Gluster NFS server operates over the following port numbers: 38465, 38466, and 38467. -For more information, see ?. - -Application fails with "Invalid argument" or "Value too large for defined data type" error. -------------------------------------------------------------------------------------------- +###Application fails with "Invalid argument" or "Value too large for defined data type" error. These two errors generally happen for 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files. Use the @@ -443,7 +402,6 @@ Applications that will benefit are those that were either: - built 32-bit and run on 32-bit machines such that they do not support large files by default - - built 32-bit on 64-bit systems This option is disabled by default so NFS returns 64-bit inode numbers @@ -454,8 +412,8 @@ using the following flag with gcc: ` -D_FILE_OFFSET_BITS=64` -Troubleshooting File Locks -========================== + +##Troubleshooting File Locks In GlusterFS 3.3 you can use `statedump` command to list the locks held on files. The statedump output also provides information on each lock @@ -463,16 +421,10 @@ with its range, basename, PID of the application holding the lock, and so on. You can analyze the output to know about the locks whose owner/application is no longer running or interested in that lock. After ensuring that the no application is using the file, you can clear the -lock using the following `clear lock` command: - -`# ` - -For more information on performing `statedump`, see ? - -**To identify locked file and clear locks** +lock using the following `clear lock` commands. -1. Perform statedump on the volume to view the files that are locked - using the following command: +1. **Perform statedump on the volume to view the files that are locked + using the following command:** `# gluster volume statedump inode` @@ -517,9 +469,9 @@ For more information on performing `statedump`, see ? lock-dump.domain.domain=vol-replicate-0 inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 714787072, owner=00ffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012 -2. Clear the lock using the following command: +2. **Clear the lock using the following command:** - `# ` + `# gluster volume clear-locks` For example, to clear the entry lock on `file1` of test-volume: @@ -527,9 +479,9 @@ For more information on performing `statedump`, see ? Volume clear-locks successful vol-locks: entry blocked locks=0 granted locks=1 -3. Clear the inode lock using the following command: +3. **Clear the inode lock using the following command:** - `# ` + `# gluster volume clear-locks` For example, to clear the inode lock on `file1` of test-volume: diff --git a/doc/admin-guide/en-US/markdown/gfs_introduction.md b/doc/admin-guide/en-US/markdown/gfs_introduction.md index fd2c53dc9..9f9c05815 100644 --- a/doc/admin-guide/en-US/markdown/gfs_introduction.md +++ b/doc/admin-guide/en-US/markdown/gfs_introduction.md @@ -13,7 +13,7 @@ managing data in a single global namespace. GlusterFS is based on a stackable user space design, delivering exceptional performance for diverse workloads. -![ Virtualized Cloud Environments ][] +![ Virtualized Cloud Environments ](../images/640px-GlusterFS_Architecture.png) GlusterFS is designed for today's high-performance, virtualized cloud environments. Unlike traditional data centers, cloud environments @@ -24,27 +24,8 @@ hybrid environments. GlusterFS is in production at thousands of enterprises spanning media, healthcare, government, education, web 2.0, and financial services. The -following table lists the commercial offerings and its documentation -location: +following table lists the commercial offerings: - ------------------------------------------------------------------------ - Product Documentation Location - ----------- ------------------------------------------------------------ - Red Hat [][] - Storage - Software - Appliance - - Red Hat [][1] - Virtual - Storage - Appliance - - Red Hat [][2] - Storage - ------------------------------------------------------------------------ - - [ Virtualized Cloud Environments ]: images/640px-GlusterFS_Architecture.png - []: http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html - [1]: http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html - [2]: http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html +* [Red Hat Storage](https://access.redhat.com/site/documentation/Red_Hat_Storage/) +* Red Hat Storage Software Appliance +* Red Hat Virtual Storage Appliance diff --git a/doc/admin-guide/en-US/markdown/glossary.md b/doc/admin-guide/en-US/markdown/glossary.md index 0febaff8f..0203319b0 100644 --- a/doc/admin-guide/en-US/markdown/glossary.md +++ b/doc/admin-guide/en-US/markdown/glossary.md @@ -1,10 +1,10 @@ Glossary ======== -Brick -: A Brick is the GlusterFS basic unit of storage, represented by an +**Brick** +: A Brick is the basic unit of storage in GlusterFS, represented by an export directory on a server in the trusted storage pool. A Brick is - expressed by combining a server with an export directory in the + represented by combining a server name with an export directory in the following format: `SERVER:EXPORT` @@ -13,15 +13,22 @@ Brick `myhostname:/exports/myexportdir/` -Cluster +**Client** +: Any machine that mounts a GlusterFS volume. + +**Cluster** : A cluster is a group of linked computers, working together closely thus in many respects forming a single computer. -Distributed File System +**Distributed File System** : A file system that allows multiple clients to concurrently access data over a computer network. -Filesystem +**Extended Attributes** +: Extended file attributes (abbreviated xattr) is a file system feature + that enables users/programs to associate files/dirs with metadata. + +**Filesystem** : A method of storing and organizing computer files and their data. Essentially, it organizes these files into a database for the storage, organization, manipulation, and retrieval by the computer's @@ -29,7 +36,7 @@ Filesystem Source: [Wikipedia][] -FUSE +**FUSE** : Filesystem in Userspace (FUSE) is a loadable kernel module for Unix-like computer operating systems that lets non-privileged users create their own file systems without editing kernel code. This is @@ -38,26 +45,38 @@ FUSE Source: [Wikipedia][1] -Geo-Replication +**Geo-Replication** : Geo-replication provides a continuous, asynchronous, and incremental replication service from site to another over Local Area Networks (LAN), Wide Area Network (WAN), and across the Internet. -glusterd +**GFID** +: Each file/directory on a GlusterFS volume has a unique 128-bit number + associated with it called the GFID. This is analogous to inode in a + regular filesystem. + +**glusterd** : The Gluster management daemon that needs to run on all servers in the trusted storage pool. -Metadata +**Infiniband** + InfiniBand is a switched fabric computer network communications link + used in high-performance computing and enterprise data centers. + +**Metadata** : Metadata is data providing information about one or more other pieces of data. -Namespace +**Namespace** : Namespace is an abstract container or environment created to hold a logical grouping of unique identifiers or symbols. Each Gluster volume exposes a single namespace as a POSIX mount point that contains every file in the cluster. -Open Source +**Node** +: A server or computer that hosts one or more bricks. + +**Open Source** : Open source describes practices in production and development that promote access to the end product's source materials. Some consider open source a philosophy, others consider it a pragmatic @@ -76,7 +95,7 @@ Open Source Source: [Wikipedia][2] -Petabyte +**Petabyte** : A petabyte (derived from the SI prefix peta- ) is a unit of information equal to one quadrillion (short scale) bytes, or 1000 terabytes. The unit symbol for the petabyte is PB. The prefix peta- @@ -89,7 +108,7 @@ Petabyte Source: [Wikipedia][3] -POSIX +**POSIX** : Portable Operating System Interface (for Unix) is the name of a family of related standards specified by the IEEE to define the application programming interface (API), along with shell and @@ -97,34 +116,79 @@ POSIX Unix operating system. Gluster exports a fully POSIX compliant file system. -RAID +**Quorum** +: The configuration of quorum in a trusted storage pool determines the + number of server failures that the trusted storage pool can sustain. + If an additional failure occurs, the trusted storage pool becomes + unavailable. + +**Quota** +: Quotas allow you to set limits on usage of disk space by directories or + by volumes. + +**RAID** : Redundant Array of Inexpensive Disks (RAID) is a technology that provides increased storage reliability through redundancy, combining multiple low-cost, less-reliable disk drives components into a logical unit where all drives in the array are interdependent. -RRDNS +**RDMA** +: Remote direct memory access (RDMA) is a direct memory access from the + memory of one computer into that of another without involving either + one's operating system. This permits high-throughput, low-latency + networking, which is especially useful in massively parallel computer + clusters. + +**Rebalance** +: A process of fixing layout and resdistributing data in a volume when a + brick is added or removed. + +**RRDNS** : Round Robin Domain Name Service (RRDNS) is a method to distribute load across application servers. RRDNS is implemented by creating multiple A records with the same name and different IP addresses in the zone file of a DNS server. -Trusted Storage Pool +**Samba** +: Samba allows file and print sharing between computers running Windows and + computers running Linux. It is an implementation of several services and + protocols including SMB and CIFS. + +**Self-Heal** +: The self-heal daemon that runs in the background, identifies + inconsistencies in files/dirs in a replicated volume and then resolves + or heals them. This healing process is usually required when one or more + bricks of a volume goes down and then comes up later. + +**Split-brain** +: This is a situation where data on two or more bricks in a replicated + volume start to diverge in terms of content or metadata. In this state, + one cannot determine programitically which set of data is "right" and + which is "wrong". + +**Translator** +: Translators (also called xlators) are stackable modules where each + module has a very specific purpose. Translators are stacked in a + hierarchical structure called as graph. A translator recieves data + from its parent translator, performs necessary operations and then + passes the data down to its child translator in hierarchy. + +**Trusted Storage Pool** : A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone. -Userspace +**Userspace** : Applications running in user space don’t directly interact with hardware, instead using the kernel to moderate access. Userspace applications are generally more portable than applications in kernel space. Gluster is a user space application. -Volfile +**Volfile** : Volfile is a configuration file used by glusterfs process. Volfile will be usually located at `/var/lib/glusterd/vols/VOLNAME`. -Volume +**Volume** : A volume is a logical collection of bricks. Most of the gluster management operations happen on the volume. -- cgit From 9031a90613c1cadcab32c418e0e2cc5b14afbba1 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 9 Dec 2013 16:46:06 +0530 Subject: cluster/afr: Add foreground self-heal launch capability through lookup Also renamed allow-sh-for-running-transaction -> attempt-self-heal Change-Id: I134cc79e663b532e625ffc342c59e49e71644ab3 BUG: 1039544 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6463 Tested-by: Gluster Build System Reviewed-by: venkatesh somyajulu Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-common.c | 35 +++++++++++++++++--------------- xlators/cluster/afr/src/afr-self-heald.c | 2 +- xlators/cluster/afr/src/afr.h | 3 ++- 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index af01f2ef2..a4f97e950 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1833,12 +1833,13 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this, afr_lookup_set_self_heal_params (local, this); if (afr_can_self_heal_proceed (&local->self_heal, priv)) { if (afr_is_transaction_running (local) && - (!local->allow_sh_for_running_transaction)) + (!local->attempt_self_heal)) goto out; reason = "lookup detected pending operations"; afr_launch_self_heal (frame, this, local->cont.lookup.inode, - _gf_true, local->cont.lookup.buf.ia_type, + !local->foreground_self_heal, + local->cont.lookup.buf.ia_type, reason, afr_post_gfid_sh_success, afr_self_heal_lookup_unwind); *sh_launched = _gf_true; @@ -2420,16 +2421,15 @@ int afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - void *gfid_req = NULL; - int ret = -1; - int i = 0; - int call_count = 0; - uint64_t ctx = 0; - int32_t op_errno = 0; - int allow_sh = 0; - priv = this->private; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + void *gfid_req = NULL; + int ret = -1; + int i = 0; + int call_count = 0; + uint64_t ctx = 0; + int32_t op_errno = 0; + priv = this->private; AFR_LOCAL_ALLOC_OR_GOTO (local, out); @@ -2500,10 +2500,13 @@ afr_lookup (call_frame_t *frame, xlator_t *this, /* By default assume ENOTCONN. On success it will be set to 0. */ local->op_errno = ENOTCONN; - ret = dict_get_int32 (xattr_req, "allow-sh-for-running-transaction", - &allow_sh); - dict_del (xattr_req, "allow-sh-for-running-transaction"); - local->allow_sh_for_running_transaction = allow_sh; + ret = dict_get_int32 (xattr_req, "attempt-self-heal", + &local->attempt_self_heal); + dict_del (xattr_req, "attempt-self-heal"); + + ret = dict_get_int32 (xattr_req, "foreground-self-heal", + &local->foreground_self_heal); + dict_del (xattr_req, "foreground-self-heal"); ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc, &gfid_req); diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 8dbb9c69e..dfa91d785 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -641,7 +641,7 @@ _self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *ent goto out; } - ret = dict_set_int32 (xattr_req, "allow-sh-for-running-transaction", 1); + ret = dict_set_int32 (xattr_req, "attempt-self-heal", 1); gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index dcf0f8d0c..dc0bc06fb 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -509,7 +509,8 @@ typedef struct _afr_local { */ gf_boolean_t append_write; - int allow_sh_for_running_transaction; + int attempt_self_heal; + int foreground_self_heal; /* This struct contains the arguments for the "continuation" -- cgit From 329e38d4ab5af1a675b4d5651eda983f8a924418 Mon Sep 17 00:00:00 2001 From: Santosh Kumar Pradhan Date: Tue, 17 Dec 2013 08:43:50 +0530 Subject: gNFS: Client cache invalidation with bad fsid 1. Problem: Couple of issues are seen when NFS-ACL is turned ON. i.e. i) NFS directory access is too slow, impacting customer workflows with ACL ii)dbench fails with 100 directories. 2. Root cause: Frequent cache invalidation in the client side when ACL is turned ON with NFS because NFS server getacl() code returns the wrong fsid to the client. 3. This attr-cache invlaidation triggers the frequent LOOKUP ops for each file instead of relying on the readdir or readdirp data. As a result performance gets impacted. 4. In case of dbench workload, the problem is more severe. e.g. Client side rpcdebug output: =========================== Dec 16 10:16:53 santosh-3 kernel: NFS: nfs_update_inode(0:1b/12061953567282551806 ct=2 info=0x7e7f) Dec 16 10:16:53 santosh-3 kernel: NFS: nfs_fhget(0:1b/12061953567282551806 ct=2) Dec 16 10:16:53 santosh-3 kernel: <-- nfs_xdev_get_sb() = -116 [splat] Dec 16 10:16:53 santosh-3 kernel: nfs_do_submount: done Dec 16 10:16:53 santosh-3 kernel: <-- nfs_do_submount() = ffffffffffffff8c Dec 16 10:16:53 santosh-3 kernel: <-- nfs_follow_mountpoint() = ffffffffffffff8c Dec 16 10:16:53 santosh-3 kernel: NFS: dentry_delete(clients/client77, 20008) As per Jeff Layton, This occurs when the client detects that the fsid on a filehandle is different from its parent. At that point, it tries to do a new submount of the new filesystem onto the correct point. It means client got a superblock reference for the new fs and is now looking to set up the root of the mount. It calls nfs_get_root to do that, which basically takes the superblock and a filehandle and returns a dentry. The problem here is that the dentry->d_inode you're getting back looks wrong. It's not a directory as expected -- it's something else. So the client gives up and tosses back an ESTALE. Which clearly says that, In getacl() code while it does the stat() call to get the attrs, it forgets to populate the deviceid or fsid before going ahead and does getxattr(). FIX: 1. Fill the deviceid in iatt. 2. Do bit more clean up for the confusing part of the code. NB: Many many thanks to Niels de Vos and Jeff Layton for their help to debug the issue. Change-Id: I8d3c2a844c9d1761051a883b5ebaeb84062a11c8 BUG: 1043737 Signed-off-by: Santosh Kumar Pradhan Reviewed-on: http://review.gluster.org/6523 Reviewed-by: Rajesh Joseph Reviewed-by: Niels de Vos Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/nfs/server/src/acl3.c | 35 +++++++++++++++++++++++++---------- xlators/nfs/server/src/acl3.h | 6 ++++++ xlators/nfs/server/src/nfs3-helpers.h | 3 +++ xlators/nfs/server/src/nfs3.h | 4 ++++ 4 files changed, 38 insertions(+), 10 deletions(-) diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c index 59c7637e3..5286077a8 100644 --- a/xlators/nfs/server/src/acl3.c +++ b/xlators/nfs/server/src/acl3.c @@ -66,7 +66,8 @@ nfs3_stat_to_fattr3 (struct iatt *buf); #define acl3_validate_gluster_fh(handle, status, errlabel) \ do { \ if (!nfs3_fh_validate (handle)) { \ - status = NFS3ERR_SERVERFAULT; \ + gf_log (GF_ACL, GF_LOG_ERROR, "Bad Handle"); \ + status = NFS3ERR_BADHANDLE; \ goto errlabel; \ } \ } while (0) \ @@ -321,6 +322,7 @@ acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, getaclreply *getaclreply = NULL; int ret = -1; nfs_user_t nfu = {0, }; + uint64_t deviceid = 0; if (!frame->local) { gf_log (GF_ACL, GF_LOG_ERROR, "Invalid argument," @@ -336,14 +338,18 @@ acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } - getaclreply->attr_follows = 1; + /* Fill the attrs before xattrs */ + getaclreply->attr_follows = TRUE; + deviceid = nfs3_request_xlator_deviceid (cs->req); + nfs3_map_deviceid_to_statdev (buf, deviceid); getaclreply->attr = nfs3_stat_to_fattr3 (buf); - getaclreply->mask = 0xf; + getaclreply->mask = (NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT); + nfs_request_user_init (&nfu, cs->req); - ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, NULL, NULL, - acl3_getacl_cbk, cs); - if (ret == -1) { - stat = nfs3_cbk_errno_status (op_ret, op_errno); + ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, + NULL, NULL, acl3_getacl_cbk, cs); + if (ret < 0) { + stat = nfs3_errno_to_nfsstat3 (-ret); goto err; } return 0; @@ -409,6 +415,13 @@ acl3svc_getacl (rpcsvc_request_t *req) rpcsvc_request_seterr (req, GARBAGE_ARGS); goto rpcerr; } + + /* Validate ACL mask */ + if (getaclargs.mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) { + stat = NFS3ERR_INVAL; + goto acl3err; + } + fhp = &fh; acl3_validate_gluster_fh (&fh, stat, acl3err); acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, @@ -470,11 +483,13 @@ acl3_setacl_resume (void *carg) nfs_request_user_init (&nfu, cs->req); xattr = dict_new(); if (cs->aclcount) - ret = dict_set_static_bin (xattr, POSIX_ACL_ACCESS_XATTR, cs->aclxattr, - cs->aclcount * 8 + 4); + ret = dict_set_static_bin (xattr, POSIX_ACL_ACCESS_XATTR, + cs->aclxattr, + posix_acl_xattr_size (cs->aclcount)); if (cs->daclcount) ret = dict_set_static_bin (xattr, POSIX_ACL_DEFAULT_XATTR, - cs->daclxattr, cs->daclcount * 8 + 4); + cs->daclxattr, + posix_acl_xattr_size (cs->daclcount)); ret = nfs_setxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, xattr, 0, NULL, acl3_setacl_cbk, cs); diff --git a/xlators/nfs/server/src/acl3.h b/xlators/nfs/server/src/acl3.h index e0e61281a..03d626f3e 100644 --- a/xlators/nfs/server/src/acl3.h +++ b/xlators/nfs/server/src/acl3.h @@ -16,6 +16,12 @@ #define GF_ACL3_PORT 38469 #define GF_ACL GF_NFS"-ACL" +/* Flags for the getacl/setacl mode */ +#define NFS_ACL 0x0001 +#define NFS_ACLCNT 0x0002 +#define NFS_DFACL 0x0004 +#define NFS_DFACLCNT 0x0008 + /* * NFSv3, identifies the default ACL by NFS_ACL_DEFAULT. Gluster * NFS needs to mask it OFF before sending it upto POSIX layer diff --git a/xlators/nfs/server/src/nfs3-helpers.h b/xlators/nfs/server/src/nfs3-helpers.h index 4de1d5623..eada24221 100644 --- a/xlators/nfs/server/src/nfs3-helpers.h +++ b/xlators/nfs/server/src/nfs3-helpers.h @@ -334,4 +334,7 @@ nfs3_is_parentdir_entry (char *entry); uint32_t nfs3_request_to_accessbits (int32_t accbits); +void +nfs3_map_deviceid_to_statdev (struct iatt *ia, uint64_t deviceid); + #endif diff --git a/xlators/nfs/server/src/nfs3.h b/xlators/nfs/server/src/nfs3.h index 023b394cf..e64ef9d15 100644 --- a/xlators/nfs/server/src/nfs3.h +++ b/xlators/nfs/server/src/nfs3.h @@ -280,4 +280,8 @@ nfs3svc_init (xlator_t *nfsx); extern int nfs3_reconfigure_state (xlator_t *nfsx, dict_t *options); + +extern uint64_t +nfs3_request_xlator_deviceid (rpcsvc_request_t *req); + #endif -- cgit From 99f9dedd5b0f817364deff7c2bd1a8401e067e40 Mon Sep 17 00:00:00 2001 From: James Shubin Date: Fri, 13 Dec 2013 10:44:34 -0500 Subject: Add boilerplate for easy, local, documentation testing. Change-Id: I5386b4e1f05a30b051d81eaca557c93ede198b51 Signed-off-by: James Shubin Reviewed-on: http://review.gluster.org/6506 Reviewed-by: Vijay Bellur Tested-by: Vijay Bellur --- doc/admin-guide/en-US/markdown/.gitignore | 2 ++ doc/admin-guide/en-US/markdown/pdfgen.sh | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 doc/admin-guide/en-US/markdown/.gitignore create mode 100755 doc/admin-guide/en-US/markdown/pdfgen.sh diff --git a/doc/admin-guide/en-US/markdown/.gitignore b/doc/admin-guide/en-US/markdown/.gitignore new file mode 100644 index 000000000..9eed46004 --- /dev/null +++ b/doc/admin-guide/en-US/markdown/.gitignore @@ -0,0 +1,2 @@ +output/*.pdf + diff --git a/doc/admin-guide/en-US/markdown/pdfgen.sh b/doc/admin-guide/en-US/markdown/pdfgen.sh new file mode 100755 index 000000000..68b320617 --- /dev/null +++ b/doc/admin-guide/en-US/markdown/pdfgen.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# pdfgen.sh simple pdf generation helper script. +# Copyright (C) 2012-2013 James Shubin +# Written by James Shubin + +#dir='/tmp/pdf' +dir=`pwd`'/output/' +ln -s ../images images +mkdir -p "$dir" + +for i in *.md; do + pandoc $i -o "$dir"`echo $i | sed 's/\.md$/\.pdf/'` +done + +rm images # remove symlink + -- cgit From 94915bc1b3b487ddb6d11412493877c9bd6d101b Mon Sep 17 00:00:00 2001 From: James Shubin Date: Fri, 13 Dec 2013 10:44:34 -0500 Subject: Added Puppet-Gluster documentation from Puppet-Gluster project. Change-Id: I196d97fb446f5938671c4cde6c460334999679cf Signed-off-by: James Shubin Reviewed-on: http://review.gluster.org/6507 Reviewed-by: Vijay Bellur Tested-by: Vijay Bellur --- doc/admin-guide/en-US/markdown/admin_puppet.md | 499 +++++++++++++++++++++++++ 1 file changed, 499 insertions(+) create mode 100644 doc/admin-guide/en-US/markdown/admin_puppet.md diff --git a/doc/admin-guide/en-US/markdown/admin_puppet.md b/doc/admin-guide/en-US/markdown/admin_puppet.md new file mode 100644 index 000000000..103449be0 --- /dev/null +++ b/doc/admin-guide/en-US/markdown/admin_puppet.md @@ -0,0 +1,499 @@ +#Puppet-Gluster + +##A GlusterFS Puppet module by [James](https://ttboj.wordpress.com/) +####Available from: +####[https://github.com/purpleidea/puppet-gluster/](https://github.com/purpleidea/puppet-gluster/) + +####Also available from: +####[https://forge.gluster.org/puppet-gluster/](https://forge.gluster.org/puppet-gluster/) + +####Table of Contents + +1. [Overview](#overview) +2. [Module description - What the module does](#module-description) +3. [Setup - Getting started with Puppet-Gluster](#setup) + * [What can Puppet-Gluster manage?](#what-can-puppet-gluster-manage) + * [Simple setup](#simple-setup) + * [Elastic setup](#elastic-setup) + * [Advanced setup](#advanced-setup) +4. [Usage/FAQ - Notes on management and frequently asked questions](#usage-and-frequently-asked-questions) +5. [Reference - Class and type reference](#reference) + * [gluster::simple](#glustersimple) + * [gluster::elastic](#glusterelastic) + * [gluster::server](#glusterserver) + * [gluster::host](#glusterhost) + * [gluster::brick](#glusterbrick) + * [gluster::volume](#glustervolume) + * [gluster::volume::property](#glustervolumeproperty) +6. [Examples - Example configurations](#examples) +7. [Limitations - Puppet versions, OS compatibility, etc...](#limitations) +8. [Development - Background on module development](#development) +9. [Author - Author and contact information](#author) + +##Overview + +The Puppet-Gluster module installs, configures, and manages a GlusterFS cluster. + +##Module Description + +This Puppet-Gluster module handles installation, configuration, and management +of GlusterFS across all of the hosts in the cluster. + +##Setup + +###What can Puppet-Gluster manage? + +Puppet-Gluster is designed to be able to manage as much or as little of your +GlusterFS cluster as you wish. All features are optional. If there is a feature +that doesn't appear to be optional, and you believe it should be, please let me +know. Having said that, it makes good sense to me to have Puppet-Gluster manage +as much of your GlusterFS infrastructure as it can. At the moment, it cannot +rack new servers, but I am accepting funding to explore this feature ;) At the +moment it can manage: + +* GlusterFS packages (rpm) +* GlusterFS configuration files (/var/lib/glusterd/) +* GlusterFS host peering (gluster peer probe) +* GlusterFS storage partitioning (fdisk) +* GlusterFS storage formatting (mkfs) +* GlusterFS brick creation (mkdir) +* GlusterFS services (glusterd) +* GlusterFS firewalling (whitelisting) +* GlusterFS volume creation (gluster volume create) +* GlusterFS volume state (started/stopped) +* GlusterFS volume properties (gluster volume set) +* And much more... + +###Simple setup + +include '::gluster::simple' is enough to get you up and running. When using the +gluster::simple class, or with any other Puppet-Gluster configuration, +identical definitions must be used on all hosts in the cluster. The simplest +way to accomplish this is with a single shared puppet host definition like: + +```puppet +node /^annex\d+$/ { # annex{1,2,..N} + class { '::gluster::simple': + } +} +``` + +If you wish to pass in different parameters, you can specify them in the class +before you provision your hosts: + +```puppet +class { '::gluster::simple': + replica => 2, + volume => ['volume1', 'volume2', 'volumeN'], +} +``` + +###Elastic setup + +The gluster::elastic class is not yet available. Stay tuned! + +###Advanced setup + +Some system administrators may wish to manually itemize each of the required +components for the Puppet-Gluster deployment. This happens automatically with +the higher level modules, but may still be a desirable feature, particularly +for non-elastic storage pools where the configuration isn't expected to change +very often (if ever). + +To put together your cluster piece by piece, you must manually include and +define each class and type that you wish to use. If there are certain aspects +that you wish to manage yourself, you can omit them from your configuration. +See the [reference](#reference) section below for the specifics. Here is one +possible example: + +```puppet +class { '::gluster::server': + shorewall => true, +} + +gluster::host { 'annex1.example.com': + # use uuidgen to make these + uuid => '1f660ca2-2c78-4aa0-8f4d-21608218c69c', +} + +# note that this is using a folder on your existing file system... +# this can be useful for prototyping gluster using virtual machines +# if this isn't a separate partition, remember that your root fs will +# run out of space when your gluster volume does! +gluster::brick { 'annex1.example.com:/data/gluster-storage1': + areyousure => true, +} + +gluster::host { 'annex2.example.com': + # NOTE: specifying a host uuid is now optional! + # if you don't choose one, one will be assigned + #uuid => '2fbe6e2f-f6bc-4c2d-a301-62fa90c459f8', +} + +gluster::brick { 'annex2.example.com:/data/gluster-storage2': + areyousure => true, +} + +$brick_list = [ + 'annex1.example.com:/data/gluster-storage1', + 'annex2.example.com:/data/gluster-storage2', +] + +gluster::volume { 'examplevol': + replica => 2, + bricks => $brick_list, + start => undef, # i'll start this myself +} + +# namevar must be: # +gluster::volume::property { 'examplevol#auth.reject': + value => ['192.0.2.13', '198.51.100.42', '203.0.113.69'], +} +``` + +##Usage and frequently asked questions + +All management should be done by manipulating the arguments on the appropriate +Puppet-Gluster classes and types. Since certain manipulations are either not +yet possible with Puppet-Gluster, or are not supported by GlusterFS, attempting +to manipulate the Puppet configuration in an unsupported way will result in +undefined behaviour, and possible even data loss, however this is unlikely. + +###How do I change the replica count? + +You must set this before volume creation. This is a limitation of GlusterFS. +There are certain situations where you can change the replica count by adding +a multiple of the existing brick count to get this desired effect. These cases +are not yet supported by Puppet-Gluster. If you want to use Puppet-Gluster +before and / or after this transition, you can do so, but you'll have to do the +changes manually. + +###Do I need to use a virtual IP? + +Using a virtual IP (VIP) is strongly recommended as a distributed lock manager +(DLM) and also to provide a highly-available (HA) IP address for your clients +to connect to. For a more detailed explanation of the reasoning please see: + +[https://ttboj.wordpress.com/2012/08/23/how-to-avoid-cluster-race-conditions-or-how-to-implement-a-distributed-lock-manager-in-puppet/](https://ttboj.wordpress.com/2012/08/23/how-to-avoid-cluster-race-conditions-or-how-to-implement-a-distributed-lock-manager-in-puppet/) + +Remember that even if you're using a hosted solution (such as AWS) that doesn't +provide an additional IP address, or you want to avoid using an additional IP, +and you're okay not having full HA client mounting, you can use an unused +private RFC1918 IP address as the DLM VIP. Remember that a layer 3 IP can +co-exist on the same layer 2 network with the layer 3 network that is used by +your cluster. + +###Is it possible to have Puppet-Gluster complete in a single run? + +No. This is a limitation of Puppet, and is related to how GlusterFS operates. +For example, it is not reliably possible to predict which ports a particular +GlusterFS volume will run on until after the volume is started. As a result, +this module will initially whitelist connections from GlusterFS host IP +addresses, and then further restrict this to only allow individual ports once +this information is known. This is possible in conjunction with the +[puppet-shorewall](https://github.com/purpleidea/puppet-shorewall) module. +You should notice that each run should complete without error. If you do see an +error, it means that either something is wrong with your system and / or +configuration, or because there is a bug in Puppet-Gluster. + +###Can you integrate this with vagrant? + +Not until vagrant properly supports libvirt/KVM. I have no desire to use +VirtualBox for fun. + +###Awesome work, but it's missing support for a feature and/or platform! + +Since this is an Open Source / Free Software project that I also give away for +free (as in beer, free as in gratis, free as in libre), I'm unable to provide +unlimited support. Please consider donating funds, hardware, virtual machines, +and other resources. For specific needs, you could perhaps sponsor a feature! + +###You didn't answer my question, or I have a question! + +Contact me through my [technical blog](https://ttboj.wordpress.com/contact/) +and I'll do my best to help. If you have a good question, please remind me to +add my answer to this documentation! + +##Reference +Please note that there are a number of undocumented options. For more +information on these options, please view the source at: +[https://github.com/purpleidea/puppet-gluster/](https://github.com/purpleidea/puppet-gluster/). +If you feel that a well used option needs documenting here, please contact me. + +###Overview of classes and types + +* [gluster::simple](#glustersimple): Simple Puppet-Gluster deployment. +* [gluster::elastic](#glusterelastic): Under construction. +* [gluster::server](#glusterserver): Base class for server hosts. +* [gluster::host](#glusterhost): Host type for each participating host. +* [gluster::brick](#glusterbrick): Brick type for each defined brick, per host. +* [gluster::volume](#glustervolume): Volume type for each defined volume. +* [gluster::volume::property](#glustervolumeproperty): Manages properties for each volume. + +###gluster::simple +This is gluster::simple. It should probably take care of 80% of all use cases. +It is particularly useful for deploying quick test clusters. It uses a +finite-state machine (FSM) to decide when the cluster has settled and volume +creation can begin. For more information on the FSM in Puppet-Gluster see: +[https://ttboj.wordpress.com/2013/09/28/finite-state-machines-in-puppet/](https://ttboj.wordpress.com/2013/09/28/finite-state-machines-in-puppet/) + +####`replica` +The replica count. Can't be changed automatically after initial deployment. + +####`volume` +The volume name or list of volume names to create. + +####`path` +The valid brick path for each host. Defaults to local file system. If you need +a different path per host, then Gluster::Simple will not meet your needs. + +####`vip` +The virtual IP address to be used for the cluster distributed lock manager. + +####`shorewall` +Boolean to specify whether puppet-shorewall integration should be used or not. + +###gluster::elastic +Under construction. + +###gluster::server +Main server class for the cluster. Must be included when building the GlusterFS +cluster manually. Wrapper classes such as [gluster::simple](#glustersimple) +include this automatically. + +####`vip` +The virtual IP address to be used for the cluster distributed lock manager. + +####`shorewall` +Boolean to specify whether puppet-shorewall integration should be used or not. + +###gluster::host +Main host type for the cluster. Each host participating in the GlusterFS +cluster must define this type on itself, and on every other host. As a result, +this is not a singleton like the [gluster::server](#glusterserver) class. + +####`ip` +Specify which IP address this host is using. This defaults to the +_$::ipaddress_ variable. Be sure to set this manually if you're declaring this +yourself on each host without using exported resources. If each host thinks the +other hosts should have the same IP address as itself, then Puppet-Gluster and +GlusterFS won't work correctly. + +####`uuid` +Universally unique identifier (UUID) for the host. If empty, Puppet-Gluster +will generate this automatically for the host. You can generate your own +manually with _uuidgen_, and set them yourself. I found this particularly +useful for testing, because I would pick easy to recognize UUID's like: +_aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa_, +_bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb_, and so on. If you set a UUID manually, +and Puppet-Gluster has a chance to run, then it will remember your choice, and +store it locally to be used again if you no longer specify the UUID. This is +particularly useful for upgrading an existing un-managed GlusterFS installation +to a Puppet-Gluster managed one, without changing any UUID's. + +###gluster::brick +Main brick type for the cluster. Each brick is an individual storage segment to +be used on a host. Each host must have at least one brick to participate in the +cluster, but usually a host will have multiple bricks. A brick can be as simple +as a file system folder, or it can be a separate file system. Please read the +official GlusterFS documentation, if you aren't entirely comfortable with the +concept of a brick. + +For most test clusters, and for experimentation, it is easiest to use a +directory on the root file system. You can even use a _/tmp_ sub folder if you +don't care about the persistence of your data. For more serious clusters, you +might want to create separate file systems for your data. On self-hosted iron, +it is not uncommon to create multiple RAID-6 drive pools, and to then create a +separate file system per virtual drive. Each file system can then be used as a +single brick. + +So that each volume in GlusterFS has the maximum ability to grow, without +having to partition storage separately, the bricks in Puppet-Gluster are +actually folders (on whatever backing store you wish) which then contain +sub folders-- one for each volume. As a result, all the volumes on a given +GlusterFS cluster can share the total available storage space. If you wish to +limit the storage used by each volume, you can setup quotas. Alternatively, you +can buy more hardware, and elastically grow your GlusterFS volumes, since the +price per GB will be significantly less than any proprietary storage system. +The one downside to this brick sharing, is that if you have chosen the brick +per host count specifically to match your performance requirements, and +each GlusterFS volume on the same cluster has drastically different brick per +host performance requirements, then this won't suit your needs. I doubt that +anyone actually has such requirements, but if you do insist on needing this +compartmentalization, then you can probably use the Puppet-Gluster grouping +feature to accomplish this goal. Please let me know about your use-case, and +be warned that the grouping feature hasn't been extensively tested. + +To prove to you that I care about automation, this type offers the ability to +automatically partition and format your file systems. This means you can plug +in new iron, boot, provision and configure the entire system automatically. +Regrettably, I don't have a lot of test hardware to routinely use this feature. +If you'd like to donate some, I'd be happy to test this thoroughly. Having said +that, I have used this feature, I consider it to be extremely safe, and it has +never caused me to lose data. If you're uncertain, feel free to look at the +code, or avoid using this feature entirely. If you think there's a way to make +it even safer, then feel free to let me know. + +####`dev` +Block device, such as _/dev/sdc_ or _/dev/disk/by-id/scsi-0123456789abcdef_. By +default, Puppet-Gluster will assume you're using a folder to store the brick +data, if you don't specify this parameter. + +####`fsuuid` +File system UUID. This ensures we can distinctly identify a file system. You +can set this to be used with automatic file system creation, or you can specify +the file system UUID that you'd like to use. + +####`labeltype` +Only _gpt_ is supported. Other options include _msdos_, but this has never been +used because of it's size limitations. + +####`fstype` +This should be _xfs_ or _ext4_. Using _xfs_ is recommended, but _ext4_ is also +quite common. This only affects a file system that is getting created by this +module. If you provision a new machine, with a root file system of _ext4_, and +the brick you create is a root file system path, then this option does nothing. + +####`xfs_inode64` +Set _inode64_ mount option when using the _xfs_ fstype. Choose _true_ to set. + +####`xfs_nobarrier` +Set _nobarrier_ mount option when using the _xfs_ fstype. Choose _true_ to set. + +####`ro` +Whether the file system should be mounted read only. For emergencies only. + +####`force` +If _true_, this will overwrite any xfs file system it sees. This is useful for +rebuilding GlusterFS repeatedly and wiping data. There are other safeties in +place to stop this. In general, you probably don't ever want to touch this. + +####`areyousure` +Do you want to allow Puppet-Gluster to do dangerous things? You have to set +this to _true_ to allow Puppet-Gluster to _fdisk_ and _mkfs_ your file system. + +###gluster::volume +Main volume type for the cluster. This is where a lot of the magic happens. +Remember that changing some of these parameters after the volume has been +created won't work, and you'll experience undefined behaviour. There could be +FSM based error checking to verify that no changes occur, but it has been left +out so that this code base can eventually support such changes, and so that the +user can manually change a parameter if they know that it is safe to do so. + +####`bricks` +List of bricks to use for this volume. If this is left at the default value of +_true_, then this list is built automatically. The algorithm that determines +this order does not support all possible situations, and most likely can't +handle certain corner cases. It is possible to examine the FSM to view the +selected brick order before it has a chance to create the volume. The volume +creation script won't run until there is a stable brick list as seen by the FSM +running on the host that has the DLM. If you specify this list of bricks +manually, you must choose the order to match your desired volume layout. If you +aren't sure about how to order the bricks, you should review the GlusterFS +documentation first. + +####`transport` +Only _tcp_ is supported. Possible values can include _rdma_, but this won't get +any testing if I don't have access to infiniband hardware. Donations welcome. + +####`replica` +Replica count. Usually you'll want to set this to _2_. Some users choose _3_. +Other values are seldom seen. A value of _1_ can be used for simply testing a +distributed setup, when you don't care about your data or high availability. A +value greater than _4_ is probably wasteful and unnecessary. It might even +cause performance issues if a synchronous write is waiting on a slow fourth +server. + +####`stripe` +Stripe count. Thoroughly unsupported and untested option. Not recommended for +use by GlusterFS. + +####`ping` +Do we want to include ping checks with _fping_? + +####`settle` +Do we want to run settle checks? + +####`start` +Requested state for the volume. Valid values include: _true_ (start), _false_ +(stop), or _undef_ (un-managed start/stop state). + +###gluster::volume::property +Main volume property type for the cluster. This allows you to manage GlusterFS +volume specific properties. There are a wide range of properties that volumes +support. For the full list of properties, you should consult the GlusterFS +documentation, or run the _gluster volume set help_ command. To set a property +you must use the special name pattern of: _volume_#_key_. The value argument is +used to set the associated value. It is smart enough to accept values in the +most logical format for that specific property. Some properties aren't yet +supported, so please report any problems you have with this functionality. +Because this feature is an awesome way to _document as code_ the volume +specific optimizations that you've made, make sure you use this feature even if +you don't use all the others. + +####`value` +The value to be used for this volume property. + +##Examples +For example configurations, please consult the [examples/](https://github.com/purpleidea/puppet-gluster/tree/master/examples) directory in the git +source repository. It is available from: + +[https://github.com/purpleidea/puppet-gluster/tree/master/examples](https://github.com/purpleidea/puppet-gluster/tree/master/examples) + +It is also available from: + +[https://forge.gluster.org/puppet-gluster/puppet-gluster/trees/master/examples](https://forge.gluster.org/puppet-gluster/puppet-gluster/trees/master/examples/) + +##Limitations + +This module has been tested against open source Puppet 3.2.4 and higher. + +The module has been tested on: + +* CentOS 6.4 + +It will probably work without incident or without major modification on: + +* CentOS 5.x/6.x +* RHEL 5.x/6.x + +It will most likely work with other Puppet versions and on other platforms, but +testing under other conditions has been light due to lack of resources. It will +most likely not work on Debian/Ubuntu systems without modification. I would +really love to add support for these operating systems, but I do not have any +test resources to do so. Please sponsor this if you'd like to see it happen. + +##Development + +This is my personal project that I work on in my free time. +Donations of funding, hardware, virtual machines, and other resources are +appreciated. Please contact me if you'd like to sponsor a feature, invite me to +talk/teach or for consulting. + +You can follow along [on my technical blog](https://ttboj.wordpress.com/). + +##Author + +Copyright (C) 2010-2013+ James Shubin + +* [github](https://github.com/purpleidea/) +* [@purpleidea](https://twitter.com/#!/purpleidea) +* [https://ttboj.wordpress.com/](https://ttboj.wordpress.com/) + -- cgit From 30bdde315e01d4d71cca121f0cba55b7ae82dd1b Mon Sep 17 00:00:00 2001 From: Kaushal M Date: Tue, 17 Dec 2013 16:09:02 +0530 Subject: glusterd: Fix stopping of stale rebalance processes Trying to stop rebalance process via RPC using the GD_SYNCOP macro, could lead to glusterd crashing. In case of an implicit volume update, which happens when a peer comes back up, the stop function would be called in the epoll thread. This would lead to glusterd crashing as the epoll thread doesn't have synctasks for the GD_SYNCOP macro to make use of. Instead of using the RPC method, we now terminate the rebalance process by kill(). The rebalance process has been designed to be resistant to interruption, so this will not lead to any data corruption. Also, when checking for stale rebalance task, make sure that the old task-id is not null. Change-Id: I54dd93803954ee55316cc58b5877f38d1ebc40b9 BUG: 1044327 Signed-off-by: Kaushal M Reviewed-on: http://review.gluster.org/6531 Reviewed-by: Krishnan Parthasarathi Tested-by: Gluster Build System --- xlators/mgmt/glusterd/src/glusterd-utils.c | 114 +++-------------------------- 1 file changed, 12 insertions(+), 102 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index c11965d40..b05c39c3e 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -3412,10 +3412,12 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo, old = &(old_volinfo->rebal); new = &(new_volinfo->rebal); - /* If the task-id's don't match, the old volinfo task is stale and - * should be cleaned up + + /* If the old task-id is not null and the task-id's don't match, the old + * volinfo task is stale and should be cleaned up */ - if (uuid_compare (old->rebalance_id, new->rebalance_id)) { + if (!uuid_is_null (old->rebalance_id) && + uuid_compare (old->rebalance_id, new->rebalance_id)) { (void)gd_stop_rebalance_process (old_volinfo); goto out; } @@ -9241,72 +9243,15 @@ glusterd_remove_auxiliary_mount (char *volname) return ret; } -/* Just a minimal callback function to which logs if the request was successfull - * or not - */ -int -_gd_stop_rebalance_process_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *call_frame) -{ - xlator_t *this = NULL; - struct syncargs *args = NULL; - gd1_mgmt_brick_op_rsp rsp = {0,}; - int ret = -1; - call_frame_t *frame = NULL; - - this = THIS; - GF_ASSERT (this); - - frame = call_frame; - args = frame->local; - frame->local = NULL; - - if (-1 == req->rpc_status) { - gf_log (this->name, GF_LOG_WARNING, "Failed to stop rebalance " - "process."); - goto out; - } - - ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp); - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, "Failed to decode stop " - "rebalance process response."); - goto out; - } - - gf_log (this->name, GF_LOG_INFO, "Stopping rebalance process was %s.", - (rsp.op_ret ? "unsuccessful" : "successful")); - -out: - if ((rsp.op_errstr) && (strcmp (rsp.op_errstr, "") != 0)) - free (rsp.op_errstr); - free (rsp.output.output_val); - - STACK_DESTROY (frame->root); - __wake (args); - - return 0; -} - -int -gd_stop_rebalance_process_cbk (struct rpc_req *req, struct iovec *iov, - int count, void *call_frame) -{ - return glusterd_big_locked_cbk (req, iov, count, call_frame, - _gd_stop_rebalance_process_cbk); -} -/* Stops the rebalance process of the given volume, gracefully +/* Stops the rebalance process of the given volume */ int gd_stop_rebalance_process (glusterd_volinfo_t *volinfo) { - int ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *conf = NULL; - gd1_mgmt_brick_op_req *req = NULL; - dict_t *req_dict = NULL; - char *name = NULL; - struct syncargs args = {0,}; + int ret = -1; + xlator_t *this = NULL; + glusterd_conf_t *conf = NULL; + char pidfile[PATH_MAX] = {0,}; GF_ASSERT (volinfo); @@ -9316,43 +9261,8 @@ gd_stop_rebalance_process (glusterd_volinfo_t *volinfo) conf = this->private; GF_ASSERT (conf); - req = GF_CALLOC (1, sizeof (*req), gf_gld_mt_mop_brick_req_t); - if (!req) { - ret = -1; - goto out; - } - - req->op = GLUSTERD_BRICK_XLATOR_DEFRAG; - - ret = gf_asprintf(&name, "%s-dht", volinfo->volname); - if (ret < 0) - goto out; - req->name = name; - - req_dict = dict_new(); - if (!req_dict) { - ret = -1; - goto out; - } - - ret = dict_set_int32 (req_dict, "rebalance-command", - GF_DEFRAG_CMD_STOP); - if (ret) - goto out; - - ret = dict_allocate_and_serialize (req_dict, &req->input.input_val, - &req->input.input_len); - if (ret) - goto out; - - GD_SYNCOP (volinfo->rebal.defrag->rpc, (&args), NULL, - gd_stop_rebalance_process_cbk, req, conf->gfs_mgmt, req->op, - (xdrproc_t)xdr_gd1_mgmt_brick_op_req); -out: - - GF_FREE (name); - GF_FREE (req); - dict_unref (req_dict); + GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, conf); + ret = glusterd_service_stop ("rebalance", pidfile, SIGTERM, _gf_true); return ret; } -- cgit From 709d9247bb467b801814637bd181bc7cddd36cb5 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Tue, 17 Dec 2013 11:43:22 +0530 Subject: glusterd: ignore failure to stop a stopped service. kill(2) returns -1 with errno set to ESRCH when the pid of the process being killed doesn't exist. Failing glusterd_brick_stop on a stopped brick could result in volume-stop failing, in commit phase. This fix prevents that from happening. Change-Id: I00f46fa06e489a671efbb8e4119f545f8ccea329 BUG: 1038051 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.org/6525 Reviewed-by: Vijaikumar Mallikarjuna Reviewed-by: Kaushal M Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-utils.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index b05c39c3e..6614f98db 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1142,6 +1142,18 @@ glusterd_service_stop (const char *service, char *pidfile, int sig, "%d", service, pid); ret = kill (pid, sig); + if (ret) { + switch (errno) { + case ESRCH: + gf_log (this->name, GF_LOG_DEBUG, "%s is already stopped", + service); + ret = 0; + break; + default: + gf_log (this->name, GF_LOG_ERROR, "Failed to kill %s: %s", + service, strerror (errno)); + } + } if (!force_kill) goto out; -- cgit From e22c42d9154c2d9090149003038f38164701dc08 Mon Sep 17 00:00:00 2001 From: Vijaykumar M Date: Wed, 18 Dec 2013 12:38:26 +0530 Subject: glusterd: hold the lock when submitting ACC in replace-brick replace-brick handler sets the timer to perform the replace operation and unlocks the big_lock. So we need to lock inside 'glusterd_do_replace_brick' before submitting the ACC Change-Id: I5525c62533bd92d3ac46587e5353e2443ad85851 BUG: 1044337 Signed-off-by: Vijaykumar M Reviewed-on: http://review.gluster.org/6534 Reviewed-by: Krishnan Parthasarathi Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-replace-brick.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 94b0383fe..5c3fc2d82 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -2012,5 +2012,9 @@ out: else ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); - glusterd_op_sm (); + synclock_lock (&priv->big_lock); + { + glusterd_op_sm (); + } + synclock_unlock (&priv->big_lock); } -- cgit From 7b392ad395ae7597a3ed3a1aa7fe61faefd96368 Mon Sep 17 00:00:00 2001 From: Anuradha Talur Date: Sun, 15 Dec 2013 13:26:15 +0000 Subject: glusterd : enable/disable quota using volume set cmd deprecated. Using the command 'gluster volume set quota {on|off}' doesn't really enable/disable quota. So, a message to the user is provided. Message asks the user to use 'gluster volume quota {enable|disable}' instead. Improvement of the previous method to solve the issue. Change-Id: Ibeda35bb49ec7b541d53e592b48fd132380fce76 BUG: 1024626 Signed-off-by: Anuradha Talur Reviewed-on: http://review.gluster.org/6512 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- cli/src/cli-cmd-parser.c | 31 ------------------------ xlators/mgmt/glusterd/src/glusterd-op-sm.c | 38 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 31 deletions(-) diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c index 0aeaccd0c..dd9398184 100644 --- a/cli/src/cli-cmd-parser.c +++ b/cli/src/cli-cmd-parser.c @@ -38,13 +38,6 @@ str_getunamb (const char *tok, char **opwords) return (char *)cli_getunamb (tok, (void **)opwords, id_sel); } -static inline gf_boolean_t -cli_is_quota_cmd (char *key) -{ - return ((strcmp (key, "quota") == 0) || - (strcmp (key, "features.quota") == 0)); -} - int32_t cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index, char **bricks, int *brick_count) @@ -865,11 +858,6 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options, char *value = NULL; int i = 0; char str[50] = {0,}; - char *true_keys[] = {"on", "enable", "yes", - "true", "1", NULL}; - char *false_keys[] = {"off", "disable", "no", - "false", "0", NULL}; - char *w1 = NULL, *w2 = NULL; GF_ASSERT (words); GF_ASSERT (options); @@ -922,25 +910,6 @@ cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options, if (ret == 0) *options = dict; goto out; - } else if (wordcount == 5 && cli_is_quota_cmd ((char *)words[3])) { - value = (char *)words[4]; - w1 = str_getunamb (value, true_keys); - w2 = str_getunamb (value, false_keys); - if (w1 != NULL) { - gf_asprintf (op_errstr,"'gluster volume set " - "%s %s' is deprecated. Use 'gluster " - "volume quota enable' instead.", - (char *)words[3], w1); - ret = -1; - goto out; - } else if (w2 != NULL) { - gf_asprintf (op_errstr,"'gluster volume set " - "%s %s' is deprecated. Use 'gluster " - "volume quota disable' instead.", - (char *)words[3], w2); - ret = -1; - goto out; - } } for (i = 3; i < wordcount; i+=2) { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 0bf7a3352..06ee849f5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -155,6 +155,40 @@ glusterd_op_sm_inject_all_acc () return ret; } +static int +glusterd_check_quota_cmd (char *key, char *value, char *errstr, size_t size) +{ + int ret = -1; + gf_boolean_t b = _gf_false; + + if ((strcmp (key, "quota") == 0) || + (strcmp (key, "features.quota") == 0)) { + ret = gf_string2boolean (value, &b); + if (ret) + goto out; + if (b) { + snprintf (errstr, size," 'gluster " + "volume set %s %s' is " + "deprecated. Use 'gluster volume " + "quota enable' instead.", + key, value); + ret = -1; + goto out; + } else { + snprintf (errstr, size, " 'gluster " + "volume set %s %s' is " + "deprecated. Use 'gluster volume " + "quota disable' instead.", + key, value); + ret = -1; + goto out; + } + } + ret = 0; +out: + return ret; +} + int glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo, gd1_mgmt_brick_op_req **req, dict_t *dict) @@ -544,6 +578,10 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) } } + ret = glusterd_check_quota_cmd (key, value, errstr, sizeof (errstr)); + if (ret) + goto out; + if (is_key_glusterd_hooks_friendly (key)) continue; -- cgit From 3991b0d0e934214882c064a0b720375af5c1575a Mon Sep 17 00:00:00 2001 From: Venkatesh Somyajulu Date: Thu, 19 Dec 2013 18:11:38 +0530 Subject: cluster/afr: For entry self heal, mark all source bricks Problem: Whenever a new brick is added into a replicate volume, all source bricks are not marked as source. Only one of them is marked as source. Here marked as source refers to adding extended attribute at the backend of a file corresponding to the newly added brick. As well as source bricks should point to the newly added brick so that heal can be triggered. Fix: All source bricks will now point to newly added bricks and heal can be triggered based on the extended attributes. Change-Id: I318e1f779a380c16c448a2d05c0140d8e4647fd4 BUG: 1037501 Signed-off-by: Venkatesh Somyajulu Reviewed-on: http://review.gluster.org/6540 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- tests/bugs/bug-1037501.t | 220 ++++++++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heal-entry.c | 62 ++++++-- 2 files changed, 269 insertions(+), 13 deletions(-) create mode 100755 tests/bugs/bug-1037501.t diff --git a/tests/bugs/bug-1037501.t b/tests/bugs/bug-1037501.t new file mode 100755 index 000000000..5470d0563 --- /dev/null +++ b/tests/bugs/bug-1037501.t @@ -0,0 +1,220 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +function write_file() +{ + path="$1"; shift + echo "$*" > "$path" +} + +cleanup; +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +## Start and create a volume +mkdir -p ${B0}/${V0}-0 +mkdir -p ${B0}/${V0}-1 +mkdir -p ${B0}/${V0}-2 +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2} + +## Verify volume is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +## Make sure io-cache and write-behind don't interfere. +TEST $CLI volume set $V0 data-self-heal off; + +## Make sure automatic self-heal doesn't perturb our results. +TEST $CLI volume set $V0 cluster.self-heal-daemon off + +TEST $CLI volume set $V0 background-self-heal-count 0 + +## Start volume and verify +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +## Mount native +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 + +TEST `echo "TEST-FILE" > $M0/File` +TEST `mkdir $M0/Dir` +TEST `ln $M0/File $M0/Link` +TEST `mknod $M0/FIFO p` + +TEST $CLI volume add-brick $V0 replica 4 $H0:$B0/$V0-3 force +TEST $CLI volume add-brick $V0 replica 5 $H0:$B0/$V0-4 force +TEST $CLI volume add-brick $V0 replica 6 $H0:$B0/$V0-5 force + +sleep 10 + +TEST ls $M0/ + + +function compare() +{ + var=-1; + if [ $1 == $2 ]; then + var=0; + else + var=-1; + fi + + echo $var +} + + +var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000020000000200000000" + +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000000000000100000001" +EXPECT 0 compare $var1 $var2 + + +var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000020000000200000000" + +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000020000000200000000" +EXPECT 0 compare $var1 $var2 + + + + +var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000000000000100000000" + +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000000000000100000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000000000000100000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000000000000100000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000000000000100000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000000000000100000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-3"` +var2="trusted.afr.$V0-client-3=0x000000000000000100000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-4"` +var2="trusted.afr.$V0-client-4=0x000000000000000100000000" +EXPECT 0 compare $var1 $var2 + +var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-5"` +var2="trusted.afr.$V0-client-5=0x000000000000000100000000" +EXPECT 0 compare $var1 $var2 + +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 53491a1d7..0ca06aaa3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1002,6 +1002,7 @@ afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie, afr_private_t *priv = NULL; afr_local_t *impunge_local = NULL; int child_index = 0; + int call_count = -1; priv = this->private; impunge_local = impunge_frame->local; @@ -1012,16 +1013,26 @@ afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie, gf_log (this->name, GF_LOG_INFO, "%s: failed to perform xattrop on %s (%s)", impunge_local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); - goto out; + priv->children[child_index]->name, strerror (op_errno)); + + LOCK (&impunge_frame->lock); + { + impunge_local->op_ret = -1; + impunge_local->op_errno = op_errno; + } + UNLOCK (&impunge_frame->lock); } - afr_sh_entry_impunge_setattr (impunge_frame, this); - return 0; -out: - afr_sh_entry_call_impunge_done (impunge_frame, this, - -1, op_errno); + call_count = afr_frame_return (impunge_frame); + + if (call_count == 0) { + if (impunge_local->op_ret == 0) { + afr_sh_entry_impunge_setattr (impunge_frame, this); + } else { + afr_sh_entry_call_impunge_done (impunge_frame, this, + -1, impunge_local->op_errno); + } + } return 0; } @@ -1035,11 +1046,15 @@ afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame, afr_local_t *impunge_local = NULL; afr_self_heal_t *impunge_sh = NULL; int32_t op_errno = 0; + int32_t call_count = 0; + int32_t i = 0; + priv = this->private; impunge_local = impunge_frame->local; impunge_sh = &impunge_local->self_heal; active_src = impunge_sh->active_source; + impunge_local->op_ret = 0; afr_prepare_new_entry_pending_matrix (impunge_local->pending, afr_is_errno_unset, @@ -1055,11 +1070,32 @@ afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame, afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src, LOCAL_LAST); - STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk, - (void *) (long) active_src, - priv->children[active_src], - priv->children[active_src]->fops->xattrop, - &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL); + for (i = 0; i < priv->child_count; i++) { + if ((impunge_sh->child_errno[i] == EEXIST) && + (impunge_local->child_up[i] == 1)) + + call_count++; + } + + impunge_local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + + if ((impunge_sh->child_errno[i] == EEXIST) + && (impunge_local->child_up[i] == 1)) { + + + STACK_WIND_COOKIE (impunge_frame, + afr_sh_entry_impunge_xattrop_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &impunge_local->loc, + GF_XATTROP_ADD_ARRAY, xattr, NULL); + if (!--call_count) + break; + } + } if (xattr) dict_unref (xattr); -- cgit From 79d5a31279825bdc61ad036b30fbe7e41b76fe5e Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Tue, 17 Dec 2013 01:12:05 +0530 Subject: glusterd: rebalance to ref volinfo before starting Change-Id: Ib316897dcbd0748bfb3bfcda186b9fe30c07f80f BUG: 1038051 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.org/6522 Tested-by: Gluster Build System Reviewed-by: Kaushal M --- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 4 ++++ xlators/mgmt/glusterd/src/glusterd-utils.c | 12 +++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index 1ac9d64ce..daa8ddd1d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -164,6 +164,9 @@ __glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata, rpc->conn.trans->name); break; } + case RPC_CLNT_DESTROY: + glusterd_volinfo_unref (volinfo); + break; default: gf_log ("", GF_LOG_TRACE, "got some other RPC event %d", event); @@ -329,6 +332,7 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo) goto out; } + glusterd_volinfo_ref (volinfo); synclock_unlock (&priv->big_lock); ret = glusterd_rpc_create (&defrag->rpc, options, glusterd_defrag_notify, volinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 6614f98db..26a67184f 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -1148,7 +1148,7 @@ glusterd_service_stop (const char *service, char *pidfile, int sig, gf_log (this->name, GF_LOG_DEBUG, "%s is already stopped", service); ret = 0; - break; + goto out; default: gf_log (this->name, GF_LOG_ERROR, "Failed to kill %s: %s", service, strerror (errno)); @@ -3425,9 +3425,11 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo, old = &(old_volinfo->rebal); new = &(new_volinfo->rebal); - /* If the old task-id is not null and the task-id's don't match, the old - * volinfo task is stale and should be cleaned up - */ + //Disconnect from rebalance process + if (old->defrag && old->defrag->rpc) { + rpc_transport_disconnect (old->defrag->rpc->conn.trans); + } + if (!uuid_is_null (old->rebalance_id) && uuid_compare (old->rebalance_id, new->rebalance_id)) { (void)gd_stop_rebalance_process (old_volinfo); @@ -3444,11 +3446,11 @@ gd_check_and_update_rebalance_info (glusterd_volinfo_t *old_volinfo, new->skipped_files = old->skipped_files; new->rebalance_failures = old->rebalance_failures; new->rebalance_time = old->rebalance_time; - new->defrag = old->defrag; new->dict = (old->dict ? dict_ref (old->dict) : NULL); /* glusterd_rebalance_t.{op, id, defrag_cmd} are copied during volume * import + * a new defrag object should come to life with rebalance being restarted */ out: return ret; -- cgit From 6fcc8df5956501bbb3687331ea518b231611856a Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Mon, 16 Dec 2013 10:29:19 +0530 Subject: glusterd: make volinfo a refcnt'ed object. Add glusterd_volinfo_remove(..) which removes @volinfo from the list of volumes in the cluster and performs an unref on @volinfo Change-Id: I5f546ca58f61bc334ab1bab4c51c4a21e1f66161 BUG: 1038051 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.org/6521 Tested-by: Gluster Build System Reviewed-by: Kaushal M Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-store.c | 2 +- xlators/mgmt/glusterd/src/glusterd-utils.c | 47 +++++++++++++++++++++++-- xlators/mgmt/glusterd/src/glusterd-utils.h | 6 ++++ xlators/mgmt/glusterd/src/glusterd-volgen.c | 2 +- xlators/mgmt/glusterd/src/glusterd-volume-ops.c | 2 +- xlators/mgmt/glusterd/src/glusterd.h | 2 ++ 6 files changed, 55 insertions(+), 6 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 2ba2548d2..3a4b09009 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -166,7 +166,7 @@ out: if (brickinfo) glusterd_brickinfo_delete (brickinfo); if (volinfo) - glusterd_volinfo_delete (volinfo); + glusterd_volinfo_unref (volinfo); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 26a67184f..9ad4963be 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -445,6 +445,37 @@ glusterd_check_volume_exists (char *volname) return _gf_true; } +glusterd_volinfo_t * +glusterd_volinfo_unref (glusterd_volinfo_t *volinfo) +{ + int refcnt = -1; + + pthread_mutex_lock (&volinfo->reflock); + { + refcnt = --volinfo->refcnt; + } + pthread_mutex_unlock (&volinfo->reflock); + + if (!refcnt) { + glusterd_volinfo_delete (volinfo); + return NULL; + } + + return volinfo; +} + +glusterd_volinfo_t * +glusterd_volinfo_ref (glusterd_volinfo_t *volinfo) +{ + pthread_mutex_lock (&volinfo->reflock); + { + ++volinfo->refcnt; + } + pthread_mutex_unlock (&volinfo->reflock); + + return volinfo; +} + int32_t glusterd_volinfo_new (glusterd_volinfo_t **volinfo) { @@ -478,7 +509,8 @@ glusterd_volinfo_new (glusterd_volinfo_t **volinfo) new_volinfo->xl = THIS; - *volinfo = new_volinfo; + pthread_mutex_init (&new_volinfo->reflock, NULL); + *volinfo = glusterd_volinfo_ref (new_volinfo); ret = 0; @@ -571,6 +603,14 @@ out: return ret; } +int +glusterd_volinfo_remove (glusterd_volinfo_t *volinfo) +{ + list_del_init (&volinfo->vol_list); + glusterd_volinfo_unref (volinfo); + return 0; +} + int32_t glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) { @@ -595,6 +635,7 @@ glusterd_volinfo_delete (glusterd_volinfo_t *volinfo) glusterd_auth_cleanup (volinfo); + pthread_mutex_destroy (&volinfo->reflock); GF_FREE (volinfo); ret = 0; @@ -3404,7 +3445,7 @@ glusterd_delete_stale_volume (glusterd_volinfo_t *stale_volinfo, (void) gf_store_handle_destroy (stale_volinfo->shandle); stale_volinfo->shandle = NULL; } - (void) glusterd_volinfo_delete (stale_volinfo); + (void) glusterd_volinfo_remove (stale_volinfo); return 0; } @@ -6181,7 +6222,7 @@ glusterd_delete_volume (glusterd_volinfo_t *volinfo) if (ret) goto out; - ret = glusterd_volinfo_delete (volinfo); + glusterd_volinfo_remove (volinfo); out: gf_log (THIS->name, GF_LOG_DEBUG, "returning %d", ret); return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 9ef09d7b0..05d5c7172 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -137,6 +137,12 @@ glusterd_volume_stop_glusterfs (glusterd_volinfo_t *volinfo, glusterd_brickinfo_t *brickinfo, gf_boolean_t del_brick); +glusterd_volinfo_t * +glusterd_volinfo_ref (glusterd_volinfo_t *volinfo); + +glusterd_volinfo_t * +glusterd_volinfo_unref (glusterd_volinfo_t *volinfo); + int32_t glusterd_volinfo_delete (glusterd_volinfo_t *volinfo); diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index dcff8c305..0a6746349 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -3184,7 +3184,7 @@ out: if (brickinfo) glusterd_brickinfo_delete (brickinfo); if (volinfo) - glusterd_volinfo_delete (volinfo); + glusterd_volinfo_unref (volinfo); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 41555230e..df2562ba6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1696,7 +1696,7 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) out: GF_FREE(free_ptr); if (!vol_added && volinfo) - glusterd_volinfo_delete (volinfo); + glusterd_volinfo_unref (volinfo); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index 23b4205b0..ab383ac1c 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -318,6 +318,8 @@ struct glusterd_volinfo_ { int op_version; int client_op_version; + pthread_mutex_t reflock; + int refcnt; }; typedef enum gd_node_type_ { -- cgit From 6e1449e5201591bbfcd0effa94f221317ad46a7a Mon Sep 17 00:00:00 2001 From: Vijay Bellur Date: Sat, 21 Dec 2013 23:35:19 +0530 Subject: cli: Throw a warning during creation of rdma volumes. Change-Id: I77155a7a4ff56a10079aab206af197e9d03e8a8a BUG: 1017176 Signed-off-by: Vijay Bellur Reviewed-on: http://review.gluster.org/6557 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System --- cli/src/cli-cmd-volume.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index f3b1828a0..c3ef42071 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -349,6 +349,11 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word, int32_t sub_count = 0; int32_t type = GF_CLUSTER_TYPE_NONE; cli_local_t *local = NULL; + char *trans_type = NULL; + char *question = "RDMA transport is" + " recommended only for testing purposes" + " in this release. Do you want to continue?"; + gf_answer_t answer = GF_ANSWER_NO; proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME]; @@ -395,6 +400,22 @@ cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word, } } + + ret = dict_get_str (options, "transport", &trans_type); + if (ret) { + gf_log("cli", GF_LOG_ERROR, "Unable to get transport type"); + goto out; + } + + if (strcasestr (trans_type, "rdma")) { + answer = + cli_cmd_get_confirmation (state, question); + if (GF_ANSWER_NO == answer) { + ret = 0; + goto out; + } + } + if (state->mode & GLUSTER_MODE_WIGNORE) { ret = dict_set_int32 (options, "force", _gf_true); if (ret) { -- cgit From 1cc90698094f9483ee8b9731aef96e1a777a7887 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Sun, 22 Dec 2013 18:32:11 +0530 Subject: cli: Throw a warning during replace-brick Change-Id: Ia024d055645ac2ec5cd506f2533831a159b38c20 BUG: 1039954 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6559 Tested-by: Gluster Build System Reviewed-by: Krishnan Parthasarathi --- cli/src/cli-cmd-volume.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index c3ef42071..a7e72dabd 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1554,6 +1554,11 @@ cli_cmd_volume_replace_brick_cbk (struct cli_state *state, int sent = 0; int parse_error = 0; cli_local_t *local = NULL; + int replace_op = 0; + char *q = "All replace-brick commands except " + "commit force are deprecated. " + "Do you want to continue?"; + gf_answer_t answer = GF_ANSWER_NO; #ifdef GF_SOLARIS_HOST_OS cli_out ("Command not supported on Solaris"); @@ -1573,6 +1578,15 @@ cli_cmd_volume_replace_brick_cbk (struct cli_state *state, goto out; } + ret = dict_get_int32 (options, "operation", &replace_op); + if (replace_op != GF_REPLACE_OP_COMMIT_FORCE) { + answer = cli_cmd_get_confirmation (state, q); + if (GF_ANSWER_NO == answer) { + ret = 0; + goto out; + } + } + if (state->mode & GLUSTER_MODE_WIGNORE) { ret = dict_set_int32 (options, "force", _gf_true); if (ret) { -- cgit From 879be836145f1d0b4bc381e7416ca8bd0811b718 Mon Sep 17 00:00:00 2001 From: James Shubin Date: Sat, 21 Dec 2013 03:35:37 -0500 Subject: Fix typos, and spacing issues. Change-Id: I459ba4e87e9bc4f1c373f7abe8701bfa8450253c BUG: 1045690 Signed-off-by: James Shubin Reviewed-on: http://review.gluster.org/6556 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr.c | 13 ++++++------- xlators/nfs/server/src/nfs.c | 18 +++++++++--------- xlators/protocol/server/src/server.c | 8 ++++---- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 34417a158..b43fde47d 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -584,8 +584,8 @@ struct volume_options options[] = { { .key = {"choose-local" }, .type = GF_OPTION_TYPE_BOOL, .default_value = "true", - .description = "Choose a local subvolume(i.e. Brick) to read from if " - "read-subvolume is not explicitly set.", + .description = "Choose a local subvolume (i.e. Brick) to read from" + " if read-subvolume is not explicitly set.", }, { .key = {"favorite-child"}, .type = GF_OPTION_TYPE_XLATOR, @@ -695,7 +695,7 @@ struct volume_options options[] = { .description = "Lock phase of a transaction has two sub-phases. " "First is an attempt to acquire locks in parallel by " "broadcasting non-blocking lock requests. If lock " - "aquistion fails on any server, then the held locks " + "acquisition fails on any server, then the held locks " "are unlocked and revert to a blocking locked mode " "sequentially on one server after another. If this " "option is enabled the initial broadcasting lock " @@ -711,16 +711,15 @@ struct volume_options options[] = { "arrives before the unlock phase of the \"optimized\" " "transaction, that in turn \"takes over\" the lock as " "well. The actual unlock now happens at the end of " - "the last \"optimzed\" transaction." + "the last \"optimized\" transaction." }, { .key = {"self-heal-daemon"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", .description = "This option applies to only self-heal-daemon. " - "Index directory crawl and automatic healing of files" - " will not be performed if this option is turned" - " off." + "Index directory crawl and automatic healing of files " + "will not be performed if this option is turned off." }, { .key = {"iam-self-heal-daemon"}, .type = GF_OPTION_TYPE_BOOL, diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index 8c895c66d..4ab5cbc90 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -1604,8 +1604,8 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "on", .description = "Disable or enable the AUTH_UNIX authentication type." - "Must always be enabled for better interoperability." - "However, can be disabled if needed. Enabled by" + "Must always be enabled for better interoperability. " + "However, can be disabled if needed. Enabled by " "default" }, { .key = {"rpc-auth.auth-null"}, @@ -1621,8 +1621,8 @@ struct volume_options options[] = { .description = "Disable or enable the AUTH_UNIX authentication type " "for a particular exported volume overriding defaults" " and general setting for AUTH_UNIX scheme. Must " - "always be enabled for better interoperability." - "However, can be disabled if needed. Enabled by" + "always be enabled for better interoperability. " + "However, can be disabled if needed. Enabled by " "default." }, { .key = {"rpc-auth.auth-unix.*.allow"}, @@ -1631,8 +1631,8 @@ struct volume_options options[] = { .description = "Disable or enable the AUTH_UNIX authentication type " "for a particular exported volume overriding defaults" " and general setting for AUTH_UNIX scheme. Must " - "always be enabled for better interoperability." - "However, can be disabled if needed. Enabled by" + "always be enabled for better interoperability. " + "However, can be disabled if needed. Enabled by " "default." }, { .key = {"rpc-auth.auth-null.*"}, @@ -1673,7 +1673,7 @@ struct volume_options options[] = { .default_value = "none", .description = "Reject a comma separated list of addresses and/or" " hostnames from connecting to the server. By default," - " all connections are allowed. This allows users to" + " all connections are allowed. This allows users to " "define a rule for a specific exported volume." }, { .key = {"rpc-auth.ports.insecure"}, @@ -1770,8 +1770,8 @@ struct volume_options options[] = { { .key = {"nfs.*.disable"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "false", - .description = "This option is used to start or stop NFS server" - "for individual volume." + .description = "This option is used to start or stop the NFS server " + "for individual volumes." }, { .key = {"nfs.nlm"}, diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 702deaa45..3720372f9 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -1052,10 +1052,10 @@ struct volume_options options[] = { { .key = {"root-squash"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", - .description = "Map requests from uid/gid 0 to the anonymous " - "uid/gid. Note that this does not apply to any other" - "uids or gids that might be equally sensitive, such as" - "user bin or group staff." + .description = "Map requests from uid/gid 0 to the anonymous " + "uid/gid. Note that this does not apply to any other " + "uids or gids that might be equally sensitive, such " + "as user bin or group staff." }, { .key = {"statedump-path"}, .type = GF_OPTION_TYPE_PATH, -- cgit From f9698036fcc1ceedea19110139400d0cf4a54c9a Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Mon, 23 Dec 2013 09:32:22 +0000 Subject: cluster/afr: avoid race due to afr_is_transaction_running() Problem: ------------------------------------------ afr_lookup_perform_self_heal() { if(afr_is_transaction_running()) goto out else afr_launch_self_heal(); } ------------------------------------------ When 2 clients simultaneously access a file in split-brain, one of them acquires the inode lock and proceeds with afr_launch_self_heal (which eventually fails and sets "sh-failed" in the callback.) The second client meanwhile bails out of afr_lookup_perform_self_heal() because afr_is_transaction_running() returns true due to the lock obtained by client-1. Consequetly in client-2, "sh-failed" does not get set in the dict, causing quick-read translator to *not* invalidate the inode, thereby serving data randomly from one of the bricks. Fix: If a possible split-brain is detected on lookup, forcefully traverse the afr_launch_self_heal() code path in afr_lookup_perform_self_heal(). Change-Id: I316f9f282543533fd3c958e4b63ecada42c2a14f BUG: 870565 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/6578 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System Reviewed-by: Varun Shastry --- xlators/cluster/afr/src/afr-common.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index a4f97e950..250b0944e 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1833,6 +1833,11 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this, afr_lookup_set_self_heal_params (local, this); if (afr_can_self_heal_proceed (&local->self_heal, priv)) { if (afr_is_transaction_running (local) && + /*Forcefully call afr_launch_self_heal (which will go on to + fail) for SB files.This prevents stale data being served + due to race in afr_is_transaction_running() when + multiple clients access the same SB file*/ + !local->cont.lookup.possible_spb && (!local->attempt_self_heal)) goto out; -- cgit From f86c618cd0943930c391e6bf55fdf977b3245f36 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Mon, 23 Dec 2013 16:27:42 +0530 Subject: bd: Check for capabilities for creating thin lv Check capabitlies of the volume before trying to create thin LV. BUG: 1028672 Change-Id: I1375f6f2a7576e223fc5d7cd40315999446db86a Signed-off-by: M. Mohan Kumar Reviewed-on: http://review.gluster.org/6577 Reviewed-by: Vijay Bellur Tested-by: Gluster Build System --- xlators/storage/bd/src/bd.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/xlators/storage/bd/src/bd.c b/xlators/storage/bd/src/bd.c index 17a9a5f15..4c3a7e14e 100644 --- a/xlators/storage/bd/src/bd.c +++ b/xlators/storage/bd/src/bd.c @@ -1015,6 +1015,13 @@ bd_setx_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } + if (!strcmp (type, BD_THIN) && !(priv->caps & BD_CAPS_THIN)) { + gf_log (this->name, GF_LOG_WARNING, "THIN lv not supported by " + "this volume"); + op_errno = EOPNOTSUPP; + goto out; + } + s_size = strtok_r (NULL, ":", &p); /* If size not specified get default size */ -- cgit From 67ddf10d07ea14e1b9faeabde9dfc700247a6377 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Fri, 20 Dec 2013 12:27:30 +0530 Subject: mount.glusterfs:export PATH environment variable Problem: spurious warning: mount -t glusterfs 10.70.7.228:repvol /mnt/fuse_mnt/ WARNING: getfattr not found, certain checks will be skipped.. Fix: export PATH so that getfattr search succeeds. Change-Id: Ib19e2899e43a7d763a136b9fbc90823f743597a5 BUG: 1040348 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/6545 Reviewed-by: Harshavardhana Tested-by: Harshavardhana Reviewed-by: Vijay Bellur --- xlators/mount/fuse/utils/mount.glusterfs.in | 1 + 1 file changed, 1 insertion(+) diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index d5993618c..2799ec847 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -26,6 +26,7 @@ _init () cmd_line=$(echo "@sbindir@/glusterfs"); # check whether getfattr exists + export PATH getfattr=$(which getfattr 2>/dev/null); if [ $? -ne 0 ]; then echo "WARNING: getfattr not found, certain checks will be skipped.." -- cgit From 2ba42d07eb967472227eb0a93e4ca2cac7a197b5 Mon Sep 17 00:00:00 2001 From: Kaushal M Date: Mon, 23 Dec 2013 14:02:12 +0530 Subject: cli: Fix xml output for volume status The XML output for volume status was malformed when one of the nodes is down, leading to outputs like ------- NFS Server localhost 63ca3d2f-8c1f-4b84-b797-b4baddab81fb 1 2049 2130 ----- This was happening because we were starting the element before determining if node was present, and were not closing it or clearing it when not finding the node in the dict. To fix this, the element is only started once a node has been found in the dict. Change-Id: I6b6205f14b27a69adb95d85db7b48999aa48d400 BUG: 1046020 Signed-off-by: Kaushal M Reviewed-on: http://review.gluster.org/6571 Reviewed-by: Aravinda VK Reviewed-by: Krishnan Parthasarathi Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- cli/src/cli-xml-output.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c index fe0969a30..69fed1bc9 100644 --- a/cli/src/cli-xml-output.c +++ b/cli/src/cli-xml-output.c @@ -251,6 +251,11 @@ cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict, } *node_present = _gf_true; + /* + * will be closed in the calling function cli_xml_output_vol_status()*/ + ret = xmlTextWriterStartElement (writer, (xmlChar *)"node"); + XML_RET_CHECK_AND_GOTO (ret, out); + ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hostname", "%s", hostname); XML_RET_CHECK_AND_GOTO (ret, out); @@ -1662,11 +1667,6 @@ cli_xml_output_vol_status (cli_local_t *local, dict_t *dict) index_max = brick_index_max + other_count; for (i = 0; i <= index_max; i++) { - /* */ - ret = xmlTextWriterStartElement (local->writer, - (xmlChar *)"node"); - XML_RET_CHECK_AND_GOTO (ret, out); - ret = cli_xml_output_vol_status_common (local->writer, dict, i, &online, &node_present); if (ret) { @@ -1732,7 +1732,8 @@ cli_xml_output_vol_status (cli_local_t *local, dict_t *dict) break; } - /* */ + + /* was opened in cli_xml_output_vol_status_common()*/ ret = xmlTextWriterEndElement (local->writer); XML_RET_CHECK_AND_GOTO (ret, out); } -- cgit From 0d7279d32d5f55c0210bdcfda2d3f83e35f524b6 Mon Sep 17 00:00:00 2001 From: Poornima G Date: Mon, 23 Dec 2013 05:11:15 +0000 Subject: gfapi: Closed the logfile fd and initialize to NULL in glfs_fini Currently if logfile is closed and other threads call gf_log after glfs_fini() is executed, it may lead to memory corruption. Adding gf_log_fini() which closes the logfile and initializes the logfile to NULL, thus any further logging happens to stderr. Also added gf_log_globals_fini() which should be filled in the future to release all the logging resources. Change-Id: I879163e1a3636e65300d166f782517ee773cab65 BUG: 1030228 Signed-off-by: Poornima G Reviewed-on: http://review.gluster.org/6552 Reviewed-by: Pranith Kumar Karampuri Reviewed-by: Shyamsundar Ranganathan Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- api/src/glfs.c | 4 ++-- libglusterfs/src/logging.c | 35 ++++++++++++++++++++++++++++++++++- libglusterfs/src/logging.h | 2 ++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/api/src/glfs.c b/api/src/glfs.c index 29ed47c0c..1bae78d23 100644 --- a/api/src/glfs.c +++ b/api/src/glfs.c @@ -666,8 +666,8 @@ glfs_fini (struct glfs *fs) glfs_subvol_done (fs, subvol); - if (ctx->log.logfile) - fclose (ctx->log.logfile); + if (gf_log_fini(ctx) != 0) + ret = -1; return ret; } diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c index e3a4a9fde..0058233a7 100644 --- a/libglusterfs/src/logging.c +++ b/libglusterfs/src/logging.c @@ -108,11 +108,44 @@ gf_log_set_xl_loglevel (void *this, gf_loglevel_t level) } void -gf_log_fini (void) +gf_log_globals_fini (void) { pthread_mutex_destroy (&THIS->ctx->log.logfile_mutex); } +/** gf_log_fini - function to perform the cleanup of the log information + * @data - glusterfs context + * @return: success: 0 + * failure: -1 + */ +int +gf_log_fini (void *data) +{ + glusterfs_ctx_t *ctx = data; + int ret = 0; + + if (ctx == NULL) { + ret = -1; + goto out; + } + + pthread_mutex_lock (&ctx->log.logfile_mutex); + { + if (ctx->log.logfile) { + if (fclose (ctx->log.logfile) != 0) + ret = -1; + /* Logfile needs to be set to NULL, so that any + call to gf_log after calling gf_log_fini, will + log the message to stderr. + */ + ctx->log.logfile = NULL; + } + } + pthread_mutex_unlock (&ctx->log.logfile_mutex); + + out: + return ret; +} #ifdef GF_USE_SYSLOG /** diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/logging.h index cc806a767..e2b7e664d 100644 --- a/libglusterfs/src/logging.h +++ b/libglusterfs/src/logging.h @@ -153,6 +153,8 @@ int gf_cmd_log_init (const char *filename); void set_sys_log_level (gf_loglevel_t level); +int gf_log_fini(void *data); + #define GF_DEBUG(xl, format, args...) \ gf_log ((xl)->name, GF_LOG_DEBUG, format, ##args) #define GF_INFO(xl, format, args...) \ -- cgit From d4b8825436daad701995e120f38da706e5fe97c1 Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Tue, 24 Dec 2013 17:58:21 +0530 Subject: socket: unix socket connect path can't be greater than UNIX_PATH_MAX characters Change-Id: I74788b63dd1c14507aa6d65182ea4b87a2e1f389 BUG: 1046308 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.org/6589 Tested-by: Gluster Build System Reviewed-by: Vijaikumar Mallikarjuna Reviewed-by: Vijay Bellur --- rpc/rpc-transport/socket/src/name.c | 4 ++-- tests/bugs/bug-1046308.t | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 tests/bugs/bug-1046308.t diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c index 1647d5b6b..c6eae9739 100644 --- a/rpc/rpc-transport/socket/src/name.c +++ b/rpc/rpc-transport/socket/src/name.c @@ -285,7 +285,7 @@ af_unix_client_get_remote_sockaddr (rpc_transport_t *this, goto err; } - if (strlen (connect_path) > UNIX_PATH_MAX) { + if ((strlen (connect_path) + 1) > UNIX_PATH_MAX) { gf_log (this->name, GF_LOG_ERROR, "connect-path value length %"GF_PRI_SIZET" > %d octets", strlen (connect_path), UNIX_PATH_MAX); @@ -329,7 +329,7 @@ af_unix_server_get_local_sockaddr (rpc_transport_t *this, #define UNIX_PATH_MAX 108 #endif - if (strlen (listen_path) > UNIX_PATH_MAX) { + if ((strlen (listen_path) + 1) > UNIX_PATH_MAX) { gf_log (this->name, GF_LOG_ERROR, "option transport.unix.listen-path has value length " "%"GF_PRI_SIZET" > %d", diff --git a/tests/bugs/bug-1046308.t b/tests/bugs/bug-1046308.t new file mode 100644 index 000000000..cfec3a35d --- /dev/null +++ b/tests/bugs/bug-1046308.t @@ -0,0 +1,19 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc + +cleanup; + +volname="StartMigrationDuringRebalanceTest" +TEST glusterd +TEST pidof glusterd; + +TEST $CLI volume info; +TEST $CLI volume create $volname $H0:$B0/${volname}{1,2}; +TEST $CLI volume start $volname; +TEST $CLI volume rebalance $volname start; + +cleanup; + + + -- cgit From 412e6ab75834f4ec3ba024b702374a84b2f18eb0 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 23 Dec 2013 11:13:17 -0800 Subject: build: fix day in the %changelog This fixes `warnings` during build tests `rpm.t` Change-Id: I706ceb1a39d046fe26f70f7b1b06f059abd5d3c0 BUG: 1000019 Signed-off-by: Harshavardhana Reviewed-on: http://review.gluster.org/6584 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- glusterfs.spec.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 1bbb17f11..12f246895 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -742,7 +742,7 @@ fi %ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options # This is really ugly, but I have no idea how to mark these directories in -# any other way. They should belong to the glusterfs-server package, but +# any other way. They should belong to the glusterfs-server package, but # don't exist after installation. They are generated on the first start... %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1 @@ -877,7 +877,7 @@ fi * Tue Dec 10 2013 Kaleb S. KEITHLEY - Sync with Fedora glusterfs.spec 3.5.0-0.1.qa3 -* Wed Oct 11 2013 Harshavardhana +* Fri Oct 11 2013 Harshavardhana - Add '_sharedstatedir' macro to `/var/lib` on <= RHEL5 (#1003184) * Wed Oct 9 2013 Kaleb S. KEITHLEY -- cgit From 2909ef3ecb5f331630460d17c56da6291c1f254b Mon Sep 17 00:00:00 2001 From: Vijay Bellur Date: Fri, 27 Dec 2013 18:00:45 +0530 Subject: protocol/server: Change log message severity Change-Id: Ia6aaf8a106c26fa7118c86fadb6805d2877bb6ee BUG: 849630 Signed-off-by: Vijay Bellur Reviewed-on: http://review.gluster.org/6610 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System --- xlators/protocol/server/src/server-rpc-fops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c index 138e601ce..d43571e87 100644 --- a/xlators/protocol/server/src/server-rpc-fops.c +++ b/xlators/protocol/server/src/server-rpc-fops.c @@ -2265,7 +2265,7 @@ server_finodelk_resume (call_frame_t *frame, xlator_t *bound_xl) GF_UNUSED int ret = -1; server_state_t *state = NULL; - gf_log (bound_xl->name, GF_LOG_WARNING, "frame %p, xlator %p", + gf_log (bound_xl->name, GF_LOG_DEBUG, "frame %p, xlator %p", frame, bound_xl); state = CALL_STATE (frame); @@ -2298,7 +2298,7 @@ server_inodelk_resume (call_frame_t *frame, xlator_t *bound_xl) GF_UNUSED int ret = -1; server_state_t *state = NULL; - gf_log (bound_xl->name, GF_LOG_WARNING, "frame %p, xlator %p", + gf_log (bound_xl->name, GF_LOG_DEBUG, "frame %p, xlator %p", frame, bound_xl); state = CALL_STATE (frame); -- cgit From 6b41a0b7dfd9f48e1518430bacba9135bee4fcb4 Mon Sep 17 00:00:00 2001 From: Venkatesh Somyajulu Date: Fri, 27 Dec 2013 16:51:29 +0530 Subject: test: Modified bug-1037501.t script Change-Id: I3114009681d49249fe292f94a464efc419c944cb BUG: 1037501 Signed-off-by: Venkatesh Somyajulu Reviewed-on: http://review.gluster.org/6596 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System --- tests/bugs/bug-1037501.t | 185 ++++++++++++++++++++++++++++------------------- 1 file changed, 109 insertions(+), 76 deletions(-) diff --git a/tests/bugs/bug-1037501.t b/tests/bugs/bug-1037501.t index 5470d0563..d11c788a0 100755 --- a/tests/bugs/bug-1037501.t +++ b/tests/bugs/bug-1037501.t @@ -65,156 +65,189 @@ function compare() echo $var } +var2="000000000000000000000000" var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000020000000200000000" - -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/File -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1| cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/File -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/File -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/Dir -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/Dir -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/Dir -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000000000000100000001" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000020000000200000000" - -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/Link -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/Link -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/Link -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000020000000200000000" -EXPECT 0 compare $var1 $var2 - +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000000000000100000000" - -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000000000000100000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-0/FIFO -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000000000000100000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000000000000100000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000000000000100000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-1/FIFO -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000000000000100000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-3"` -var2="trusted.afr.$V0-client-3=0x000000000000000100000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-4"` -var2="trusted.afr.$V0-client-4=0x000000000000000100000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 var1=`getfattr -d -m . $B0/$V0-2/FIFO -e hex 2>&1 | grep "client-5"` -var2="trusted.afr.$V0-client-5=0x000000000000000100000000" -EXPECT 0 compare $var1 $var2 +EXPECT "0" echo $? +var3=`echo $var1 | cut -d x -f 2` +EXPECT_NOT $var2 echo $var3 cleanup; -- cgit From a42d0938bf85a2fa23ce23ee88ea0cd24fd79d24 Mon Sep 17 00:00:00 2001 From: Vijay Bellur Date: Fri, 27 Dec 2013 14:24:29 +0530 Subject: features/index: Minor improvement in log message. Change-Id: Ic4f39785dab5ad64def4c06d7bd2f2dec09e19ab BUG: 1045690 Signed-off-by: Vijay Bellur Reviewed-on: http://review.gluster.org/6606 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System --- xlators/features/index/src/index.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 2923e9c91..fde0893ec 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -264,7 +264,7 @@ check_delete_stale_index_file (xlator_t *this, char *filename) ret = stat (filepath_under_base_indices_holder, &base_index_st); if (ret) { gf_log (THIS->name, GF_LOG_ERROR, "Base index is not created" - "under index/base_indices_holder"); + " under index/base_indices_holder"); return; } -- cgit From efcfb60f87f6d5ce4aa8ec7f4cd3bbe51a87f538 Mon Sep 17 00:00:00 2001 From: Louis Zuckerman Date: Sat, 28 Dec 2013 00:34:47 -0500 Subject: Updating extras/Ubuntu with latest upstart configs (BUG: 1047007) Change-Id: Ia769589f6af1d7ca3577185fd4c56eb9f43b3e2e BUG: 1047007 Signed-off-by: Louis Zuckerman Reviewed-on: http://review.gluster.org/6611 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- extras/Ubuntu/README.Ubuntu | 14 ++++++++------ extras/Ubuntu/glusterd.conf | 10 ---------- extras/Ubuntu/glusterfs-server.conf | 10 ++++++++++ extras/Ubuntu/mounting-glusterfs.conf | 5 ++--- 4 files changed, 20 insertions(+), 19 deletions(-) delete mode 100644 extras/Ubuntu/glusterd.conf create mode 100644 extras/Ubuntu/glusterfs-server.conf diff --git a/extras/Ubuntu/README.Ubuntu b/extras/Ubuntu/README.Ubuntu index 0c5b7828d..890da3ca6 100644 --- a/extras/Ubuntu/README.Ubuntu +++ b/extras/Ubuntu/README.Ubuntu @@ -1,5 +1,6 @@ Bug 765014 - Mounting from localhost in fstab fails at boot on ubuntu -(https://bugzilla.redhat.com/show_bug.cgi?id=765014) +(original bug: https://bugzilla.redhat.com/show_bug.cgi?id=765014) +(updated in: https://bugzilla.redhat.com/show_bug.cgi?id=1047007) (https://bugs.launchpad.net/ubuntu/+source/glusterfs/+bug/876648) Ubuntu uses upstart instead of init to bootstrap the system and it has a unique @@ -10,15 +11,16 @@ and the volume is mounted from localhost, the mount fails at boot time. To correct this we need to launch glusterd using upstart and block the glusterfs mounting event until glusterd is started. -The glusterd.conf file contains the necessary configuration for upstart to -manage the glusterd service. It should be placed in /etc/init/glusterd.conf +The glusterfs-server.conf file contains the necessary configuration for upstart to +manage the glusterd service. It should be placed in /etc/init/glusterfs-server.conf on Ubuntu systems, and then the old initscript /etc/init.d/glusterd can be removed. An additional upstart job, mounting-glusterfs.conf, is also required -to block mounting glusterfs volumes until the glusterd service is available. +to block mounting glusterfs volumes until the network interfaces are available. Both of these upstart jobs need to be placed in /etc/init to resolve the issue. -Starting with Ubuntu 12.04, Precise Pangolin, these upstart jobs will be -included with the glusterfs-server package in the Ubuntu repository. +Starting with Ubuntu 14.04, Trusty Tahr, these upstart jobs will be included +with the glusterfs-server and glusterfs-client packages in the Ubuntu +universe repository. This affects all versions of glusterfs on the Ubuntu platform since at least 10.04, Lucid Lynx. diff --git a/extras/Ubuntu/glusterd.conf b/extras/Ubuntu/glusterd.conf deleted file mode 100644 index aa99502b0..000000000 --- a/extras/Ubuntu/glusterd.conf +++ /dev/null @@ -1,10 +0,0 @@ -author "Louis Zuckerman " -description "GlusterFS Management Daemon" - -start on runlevel [2345] -stop on runlevel [016] - -expect fork - -exec /usr/sbin/glusterd -p /var/run/glusterd.pid - diff --git a/extras/Ubuntu/glusterfs-server.conf b/extras/Ubuntu/glusterfs-server.conf new file mode 100644 index 000000000..aa99502b0 --- /dev/null +++ b/extras/Ubuntu/glusterfs-server.conf @@ -0,0 +1,10 @@ +author "Louis Zuckerman " +description "GlusterFS Management Daemon" + +start on runlevel [2345] +stop on runlevel [016] + +expect fork + +exec /usr/sbin/glusterd -p /var/run/glusterd.pid + diff --git a/extras/Ubuntu/mounting-glusterfs.conf b/extras/Ubuntu/mounting-glusterfs.conf index 3c59c0f63..786ef16df 100644 --- a/extras/Ubuntu/mounting-glusterfs.conf +++ b/extras/Ubuntu/mounting-glusterfs.conf @@ -1,7 +1,6 @@ author "Louis Zuckerman " -description "Block the mounting event for glusterfs filesystems until glusterd is running" +description "Block the mounting event for glusterfs filesystems until the network interfaces are running" start on mounting TYPE=glusterfs task -exec start wait-for-state WAIT_FOR=glusterd WAITER=mounting-glusterfs - +exec start wait-for-state WAIT_FOR=static-network-up WAITER=mounting-glusterfs -- cgit From d85726d19432384e2c3dd6ceff4b7b4ec3f8f57a Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 23 Dec 2013 02:52:12 -0800 Subject: crypt: On calloc failure follow goto statement At -------------------------------------------- 1423 if (local->vec.iov_base == NULL) { -------------------------------------------- This condition being true leads to NULL pointer to be passed into `memcpy` later at ---------------------------------------------------- 1432 memcpy((char *)local->vec.iov_base + copied, ---------------------------------------------------- Avoid this by clean exit through a goto statement with in the conditional. Change-Id: I2546b7dd634dc251adae8ca39497c4c3ef520f62 BUG: 1030058 Signed-off-by: Harshavardhana Reviewed-on: http://review.gluster.org/6576 Reviewed-by: Shyamsundar Ranganathan Reviewed-by: Edward Shishkin Tested-by: Edward Shishkin Tested-by: Gluster Build System --- xlators/encryption/crypt/src/crypt.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c index db2e6d83c..47ff38e50 100644 --- a/xlators/encryption/crypt/src/crypt.c +++ b/xlators/encryption/crypt/src/crypt.c @@ -1421,8 +1421,11 @@ static int32_t prune_write(call_frame_t *frame, gf_crypt_mt_data); if (local->vec.iov_base == NULL) { + gf_log(this->name, GF_LOG_WARNING, + "Failed to calloc head block for prune"); local->op_ret = -1; local->op_errno = ENOMEM; + goto put_one_call; } for (i = 0; i < count; i++) { to_copy = vec[i].iov_len; -- cgit From 7e3dd526c62b3a1bb59945efdfed2c2fbbcf9cf9 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Mon, 23 Dec 2013 12:53:32 +0100 Subject: rpc/server: add anonuid and anongid options for root-squash Introduce new options to modify the behaviour of server.root-squash. With server.anonuid and server.anongid the uid/gid can be specified and the root user (uid=0 and gid=0) will be mapped to the given uid/gid instead of nfsnobody (uid=65534 and gid=65534). Many thanks to Vikhyat Umrao for writing the majority of the test-case! Change-Id: I6379a3d2ef52b9b9707f2f6f0529657580c8d779 BUG: 1043886 CC: Vikhyat Umrao Signed-off-by: Niels de Vos Reviewed-on: http://review.gluster.org/6546 Reviewed-by: Santosh Pradhan Reviewed-by: Vikhyat Umrao Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- rpc/rpc-lib/src/rpcsvc-auth.c | 17 +++++++- rpc/rpc-lib/src/rpcsvc-common.h | 2 + rpc/rpc-lib/src/rpcsvc.h | 6 +-- tests/bugs/bug-1043886.t | 55 +++++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-volume-set.c | 10 +++++ xlators/protocol/server/src/server.c | 16 +++++++ 6 files changed, 102 insertions(+), 4 deletions(-) create mode 100755 tests/bugs/bug-1043886.t diff --git a/rpc/rpc-lib/src/rpcsvc-auth.c b/rpc/rpc-lib/src/rpcsvc-auth.c index 4cb86a758..0ede19f74 100644 --- a/rpc/rpc-lib/src/rpcsvc-auth.c +++ b/rpc/rpc-lib/src/rpcsvc-auth.c @@ -230,6 +230,8 @@ int rpcsvc_set_root_squash (rpcsvc_t *svc, dict_t *options) { int ret = -1; + uid_t anonuid = -1; + gid_t anongid = -1; GF_ASSERT (svc); GF_ASSERT (options); @@ -240,8 +242,21 @@ rpcsvc_set_root_squash (rpcsvc_t *svc, dict_t *options) else svc->root_squash = _gf_false; + ret = dict_get_uint32 (options, "anonuid", &anonuid); + if (!ret) + svc->anonuid = anonuid; + else + svc->anonuid = RPC_NOBODY_UID; + + ret = dict_get_uint32 (options, "anongid", &anongid); + if (!ret) + svc->anongid = anongid; + else + svc->anongid = RPC_NOBODY_GID; + if (svc->root_squash) - gf_log (GF_RPCSVC, GF_LOG_DEBUG, "root squashing enabled "); + gf_log (GF_RPCSVC, GF_LOG_DEBUG, "root squashing enabled " + "(uid=%d, gid=%d)", svc->anonuid, svc->anongid); return 0; } diff --git a/rpc/rpc-lib/src/rpcsvc-common.h b/rpc/rpc-lib/src/rpcsvc-common.h index aed55e039..3c16abeb7 100644 --- a/rpc/rpc-lib/src/rpcsvc-common.h +++ b/rpc/rpc-lib/src/rpcsvc-common.h @@ -55,6 +55,8 @@ typedef struct rpcsvc_state { gf_boolean_t allow_insecure; gf_boolean_t register_portmap; gf_boolean_t root_squash; + uid_t anonuid; + gid_t anongid; glusterfs_ctx_t *ctx; /* list of connections which will listen for incoming connections */ diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h index cbc1f4226..28ec93e11 100644 --- a/rpc/rpc-lib/src/rpcsvc.h +++ b/rpc/rpc-lib/src/rpcsvc.h @@ -282,14 +282,14 @@ struct rpcsvc_request { int gidcount = 0; \ if (req->svc->root_squash) { \ if (req->uid == RPC_ROOT_UID) \ - req->uid = RPC_NOBODY_UID; \ + req->uid = req->svc->anonuid; \ if (req->gid == RPC_ROOT_GID) \ - req->gid = RPC_NOBODY_GID; \ + req->gid = req->svc->anongid; \ for (gidcount = 0; gidcount < req->auxgidcount; \ ++gidcount) { \ if (!req->auxgids[gidcount]) \ req->auxgids[gidcount] = \ - RPC_NOBODY_GID; \ + req->svc->anongid; \ } \ } \ } while (0); diff --git a/tests/bugs/bug-1043886.t b/tests/bugs/bug-1043886.t new file mode 100755 index 000000000..fb7ecb194 --- /dev/null +++ b/tests/bugs/bug-1043886.t @@ -0,0 +1,55 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../nfs.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}; +TEST $CLI volume start $V0 + +sleep 2; +## Mount FUSE with caching disabled +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0; + +EXPECT_WITHIN 20 "1" is_nfs_export_available; + +## Mount volume as NFS export +TEST mount -t nfs -o vers=3,nolock $H0:/$V0 $N0; + +# just a random uid/gid +uid=22162 +gid=5845 + +mkdir $N0/other; +chown $uid:$gid $N0/other; + +TEST $CLI volume set $V0 server.root-squash on; +TEST $CLI volume set $V0 server.anonuid $uid; +TEST $CLI volume set $V0 server.anongid $gid; + +sleep 2; + +EXPECT_WITHIN 20 "1" is_nfs_export_available; + +# create files and directories in the root of the glusterfs and nfs mount +# which is owned by root and hence the right behavior is getting EACCESS +# as the fops are executed as nfsnobody. +touch $M0/file 2>/dev/null; +TEST [ $? -ne 0 ] +mkdir $M0/dir 2>/dev/null; +TEST [ $? -ne 0 ] + +# Here files and directories should be getting created as other directory is owned +# by tmp_user as server.anonuid and server.anongid have the value of tmp_user uid and gid +TEST touch $M0/other/file 2>/dev/null; +TEST [ "$(stat -c %u:%g $N0/other/file)" = "$uid:$gid" ]; +TEST mkdir $M0/other/dir 2>/dev/null; +TEST [ "$(stat -c %u:%g $N0/other/dir)" = "$uid:$gid" ]; + +TEST $CLI volume stop $V0; +TEST $CLI volume delete $V0; + +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 131f96ce6..b1989567a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -831,6 +831,16 @@ struct volopt_map_entry glusterd_volopt_map[] = { .option = "root-squash", .op_version = 2 }, + { .key = "server.anonuid", + .voltype = "protocol/server", + .option = "anonuid", + .op_version = 3 + }, + { .key = "server.anongid", + .voltype = "protocol/server", + .option = "anongid", + .op_version = 3 + }, { .key = "server.statedump-path", .voltype = "protocol/server", .option = "statedump-path", diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index 3720372f9..a797a0d6c 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -1057,6 +1057,22 @@ struct volume_options options[] = { "uids or gids that might be equally sensitive, such " "as user bin or group staff." }, + { .key = {"anonuid"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "65534", /* RPC_NOBODY_UID */ + .min = 0, + .max = (uint32_t) -1, + .description = "value of the uid used for the anonymous " + "user/nfsnobody when root-squash is enabled." + }, + { .key = {"anongid"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "65534", /* RPC_NOBODY_GID */ + .min = 0, + .max = (uint32_t) -1, + .description = "value of the gid used for the anonymous " + "user/nfsnobody when root-squash is enabled." + }, { .key = {"statedump-path"}, .type = GF_OPTION_TYPE_PATH, .default_value = DEFAULT_VAR_RUN_DIRECTORY, -- cgit From 2401384fbc882c02319881166b299f8e88c7ab55 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 23 Dec 2013 18:28:36 +0530 Subject: mount/fuse: Remove duplicate GET_STATE call Change-Id: I5716cb0e9914f78489e3422a03efc689f5ebc413 BUG: 952029 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6579 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mount/fuse/src/fuse-bridge.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index ecfe86d5d..ee12d869c 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -3374,8 +3374,6 @@ fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) } } - GET_STATE (this, finh, state); - fuse_resolve_inode_init (state, &state->resolve, finh->nodeid); rv = fuse_flip_xattr_ns (priv, name, &newkey); -- cgit From 2ce8918759e9676a54791848fd2ac85f48a05016 Mon Sep 17 00:00:00 2001 From: Vijay Bellur Date: Wed, 1 Jan 2014 21:36:27 +0530 Subject: libglusterfs: Add missing goto in eh_new() Change-Id: I64bbd8d5d919e78286a0521b62ca75f22ad296fa BUG: 795419 Signed-off-by: Vijay Bellur Reviewed-on: http://review.gluster.org/6623 Tested-by: Gluster Build System Reviewed-by: Pranith Kumar Karampuri --- libglusterfs/src/event-history.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c index 82baa521a..e89df09c9 100644 --- a/libglusterfs/src/event-history.c +++ b/libglusterfs/src/event-history.c @@ -29,6 +29,7 @@ eh_new (size_t buffer_size, gf_boolean_t use_buffer_once, gf_log ("", GF_LOG_ERROR, "allocating circular buffer failed"); GF_FREE (history); history = NULL; + goto out; } history->buffer = buffer; -- cgit From d1f8b7ebc71df415f6b8ff37e9654ecee0d9064c Mon Sep 17 00:00:00 2001 From: Emmanuel Dreyfus Date: Thu, 19 Dec 2013 14:11:45 +0100 Subject: Use linkat() instead of link() for portability sake POSIX does not says wether link(2) on symlink should link on symlink itself or on target. Linux use symlink, most other systems use target. Using linkat(2) allows the behavior to be specified, so that the behavior is portable. Also fix configure test for NetBSD linkata(2), which ceased to work. BUG: 764655 Change-Id: Iccd27ac076b7a74e40dcbaa1c4762fd3ad59da5f Signed-off-by: Emmanuel Dreyfus Reviewed-on: http://review.gluster.org/6539 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- configure.ac | 2 +- libglusterfs/src/syscall.c | 5 +++++ xlators/features/index/src/index.c | 20 ++++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 7bfee047a..be1ec7ab6 100644 --- a/configure.ac +++ b/configure.ac @@ -598,7 +598,7 @@ dnl FreeBSD, NetBSD AC_CHECK_MEMBERS([struct stat.st_atimespec.tv_nsec]) case $host_os in *netbsd*) - CFLAGS+=" -D_INCOMPLETE_XOPEN_C063" + CFLAGS="${CFLAGS} -D_INCOMPLETE_XOPEN_C063" ;; esac AC_CHECK_FUNC([linkat], [have_linkat=yes]) diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index e8954cc23..a619f9c41 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -120,7 +120,12 @@ sys_rename (const char *oldpath, const char *newpath) int sys_link (const char *oldpath, const char *newpath) { +#ifdef HAVE_LINKAT + /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ + return linkat (AT_FDCWD, oldpath, AT_FDCWD, newpath, 0); +#else return link (oldpath, newpath); +#endif } diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index fde0893ec..6c634dd9a 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -407,7 +407,12 @@ sync_base_indices (void *index_priv) snprintf (base_index_path, PATH_MAX, "%s/%s", base_indices_holder, entry->d_name); +#ifdef HAVE_LINKAT + /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ + ret = linkat (AT_FDCWD, xattrop_index_path, AT_FDCWD, base_index_path, 0); +#else ret = link (xattrop_index_path, base_index_path); +#endif if (ret && errno != EEXIST) goto out; @@ -543,7 +548,12 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir) index_get_index (priv, index); make_index_path (priv->index_basepath, subdir, index, index_path, sizeof (index_path)); +#ifdef HAVE_LINKAT + /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ + ret = linkat (AT_FDCWD, index_path, AT_FDCWD, gfid_path, 0); +#else ret = link (index_path, gfid_path); +#endif if (!ret || (errno == EEXIST)) { ret = 0; index_created = 1; @@ -576,7 +586,12 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir) if (fd >= 0) close (fd); +#ifdef HAVE_LINKAT + /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ + ret = linkat (AT_FDCWD, index_path, AT_FDCWD, gfid_path, 0); +#else ret = link (index_path, gfid_path); +#endif if (ret && (errno != EEXIST)) { gf_log (this->name, GF_LOG_ERROR, "%s: Not able to " "add to index (%s)", uuid_utoa (gfid), @@ -590,7 +605,12 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir) make_index_path (priv->index_basepath, GF_BASE_INDICES_HOLDER_GFID, index, base_path, sizeof (base_path)); +#ifdef HAVE_LINKAT + /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ + ret = linkat (AT_FDCWD, index_path, AT_FDCWD, base_path, 0); +#else ret = link (index_path, base_path); +#endif if (ret) goto out; } -- cgit From f54e9ca3897177ee41b5f5299b94b719448c46cd Mon Sep 17 00:00:00 2001 From: Emmanuel Dreyfus Date: Sat, 28 Dec 2013 08:31:55 +0100 Subject: Use linkat() instead of link() for portability sake POSIX does not says wether link(2) on symlink should link on symlink itself or on target. Linux use symlink, most other systems use target. Using linkat(2) allows the behavior to be specified, so that the behavior is portable. Also fix configure test for NetBSD linkat(2), which ceased to work. BUG: 764655 Change-Id: I2633fde3b0828ca8c199e11c827720c513e15852 Signed-off-by: Emmanuel Dreyfus Reviewed-on: http://review.gluster.org/6613 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- libglusterfs/src/syscall.c | 2 +- xlators/features/index/src/index.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index a619f9c41..117fa209e 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -120,7 +120,7 @@ sys_rename (const char *oldpath, const char *newpath) int sys_link (const char *oldpath, const char *newpath) { -#ifdef HAVE_LINKAT +#ifdef HAVE_LINKAT /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ return linkat (AT_FDCWD, oldpath, AT_FDCWD, newpath, 0); #else diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 6c634dd9a..4ba72c022 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -409,7 +409,8 @@ sync_base_indices (void *index_priv) #ifdef HAVE_LINKAT /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ - ret = linkat (AT_FDCWD, xattrop_index_path, AT_FDCWD, base_index_path, 0); + ret = linkat (AT_FDCWD, xattrop_index_path, + AT_FDCWD, base_index_path, 0); #else ret = link (xattrop_index_path, base_index_path); #endif -- cgit From d25d912835e01c40cb00a0ae93d49f68f9b3542b Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 2 Jan 2014 23:06:39 +0530 Subject: glusterd: update volinfo->subvol_count in newly added peers Update the subvol_count when a peer imports information about the friend volumes. Change-Id: Id3884bd5727ff22be7ed87f43a1ec1b5fe34813c BUG: 1047955 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/6629 Reviewed-by: Krishnan Parthasarathi Tested-by: Gluster Build System --- tests/bugs/bug-1047955.t | 23 +++++++++++++++++++++++ xlators/mgmt/glusterd/src/glusterd-utils.c | 3 ++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100644 tests/bugs/bug-1047955.t diff --git a/tests/bugs/bug-1047955.t b/tests/bugs/bug-1047955.t new file mode 100644 index 000000000..e15f3ceef --- /dev/null +++ b/tests/bugs/bug-1047955.t @@ -0,0 +1,23 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../cluster.rc + +function check_peers { + $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l +} + +cleanup; + +# Create a 2x2 dist-rep volume; peer probe a new node. +# Performing remove-brick from this new node must succeed +# without crashing it's glusterd + +TEST launch_cluster 2; +TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/${V0}{1,2,3,4} +TEST $CLI_1 volume start $V0; +TEST $CLI_1 peer probe $H2; +EXPECT_WITHIN 20 1 check_peers; +TEST $CLI_2 volume remove-brick $V0 $H1:$B1/${V0}{3,4} start; +TEST $CLI_2 volume info +cleanup; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 9ad4963be..a0c969124 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -3135,7 +3135,8 @@ glusterd_import_volinfo (dict_t *vols, int count, if (ret) gf_log (THIS->name, GF_LOG_INFO, "peer is possibly old version"); - + new_volinfo->subvol_count = new_volinfo->brick_count/ + glusterd_get_dist_leaf_count (new_volinfo); memset (key, 0, sizeof (key)); snprintf (key, sizeof (key), "volume%d.ckusm", count); ret = dict_get_uint32 (vols, key, &new_volinfo->cksum); -- cgit From 8ee2420266a0a1c47fcfee0796ef08d93d0797ab Mon Sep 17 00:00:00 2001 From: Raghavendra G Date: Fri, 3 Jan 2014 11:23:02 +0530 Subject: performance/io-cache: reduce the severity of log-message During a genuine error condition like network outage, the log grows with redundant information. Change-Id: I5a4f2f62da10ef656f14200c4c84a6917b1f0ddd Signed-off-by: Raghavendra G BUG: 1048084 Reviewed-on: http://review.gluster.org/6635 Reviewed-by: Pranith Kumar Karampuri Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/performance/io-cache/src/page.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index 64c5f6b66..94b8f229b 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -990,7 +990,7 @@ __ioc_page_error (ioc_page_t *page, int32_t op_ret, int32_t op_errno) waitq = page->waitq; page->waitq = NULL; - gf_log (page->inode->table->xl->name, GF_LOG_WARNING, + gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, "page error for page = %p & waitq = %p", page, waitq); for (trav = waitq; trav; trav = trav->next) { -- cgit From 311e3868bfdb3f2c1535d5a7cb8f759195457612 Mon Sep 17 00:00:00 2001 From: Santosh Kumar Pradhan Date: Fri, 3 Jan 2014 10:32:16 +0530 Subject: gNFS: Possible SEGV crash in NFS while DRC is OFF In rpcsvc_submit_generic(), FILE: rpc/rpc-lib/src/rpcsvc.c, while caching the reply (DRC), the code does not check if DRC is ON and goes ahead assuming DRC is on and try to take a LOCK on drc. FIX: Put a check on svc->drc by rpcsvc_need_drc(). Change-Id: I52c57280487e6061c68fd0b784e1cafceb2f3690 BUG: 1048072 Signed-off-by: Santosh Kumar Pradhan Reviewed-on: http://review.gluster.org/6632 Reviewed-by: Niels de Vos Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- rpc/rpc-lib/src/rpcsvc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c index 037c157f2..d19a3ca0c 100644 --- a/rpc/rpc-lib/src/rpcsvc.c +++ b/rpc/rpc-lib/src/rpcsvc.c @@ -1183,7 +1183,7 @@ rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr, iobref_add (iobref, replyiob); /* cache the request in the duplicate request cache for appropriate ops */ - if (req->reply) { + if ((req->reply) && (rpcsvc_need_drc (req))) { drc = req->svc->drc; LOCK (&drc->lock); -- cgit From 825b976ee30a53e89fe747b4a3ba8f2eb862047c Mon Sep 17 00:00:00 2001 From: Santosh Kumar Pradhan Date: Thu, 2 Jan 2014 20:55:59 +0530 Subject: gNFS: Small memory leak in rpcsvc_drc_init() 1. The routine rpcsvc_drc_init() is only used while initialization of NFS xlator. It should just check for nfs.drc option and init DRC feature accordingly. If it's set to OFF, then rpcsvc_drc_init() allocates memory for svc.drc and set ret value to 0 and goes to out: block where drc is leaked. 2. rpcsvc_drc_init() should just allocate svc.drc and init it. Here svc.drc can never be valid. 3. If svc.drc gets init'd here, no point of checking for drc type here. Change-Id: I4085771cdb8c9c15d1b9c548b77929a37f27c124 BUG: 1047902 Signed-off-by: Santosh Kumar Pradhan Reviewed-on: http://review.gluster.org/6628 Reviewed-by: Niels de Vos Reviewed-by: Rajesh Joseph Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- rpc/rpc-lib/src/rpc-drc.c | 42 +++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/rpc/rpc-lib/src/rpc-drc.c b/rpc/rpc-lib/src/rpc-drc.c index e7ba114dd..7e77e038e 100644 --- a/rpc/rpc-lib/src/rpc-drc.c +++ b/rpc/rpc-lib/src/rpc-drc.c @@ -712,38 +712,30 @@ rpcsvc_drc_init (rpcsvc_t *svc, dict_t *options) GF_ASSERT (svc); GF_ASSERT (options); - if (!svc->drc) { - drc = GF_CALLOC (1, sizeof (rpcsvc_drc_globals_t), - gf_common_mt_drc_globals_t); - if (!drc) - return -1; - - svc->drc = drc; - LOCK_INIT (&drc->lock); - } else { - drc = svc->drc; - } - - LOCK (&drc->lock); - if (drc->type != DRC_TYPE_NONE) { - ret = 0; - goto out; - } - /* Toggle DRC on/off, when more drc types(persistent/cluster) are added, we shouldn't treat this as boolean */ ret = dict_get_str_boolean (options, "nfs.drc", _gf_true); if (ret == -1) { - gf_log (GF_RPCSVC, GF_LOG_INFO, "drc user options need second look"); + gf_log (GF_RPCSVC, GF_LOG_INFO, + "drc user options need second look"); ret = _gf_true; } - if (ret == _gf_false) { - /* drc off */ - gf_log (GF_RPCSVC, GF_LOG_INFO, "DRC is manually turned OFF"); - ret = 0; - goto out; - } + gf_log (GF_RPCSVC, GF_LOG_INFO, "DRC is turned %s", (ret?"ON":"OFF")); + + /*DRC off, nothing to do */ + if (ret == _gf_false) + return (0); + + drc = GF_CALLOC (1, sizeof (rpcsvc_drc_globals_t), + gf_common_mt_drc_globals_t); + if (!drc) + return (-1); + + LOCK_INIT (&drc->lock); + svc->drc = drc; + + LOCK (&drc->lock); /* Specify type of DRC to be used */ ret = dict_get_uint32 (options, "nfs.drc-type", &drc_type); -- cgit From d062e09e0c7925f37cbfc42ef42c7fe6804823fe Mon Sep 17 00:00:00 2001 From: Vijaykumar M Date: Wed, 18 Dec 2013 14:32:25 +0530 Subject: pathinfo: Provide user namespace access. Do not allow to setxattr for pathinfo This change was missed out when submitted patch: http://review.gluster.org/5101/ Change-Id: Ifd32d95089b9bacc5dee80a8b924bb8713dca8a1 Signed-off-by: Vijaykumar M Reviewed-on: http://review.gluster.org/6535 Reviewed-by: Raghavendra G Tested-by: Gluster Build System --- xlators/storage/posix/src/posix-helpers.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 86ce08cb3..d2c991900 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -838,6 +838,11 @@ posix_fhandle_pair (xlator_t *this, int fd, int sys_ret = -1; int ret = 0; + if (XATTR_IS_PATHINFO (key)) { + ret = -EACCES; + goto out; + } + sys_ret = sys_fsetxattr (fd, key, value->data, value->len, flags); -- cgit From 79cbf27b9b98d1feebcc2f1db5fc1c976d2c24cf Mon Sep 17 00:00:00 2001 From: Atin Mukherjee Date: Fri, 3 Jan 2014 13:36:40 +0530 Subject: Glusterd : glusterd process generates core due to NULL store handle Problem : glusterd crashed as backtrace revealed that store handle was set to NULL. Solution : In glusterd_store_global_info() function out block the handle is dereferenced with out any NULL check which caused this segmentation fault. A NULL check is introduced to avoid this. While testing this fix, another issue was noticed where GF_ASSERT macro again does not gurantee the NULL dereference check and hence this macro call has been replaced by GF_VALIDATE_OR_GOTO macro call in places where there is a danger of macro getting crashed due to NULL dereference check. Change-Id: Ic301aa45ce4bbdc2da751d2386439df7bb24c016 BUG: 1040844 Signed-off-by: Atin Mukherjee Reviewed-on: http://review.gluster.org/6619 Reviewed-by: Niels de Vos Reviewed-by: Krishnan Parthasarathi Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- libglusterfs/src/store.c | 16 ++++++++-------- xlators/mgmt/glusterd/src/glusterd-store.c | 12 +++++++----- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c index 48c79ee02..5af23592b 100644 --- a/libglusterfs/src/store.c +++ b/libglusterfs/src/store.c @@ -62,8 +62,8 @@ gf_store_mkstemp (gf_store_handle_t *shandle) int fd = -1; char tmppath[PATH_MAX] = {0,}; - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); + GF_VALIDATE_OR_GOTO ("store", shandle, out); + GF_VALIDATE_OR_GOTO ("store", shandle->path, out); snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); fd = open (tmppath, O_RDWR | O_CREAT | O_TRUNC | O_SYNC, 0600); @@ -71,7 +71,7 @@ gf_store_mkstemp (gf_store_handle_t *shandle) gf_log ("", GF_LOG_ERROR, "Failed to open %s, error: %s", tmppath, strerror (errno)); } - +out: return fd; } @@ -127,8 +127,8 @@ gf_store_rename_tmppath (gf_store_handle_t *shandle) int32_t ret = -1; char tmppath[PATH_MAX] = {0,}; - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); + GF_VALIDATE_OR_GOTO ("store", shandle, out); + GF_VALIDATE_OR_GOTO ("store", shandle->path, out); snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); ret = rename (tmppath, shandle->path); @@ -149,8 +149,8 @@ gf_store_unlink_tmppath (gf_store_handle_t *shandle) int32_t ret = -1; char tmppath[PATH_MAX] = {0,}; - GF_ASSERT (shandle); - GF_ASSERT (shandle->path); + GF_VALIDATE_OR_GOTO ("store", shandle, out); + GF_VALIDATE_OR_GOTO ("store", shandle->path, out); snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path); ret = unlink (tmppath); @@ -160,7 +160,7 @@ gf_store_unlink_tmppath (gf_store_handle_t *shandle) } else { ret = 0; } - +out: return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 3a4b09009..d0ad7dcdb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -1329,12 +1329,14 @@ glusterd_store_global_info (xlator_t *this) ret = gf_store_rename_tmppath (handle); out: - if (ret && (handle->fd > 0)) - gf_store_unlink_tmppath (handle); + if (handle) { + if (ret && (handle->fd > 0)) + gf_store_unlink_tmppath (handle); - if (handle->fd > 0) { - close (handle->fd); - handle->fd = 0; + if (handle->fd > 0) { + close (handle->fd); + handle->fd = 0; + } } if (uuid_str) -- cgit From 0fbb9c7baf13331d72fc42243cd031c2d7acdbe7 Mon Sep 17 00:00:00 2001 From: Lalatendu Mohanty Date: Thu, 2 Jan 2014 20:04:46 +0530 Subject: Crypt: Fixing incorrect placement of GF_FREE i.e. we are trying access the memory after freeing it using GF_FREE Change-Id: I2b22fd421131c618bd0d7c246ebf09f14751ad30 Signed-off-by: Lalatendu Mohanty Reviewed-on: http://review.gluster.org/6626 Reviewed-by: Xavier Hernandez Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/encryption/crypt/src/crypt.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/xlators/encryption/crypt/src/crypt.c b/xlators/encryption/crypt/src/crypt.c index 47ff38e50..becff3e47 100644 --- a/xlators/encryption/crypt/src/crypt.c +++ b/xlators/encryption/crypt/src/crypt.c @@ -3277,15 +3277,15 @@ static int32_t linkop_grab_local(call_frame_t *frame, if (newloc) { local->newloc = GF_CALLOC(1, sizeof(*newloc), gf_crypt_mt_loc); if (!local->newloc) { - GF_FREE(local->loc); loc_wipe(local->loc); + GF_FREE(local->loc); goto error; } memset(local->newloc, 0, sizeof(*local->newloc)); ret = loc_copy(local->newloc, newloc); if (ret) { - GF_FREE(local->loc); loc_wipe(local->loc); + GF_FREE(local->loc); GF_FREE(local->newloc); goto error; } @@ -3297,19 +3297,21 @@ static int32_t linkop_grab_local(call_frame_t *frame, goto error; } return 0; - error: - if (local->xdata) - dict_unref(local->xdata); - if (local->fd) - fd_unref(local->fd); - local->fd = 0; - local->loc = NULL; - local->newloc = NULL; - local->op_ret = -1; - local->op_errno = ret; +error: + if (local) { + if (local->xdata) + dict_unref(local->xdata); + if (local->fd) + fd_unref(local->fd); + local->fd = 0; + local->loc = NULL; + local->newloc = NULL; + local->op_ret = -1; + local->op_errno = ret; + } - return ret; + return ret; } /* -- cgit From b159d0a4c3ed128dc9f018b76c5d4b54a13060b3 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Wed, 11 Dec 2013 10:17:46 +0100 Subject: Add the MAINTAINERS file Different projects (Linux kernel, QEMU, ...) have a MAINTAINERS file in the root of the git repository. This file mostly lists the maintainers or main committers for certain files and directories. On occasion it would be very helpful to know who to add as a reviewer for proposed patches. Change-Id: Ide68779c7477529808ccb0e7d421db160e3222ea BUG: 1040351 URL: http://lists.nongnu.org/archive/html/gluster-devel/2013-11/msg00045.html Signed-off-by: Niels de Vos Reviewed-on: http://review.gluster.org/6480 Reviewed-by: Amar Tumballi Tested-by: Gluster Build System Reviewed-by: Lalatendu Mohanty Reviewed-by: Kaleb KEITHLEY --- MAINTAINERS | 199 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 MAINTAINERS diff --git a/MAINTAINERS b/MAINTAINERS new file mode 100644 index 000000000..0e3992523 --- /dev/null +++ b/MAINTAINERS @@ -0,0 +1,199 @@ +GlusterFS Maintainers +===================== + +The intention of this file is not to establish who owns what portions of the +code base, but to provide a set of names that developers can consult when they +have a question about a particular subset and also to provide a set of names +to be CC'd when submitting a patch to obtain appropriate review. + +In general, if you have a question about inclusion of a patch, you should +consult gluster-devel@nongnu.org and not any specific individual privately. + +Descriptions of section entries: + + M: Mail patches to: FullName + L: Mailing list that is relevant to this area + W: Web-page with status/info + Q: Patchwork web based patch tracking system site + T: SCM tree type and location. Type is one of: git, hg, quilt, stgit. + S: Status, one of the following: + Supported: Someone is actually paid to look after this. + Maintained: Someone actually looks after it. + Odd Fixes: It has a maintainer but they don't have time to do + much other than throw the odd patch in. See below. + Orphan: No current maintainer [but maybe you could take the + role as you write your new code]. + Obsolete: Old code. Something tagged obsolete generally means + it has been replaced by a better system and you + should be using that. + F: Files and directories with wildcard patterns. + A trailing slash includes all files and subdirectory files. + F: drivers/net/ all files in and below drivers/net + F: drivers/net/* all files in drivers/net, but not below + F: */net/* all files in "any top level directory"/net + One pattern per line. Multiple F: lines acceptable. + X: Files and directories that are NOT maintained, same rules as F: + Files exclusions are tested before file matches. + Can be useful for excluding a specific subdirectory, for instance: + F: net/ + X: net/ipv6/ + matches all files in and below net excluding net/ipv6/ + K: Keyword perl extended regex pattern to match content in a + patch or file. For instance: + K: of_get_profile + matches patches or files that contain "of_get_profile" + K: \b(printk|pr_(info|err))\b + matches patches or files that contain one or more of the words + printk, pr_info or pr_err + One regex pattern per line. Multiple K: lines acceptable. + + +General Project Architects +-------------------------- +M: Amar Tumballi +M: Anand Avati +M: Jeff Darcy +M: Kaleb S. Keithley +M: Vijay Bellur + +xlators: +-------- +Automatic File Replication (AFR) +M: Pranith Karampuri +S: Maintained +F: xlators/cluster/afr/ + +Distributed Hashing Table (DHT) +M: Shishir Gowda +S: Maintained +F: xlators/cluster/dht/ + +Performance +M: Raghavendra Gowdappa +S: Maintained +F: xlators/performance/ + +Index +M: Pranith Karampuri +S: Maintained +F: xlators/features/index/ + +Quota +M: Krishnan Parthasarathi +M: Raghavendra Gowdappa +S: Maintained +F: xlators/features/quota/ + +Marker +M: Raghavendra Gowdappa +S: Maintained +F: xlators/features/marker/ + +NFS Server +M: Santosh Pradhan +S: Maintained +F: xlators/nfs/ + +Changelog +M: Venky Shankar +S: Maintained +F: xlators/features/changelog/ + +Block Device +M: M. Mohan Kumar +F: xlators/storage/bd/ + +FUSE Bridge +M: Anand Avati +M: Brian Foster +M: Csaba Henk +S: Maintained +F: xlators/mount/ + + +Other bits of code: +------------------- +Geo Replication +M: Venky Shankar +S: Maintained +F: geo-replication/ + +libgfapi +M: Anand Avati +S: Maintained +F: api/ + +Management Daemon +M: Krishnan Parthasarathi +S: Maintained +F: cli/ +F: xlators/mgmt/ + +Remote Procedure Call subsystem +M: Amar Tumballi +M: Anand Avati +S: Maintained +F: rpc/ + + +Distribution Specific: +---------------------- +Debian Packaging +M: Patrick Matthäi +M: Louis Zuckerman +W: http://packages.qa.debian.org/g/glusterfs.html + +Fedora Packaging +M: glusterfs-owner@fedoraproject.org +M: Kaleb Keithley +M: Niels de Vos +W: https://apps.fedoraproject.org/packages/glusterfs +T: http://pkgs.fedoraproject.org/git/glusterfs.git + +NetBSD port +M: Emmanuel Dreyfus +W: http://pkgsrc.se/filesystems/glusterfs + +Ubuntu Packaging +M: Louis Zuckerman +W: http://download.gluster.org/pub/gluster/glusterfs/LATEST/Ubuntu/Ubuntu.README + + +Related projects +---------------- +Gluster Openstack Swift +M: Luis Pabon +S: Maintained +T: https://github.com/gluster/gluster-swift.git + +GlusterFS Hadoop HCFS plugin +M: Jay Vyas +T: https://github.com/gluster/glusterfs-hadoop.git + +NFS-Ganesha FSAL plugin +M: Anand Subramanian +S: Maintained +T: git://github.com/nfs-ganesha/nfs-ganesha.git +F: src/nfs-ganesha~/src/FSAL/FSAL_GLUSTER/ + +QEMU integration +M: Bharata B Rao +S: Maintained +T: git://git.qemu.org/qemu.git +F: block/gluster.c + +Samba VFS plugin +M: Raghavendra Talur +M: Chris Hertel +M: Jose Rivera +M: Ira Cooper +S: Maintained +T: git://git.samba.org/samba.git +F: source3/modules/vfs_glusterfs.c + +Wireshark dissectors +M: Niels de Vos +S: Maintained +T: http://code.wireshark.org/git/wireshark +F: epan/dissectors/packet-gluster* + -- cgit From 1e4f2d08f102e9fc1e27ffb5608d4b45c11e0b55 Mon Sep 17 00:00:00 2001 From: Niels de Vos Date: Sat, 4 Jan 2014 17:29:26 +0100 Subject: build: the main glusterfs package should not provide glusterfs-libs Because of this incorrect provides, there is no requirement to update glusterfs-libs. Most users will get a newer glusterfs-libs when updating anyway, but users that manually select RPMs for updating my skip the package, which will break their system. Change-Id: I636dfac3f0a521924f1c2e82948bb513b0a5c9f9 BUG: 950083 Fedora-BUG: 1048489 Signed-off-by: Niels de Vos Reviewed-on: http://review.gluster.org/6642 Tested-by: Gluster Build System Reviewed-by: Kaleb KEITHLEY --- glusterfs.spec.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 12f246895..516ca513f 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -156,11 +156,9 @@ BuildRequires: glib2-devel %endif Obsoletes: hekafs -Obsoletes: %{name}-libs <= 2.0.0 Obsoletes: %{name}-common < %{version}-%{release} Obsoletes: %{name}-core < %{version}-%{release} Obsoletes: %{name}-ufo -Provides: %{name}-libs = %{version}-%{release} Provides: %{name}-common = %{version}-%{release} Provides: %{name}-core = %{version}-%{release} @@ -874,6 +872,9 @@ if [ $1 -ge 1 ]; then fi %changelog +* Sat Jan 4 2014 Niels de Vos +- The main glusterfs package should not provide glusterfs-libs (#1048489) + * Tue Dec 10 2013 Kaleb S. KEITHLEY - Sync with Fedora glusterfs.spec 3.5.0-0.1.qa3 -- cgit From a1e26be05c7b344dc8e7f980d0a6ab00695f2f42 Mon Sep 17 00:00:00 2001 From: Santosh Kumar Pradhan Date: Tue, 7 Jan 2014 14:22:03 +0530 Subject: gNFS: mnt3_find_export() by pass AUTH check NFS subdir mount does not respect nfs.rpc-auth-reject option in the volume. If the volume is being mounted, then it would validate the AUTH by mnt3_check_client_net() but if the client is mounting a subdir, the control takes a different code path i.e. mnt3_find_export() which does not bother about the AUTH. FIX: Enforce the AUTH check in mnt3_parse_dir_exports() which is invoked by mnt3_find_export() for subdir mount. Change-Id: I6fdd3e6bd6cbd32b0d9ca620cc4c30fdaff9ca30 BUG: 1049225 Signed-off-by: Santosh Kumar Pradhan Reviewed-on: http://review.gluster.org/6655 Reviewed-by: Niels de Vos Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- tests/bugs/bug-921072.t | 9 ++++++-- xlators/nfs/server/src/mount3.c | 50 +++++++++++++++++++++++++++++------------ 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/tests/bugs/bug-921072.t b/tests/bugs/bug-921072.t index e101d5b46..37f8fde52 100755 --- a/tests/bugs/bug-921072.t +++ b/tests/bugs/bug-921072.t @@ -89,12 +89,13 @@ TEST $CLI volume set $V0 nfs.rpc-auth-allow 127.0.0.1 EXPECT_WITHIN 20 1 is_nfs_export_available TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0 +TEST mkdir -p $N0/subdir TEST umount $N0 # case 10: allow a non-localhost ip TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.1.1 EXPECT_WITHIN 20 1 is_nfs_export_available -#40 +#41 TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0 # case 11: reject only localhost ip @@ -104,6 +105,7 @@ TEST $CLI volume set $V0 nfs.rpc-auth-reject 127.0.0.1 EXPECT_WITHIN 20 1 is_nfs_export_available TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0 +TEST ! mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0/subdir $N0 # case 12: reject only non-localhost ip TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.1.1 @@ -112,7 +114,10 @@ EXPECT_WITHIN 20 1 is_nfs_export_available TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0 $N0 TEST umount $N0 +TEST mount -t nfs -o vers=3,nolock,soft,intr localhost:/$V0/subdir $N0 +TEST umount $N0 + TEST $CLI volume stop --mode=script $V0 -#49 +#52 TEST $CLI volume delete --mode=script $V0 cleanup diff --git a/xlators/nfs/server/src/mount3.c b/xlators/nfs/server/src/mount3.c index b0824bf10..e86235522 100644 --- a/xlators/nfs/server/src/mount3.c +++ b/xlators/nfs/server/src/mount3.c @@ -1318,7 +1318,8 @@ mnt3_parse_dir_exports (rpcsvc_request_t *req, struct mount3_state *ms, char volname[1024]; struct mnt3_export *exp = NULL; char *volname_ptr = NULL; - int ret = -1; + int ret = -ENOENT; + struct nfs_state *nfs = NULL; if ((!ms) || (!subdir)) return -1; @@ -1332,10 +1333,26 @@ mnt3_parse_dir_exports (rpcsvc_request_t *req, struct mount3_state *ms, if (!exp) goto err; + nfs = (struct nfs_state *)ms->nfsx->private; + if (!nfs) + goto err; + + if (!nfs_subvolume_started (nfs, exp->vol)) { + gf_log (GF_MNT, GF_LOG_DEBUG, + "Volume %s not started", exp->vol->name); + goto err; + } + + if (mnt3_check_client_net (ms, req, exp->vol) == RPCSVC_AUTH_REJECT) { + gf_log (GF_MNT, GF_LOG_DEBUG, "Client mount not allowed"); + ret = -EACCES; + goto err; + } + ret = mnt3_resolve_subdir (req, ms, exp, subdir); if (ret < 0) { - gf_log (GF_MNT, GF_LOG_ERROR, "Failed to resolve export dir: %s" - , subdir); + gf_log (GF_MNT, GF_LOG_ERROR, + "Failed to resolve export dir: %s", subdir); goto err; } @@ -1375,10 +1392,6 @@ mnt3_find_export (rpcsvc_request_t *req, char *path, struct mnt3_export **e) } ret = mnt3_parse_dir_exports (req, ms, path); - if (ret == 0) { - ret = -2; - goto err; - } err: return ret; @@ -1416,17 +1429,26 @@ mnt3svc_mnt (rpcsvc_request_t *req) goto rpcerr; } - ret = 0; nfs = (struct nfs_state *)ms->nfsx->private; gf_log (GF_MNT, GF_LOG_DEBUG, "dirpath: %s", path); ret = mnt3_find_export (req, path, &exp); - if (ret == -2) { - ret = 0; - goto rpcerr; - } else if (ret < 0) { - ret = -1; - mntstat = MNT3ERR_NOENT; + if (ret < 0) { + mntstat = mnt3svc_errno_to_mnterr (-ret); goto mnterr; + } else if (!exp) { + /* + * SPECIAL CASE: exp is NULL if "path" is subdir in + * call to mnt3_find_export(). + * + * This is subdir mount, we are already DONE! + * nfs_subvolume_started() and mnt3_check_client_net() + * validation are done in mnt3_parse_dir_exports() + * which is invoked through mnt3_find_export(). + * + * TODO: All mount should happen thorugh mnt3svc_mount() + * It needs more clean up. + */ + return (0); } if (!nfs_subvolume_started (nfs, exp->vol)) { -- cgit From c0767852b3dfad94546a5458bcf4460797cdbf0b Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 7 Jan 2014 15:23:19 +0530 Subject: cluster/afr: Don't accept heal commands until graph is up Change-Id: Icca6c23b6a5965f462db8b65af3eb2e141c7cd39 BUG: 1049355 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6658 Tested-by: Gluster Build System Reviewed-by: Ravishankar N Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-common.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 250b0944e..224d30546 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -3985,6 +3985,10 @@ afr_notify (xlator_t *this, int32_t event, case GF_EVENT_TRANSLATOR_OP: input = data; output = data2; + if (!had_heard_from_all) { + ret = -1; + goto out; + } ret = afr_xl_op (this, input, output); goto out; break; -- cgit From 88816bf4b2933da8fa2717cb0e25c521895da4e1 Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Tue, 7 Jan 2014 15:54:14 +0530 Subject: cluster/afr: Stop index crawl on pending full crawl Full crawl is executed when index self-heal is useless, like disk replacement. So if there are on-going index crawls, they should be stopped inorder to start full self-heals. Change-Id: I9a1545f1ec4ad9999dc08523ce859e4fa152e214 BUG: 1049355 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/6659 Tested-by: Gluster Build System Reviewed-by: Ravishankar N Reviewed-by: Vijay Bellur --- xlators/cluster/afr/src/afr-self-heald.c | 40 ++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index dfa91d785..5f85c3047 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -20,7 +20,8 @@ #include "event-history.h" typedef enum { - STOP_CRAWL_ON_SINGLE_SUBVOL = 1 + STOP_CRAWL_ON_SINGLE_SUBVOL = 1, + STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL = 2 } afr_crawl_flags_t; typedef enum { @@ -668,11 +669,26 @@ afr_crawl_done (int ret, call_frame_t *sync_frame, void *data) return 0; } +int +_get_heal_op_flags (shd_crawl_op op, afr_crawl_type_t crawl) +{ + int crawl_flags = 0; + + if (HEAL == op) { + crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL; + + if (crawl == INDEX) + crawl_flags |= STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL; + } + + return crawl_flags; +} + void _do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl) { afr_start_crawl (this, child, crawl, _self_heal_entry, - NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL, + NULL, _gf_true, _get_heal_op_flags (HEAL, crawl), afr_crawl_done); } @@ -691,6 +707,7 @@ _crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason) gf_log (this->name, GF_LOG_DEBUG, "%s", msg); goto out; } + if (!priv->child_up[child]) { gf_log (this->name, GF_LOG_DEBUG, "Stopping crawl for %s , " "subvol went down", priv->children[child]->name); @@ -707,6 +724,17 @@ _crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason) goto out; } } + + if (crawl_flags & STOP_INDEX_CRAWL_ON_PENDING_FULL_CRAWL) { + if (shd->pending[child] == FULL) { + gf_log (this->name, GF_LOG_INFO, "Stopping index " + "self-heal as Full self-heal is pending on %s", + priv->children[child]->name); + msg = "Full crawl is pending"; + goto out; + } + } + proceed = _gf_true; out: if (reason) @@ -730,8 +758,7 @@ _do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl, int crawl_flags = 0; priv = this->private; - if (op == HEAL) - crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL; + crawl_flags = _get_heal_op_flags (op, crawl); if (output) { ret = dict_get_int32 (output, this->name, &xl_id); @@ -1684,7 +1711,10 @@ afr_dir_exclusive_crawl (void *data) if (!crawl) { gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress " - "for %s", priv->children[child]->name); + "for %s while attempting %s heal on %s", + priv->children[child]->name, + get_crawl_type_in_string (crawl_data->crawl), + priv->children[child]->name); goto out; } -- cgit From 2b05c1588ac60af26e1b16f9f27ef8d5e4e50a5f Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Tue, 24 Dec 2013 08:23:13 -0800 Subject: rpc/auth: Avoid NULL dereference in rpcsvc_auth_request_init() Code section is bogus! ------------------------------------------ 370: if (!auth->authops->request_init) 371: ret = auth->authops->request_init (req, auth->authprivate); ------------------------------------------ Seems to have been never been used historically since logically above code has never been true to actually execute "authops->request_init() --> auth_glusterfs_{v2,}_request_init()" On top of that under "rpcsvc_request_init()" verf.flavour and verf.datalen are initialized from what is provided through 'callmsg'. ------------------------------------------ req->verf.flavour = rpc_call_verf_flavour (callmsg); req->verf.datalen = rpc_call_verf_len (callmsg); /* AUTH */ rpcsvc_auth_request_init (req); return req; ------------------------------------------ So the code in 'auth_glusterfs_{v2,}_request_init()' performing this operation will over-write the original flavour and datalen. ------------------------------------------ if (!req) return -1; memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES); req->verf.datalen = 0; req->verf.flavour = AUTH_NULL; ------------------------------------------ Refactoring the whole code into a more understandable version and also avoiding a potential NULL dereference Change-Id: I1a430fcb4d26de8de219bd0cb3c46c141649d47d BUG: 1049735 Signed-off-by: Harshavardhana Reviewed-on: http://review.gluster.org/6591 Reviewed-by: Santosh Pradhan Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- rpc/rpc-lib/src/auth-glusterfs.c | 12 ------------ rpc/rpc-lib/src/auth-null.c | 9 --------- rpc/rpc-lib/src/auth-unix.c | 6 ------ rpc/rpc-lib/src/rpcsvc-auth.c | 33 ++++++++++++++++++++++----------- rpc/rpc-lib/src/rpcsvc.c | 8 +------- rpc/rpc-lib/src/rpcsvc.h | 2 +- 6 files changed, 24 insertions(+), 46 deletions(-) diff --git a/rpc/rpc-lib/src/auth-glusterfs.c b/rpc/rpc-lib/src/auth-glusterfs.c index db488434c..48871ffb3 100644 --- a/rpc/rpc-lib/src/auth-glusterfs.c +++ b/rpc/rpc-lib/src/auth-glusterfs.c @@ -50,12 +50,6 @@ ret: int auth_glusterfs_request_init (rpcsvc_request_t *req, void *priv) { - if (!req) - return -1; - memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES); - req->verf.datalen = 0; - req->verf.flavour = AUTH_NULL; - return 0; } @@ -172,12 +166,6 @@ ret: int auth_glusterfs_v2_request_init (rpcsvc_request_t *req, void *priv) { - if (!req) - return -1; - memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES); - req->verf.datalen = 0; - req->verf.flavour = AUTH_NULL; - return 0; } diff --git a/rpc/rpc-lib/src/auth-null.c b/rpc/rpc-lib/src/auth-null.c index ebdcc8ff8..b030341ab 100644 --- a/rpc/rpc-lib/src/auth-null.c +++ b/rpc/rpc-lib/src/auth-null.c @@ -22,15 +22,6 @@ int auth_null_request_init (rpcsvc_request_t *req, void *priv) { - if (!req) - return -1; - - memset (req->cred.authdata, 0, GF_MAX_AUTH_BYTES); - req->cred.datalen = 0; - - memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES); - req->verf.datalen = 0; - return 0; } diff --git a/rpc/rpc-lib/src/auth-unix.c b/rpc/rpc-lib/src/auth-unix.c index fa5f0576e..27351f669 100644 --- a/rpc/rpc-lib/src/auth-unix.c +++ b/rpc/rpc-lib/src/auth-unix.c @@ -24,12 +24,6 @@ int auth_unix_request_init (rpcsvc_request_t *req, void *priv) { - if (!req) - return -1; - memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES); - req->verf.datalen = 0; - req->verf.flavour = AUTH_NULL; - return 0; } diff --git a/rpc/rpc-lib/src/rpcsvc-auth.c b/rpc/rpc-lib/src/rpcsvc-auth.c index 0ede19f74..384e4a75d 100644 --- a/rpc/rpc-lib/src/rpcsvc-auth.c +++ b/rpc/rpc-lib/src/rpcsvc-auth.c @@ -369,25 +369,36 @@ ret: int -rpcsvc_auth_request_init (rpcsvc_request_t *req) +rpcsvc_auth_request_init (rpcsvc_request_t *req, struct rpc_msg *callmsg) { - int ret = -1; + int32_t ret = 0; rpcsvc_auth_t *auth = NULL; - if (!req) - return -1; + if (!req || !callmsg) { + ret = -1; + goto err; + } + + req->cred.flavour = rpc_call_cred_flavour (callmsg); + req->cred.datalen = rpc_call_cred_len (callmsg); + req->verf.flavour = rpc_call_verf_flavour (callmsg); + req->verf.datalen = rpc_call_verf_len (callmsg); auth = rpcsvc_auth_get_handler (req); - if (!auth) + if (!auth) { + ret = -1; goto err; - ret = 0; + } + gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname); - if (!auth->authops->request_init) - ret = auth->authops->request_init (req, auth->authprivate); - req->auxgids = req->auxgidsmall; /* reset to auxgidlarge during - unsersialize if necessary */ - req->auxgidlarge = NULL; + if (auth->authops->request_init) + ret = auth->authops->request_init (req, auth->authprivate); + + /* reset to auxgidlarge during + unsersialize if necessary */ + req->auxgids = req->auxgidsmall; + req->auxgidlarge = NULL; err: return ret; } diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c index d19a3ca0c..69db8b70b 100644 --- a/rpc/rpc-lib/src/rpcsvc.c +++ b/rpc/rpc-lib/src/rpcsvc.c @@ -367,13 +367,7 @@ rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans, * been copied into the required sections of the req structure, * we just need to fill in the meta-data about it now. */ - req->cred.flavour = rpc_call_cred_flavour (callmsg); - req->cred.datalen = rpc_call_cred_len (callmsg); - req->verf.flavour = rpc_call_verf_flavour (callmsg); - req->verf.datalen = rpc_call_verf_len (callmsg); - - /* AUTH */ - rpcsvc_auth_request_init (req); + rpcsvc_auth_request_init (req, callmsg); return req; } diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h index 28ec93e11..30a969b11 100644 --- a/rpc/rpc-lib/src/rpcsvc.h +++ b/rpc/rpc-lib/src/rpcsvc.h @@ -553,7 +553,7 @@ struct rpcsvc_auth_list { }; extern int -rpcsvc_auth_request_init (rpcsvc_request_t *req); +rpcsvc_auth_request_init (rpcsvc_request_t *req, struct rpc_msg *callmsg); extern int rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options); -- cgit From 21ffbf9b92b2478c17f816a8d0418a7cb080998a Mon Sep 17 00:00:00 2001 From: Lalatendu Mohanty Date: Wed, 8 Jan 2014 14:25:08 +0530 Subject: geo-rep: Fixing an incorrect memcpy operation. Currently we are copying a higher size variable to lower size variable i.e. copying a NAME_MAX to PATH_MAX sized variable in "memcpy (sts_val->worker_status, monitor_status, strlen(monitor_status));" Change-Id: I81dca8e81a4aea5545d5982aed20e05a5e08641c Signed-off-by: Lalatendu Mohanty Reviewed-on: http://review.gluster.org/6667 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-geo-rep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 5d724cc2a..c5c76e11a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -2743,7 +2743,7 @@ glusterd_read_status_file (glusterd_volinfo_t *volinfo, char *slave, char *master = NULL; char tmp[1024] = ""; char sts_val_name[1024] = ""; - char monitor_status[PATH_MAX] = ""; + char monitor_status[NAME_MAX] = ""; char *statefile = NULL; char *socketfile = NULL; dict_t *confd = NULL; -- cgit From f9162911e85f95fa2a10d4b70d3f09876bd15adb Mon Sep 17 00:00:00 2001 From: Lalatendu Mohanty Date: Mon, 6 Jan 2014 13:37:24 +0530 Subject: libgfapi : Fixing some NULL pointer dereferencing issues. 1. Fixing dereferencing null pointer "gio" 2. Checking "frame" before calling STACK_DESTROY (frame->root) Change-Id: If2ce8804a8e9f3f99f71a8c0d13900afb5635072 Signed-off-by: Lalatendu Mohanty Reviewed-on: http://review.gluster.org/6313 Reviewed-by: Santosh Pradhan Tested-by: Gluster Build System Reviewed-by: Shyamsundar Ranganathan Reviewed-by: Raghavendra Talur Reviewed-by: Vijay Bellur --- api/src/glfs-fops.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index f3ac335fb..b09dd90f7 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -710,10 +710,14 @@ glfs_preadv_async (struct glfs_fd *glfd, const struct iovec *iovec, int count, offset, flags, NULL); out: - if (ret) { - GF_FREE (gio->iov); - GF_FREE (gio); - STACK_DESTROY (frame->root); + if (ret) { + if (gio) { + GF_FREE (gio->iov); + GF_FREE (gio); + } + if (frame) { + STACK_DESTROY (frame->root); + } glfs_subvol_done (fs, subvol); } -- cgit From a2f772de44cc09d595005f4d3316fbd0f37c46b8 Mon Sep 17 00:00:00 2001 From: "M. Mohan Kumar" Date: Fri, 10 Jan 2014 12:26:29 +0530 Subject: MAINTAINERS: Update BD BUG: 1040351 Change-Id: I2cbbae22e84fe7f8593db22a32ceb87ca97f46da Signed-off-by: M. Mohan Kumar Reviewed-on: http://review.gluster.org/6679 Reviewed-by: Vijay Bellur Tested-by: Vijay Bellur --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0e3992523..39c042b9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -101,6 +101,7 @@ F: xlators/features/changelog/ Block Device M: M. Mohan Kumar +S: Maintained F: xlators/storage/bd/ FUSE Bridge -- cgit From 2edf1ec797e6f56515d0208be152d18ca6e71456 Mon Sep 17 00:00:00 2001 From: Kaushal M Date: Mon, 30 Dec 2013 09:59:18 +0530 Subject: glusterd: Relocate rebalance sockfile The defrag sockfile was moved from priv->workdir to DEFAULT_VAR_RUN_DIRECTORY. The format for the new path of the defrag sockfile is 'DEFAULT_VAR_RUN_DIRECTORY/gluster-rebalance-.sock'. This was needed because the earlier location didn't have a fixed length and could exceed UNIX_PATH_MAX characters. This could lead to the rebalance process failing to start as the socket file could not be created. Also, for keeping backward compatiblity, glusterd_rebalance_rpc_create will try both the new and old sockfile locations when attempting reconnection. Change-Id: I6740ea665de84ebce1ef7199c412f426de54e3d0 BUG: 1049726 Signed-off-by: Kaushal M Reviewed-on: http://review.gluster.org/6616 Reviewed-by: Krishnan Parthasarathi Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-rebalance.c | 41 ++++++++++++++++++++++---- xlators/mgmt/glusterd/src/glusterd-utils.c | 2 +- xlators/mgmt/glusterd/src/glusterd.h | 11 +++++-- 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index daa8ddd1d..b28056135 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -237,7 +237,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, goto out; } - GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); + GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo); GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv); snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log", DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname); @@ -288,7 +288,7 @@ glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, sleep (5); - ret = glusterd_rebalance_rpc_create (volinfo); + ret = glusterd_rebalance_rpc_create (volinfo, _gf_false); //FIXME: this cbk is passed as NULL in all occurrences. May be //we never needed it. @@ -302,13 +302,21 @@ out: int -glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo) +glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, + gf_boolean_t reconnect) { dict_t *options = NULL; char sockfile[PATH_MAX] = {0,}; int ret = -1; glusterd_defrag_info_t *defrag = volinfo->rebal.defrag; - glusterd_conf_t *priv = THIS->private; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + struct stat buf = {0,}; + + this = THIS; + GF_ASSERT (this); + priv = this->private; + GF_ASSERT (priv); //rebalance process is not started if (!defrag) @@ -319,7 +327,30 @@ glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo) ret = 0; goto out; } - GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv); + GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo); + /* If reconnecting check if defrag sockfile exists in the new location + * in /var/run/ , if it does not try the old location + */ + if (reconnect) { + ret = sys_stat (sockfile, &buf); + /* TODO: Remove this once we don't need backward compatability + * with the older path + */ + if (ret && (errno == ENOENT)) { + gf_log (this->name, GF_LOG_WARNING, "Rebalance sockfile " + "%s does not exist. Trying old path.", + sockfile); + GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD (sockfile, volinfo, + priv); + ret =sys_stat (sockfile, &buf); + if (ret && (ENOENT == errno)) { + gf_log (this->name, GF_LOG_ERROR, "Rebalance " + "sockfile %s does not exist.", + sockfile); + goto out; + } + } + } /* Setting frame-timeout to 10mins (600seconds). * Unix domain sockets ensures that the connection is reliable. The diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index a0c969124..4a88b5b35 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -6949,7 +6949,7 @@ glusterd_volume_defrag_restart (glusterd_volinfo_t *volinfo, char *op_errstr, case GF_DEFRAG_STATUS_STARTED: GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv); if (gf_is_service_running (pidfile, &pid)) { - glusterd_rebalance_rpc_create (volinfo); + glusterd_rebalance_rpc_create (volinfo, _gf_true); break; } case GF_DEFRAG_STATUS_NOT_STARTED: diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index ab383ac1c..9b6e2fb33 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -439,13 +439,19 @@ typedef ssize_t (*gd_serialize_t) (struct iovec outmsg, void *args); snprintf (path, PATH_MAX, "%s/rebalance",vol_path); \ } while (0) -#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo, priv) do { \ +#define GLUSTERD_GET_DEFRAG_SOCK_FILE_OLD(path, volinfo, priv) do { \ char defrag_path[PATH_MAX]; \ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \ snprintf (path, PATH_MAX, "%s/%s.sock", defrag_path, \ uuid_utoa(MY_UUID)); \ } while (0) +#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) do { \ + snprintf (path, UNIX_PATH_MAX, DEFAULT_VAR_RUN_DIRECTORY \ + "/gluster-rebalance-%s.sock", \ + uuid_utoa(volinfo->volume_id)); \ + } while (0) + #define GLUSTERD_GET_DEFRAG_PID_FILE(path, volinfo, priv) do { \ char defrag_path[PATH_MAX]; \ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv); \ @@ -727,7 +733,8 @@ int glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr, size_t len, int cmd, defrag_cbk_fn_t cbk, glusterd_op_t op); int -glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo); +glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo, + gf_boolean_t reconnect); int glusterd_handle_cli_heal_volume (rpcsvc_request_t *req); -- cgit From 6992e4e37df9b12aa3582bb71341e9591d9f6e20 Mon Sep 17 00:00:00 2001 From: Lalatendu Mohanty Date: Sun, 5 Jan 2014 01:01:39 +0530 Subject: glusterd: Comments for missing break in switchcase of glusterd_op_stage_quota Change-Id: I3ac5b73b9703a390f1de9cd6a999dfd8ec27d379 Signed-off-by: Lalatendu Mohanty Reviewed-on: http://review.gluster.org/6645 Reviewed-by: Vijay Bellur Tested-by: Vijay Bellur --- xlators/mgmt/glusterd/src/glusterd-quota.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c index f46f08787..3c8dcf8dd 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quota.c +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -1403,6 +1403,9 @@ glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) "greater than INT64_MAX", hard_limit_str); goto out; } + /*The break statement is missing here to allow intentional fall + * through of code execution to the next switch case + */ case GF_QUOTA_OPTION_TYPE_REMOVE: ret = glusterd_get_gfid_from_brick (dict, volinfo, rsp_dict, -- cgit From f7fcbc0ffe0e56f81f73f99e0d4825cd95ca0014 Mon Sep 17 00:00:00 2001 From: Varun Shastry Date: Mon, 9 Dec 2013 17:51:40 +0530 Subject: mgmt/glusterd: Improve the description in volume set help output Change-Id: I785648970f53033a69922c23110b5eea9e47feb3 BUG: 1046030 Signed-off-by: Varun Shastry Reviewed-on: http://review.gluster.org/6573 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/cluster/dht/src/dht-shared.c | 3 ++- xlators/mgmt/glusterd/src/glusterd.c | 9 +++++++-- xlators/protocol/client/src/client.c | 10 ++++++++-- xlators/protocol/server/src/server.c | 10 ++++++---- 4 files changed, 23 insertions(+), 9 deletions(-) diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 674297c71..324b30626 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -690,7 +690,8 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_INT, .min = 1, .validate = GF_OPT_VALIDATE_MIN, - .description = "Specifies the directory layout spread." + .description = "Specifies the directory layout spread. Takes number " + "of subvolumes as default value." }, { .key = {"decommissioned-bricks"}, .type = GF_OPTION_TYPE_ANY, diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 58833869a..d59aaa44a 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -1516,8 +1516,13 @@ struct volume_options options[] = { { .key = {"server-quorum-type"}, .type = GF_OPTION_TYPE_STR, .value = { "none", "server"}, - .description = "If set to server, enables the specified " - "volume to participate in quorum." + .description = "This feature is on the server-side i.e. in glusterd." + " Whenever the glusterd on a machine observes that " + "the quorum is not met, it brings down the bricks to " + "prevent data split-brains. When the network " + "connections are brought back up and the quorum is " + "restored the bricks in the volume are brought back " + "up." }, { .key = {"server-quorum-ratio"}, .type = GF_OPTION_TYPE_PERCENT, diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index ac3fcb117..d8c5f4262 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -2841,13 +2841,19 @@ struct volume_options options[] = { { .key = {"lk-heal"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", - .description = "Enables or disables the lock heal." + .description = "When the connection to client is lost, server " + "cleans up all the locks held by the client. After " + "the connection is restored, the client reacquires " + "(heals) the fcntl locks released by the server." }, { .key = {"grace-timeout"}, .type = GF_OPTION_TYPE_INT, .min = 10, .max = 1800, - .description = "Sets the grace-timeout value. Valid range 10-1800." + .default_value = "10", + .description = "Specifies the duration for the lock state to be " + "maintained on the client after a network " + "disconnection. Range 10-1800 seconds." }, {.key = {"tcp-window-size"}, .type = GF_OPTION_TYPE_SIZET, diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c index a797a0d6c..56b83cb9a 100644 --- a/xlators/protocol/server/src/server.c +++ b/xlators/protocol/server/src/server.c @@ -770,7 +770,7 @@ client_destroy_cbk (xlator_t *this, client_t *client) server_ctx_t *ctx = NULL; client_ctx_del (client, this, &tmp); - + ctx = tmp; if (ctx == NULL) @@ -1101,13 +1101,15 @@ struct volume_options options[] = { { .key = {"auth.addr.*.allow"}, .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, .description = "Allow a comma separated list of addresses and/or " - "hostnames to connect to the server. By default, all" - " connections are allowed." + "hostnames to connect to the server. Option " + "auth.reject overrides this option. By default, all " + "connections are allowed." }, { .key = {"auth.addr.*.reject"}, .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST, .description = "Reject a comma separated list of addresses and/or " - "hostnames to connect to the server. By default, all" + "hostnames to connect to the server. This option " + "overrides the auth.allow option. By default, all" " connections are allowed." }, -- cgit From 5a26daaea96aa3f7855b8d75b6568449f5466cc2 Mon Sep 17 00:00:00 2001 From: Santosh Kumar Pradhan Date: Fri, 10 Jan 2014 11:57:05 +0530 Subject: gNFS: Server sets ACL mask wrongly in GETACL reply FIX: 1. Set the ACL mask what was requested by client 2. Validate the ACL mask in SETACL routine Change-Id: Icb8576a8fe2684e0beaf94e8db6a92bc70bbfe7f BUG: 1051865 Signed-off-by: Santosh Kumar Pradhan Reviewed-on: http://review.gluster.org/6683 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- xlators/nfs/server/src/acl3.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c index 5286077a8..25476ebbe 100644 --- a/xlators/nfs/server/src/acl3.c +++ b/xlators/nfs/server/src/acl3.c @@ -256,7 +256,7 @@ acl3_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } cs = frame->local; getaclreply = &cs->args.getaclreply; - if (op_ret == -1) { + if (op_ret < 0) { stat = nfs3_cbk_errno_status (op_ret, op_errno); goto err; } @@ -343,7 +343,6 @@ acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, deviceid = nfs3_request_xlator_deviceid (cs->req); nfs3_map_deviceid_to_statdev (buf, deviceid); getaclreply->attr = nfs3_stat_to_fattr3 (buf); - getaclreply->mask = (NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT); nfs_request_user_init (&nfu, cs->req); ret = nfs_getxattr (cs->nfsx, cs->vol, &nfu, &cs->resolvedloc, @@ -430,6 +429,7 @@ acl3svc_getacl (rpcsvc_request_t *req) vol, stat, rpcerr); cs->vol = vol; + cs->args.getaclreply.mask = getaclargs.mask; acl3_volume_started_check (nfs3, vol, ret, acl3err); ret = nfs3_fh_resolve_and_resume (cs, fhp, @@ -551,6 +551,13 @@ acl3svc_setacl (rpcsvc_request_t *req) rpcsvc_request_seterr (req, GARBAGE_ARGS); goto rpcerr; } + + /* Validate ACL mask */ + if (setaclargs.mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) { + stat = NFS3ERR_INVAL; + goto acl3err; + } + fhp = &fh; acl3_validate_gluster_fh (fhp, stat, acl3err); acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, -- cgit From ce86c132347f0a788ab50ffbd6795a2eb982074e Mon Sep 17 00:00:00 2001 From: Venkatesh Somyajulu Date: Fri, 3 Jan 2014 13:54:25 +0530 Subject: cluster/afr: Unable to self heal symbolic links Problem: Under the entry self heal, readlink is done at the source and sink. When readlink is done at the sink, because link is not present at the sink, afr expects ENOENT. AFR translator takes decisions for new link creation based on ENOENT but server translator is modified to return ESTALE because of which afr xlator is not able to heal. Fix: The check for inode absence at server includes ESTALE as well. Change-Id: I319e4cb4156a243afee79365b7b7a5a7823e9a24 BUG: 1046624 Signed-off-by: Venkatesh Somyajulu Reviewed-on: http://review.gluster.org/6599 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- tests/bugs/bug-1046624.t | 49 +++++++++++++++++++++++++++ xlators/cluster/afr/src/afr-self-heal-entry.c | 4 +-- xlators/cluster/afr/src/afr.h | 2 ++ 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100755 tests/bugs/bug-1046624.t diff --git a/tests/bugs/bug-1046624.t b/tests/bugs/bug-1046624.t new file mode 100755 index 000000000..bd46b5eaf --- /dev/null +++ b/tests/bugs/bug-1046624.t @@ -0,0 +1,49 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +## Start and create a volume +mkdir -p ${B0}/${V0}-0 +mkdir -p ${B0}/${V0}-1 +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1} + +## Verify volume is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + + +## Make sure automatic self-heal doesn't perturb our results. +TEST $CLI volume set $V0 cluster.self-heal-daemon off + +TEST $CLI volume set $V0 background-self-heal-count 0 + +## Start volume and verify +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +## Mount native +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 + +TEST `echo "TEST-FILE" > $M0/File` +TEST `mkdir $M0/Dir` +TEST kill_brick $V0 $H0 $B0/${V0}-0 + +TEST `ln -s $M0/File $M0/Link1` +TEST `ln -s $M0/Dir $M0/Link2` + +TEST $CLI volume start $V0 force + +TEST `find $M0/ | xargs stat 2>/dev/null 1>/dev/null` + +sleep 60 + +TEST stat $B0/${V0}-0/Link1 +TEST stat $B0/${V0}-0/Link2 + +cleanup; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 0ca06aaa3..00f1a9cb9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -1530,7 +1530,7 @@ afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cooki child_index = (long) cookie; - if ((op_ret == -1) && (op_errno != ENOENT)) { + if ((op_ret == -1) && (!afr_inode_missing(op_errno))) { gf_log (this->name, GF_LOG_INFO, "readlink of %s on %s failed (%s)", impunge_local->loc.path, @@ -1541,7 +1541,7 @@ afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cooki /* symlink doesn't exist on the sink */ - if ((op_ret == -1) && (op_errno == ENOENT)) { + if ((op_ret == -1) && (afr_inode_missing(op_errno))) { afr_sh_entry_impunge_symlink (impunge_frame, this, child_index, impunge_sh->linkname); return 0; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index dc0bc06fb..9196a1f27 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -33,6 +33,8 @@ #define AFR_LOCKEE_COUNT_MAX 3 #define AFR_DOM_COUNT_MAX 3 +#define afr_inode_missing(op_errno) (op_errno == ENOENT || op_errno == ESTALE) + struct _pump_private; typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this, -- cgit From aa3b01533efcd85fc1e654ac14a03ab8e1d5bbab Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 3 Dec 2013 16:30:45 -0800 Subject: locks: various fixes - implement ref/unref of entry locks (and fix bad pointer deref crashes) - code cleanup and deleted various data types - fix improper read/write lock conflict detection in entrylk - fix indefinite hang of blocked locks on disconnect - register locks in client_t synchronously, fix crashes in disconnect path Change-Id: Id273690c9111b8052139d1847060d1fb5a711924 BUG: 849630 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.org/6638 Tested-by: Gluster Build System Reviewed-by: Kaleb KEITHLEY Reviewed-by: Vijay Bellur --- libglusterfs/src/client_t.h | 1 + libglusterfs/src/glusterfs.h | 1 - tests/bugs/bug-765564.t | 2 + xlators/features/locks/src/clear.c | 11 +- xlators/features/locks/src/common.c | 120 --------- xlators/features/locks/src/common.h | 35 +-- xlators/features/locks/src/entrylk.c | 490 ++++++++++++++++------------------- xlators/features/locks/src/inodelk.c | 208 +++++++-------- xlators/features/locks/src/locks.h | 36 ++- xlators/features/locks/src/posix.c | 109 +++----- 10 files changed, 386 insertions(+), 627 deletions(-) diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/client_t.h index f7812f8f0..548081896 100644 --- a/libglusterfs/src/client_t.h +++ b/libglusterfs/src/client_t.h @@ -60,6 +60,7 @@ struct clienttable { gf_lock_t lock; cliententry_t *cliententries; int first_free; + client_t *local; }; typedef struct clienttable clienttable_t; diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index ed483d19c..6dc2fe6df 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -455,7 +455,6 @@ struct _glusterfs_ctx { int daemon_pipe[2]; - struct client_disconnect *client_disconnect; struct clienttable *clienttable; }; typedef struct _glusterfs_ctx glusterfs_ctx_t; diff --git a/tests/bugs/bug-765564.t b/tests/bugs/bug-765564.t index 0b8b8cd4f..6e4087f80 100644 --- a/tests/bugs/bug-765564.t +++ b/tests/bugs/bug-765564.t @@ -3,6 +3,8 @@ . $(dirname $0)/../include.rc . $(dirname $0)/../volume.rc +cleanup; + TEST glusterd TEST pidof glusterd diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c index 124b9ad0f..75593b898 100644 --- a/xlators/features/locks/src/clear.c +++ b/xlators/features/locks/src/clear.c @@ -338,9 +338,8 @@ blkd: elock->basename, ENTRYLK_LOCK, elock->type, -1, EAGAIN); STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL); - GF_FREE ((char *) elock->basename); - GF_FREE (elock->connection_id); - GF_FREE (elock); + + __pl_entrylk_unref (elock); } if (!(args->kind & CLRLK_GRANTED)) { @@ -363,13 +362,13 @@ granted: gcount++; list_del_init (&elock->domain_list); list_add_tail (&elock->domain_list, &removed); + + __pl_entrylk_unref (elock); } } pthread_mutex_unlock (&pl_inode->mutex); - list_for_each_entry_safe (elock, tmp, &removed, domain_list) { - grant_blocked_entry_locks (this, pl_inode, elock, dom); - } + grant_blocked_entry_locks (this, pl_inode, dom); ret = 0; out: diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c index b3309580d..f6c71c1cf 100644 --- a/xlators/features/locks/src/common.c +++ b/xlators/features/locks/src/common.c @@ -1099,123 +1099,3 @@ pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock) return conf; } - -struct _lock_table * -pl_lock_table_new (void) -{ - struct _lock_table *new = NULL; - - new = GF_CALLOC (1, sizeof (struct _lock_table), gf_common_mt_lock_table); - if (new == NULL) { - goto out; - } - INIT_LIST_HEAD (&new->entrylk_lockers); - INIT_LIST_HEAD (&new->inodelk_lockers); - LOCK_INIT (&new->lock); -out: - return new; -} - - -int -pl_add_locker (struct _lock_table *table, const char *volume, - loc_t *loc, fd_t *fd, pid_t pid, gf_lkowner_t *owner, - glusterfs_fop_t type) -{ - int32_t ret = -1; - struct _locker *new = NULL; - - GF_VALIDATE_OR_GOTO ("lock-table", table, out); - GF_VALIDATE_OR_GOTO ("lock-table", volume, out); - - new = GF_CALLOC (1, sizeof (struct _locker), gf_common_mt_locker); - if (new == NULL) { - goto out; - } - INIT_LIST_HEAD (&new->lockers); - - new->volume = gf_strdup (volume); - - if (fd == NULL) { - loc_copy (&new->loc, loc); - } else { - new->fd = fd_ref (fd); - } - - new->pid = pid; - new->owner = *owner; - - LOCK (&table->lock); - { - if (type == GF_FOP_ENTRYLK) - list_add_tail (&new->lockers, &table->entrylk_lockers); - else - list_add_tail (&new->lockers, &table->inodelk_lockers); - } - UNLOCK (&table->lock); -out: - return ret; -} - -int -pl_del_locker (struct _lock_table *table, const char *volume, - loc_t *loc, fd_t *fd, gf_lkowner_t *owner, glusterfs_fop_t type) -{ - struct _locker *locker = NULL; - struct _locker *tmp = NULL; - int32_t ret = -1; - struct list_head *head = NULL; - struct list_head del; - - GF_VALIDATE_OR_GOTO ("lock-table", table, out); - GF_VALIDATE_OR_GOTO ("lock-table", volume, out); - - INIT_LIST_HEAD (&del); - - LOCK (&table->lock); - { - if (type == GF_FOP_ENTRYLK) { - head = &table->entrylk_lockers; - } else { - head = &table->inodelk_lockers; - } - - list_for_each_entry_safe (locker, tmp, head, lockers) { - if (!is_same_lkowner (&locker->owner, owner) || - strcmp (locker->volume, volume)) - continue; - - /* - * It is possible for inodelk lock to come on anon-fd - * and inodelk unlock to come on normal fd in case of - * client re-opens. So don't check for fds to be equal. - */ - if (locker->fd && fd) - list_move_tail (&locker->lockers, &del); - else if (locker->loc.inode && loc && - (locker->loc.inode == loc->inode)) - list_move_tail (&locker->lockers, &del); - } - } - UNLOCK (&table->lock); - - tmp = NULL; - locker = NULL; - - list_for_each_entry_safe (locker, tmp, &del, lockers) { - list_del_init (&locker->lockers); - if (locker->fd) - fd_unref (locker->fd); - else - loc_wipe (&locker->loc); - - GF_FREE (locker->volume); - GF_FREE (locker); - } - - ret = 0; -out: - return ret; - -} - diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h index db19ec978..5ec630ee8 100644 --- a/xlators/features/locks/src/common.h +++ b/xlators/features/locks/src/common.h @@ -32,20 +32,6 @@ #define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid) -struct _locker { - struct list_head lockers; - char *volume; - loc_t loc; - fd_t *fd; - gf_lkowner_t owner; - pid_t pid; -}; - -struct _lock_table { - struct list_head inodelk_lockers; - struct list_head entrylk_lockers; - gf_lock_t lock; -}; posix_lock_t * new_posix_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, @@ -92,7 +78,7 @@ __pl_inodelk_unref (pl_inode_lock_t *lock); void grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_entry_lock_t *unlocked, pl_dom_list_t *dom); + pl_dom_list_t *dom); void pl_update_refkeeper (xlator_t *this, inode_t *inode); @@ -166,22 +152,7 @@ pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock); uint32_t check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename); -int32_t -pl_add_locker (struct _lock_table *table, const char *volume, - loc_t *loc, - fd_t *fd, - pid_t pid, - gf_lkowner_t *owner, - glusterfs_fop_t type); - -int32_t -pl_del_locker (struct _lock_table *table, const char *volume, - loc_t *loc, - fd_t *fd, - gf_lkowner_t *owner, - glusterfs_fop_t type); - -struct _lock_table * -pl_lock_table_new (void); +void __pl_inodelk_unref (pl_inode_lock_t *lock); +void __pl_entrylk_unref (pl_entry_lock_t *lock); #endif /* __COMMON_H__ */ diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c index 0785dc547..208bc140e 100644 --- a/xlators/features/locks/src/entrylk.c +++ b/xlators/features/locks/src/entrylk.c @@ -23,11 +23,29 @@ #include "locks.h" #include "common.h" + +void +__pl_entrylk_unref (pl_entry_lock_t *lock) +{ + lock->ref--; + if (!lock->ref) { + GF_FREE ((char *)lock->basename); + GF_FREE (lock->connection_id); + GF_FREE (lock); + } +} + + +static void +__pl_entrylk_ref (pl_entry_lock_t *lock) +{ + lock->ref++; +} + + static pl_entry_lock_t * new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, - client_t *client, pid_t client_pid, gf_lkowner_t *owner, - const char *volume) - + const char *domain, call_frame_t *frame, char *conn_id) { pl_entry_lock_t *newlock = NULL; @@ -39,14 +57,21 @@ new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type, newlock->basename = basename ? gf_strdup (basename) : NULL; newlock->type = type; - newlock->trans = client; - newlock->volume = volume; - newlock->client_pid = client_pid; - newlock->owner = *owner; + newlock->client = frame->root->client; + newlock->client_pid = frame->root->pid; + newlock->volume = domain; + newlock->owner = frame->root->lk_owner; + newlock->frame = frame; + + if (conn_id) { + newlock->connection_id = gf_strdup (conn_id); + } INIT_LIST_HEAD (&newlock->domain_list); INIT_LIST_HEAD (&newlock->blocked_locks); + INIT_LIST_HEAD (&newlock->client_list); + __pl_entrylk_ref (newlock); out: return newlock; } @@ -77,42 +102,42 @@ __same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2) { return (is_same_lkowner (&l1->owner, &l2->owner) && - (l1->trans == l2->trans)); + (l1->client == l2->client)); } /** - * lock_grantable - is this lock grantable? + * entrylk_grantable - is this lock grantable? * @inode: inode in which to look * @basename: name we're trying to lock * @type: type of lock */ static pl_entry_lock_t * -__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type) +__entrylk_grantable (pl_dom_list_t *dom, pl_entry_lock_t *lock) { - pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *tmp = NULL; if (list_empty (&dom->entrylk_list)) return NULL; - list_for_each_entry (lock, &dom->entrylk_list, domain_list) { - if (names_conflict (lock->basename, basename)) - return lock; + list_for_each_entry (tmp, &dom->entrylk_list, domain_list) { + if (names_conflict (tmp->basename, lock->basename)) + return tmp; } return NULL; } static pl_entry_lock_t * -__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type) +__blocked_entrylk_conflict (pl_dom_list_t *dom, pl_entry_lock_t *lock) { - pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *tmp = NULL; if (list_empty (&dom->blocked_entrylks)) return NULL; - list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) { - if (names_conflict (lock->basename, basename)) + list_for_each_entry (tmp, &dom->blocked_entrylks, blocked_locks) { + if (names_conflict (tmp->basename, lock->basename)) return lock; } @@ -293,7 +318,7 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename) } /** - * __lock_name - lock a name in a directory + * __lock_entrylk - lock a name in a directory * @inode: inode for the directory in which to lock * @basename: name of the entry to lock * if null, lock the entire directory @@ -304,89 +329,49 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename) */ int -__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type, - call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, - int nonblock, char *conn_id) +__lock_entrylk (xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock, + int nonblock, pl_dom_list_t *dom) { - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *conf = NULL; - int ret = -EINVAL; - - lock = new_entrylk_lock (pinode, basename, type, - frame->root->client, frame->root->pid, - &frame->root->lk_owner, dom->domain); - if (!lock) { - ret = -ENOMEM; - goto out; - } - - lock->frame = frame; - lock->this = this; - lock->trans = frame->root->client; + pl_entry_lock_t *conf = NULL; + int ret = -EAGAIN; - if (conn_id) { - lock->connection_id = gf_strdup (conn_id); - } - - conf = __lock_grantable (dom, basename, type); + conf = __entrylk_grantable (dom, lock); if (conf) { ret = -EAGAIN; - if (nonblock){ - GF_FREE (lock->connection_id); - GF_FREE ((char *)lock->basename); - GF_FREE (lock); + if (nonblock) goto out; - } - gettimeofday (&lock->blkd_time, NULL); list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); gf_log (this->name, GF_LOG_TRACE, "Blocking lock: {pinode=%p, basename=%s}", - pinode, basename); + pinode, lock->basename); goto out; } - if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) { + if (__blocked_entrylk_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) { ret = -EAGAIN; - if (nonblock) { - GF_FREE (lock->connection_id); - GF_FREE ((char *) lock->basename); - GF_FREE (lock); + if (nonblock) goto out; - } - lock->frame = frame; - lock->this = this; - gettimeofday (&lock->blkd_time, NULL); list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks); - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_DEBUG, "Lock is grantable, but blocking to prevent starvation"); gf_log (this->name, GF_LOG_TRACE, "Blocking lock: {pinode=%p, basename=%s}", - pinode, basename); + pinode, lock->basename); - ret = -EAGAIN; goto out; } - switch (type) { - case ENTRYLK_WRLCK: - gettimeofday (&lock->granted_time, NULL); - list_add_tail (&lock->domain_list, &dom->entrylk_list); - break; - - default: - - gf_log (this->name, GF_LOG_DEBUG, - "Invalid type for entrylk specified: %d", type); - ret = -EINVAL; - goto out; - } + __pl_entrylk_ref (lock); + gettimeofday (&lock->granted_time, NULL); + list_add (&lock->domain_list, &dom->entrylk_list); + lock->frame = NULL; ret = 0; out: @@ -394,37 +379,36 @@ out: } /** - * __unlock_name - unlock a name in a directory + * __unlock_entrylk - unlock a name in a directory * @inode: inode for the directory to unlock in * @basename: name of the entry to unlock * if null, unlock the entire directory */ pl_entry_lock_t * -__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type) +__unlock_entrylk (pl_dom_list_t *dom, pl_entry_lock_t *lock) { - pl_entry_lock_t *lock = NULL; + pl_entry_lock_t *tmp = NULL; pl_entry_lock_t *ret_lock = NULL; - lock = __find_most_matching_lock (dom, basename); + tmp = __find_most_matching_lock (dom, lock->basename); - if (!lock) { - gf_log ("locks", GF_LOG_DEBUG, + if (!tmp) { + gf_log ("locks", GF_LOG_ERROR, "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found", - basename); + lock->basename); goto out; } - if (names_equal (lock->basename, basename) - && lock->type == type) { + if (names_equal (tmp->basename, lock->basename) + && tmp->type == lock->type) { + + list_del_init (&tmp->domain_list); + ret_lock = tmp; - if (type == ENTRYLK_WRLCK) { - list_del_init (&lock->domain_list); - ret_lock = lock; - } } else { - gf_log ("locks", GF_LOG_DEBUG, - "Unlock for a non-existing lock!"); + gf_log ("locks", GF_LOG_ERROR, + "Unlock on %s for a non-existing lock!", lock->basename); goto out; } @@ -446,7 +430,7 @@ check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename) pthread_mutex_lock (&pinode->mutex); { list_for_each_entry (dom, &pinode->dom_list, inode_list) { - conf = __lock_grantable (dom, basename, ENTRYLK_WRLCK); + conf = __find_most_matching_lock (dom, basename); if (conf && conf->basename) { entrylk = 1; break; @@ -472,28 +456,14 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, INIT_LIST_HEAD (&blocked_list); list_splice_init (&dom->blocked_entrylks, &blocked_list); - list_for_each_entry_safe (bl, tmp, &blocked_list, - blocked_locks) { + list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) { list_del_init (&bl->blocked_locks); - - gf_log ("locks", GF_LOG_TRACE, - "Trying to unblock: {pinode=%p, basename=%s}", - pl_inode, bl->basename); - - bl_ret = __lock_name (pl_inode, bl->basename, bl->type, - bl->frame, dom, bl->this, 0, - bl->connection_id); + bl_ret = __lock_entrylk (bl->this, pl_inode, bl, 0, dom); if (bl_ret == 0) { list_add (&bl->blocked_locks, granted); - } else { - gf_log (this->name, GF_LOG_DEBUG, - "should never happen"); - GF_FREE (bl->connection_id); - GF_FREE ((char *)bl->basename); - GF_FREE (bl); } } return; @@ -502,7 +472,7 @@ __grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, /* Grants locks if possible which are blocked on a lock */ void grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, - pl_entry_lock_t *unlocked, pl_dom_list_t *dom) + pl_dom_list_t *dom) { struct list_head granted_list; pl_entry_lock_t *tmp = NULL; @@ -518,105 +488,26 @@ grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode, pthread_mutex_unlock (&pl_inode->mutex); list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { - list_del_init (&lock->blocked_locks); - entrylk_trace_out (this, lock->frame, NULL, NULL, NULL, lock->basename, ENTRYLK_LOCK, lock->type, 0, 0); STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL); + lock->frame = NULL; + } - GF_FREE (lock->connection_id); - GF_FREE ((char *)lock->basename); - GF_FREE (lock); - } - - GF_FREE ((char *)unlocked->basename); - GF_FREE (unlocked->connection_id); - GF_FREE (unlocked); + pthread_mutex_lock (&pl_inode->mutex); + { + list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) { + list_del_init (&lock->blocked_locks); + __pl_entrylk_unref (lock); + } + } + pthread_mutex_unlock (&pl_inode->mutex); return; } -/** - * release_entry_locks_for_client: release all entry locks from this - * client for this loc_t - */ - -static int -release_entry_locks_for_client (xlator_t *this, pl_inode_t *pinode, - pl_dom_list_t *dom, client_t *client) -{ - pl_entry_lock_t *lock = NULL; - pl_entry_lock_t *tmp = NULL; - struct list_head granted; - struct list_head released; - - INIT_LIST_HEAD (&granted); - INIT_LIST_HEAD (&released); - - pthread_mutex_lock (&pinode->mutex); - { - list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks, - blocked_locks) { - if (lock->trans != client) - continue; - - list_del_init (&lock->blocked_locks); - - gf_log (this->name, GF_LOG_TRACE, - "releasing lock on held by " - "{client=%p}", client); - - list_add (&lock->blocked_locks, &released); - - } - - list_for_each_entry_safe (lock, tmp, &dom->entrylk_list, - domain_list) { - if (lock->trans != client) - continue; - - list_del_init (&lock->domain_list); - - gf_log (this->name, GF_LOG_TRACE, - "releasing lock on held by " - "{client=%p}", client); - - GF_FREE ((char *)lock->basename); - GF_FREE (lock->connection_id); - GF_FREE (lock); - } - - __grant_blocked_entry_locks (this, pinode, dom, &granted); - - } - - pthread_mutex_unlock (&pinode->mutex); - - list_for_each_entry_safe (lock, tmp, &released, blocked_locks) { - list_del_init (&lock->blocked_locks); - - STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN, NULL); - - GF_FREE ((char *)lock->basename); - GF_FREE (lock->connection_id); - GF_FREE (lock); - - } - - list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) { - list_del_init (&lock->blocked_locks); - - STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL); - - GF_FREE ((char *)lock->basename); - GF_FREE (lock->connection_id); - GF_FREE (lock); - } - - return 0; -} /* Common entrylk code called by pl_entrylk and pl_fentrylk */ int @@ -632,10 +523,12 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, char unwind = 1; GF_UNUSED int dict_ret = -1; pl_inode_t *pinode = NULL; + pl_entry_lock_t *reqlock = NULL; pl_entry_lock_t *unlocked = NULL; pl_dom_list_t *dom = NULL; char *conn_id = NULL; pl_ctx_t *ctx = NULL; + int nonblock = 0; if (xdata) dict_ret = dict_get_str (xdata, "connection-id", &conn_id); @@ -646,6 +539,15 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, goto out; } + if (frame->root->client) { + ctx = pl_ctx_get (frame->root->client, this); + if (!ctx) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; + } + } + dom = get_domain (pinode, volume); if (!dom){ op_errno = ENOMEM; @@ -654,72 +556,64 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type); - if (frame->root->lk_owner.len == 0) { - /* - this is a special case that means release - all locks from this client - */ - - gf_log (this->name, GF_LOG_TRACE, - "Releasing locks for client %p", frame->root->client); - - release_entry_locks_for_client (this, pinode, dom, - frame->root->client); - op_ret = 0; - - goto out; + reqlock = new_entrylk_lock (pinode, basename, type, dom->domain, frame, + conn_id); + if (!reqlock) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; } switch (cmd) { - case ENTRYLK_LOCK: - pthread_mutex_lock (&pinode->mutex); - { - ret = __lock_name (pinode, basename, type, - frame, dom, this, 0, conn_id); - } - pthread_mutex_unlock (&pinode->mutex); - - op_errno = -ret; - if (ret < 0) { - if (ret == -EAGAIN) - unwind = 0; - else - unwind = 1; - goto out; - } else { - op_ret = 0; - op_errno = 0; - unwind = 1; - goto out; - } - - break; - case ENTRYLK_LOCK_NB: - unwind = 1; + nonblock = 1; + /* fall through */ + case ENTRYLK_LOCK: + if (ctx) + pthread_mutex_lock (&ctx->lock); pthread_mutex_lock (&pinode->mutex); { - ret = __lock_name (pinode, basename, type, - frame, dom, this, 1, conn_id); + reqlock->pinode = pinode; + + ret = __lock_entrylk (this, pinode, reqlock, nonblock, dom); + if (ret == 0) + op_ret = 0; + else + op_errno = -ret; + + if (ctx && (!ret || !nonblock)) + list_add (&reqlock->client_list, + &ctx->entrylk_lockers); + + if (ret == -EAGAIN && !nonblock) { + /* blocked */ + unwind = 0; + } else { + __pl_entrylk_unref (reqlock); + } } pthread_mutex_unlock (&pinode->mutex); - - if (ret < 0) { - op_errno = -ret; - goto out; - } - - break; + if (ctx) + pthread_mutex_unlock (&ctx->lock); + break; case ENTRYLK_UNLOCK: + if (ctx) + pthread_mutex_lock (&ctx->lock); pthread_mutex_lock (&pinode->mutex); { - unlocked = __unlock_name (dom, basename, type); + unlocked = __unlock_entrylk (dom, reqlock); + if (unlocked) { + list_del_init (&unlocked->client_list); + __pl_entrylk_unref (unlocked); + } + __pl_entrylk_unref (reqlock); } pthread_mutex_unlock (&pinode->mutex); + if (ctx) + pthread_mutex_unlock (&ctx->lock); - if (unlocked) - grant_blocked_entry_locks (this, pinode, unlocked, dom); + grant_blocked_entry_locks (this, pinode, dom); break; @@ -733,27 +627,10 @@ pl_common_entrylk (call_frame_t *frame, xlator_t *this, op_ret = 0; out: pl_update_refkeeper (this, inode); + if (unwind) { entrylk_trace_out (this, frame, volume, fd, loc, basename, cmd, type, op_ret, op_errno); - - ctx = pl_ctx_get (frame->root->client, this); - - if (ctx == NULL) { - gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); - goto unwind; - } - - if (cmd == ENTRYLK_UNLOCK) - pl_del_locker (ctx->ltable, volume, loc, fd, - &frame->root->lk_owner, - GF_FOP_ENTRYLK); - else - pl_add_locker (ctx->ltable, volume, loc, fd, - frame->root->pid, - &frame->root->lk_owner, - GF_FOP_ENTRYLK); - unwind: STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL); } else { @@ -761,7 +638,6 @@ unwind: cmd, type); } - return 0; } @@ -801,6 +677,88 @@ pl_fentrylk (call_frame_t *frame, xlator_t *this, } +static void +pl_entrylk_log_cleanup (pl_entry_lock_t *lock) +{ + pl_inode_t *pinode = NULL; + char *path = NULL; + char *file = NULL; + + pinode = lock->pinode; + + inode_path (pinode->refkeeper, NULL, &path); + + if (path) + file = path; + else + file = uuid_utoa (pinode->refkeeper->gfid); + + gf_log (THIS->name, GF_LOG_WARNING, + "releasing lock on %s held by " + "{client=%p, pid=%"PRId64" lk-owner=%s}", + file, lock->client, (uint64_t) lock->client_pid, + lkowner_utoa (&lock->owner)); + GF_FREE (path); +} + + +/* Release all entrylks from this client */ +int +pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) +{ + pl_entry_lock_t *tmp = NULL; + pl_entry_lock_t *l = NULL; + pl_dom_list_t *dom = NULL; + pl_inode_t *pinode = NULL; + + struct list_head released; + + INIT_LIST_HEAD (&released); + + pthread_mutex_lock (&ctx->lock); + { + list_for_each_entry_safe (l, tmp, &ctx->entrylk_lockers, + client_list) { + list_del_init (&l->client_list); + list_add_tail (&l->client_list, &released); + + pl_entrylk_log_cleanup (l); + + pinode = l->pinode; + + pthread_mutex_lock (&pinode->mutex); + { + list_del_init (&l->domain_list); + } + pthread_mutex_unlock (&pinode->mutex); + } + } + pthread_mutex_unlock (&ctx->lock); + + list_for_each_entry_safe (l, tmp, &released, client_list) { + list_del_init (&l->client_list); + + if (l->frame) + STACK_UNWIND_STRICT (entrylk, l->frame, -1, EAGAIN, + NULL); + + pinode = l->pinode; + + dom = get_domain (pinode, l->volume); + + grant_blocked_inode_locks (this, pinode, dom); + + pthread_mutex_lock (&pinode->mutex); + { + __pl_entrylk_unref (l); + } + pthread_mutex_unlock (&pinode->mutex); + } + + return 0; +} + + int32_t __get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode) { diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c index 508523e11..969b67a61 100644 --- a/xlators/features/locks/src/inodelk.c +++ b/xlators/features/locks/src/inodelk.c @@ -35,7 +35,7 @@ __pl_inodelk_ref (pl_inode_lock_t *lock) lock->ref++; } -inline void +void __pl_inodelk_unref (pl_inode_lock_t *lock) { lock->ref--; @@ -204,7 +204,7 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, int ret = -EINVAL; conf = __inodelk_grantable (dom, lock); - if (conf){ + if (conf) { ret = -EAGAIN; if (can_block == 0) goto out; @@ -232,7 +232,7 @@ __lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, gettimeofday (&lock->blkd_time, NULL); list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks); - gf_log (this->name, GF_LOG_TRACE, + gf_log (this->name, GF_LOG_DEBUG, "Lock is grantable, but blocking to prevent starvation"); gf_log (this->name, GF_LOG_TRACE, "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked", @@ -307,6 +307,8 @@ __inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom) out: return conf; } + + static void __grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted, pl_dom_list_t *dom) @@ -363,6 +365,7 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, &lock->user_flock, 0, 0, lock->volume); STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL); + lock->frame = NULL; } pthread_mutex_lock (&pl_inode->mutex); @@ -375,103 +378,101 @@ grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pthread_mutex_unlock (&pl_inode->mutex); } -/* Release all inodelks from this client */ -static int -release_inode_locks_of_client (xlator_t *this, pl_dom_list_t *dom, - inode_t *inode, client_t *client) + +static void +pl_inodelk_log_cleanup (pl_inode_lock_t *lock) { - pl_inode_lock_t *tmp = NULL; - pl_inode_lock_t *l = NULL; + pl_inode_t *pl_inode = NULL; + char *path = NULL; + char *file = NULL; - pl_inode_t * pinode = NULL; + pl_inode = lock->pl_inode; - struct list_head released; + inode_path (pl_inode->refkeeper, NULL, &path); - char *path = NULL; - char *file = NULL; + if (path) + file = path; + else + file = uuid_utoa (pl_inode->refkeeper->gfid); - INIT_LIST_HEAD (&released); + gf_log (THIS->name, GF_LOG_WARNING, + "releasing lock on %s held by " + "{client=%p, pid=%"PRId64" lk-owner=%s}", + file, lock->client, (uint64_t) lock->client_pid, + lkowner_utoa (&lock->owner)); + GF_FREE (path); +} - pinode = pl_inode_get (this, inode); - pthread_mutex_lock (&pinode->mutex); - { +/* Release all entrylks from this client */ +int +pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx) +{ + pl_inode_lock_t *tmp = NULL; + pl_inode_lock_t *l = NULL; + pl_dom_list_t *dom = NULL; + pl_inode_t *pl_inode = NULL; + + struct list_head released; - list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) { - if (l->client != client) - continue; + INIT_LIST_HEAD (&released); - list_del_init (&l->blocked_locks); + pthread_mutex_lock (&ctx->lock); + { + list_for_each_entry_safe (l, tmp, &ctx->inodelk_lockers, + client_list) { + list_del_init (&l->client_list); + list_add_tail (&l->client_list, &released); - inode_path (inode, NULL, &path); - if (path) - file = path; - else - file = uuid_utoa (inode->gfid); + pl_inodelk_log_cleanup (l); - gf_log (this->name, GF_LOG_DEBUG, - "releasing blocking lock on %s held by " - "{client=%p, pid=%"PRId64" lk-owner=%s}", - file, client, (uint64_t) l->client_pid, - lkowner_utoa (&l->owner)); + pl_inode = l->pl_inode; - list_add (&l->blocked_locks, &released); - if (path) { - GF_FREE (path); - path = NULL; + pthread_mutex_lock (&pl_inode->mutex); + { + __delete_inode_lock (l); } + pthread_mutex_unlock (&pl_inode->mutex); } + } + pthread_mutex_unlock (&ctx->lock); - list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) { - if (l->client != client) - continue; - - inode_path (inode, NULL, &path); - if (path) - file = path; - else - file = uuid_utoa (inode->gfid); - - gf_log (this->name, GF_LOG_DEBUG, - "releasing granted lock on %s held by " - "{client=%p, pid=%"PRId64" lk-owner=%s}", - file, client, (uint64_t) l->client_pid, - lkowner_utoa (&l->owner)); - - if (path) { - GF_FREE (path); - path = NULL; - } + list_for_each_entry_safe (l, tmp, &released, client_list) { + list_del_init (&l->client_list); - __delete_inode_lock (l); - __pl_inodelk_unref (l); - } - } - GF_FREE (path); + if (l->frame) + STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, + NULL); + + pl_inode = l->pl_inode; - pthread_mutex_unlock (&pinode->mutex); + dom = get_domain (pl_inode, l->volume); - list_for_each_entry_safe (l, tmp, &released, blocked_locks) { - list_del_init (&l->blocked_locks); + grant_blocked_inode_locks (this, pl_inode, dom); - STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL); - //No need to take lock as the locks are only in one list - __pl_inodelk_unref (l); + pthread_mutex_lock (&pl_inode->mutex); + { + __pl_inodelk_unref (l); + } + pthread_mutex_unlock (&pl_inode->mutex); } - grant_blocked_inode_locks (this, pinode, dom); return 0; } static int -pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, - int can_block, pl_dom_list_t *dom) +pl_inode_setlk (xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode, + pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom) { int ret = -EINVAL; pl_inode_lock_t *retlock = NULL; gf_boolean_t unref = _gf_true; + lock->pl_inode = pl_inode; + + if (ctx) + pthread_mutex_lock (&ctx->lock); pthread_mutex_lock (&pl_inode->mutex); { if (lock->fl_type != F_UNLCK) { @@ -495,6 +496,10 @@ pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, if (can_block) unref = _gf_false; } + + if (ctx && (!ret || can_block)) + list_add_tail (&lock->client_list, + &ctx->inodelk_lockers); } else { retlock = __inode_unlock_lock (this, lock, dom); if (!retlock) { @@ -503,16 +508,21 @@ pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock, ret = -EINVAL; goto out; } - __pl_inodelk_unref (retlock); + list_del_init (&retlock->client_list); + __pl_inodelk_unref (retlock); ret = 0; } - } out: - if (unref) - __pl_inodelk_unref (lock); + if (unref) + __pl_inodelk_unref (lock); + } pthread_mutex_unlock (&pl_inode->mutex); + if (ctx) + pthread_mutex_unlock (&ctx->lock); + grant_blocked_inode_locks (this, pl_inode, dom); + return ret; } @@ -552,6 +562,7 @@ new_inode_lock (struct gf_flock *flock, client_t *client, pid_t client_pid, INIT_LIST_HEAD (&lock->list); INIT_LIST_HEAD (&lock->blocked_locks); + INIT_LIST_HEAD (&lock->client_list); __pl_inodelk_ref (lock); return lock; @@ -627,6 +638,15 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, pl_trace_in (this, frame, fd, loc, cmd, flock, volume); + if (frame->root->client) { + ctx = pl_ctx_get (frame->root->client, this); + if (!ctx) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); + goto unwind; + } + } + pinode = pl_inode_get (this, inode); if (!pinode) { op_errno = ENOMEM; @@ -639,27 +659,6 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, goto unwind; } - if (frame->root->lk_owner.len == 0) { - /* - special case: this means release all locks - from this client - */ - gf_log (this->name, GF_LOG_TRACE, - "Releasing all locks from client %p", frame->root->client); - - release_inode_locks_of_client (this, dom, inode, frame->root->client); - _pl_convert_volume (volume, &res1); - if (res1) { - dom = get_domain (pinode, res1); - if (dom) - release_inode_locks_of_client (this, dom, - inode, frame->root->client); - } - - op_ret = 0; - goto unwind; - } - reqlock = new_inode_lock (flock, frame->root->client, frame->root->pid, frame, this, volume, conn_id); @@ -678,8 +677,8 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, case F_SETLK: memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock)); - ret = pl_inode_setlk (this, pinode, reqlock, - can_block, dom); + ret = pl_inode_setlk (this, ctx, pinode, reqlock, can_block, + dom); if (ret < 0) { if ((can_block) && (F_UNLCK != flock->l_type)) { @@ -704,23 +703,6 @@ pl_common_inodelk (call_frame_t *frame, xlator_t *this, op_ret = 0; - ctx = pl_ctx_get (frame->root->client, this); - - if (ctx == NULL) { - gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); - goto unwind; - } - - if (flock->l_type == F_UNLCK) - pl_del_locker (ctx->ltable, volume, loc, fd, - &frame->root->lk_owner, - GF_FOP_INODELK); - else - pl_add_locker (ctx->ltable, volume, loc, fd, - frame->root->pid, - &frame->root->lk_owner, - GF_FOP_INODELK); - unwind: if ((inode != NULL) && (flock !=NULL)) { pl_update_refkeeper (this, inode); diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h index 76fc941d7..8c2a6f867 100644 --- a/xlators/features/locks/src/locks.h +++ b/xlators/features/locks/src/locks.h @@ -65,7 +65,7 @@ struct __pl_inode_lock { struct gf_flock user_flock; /* the flock supplied by the user */ xlator_t *this; /* required for blocked locks */ - fd_t *fd; + struct __pl_inode *pl_inode; call_frame_t *frame; @@ -80,6 +80,8 @@ struct __pl_inode_lock { pid_t client_pid; /* pid of client process */ char *connection_id; /* stores the client connection id */ + + struct list_head client_list; /* list of all locks from a client */ }; typedef struct __pl_inode_lock pl_inode_lock_t; @@ -103,9 +105,11 @@ typedef struct __pl_dom_list_t pl_dom_list_t; struct __entry_lock { struct list_head domain_list; /* list_head back to pl_dom_list_t */ struct list_head blocked_locks; /* list_head back to blocked_entrylks */ + int ref; call_frame_t *frame; xlator_t *this; + struct __pl_inode *pinode; const char *volume; @@ -115,11 +119,13 @@ struct __entry_lock { struct timeval blkd_time; /*time at which lock was queued into blkd list*/ struct timeval granted_time; /*time at which lock was queued into active list*/ - void *trans; + void *client; gf_lkowner_t owner; pid_t client_pid; /* pid of client process */ char *connection_id; /* stores the client connection id */ + + struct list_head client_list; /* list of all locks from a client */ }; typedef struct __entry_lock pl_entry_lock_t; @@ -144,12 +150,6 @@ struct __pl_inode { typedef struct __pl_inode pl_inode_t; -struct __pl_fd { - gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */ -}; -typedef struct __pl_fd pl_fd_t; - - typedef struct { gf_boolean_t mandatory; /* if mandatory locking is enabled */ gf_boolean_t trace; /* trace lock requests in and out */ @@ -178,15 +178,27 @@ typedef struct { } pl_fdctx_t; +struct _locker { + struct list_head lockers; + char *volume; + inode_t *inode; + gf_lkowner_t owner; +}; + typedef struct _locks_ctx { - gf_lock_t ltable_lock; /* only for replace, - ltable has its own internal - lock for operations */ - struct _lock_table *ltable; + pthread_mutex_t lock; + struct list_head inodelk_lockers; + struct list_head entrylk_lockers; } pl_ctx_t; pl_ctx_t * pl_ctx_get (client_t *client, xlator_t *xlator); +int +pl_inodelk_client_cleanup (xlator_t *this, pl_ctx_t *ctx); + +int +pl_entrylk_client_cleanup (xlator_t *this, pl_ctx_t *ctx); + #endif /* __POSIX_LOCKS_H__ */ diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c index 7bfb38a51..fce0d509f 100644 --- a/xlators/features/locks/src/posix.c +++ b/xlators/features/locks/src/posix.c @@ -2243,7 +2243,7 @@ __dump_entrylks (pl_inode_t *pl_inode) lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", lock->basename, (unsigned long long) lock->client_pid, - lkowner_utoa (&lock->owner), lock->trans, + lkowner_utoa (&lock->owner), lock->client, lock->connection_id, ctime_r (&lock->granted_time.tv_sec, granted)); } else { @@ -2251,7 +2251,7 @@ __dump_entrylks (pl_inode_t *pl_inode) lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", lock->basename, (unsigned long long) lock->client_pid, - lkowner_utoa (&lock->owner), lock->trans, + lkowner_utoa (&lock->owner), lock->client, lock->connection_id, ctime_r (&lock->blkd_time.tv_sec, blocked), ctime_r (&lock->granted_time.tv_sec, granted)); @@ -2271,7 +2271,7 @@ __dump_entrylks (pl_inode_t *pl_inode) lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK", lock->basename, (unsigned long long) lock->client_pid, - lkowner_utoa (&lock->owner), lock->trans, + lkowner_utoa (&lock->owner), lock->client, lock->connection_id, ctime_r (&lock->blkd_time.tv_sec, blocked)); @@ -2524,19 +2524,12 @@ pl_ctx_get (client_t *client, xlator_t *xlator) if (ctx == NULL) goto out; - ctx->ltable = pl_lock_table_new(); - - if (ctx->ltable == NULL) { - GF_FREE (ctx); - ctx = NULL; - goto out; - } - - LOCK_INIT (&ctx->ltable_lock); + pthread_mutex_init (&ctx->lock, NULL); + INIT_LIST_HEAD (&ctx->inodelk_lockers); + INIT_LIST_HEAD (&ctx->entrylk_lockers); if (client_ctx_set (client, xlator, ctx) != 0) { - LOCK_DESTROY (&ctx->ltable_lock); - GF_FREE (ctx->ltable); + pthread_mutex_destroy (&ctx->lock); GF_FREE (ctx); ctx = NULL; } @@ -2544,82 +2537,44 @@ out: return ctx; } -static void -ltable_delete_locks (struct _lock_table *ltable) + +static int +pl_client_disconnect_cbk (xlator_t *this, client_t *client) { - struct _locker *locker = NULL; - struct _locker *tmp = NULL; + pl_ctx_t *pl_ctx = NULL; - list_for_each_entry_safe (locker, tmp, <able->inodelk_lockers, lockers) { - if (locker->fd) - pl_del_locker (ltable, locker->volume, &locker->loc, - locker->fd, &locker->owner, - GF_FOP_INODELK); - GF_FREE (locker->volume); - GF_FREE (locker); - } + pl_ctx = pl_ctx_get (client, this); - list_for_each_entry_safe (locker, tmp, <able->entrylk_lockers, lockers) { - if (locker->fd) - pl_del_locker (ltable, locker->volume, &locker->loc, - locker->fd, &locker->owner, - GF_FOP_ENTRYLK); - GF_FREE (locker->volume); - GF_FREE (locker); - } - GF_FREE (ltable); + pl_inodelk_client_cleanup (this, pl_ctx); + + pl_entrylk_client_cleanup (this, pl_ctx); + + return 0; } -static int32_t -destroy_cbk (xlator_t *this, client_t *client) +static int +pl_client_destroy_cbk (xlator_t *this, client_t *client) { - void *tmp = NULL; - pl_ctx_t *locks_ctx = NULL; + void *tmp = NULL; + pl_ctx_t *pl_ctx = NULL; + + pl_client_disconnect_cbk (this, client); client_ctx_del (client, this, &tmp); if (tmp == NULL) - return 0 -; - locks_ctx = tmp; - if (locks_ctx->ltable) - ltable_delete_locks (locks_ctx->ltable); - - LOCK_DESTROY (&locks_ctx->ltable_lock); - GF_FREE (locks_ctx); - - return 0; -} + return 0; + pl_ctx = tmp; -static int32_t -disconnect_cbk (xlator_t *this, client_t *client) -{ - int32_t ret = 0; - pl_ctx_t *locks_ctx = NULL; - struct _lock_table *ltable = NULL; + GF_ASSERT (list_empty(&pl_ctx->inodelk_lockers)); + GF_ASSERT (list_empty(&pl_ctx->entrylk_lockers)); - locks_ctx = pl_ctx_get (client, this); - if (locks_ctx == NULL) { - gf_log (this->name, GF_LOG_INFO, "pl_ctx_get() failed"); - goto out; - } + pthread_mutex_destroy (&pl_ctx->lock); + GF_FREE (pl_ctx); - LOCK (&locks_ctx->ltable_lock); - { - if (locks_ctx->ltable) { - ltable = locks_ctx->ltable; - locks_ctx->ltable = pl_lock_table_new (); - } - } - UNLOCK (&locks_ctx->ltable_lock); - - if (ltable) - ltable_delete_locks (ltable); - -out: - return ret; + return 0; } @@ -2756,8 +2711,8 @@ struct xlator_cbks cbks = { .forget = pl_forget, .release = pl_release, .releasedir = pl_releasedir, - .client_destroy = destroy_cbk, - .client_disconnect = disconnect_cbk, + .client_destroy = pl_client_destroy_cbk, + .client_disconnect = pl_client_disconnect_cbk, }; -- cgit From 3af42583dd804371952d61e9d7ff4c640e67ba0d Mon Sep 17 00:00:00 2001 From: Vijay Bellur Date: Sun, 12 Jan 2014 22:39:14 +0530 Subject: storage/posix: UNWIND right op_error and op_errno in *setxattr() 1. errno was being set after gf_log() in posix_{f}handle_pair, this would cause errno to be overwritten. 2. dht would expect -1 for indication of failure in setxattr callback (dht_err_cbk()). posix_{f}setxattr has been changed to set op_ret as -1 instead of -op_errno. 3. dict_foreach() has been changed to return an error if the invoked fn() returns < 0. Bug report and test case credits to Zorro Lang Change-Id: I96c15f12a5d7717b7584ba392f390a0b4f704a98 BUG: 1051896 Signed-off-by: Vijay Bellur Reviewed-on: http://review.gluster.org/6684 Tested-by: Gluster Build System Reviewed-by: Niels de Vos Reviewed-by: Anand Avati --- libglusterfs/src/dict.c | 4 +- tests/bugs/bug-1051896.c | 94 +++++++++++++++++++++++++++++++ tests/bugs/bug-1051896.t | 24 ++++++++ xlators/storage/posix/src/posix-helpers.c | 4 +- xlators/storage/posix/src/posix.c | 8 ++- 5 files changed, 128 insertions(+), 6 deletions(-) create mode 100644 tests/bugs/bug-1051896.c create mode 100644 tests/bugs/bug-1051896.t diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c index f2df5a6d4..e9fc1222d 100644 --- a/libglusterfs/src/dict.c +++ b/libglusterfs/src/dict.c @@ -1121,8 +1121,8 @@ dict_foreach (dict_t *dict, while (pairs) { next = pairs->next; ret = fn (dict, pairs->key, pairs->value, data); - if (ret == -1) - return -1; + if (ret < 0) + return ret; pairs = next; } diff --git a/tests/bugs/bug-1051896.c b/tests/bugs/bug-1051896.c new file mode 100644 index 000000000..0ffd81986 --- /dev/null +++ b/tests/bugs/bug-1051896.c @@ -0,0 +1,94 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int do_setfacl(const char *path, const char *options, const char *textacl) +{ + int r; + int type; + acl_t acl; + int dob; + int dok; + int dom; + struct stat st; + char textmode[30]; + + r = 0; + dob = strchr(options,'b') != (char*)NULL; + dok = strchr(options,'k') != (char*)NULL; + dom = strchr(options,'m') != (char*)NULL; + if ((dom && !textacl) + || (!dom && (textacl || (!dok && !dob) || + strchr(options,'d')))) { + errno = EBADRQC; /* "bad request" */ + r = -1; + } else { + if (dob || dok) { + r = acl_delete_def_file(path); + } + if (dob && !r) { + if (!stat(path,&st)) { + sprintf(textmode, + "u::%c%c%c,g::%c%c%c,o::%c%c%c", + (st.st_mode & 0400 ? 'r' : '-'), + (st.st_mode & 0200 ? 'w' : '-'), + (st.st_mode & 0100 ? 'x' : '-'), + (st.st_mode & 0040 ? 'r' : '-'), + (st.st_mode & 0020 ? 'w' : '-'), + (st.st_mode & 0010 ? 'x' : '-'), + (st.st_mode & 004 ? 'r' : '-'), + (st.st_mode & 002 ? 'w' : '-'), + (st.st_mode & 001 ? 'x' : '-')); + acl = acl_from_text(textmode); + if (acl) { + r = acl_set_file(path, + ACL_TYPE_ACCESS,acl); + acl_free(acl); + } else + r = -1; + } else + r = -1; + } + if (!r && dom) { + if (strchr(options,'d')) + type = ACL_TYPE_DEFAULT; + else + type = ACL_TYPE_ACCESS; + acl = acl_from_text(textacl); + if (acl) { + r = acl_set_file(path,type,acl); + acl_free(acl); + } else + r = -1; + } + } + if (r) + r = -errno; + return (r); +} + + +int main(int argc, char *argv[]){ + int rc = 0; + if (argc != 4) { + fprintf(stderr, + "usage: ./setfacl_test \n"); + return 0; + } + if ((rc = do_setfacl(argv[1], argv[2], argv[3])) != 0){ + fprintf(stderr, "do_setfacl failed: %s\n", strerror(errno)); + return rc; + } + return 0; +} diff --git a/tests/bugs/bug-1051896.t b/tests/bugs/bug-1051896.t new file mode 100644 index 000000000..75859cbef --- /dev/null +++ b/tests/bugs/bug-1051896.t @@ -0,0 +1,24 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +## Start and create a volume +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume info; + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}; +TEST $CLI volume start $V0; + +TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --acl -s $H0 --volfile-id $V0 $M0; + +TEST touch $M0/file1; + +gcc -lacl $(dirname $0)/bug-1051896.c -o $(dirname $0)/bug-1051896 +TEST ! $(dirname $0)/bug-1051896 $M0/file1 m 'u::r,u::w,g::r--,o::r--' +rm -f $(dirname $0)/bug-1051896 + +cleanup diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index d2c991900..3a66ecfc2 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -792,6 +792,7 @@ posix_handle_pair (xlator_t *this, const char *real_path, value->len, flags); if (sys_ret < 0) { + ret = -errno; if (errno == ENOTSUP) { GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log, this->name,GF_LOG_WARNING, @@ -823,7 +824,6 @@ posix_handle_pair (xlator_t *this, const char *real_path, #endif /* DARWIN */ } - ret = -errno; goto out; } } @@ -847,6 +847,7 @@ posix_fhandle_pair (xlator_t *this, int fd, value->len, flags); if (sys_ret < 0) { + ret = -errno; if (errno == ENOTSUP) { GF_LOG_OCCASIONALLY(gf_xattr_enotsup_log, this->name,GF_LOG_WARNING, @@ -873,7 +874,6 @@ posix_fhandle_pair (xlator_t *this, int fd, #endif /* DARWIN */ } - ret = -errno; goto out; } diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 7695289fa..dc4af1b92 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -2950,8 +2950,10 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, filler.flags = flags; op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, &filler); - if (op_ret < 0) + if (op_ret < 0) { op_errno = -op_ret; + op_ret = -1; + } out: SET_TO_OLD_FS_ID (); @@ -3916,8 +3918,10 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, filler.flags = flags; op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, &filler); - if (op_ret < 0) + if (op_ret < 0) { op_errno = -op_ret; + op_ret = -1; + } out: SET_TO_OLD_FS_ID (); -- cgit From 54bf0ba4698a2d46db3485cc12ae04dd90349570 Mon Sep 17 00:00:00 2001 From: Vijay Bellur Date: Fri, 3 Jan 2014 11:59:43 +0530 Subject: consolidate code for #ifdef HAVE_LINKAT usage sys_link() now does ifdef HAVE_LINKAT linkat (...) else link (...) endif Use sys_link() in all places where we previously had the conditional behavior. Change-Id: I8bce5ac1175efd2ba7ab4bb5b372f6d1e0365d28 BUG: 764655 Signed-off-by: Vijay Bellur Reviewed-on: http://review.gluster.org/6633 Tested-by: Gluster Build System Reviewed-by: Xavier Hernandez Reviewed-by: Anand Avati --- libglusterfs/src/syscall.c | 8 +++++++- xlators/features/index/src/index.c | 32 +++++++------------------------- xlators/storage/posix/src/posix-handle.c | 23 +++-------------------- xlators/storage/posix/src/posix.c | 15 +++------------ 4 files changed, 20 insertions(+), 58 deletions(-) diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index 117fa209e..d1b9ef84c 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -121,7 +121,13 @@ int sys_link (const char *oldpath, const char *newpath) { #ifdef HAVE_LINKAT - /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ + /* + * On most systems (Linux being the notable exception), link(2) + * first resolves symlinks. If the target is a directory or + * is nonexistent, it will fail. linkat(2) operates on the + * symlink instead of its target when the AT_SYMLINK_FOLLOW + * flag is not supplied. + */ return linkat (AT_FDCWD, oldpath, AT_FDCWD, newpath, 0); #else return link (oldpath, newpath); diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index 4ba72c022..db592719b 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -16,6 +16,7 @@ #include "options.h" #include "glusterfs3-xdr.h" #include "syncop.h" +#include "syscall.h" #define XATTROP_SUBDIR "xattrop" #define BASE_INDICES_HOLDER_SUBDIR "base_indices_holder" @@ -407,13 +408,8 @@ sync_base_indices (void *index_priv) snprintf (base_index_path, PATH_MAX, "%s/%s", base_indices_holder, entry->d_name); -#ifdef HAVE_LINKAT - /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ - ret = linkat (AT_FDCWD, xattrop_index_path, - AT_FDCWD, base_index_path, 0); -#else - ret = link (xattrop_index_path, base_index_path); -#endif + ret = sys_link (xattrop_index_path, base_index_path); + if (ret && errno != EEXIST) goto out; @@ -549,12 +545,8 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir) index_get_index (priv, index); make_index_path (priv->index_basepath, subdir, index, index_path, sizeof (index_path)); -#ifdef HAVE_LINKAT - /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ - ret = linkat (AT_FDCWD, index_path, AT_FDCWD, gfid_path, 0); -#else - ret = link (index_path, gfid_path); -#endif + + ret = sys_link (index_path, gfid_path); if (!ret || (errno == EEXIST)) { ret = 0; index_created = 1; @@ -587,12 +579,7 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir) if (fd >= 0) close (fd); -#ifdef HAVE_LINKAT - /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ - ret = linkat (AT_FDCWD, index_path, AT_FDCWD, gfid_path, 0); -#else - ret = link (index_path, gfid_path); -#endif + ret = sys_link (index_path, gfid_path); if (ret && (errno != EEXIST)) { gf_log (this->name, GF_LOG_ERROR, "%s: Not able to " "add to index (%s)", uuid_utoa (gfid), @@ -606,12 +593,7 @@ index_add (xlator_t *this, uuid_t gfid, const char *subdir) make_index_path (priv->index_basepath, GF_BASE_INDICES_HOLDER_GFID, index, base_path, sizeof (base_path)); -#ifdef HAVE_LINKAT - /* see HAVE_LINKAT in xlators/storage/posix/src/posix.c */ - ret = linkat (AT_FDCWD, index_path, AT_FDCWD, base_path, 0); -#else - ret = link (index_path, base_path); -#endif + ret = sys_link (index_path, base_path); if (ret) goto out; } diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c index 1d8e98631..613709fc8 100644 --- a/xlators/storage/posix/src/posix-handle.c +++ b/xlators/storage/posix/src/posix-handle.c @@ -701,16 +701,8 @@ posix_handle_hard (xlator_t *this, const char *oldpath, uuid_t gfid, struct stat return -1; } -#ifdef HAVE_LINKAT - /* - * Use linkat if the target may be a symlink to a directory - * or without an existing target. See comment about linkat() - * usage in posix_link() in posix.c for details - */ - ret = linkat (AT_FDCWD, oldpath, AT_FDCWD, newpath, 0); -#else - ret = link (oldpath, newpath); -#endif + ret = sys_link (oldpath, newpath); + if (ret) { gf_log (this->name, GF_LOG_WARNING, "link %s -> %s failed (%s)", @@ -882,16 +874,7 @@ posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid, MAKE_HANDLE_PATH (newpath, this, gfid, NULL); ret = lstat (newpath, &stbuf); if (!ret) { -#ifdef HAVE_LINKAT - /* - * Use linkat if the target may be a symlink to a directory - * or without an existing target. See comment about linkat() - * usage in posix_link() in posix.c for details - */ - ret = linkat (AT_FDCWD, newpath, AT_FDCWD, real_path, 0); -#else - ret = link (newpath, real_path); -#endif + ret = sys_link (newpath, real_path); } return ret; diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index dc4af1b92..83b689d06 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1930,18 +1930,9 @@ posix_link (call_frame_t *frame, xlator_t *this, goto out; } -#ifdef HAVE_LINKAT - /* - * On most systems (Linux being the notable exception), link(2) - * first resolves symlinks. If the target is a directory or - * is nonexistent, it will fail. linkat(2) operates on the - * symlink instead of its target when the AT_SYMLINK_FOLLOW - * flag is not supplied. - */ - op_ret = linkat (AT_FDCWD, real_oldpath, AT_FDCWD, real_newpath, 0); -#else - op_ret = link (real_oldpath, real_newpath); -#endif + + op_ret = sys_link (real_oldpath, real_newpath); + if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, -- cgit From 7d89ec77763dc5076379753c736f7fce2bedd9ec Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Thu, 2 Jan 2014 20:11:19 +0530 Subject: socket: propogate connect failure in socket_event_handler This patch prevents spurious handling of pollin/pollout events on an 'un-connected' socket, when outgoing packets to its remote endpoint are 'dropped' using iptables(8) rules. For eg, iptables -I OUTPUT -p tcp --dport 24007 -j DROP Change-Id: I1d3f3259dc536adca32330bfb7566e0b9a521e3c BUG: 1048188 Signed-off-by: Krishnan Parthasarathi Reviewed-on: http://review.gluster.org/6627 Reviewed-by: Anand Avati Tested-by: Anand Avati --- rpc/rpc-transport/socket/src/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index f9df4ac1d..c6b293be4 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -2195,7 +2195,7 @@ unlock: rpc_transport_notify (this, event, this); } out: - return 0; + return ret; } -- cgit