diff options
author | Jeff Darcy <jdarcy@redhat.com> | 2014-02-11 14:02:43 +0000 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2014-02-11 14:02:43 +0000 |
commit | 0d92798e88c5444fb2cc23663b4ea9a345887756 (patch) | |
tree | a71f336115753242e8690547f403405a3373ddf1 | |
parent | 33939dcde38389373e7ed8b12c6e9916b39411d0 (diff) | |
parent | 408d50a64b7b3a9d6a4899060baa423ff126cc5f (diff) |
Merge remote-tracking branch 'upstream/master'
Conflicts:
xlators/mgmt/glusterd/src/Makefile.am
Change-Id: Ida5ec4aecc358cb2268bdfdb1a8c9bab750f9575
87 files changed, 3696 insertions, 544 deletions
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c index c88330aa4..0ad9ce723 100644 --- a/api/src/glfs-resolve.c +++ b/api/src/glfs-resolve.c @@ -450,7 +450,7 @@ glfs_resolve_at (struct glfs *fs, xlator_t *subvol, inode_t *at, ret = 0; } - glfs_loc_touchup (loc); + ret = glfs_loc_touchup (loc); out: GF_FREE (path); diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 9c29b7f3b..98e384f2f 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1115,9 +1115,11 @@ print_quota_list_header (void) { //Header cli_out (" Path Hard-limit " - "Soft-limit Used Available"); + "Soft-limit Used Available Soft-limit exceeded?" + " Hard-limit exceeded?"); cli_out ("-----------------------------------------------------" - "---------------------------"); + "-----------------------------------------------------" + "-----------------"); } int diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c index 0b6bcb54b..bef70357a 100644 --- a/cli/src/cli-rpc-ops.c +++ b/cli/src/cli-rpc-ops.c @@ -42,6 +42,11 @@ #include "cli-quotad-client.h" #include "run.h" +enum gf_task_types { + GF_TASK_TYPE_REBALANCE, + GF_TASK_TYPE_REMOVE_BRICK +}; + extern struct rpc_clnt *global_quotad_rpc; extern rpc_clnt_prog_t cli_quotad_clnt; extern rpc_clnt_prog_t *cli_rpc_prog; @@ -1225,7 +1230,7 @@ out: } int -gf_cli_print_rebalance_status (dict_t *dict) +gf_cli_print_rebalance_status (dict_t *dict, enum gf_task_types task_type) { int ret = -1; int count = 0; @@ -1316,6 +1321,13 @@ gf_cli_print_rebalance_status (dict_t *dict) if (ret) gf_log ("cli", GF_LOG_TRACE, "failed to get skipped count"); + + /* For remove-brick include skipped count into failure count*/ + if (task_type != GF_TASK_TYPE_REBALANCE) { + failures += skipped; + skipped = 0; + } + memset (key, 0, 256); snprintf (key, 256, "run-time-%d", i); ret = dict_get_double (dict, key, &elapsed); @@ -1470,7 +1482,7 @@ gf_cli_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov, goto out; } - ret = gf_cli_print_rebalance_status (dict); + ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REBALANCE); if (ret) gf_log ("cli", GF_LOG_ERROR, "Failed to print rebalance status"); @@ -1873,7 +1885,7 @@ xml_output: goto out; } - ret = gf_cli_print_rebalance_status (dict); + ret = gf_cli_print_rebalance_status (dict, GF_TASK_TYPE_REMOVE_BRICK); if (ret) { gf_log ("cli", GF_LOG_ERROR, "Failed to print remove-brick " "rebalance status"); @@ -2306,8 +2318,11 @@ print_quota_list_output (char *mountdir, char *default_sl, char *path) char *avail_str = NULL; int ret = -1; char *sl_final = NULL; - char percent_str[20] = {0,}; char *hl_str = NULL; + double sl_num = 0; + gf_boolean_t sl = _gf_false; + gf_boolean_t hl = _gf_false; + char percent_str[20] = {0}; struct quota_limit { int64_t hl; @@ -2344,8 +2359,11 @@ print_quota_list_output (char *mountdir, char *default_sl, char *path) hl_str = gf_uint64_2human_readable (existing_limits.hl); if (existing_limits.sl < 0) { + ret = gf_string2percent (default_sl, &sl_num); + sl_num = (sl_num * existing_limits.hl) / 100; sl_final = default_sl; } else { + sl_num = (existing_limits.sl * existing_limits.hl) / 100; snprintf (percent_str, sizeof (percent_str), "%"PRIu64"%%", existing_limits.sl); sl_final = percent_str; @@ -2355,26 +2373,37 @@ print_quota_list_output (char *mountdir, char *default_sl, char *path) &used_space, sizeof (used_space)); if (ret < 0) { - cli_out ("%-40s %7s %9s %11s %7s", path, hl_str, sl_final, - "N/A", "N/A"); + cli_out ("%-40s %7s %9s %11s %7s %15s %20s", + path, hl_str, sl_final, + "N/A", "N/A", "N/A", "N/A"); } else { used_space = ntoh64 (used_space); used_str = gf_uint64_2human_readable (used_space); - if (existing_limits.hl > used_space) + if (existing_limits.hl > used_space) { avail = existing_limits.hl - used_space; - else + hl = _gf_false; + if (used_space > sl_num) + sl = _gf_true; + else + sl = _gf_false; + } else { avail = 0; + hl = sl = _gf_true; + } avail_str = gf_uint64_2human_readable (avail); - if (used_str == NULL) + if (used_str == NULL) { cli_out ("%-40s %7s %9s %11"PRIu64 - "%9"PRIu64, path, hl_str, - sl_final, used_space, avail); - else - cli_out ("%-40s %7s %9s %11s %7s", path, hl_str, - sl_final, used_str, avail_str); + "%9"PRIu64" %15s %18s", path, hl_str, + sl_final, used_space, avail, sl? "Yes" : "No", + hl? "Yes" : "No"); + } else { + cli_out ("%-40s %7s %9s %11s %7s %15s %20s", path, hl_str, + sl_final, used_str, avail_str, sl? "Yes" : "No", + hl? "Yes" : "No"); + } } out: @@ -2413,9 +2442,11 @@ gf_cli_print_limit_list_from_dict (char *volname, dict_t *dict, } cli_out (" Path Hard-limit " - "Soft-limit Used Available"); - cli_out ("-----------------------------------------------------" - "---------------------------"); + "Soft-limit Used Available Soft-limit exceeded?" + " Hard-limit exceeded?"); + cli_out ("--------------------------------------------------------" + "--------------------------------------------------------" + "-----------"); while (count--) { snprintf (key, sizeof (key), "path%d", i++); @@ -2446,14 +2477,17 @@ print_quota_list_from_quotad (call_frame_t *frame, dict_t *rsp_dict) int64_t *limit = NULL; char *used_str = NULL; char *avail_str = NULL; - char percent_str[20]= {0}; char *hl_str = NULL; char *sl_final = NULL; char *path = NULL; - char *default_sl = NULL; + char *default_sl = NULL; int ret = -1; cli_local_t *local = NULL; dict_t *gd_rsp_dict = NULL; + double sl_num = 0; + gf_boolean_t sl = _gf_false; + gf_boolean_t hl = _gf_false; + char percent_str[20] = {0,}; local = frame->local; gd_rsp_dict = local->dict; @@ -2490,37 +2524,48 @@ print_quota_list_from_quotad (call_frame_t *frame, dict_t *rsp_dict) hl_str = gf_uint64_2human_readable (existing_limits->hl); if (existing_limits->sl < 0) { + ret = gf_string2percent (default_sl, &sl_num); + sl_num = (sl_num * existing_limits->hl) / 100; sl_final = default_sl; } else { + sl_num = (existing_limits->sl * existing_limits->hl) / 100; snprintf (percent_str, sizeof (percent_str), "%"PRIu64"%%", existing_limits->sl); - sl_final = percent_str; + sl_final = percent_str; } ret = dict_get_bin (rsp_dict, QUOTA_SIZE_KEY, (void**)&limit); if (ret < 0) { gf_log ("cli", GF_LOG_WARNING, "size key not present in dict"); - cli_out ("%-40s %7s %9s %11s %7s", path, hl_str, sl_final, - "N/A", "N/A"); + cli_out ("%-40s %7s %9s %11s %7s %15s %20s", path, hl_str, + sl_final, "N/A", "N/A", "N/A", "N/A"); } else { used_space = *limit; used_space = ntoh64 (used_space); used_str = gf_uint64_2human_readable (used_space); - if (existing_limits->hl > used_space) + if (existing_limits->hl > used_space) { avail = existing_limits->hl - used_space; - else + hl = _gf_false; + if (used_space > sl_num) + sl = _gf_true; + else + sl = _gf_false; + } else { avail = 0; - + hl = sl = _gf_true; + } avail_str = gf_uint64_2human_readable (avail); if (used_str == NULL) cli_out ("%-40s %7s %9s %11"PRIu64 - "%9"PRIu64, path, hl_str, - sl_final, used_space, avail); + "%9"PRIu64" %15s %20s", path, hl_str, + sl_final, used_space, avail, sl? "Yes" : "No", + hl? "Yes" : "No"); else - cli_out ("%-40s %7s %9s %11s %7s", path, hl_str, - sl_final, used_str, avail_str); + cli_out ("%-40s %7s %9s %11s %7s %15s %20s", path, + hl_str, sl_final, used_str, avail_str, + sl? "Yes" : "No", hl? "Yes" : "No"); } ret = 0; diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c index 822b98df5..ae6b051ed 100644 --- a/cli/src/cli-xml-output.c +++ b/cli/src/cli-xml-output.c @@ -3228,31 +3228,30 @@ cli_xml_output_vol_rebalance_status (xmlTextWriterPtr writer, dict_t *dict, ret = dict_get_uint64 (dict, key, &failures); if (ret) goto out; - total_failures += failures; - ret = xmlTextWriterWriteFormatElement (writer, - (xmlChar *)"failures", - "%"PRIu64, failures); - XML_RET_CHECK_AND_GOTO (ret, out); - /* skipped-%d is not available for remove brick in dict, - so using failures as skipped count in case of remove-brick - similar to logic used in CLI(non xml output) */ - if (task_type == GF_TASK_TYPE_REBALANCE) { - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "skipped-%d", i); - } - else { - memset (key, 0, sizeof (key)); - snprintf (key, sizeof (key), "failures-%d", i); - } + memset (key, 0, sizeof (key)); + snprintf (key, sizeof (key), "skipped-%d", i); ret = dict_get_uint64 (dict, key, &skipped); if (ret) goto out; + + if (task_type == GF_TASK_TYPE_REMOVE_BRICK) { + failures += skipped; + skipped = 0; + } + + total_failures += failures; + ret = xmlTextWriterWriteFormatElement (writer, + (xmlChar *)"failures", + "%"PRIu64, failures); + XML_RET_CHECK_AND_GOTO (ret, out); + total_skipped += skipped; + ret = xmlTextWriterWriteFormatElement (writer, - (xmlChar *)"skipped", - "%"PRIu64, skipped); + (xmlChar *)"skipped", + "%"PRIu64, skipped); XML_RET_CHECK_AND_GOTO (ret, out); ret = xmlTextWriterWriteFormatElement (writer, diff --git a/configure.ac b/configure.ac index 0d86b3373..604b53539 100644 --- a/configure.ac +++ b/configure.ac @@ -908,7 +908,7 @@ AM_CONDITIONAL([GF_DARWIN_HOST_OS], test "${GF_HOST_OS}" = "GF_DARWIN_HOST_OS") AM_CONDITIONAL([GF_INSTALL_VAR_LIB_GLUSTERD], test ! -d ${localstatedir}/lib/glusterd && test -d ${sysconfdir}/glusterd ) dnl pkg-config versioning -GFAPI_VERSION="0.0.6" +GFAPI_VERSION="6.0.0" LIBGFCHANGELOG_VERSION="0.0.1" AC_SUBST(GFAPI_VERSION) AC_SUBST(LIBGFCHANGELOG_VERSION) @@ -918,7 +918,7 @@ LIBGFXDR_LT_VERSION="0:1:0" LIBGFRPC_LT_VERSION="0:1:0" LIBGLUSTERFS_LT_VERSION="0:1:0" LIBGFCHANGELOG_LT_VERSION="0:1:0" -GFAPI_LT_VERSION="0:6:0" +GFAPI_LT_VERSION="6:0:0" AC_SUBST(LIBGFXDR_LT_VERSION) AC_SUBST(LIBGFRPC_LT_VERSION) AC_SUBST(LIBGLUSTERFS_LT_VERSION) diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am index fc5f56d54..9435861cf 100644 --- a/extras/geo-rep/Makefile.am +++ b/extras/geo-rep/Makefile.am @@ -1,2 +1,11 @@ +scriptsdir = $(datadir)/glusterfs/scripts +scripts_DATA = gsync-upgrade.sh generate-gfid-file.sh get-gfid.sh \ + slave-upgrade.sh + +scripts_PROGRAMS = gsync-sync-gfid +gsync_sync_gfid_CFLAGS = $(GF_CFLAGS) -Wall -I$(top_srcdir)/libglusterfs/src +gsync_sync_gfid_LDADD = $(GF_LIBS) $(top_builddir)/libglusterfs/src/libglusterfs.la +gsync_sync_gfid_SOURCES = gsync-sync-gfid.c + EXTRA_DIST = gsync-sync-gfid.c gsync-upgrade.sh generate-gfid-file.sh \ get-gfid.sh slave-upgrade.sh diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh index 24580be44..ef31f527a 100755 --- a/geo-replication/src/gverify.sh +++ b/geo-replication/src/gverify.sh @@ -32,11 +32,12 @@ echo 0:0; exit 1; fi; cd \$d; -available_size=\$(df -B1 \$d | tail -1 | awk "{print \\\$2}"); +disk_size=\$(df -B1 \$d | tail -1 | awk "{print \\\$2}"); +used_size=\$(df -B1 \$d | tail -1 | awk "{print \\\$3}"); umount -l \$d; rmdir \$d; ver=\$(gluster --version | head -1 | cut -f2 -d " "); -echo \$available_size:\$ver; +echo \$disk_size:\$used_size:\$ver; }; cd /tmp; [ x$VOL != x ] && do_verify $VOL; @@ -61,12 +62,13 @@ echo 0:0; exit 1; fi; cd \$d; -available_size=\$(df -B1 \$d | tail -1 | awk "{print \\\$4}"); +disk_size=\$(df -B1 \$d | tail -1 | awk "{print \\\$2}"); +available_size=\$(df -B1 \$d | tail -1 | awk "{print \\\$3}"); no_of_files=\$(find \$d -maxdepth 0 -empty); umount -l \$d; rmdir \$d; ver=\$(gluster --version | head -1 | cut -f2 -d " "); -echo \$available_size:\$ver:\$no_of_files:; +echo \$disk_size:\$used_size:\$ver:\$no_of_files; }; cd /tmp; [ x$VOL != x ] && do_verify $VOL; @@ -118,18 +120,20 @@ function main() ERRORS=0; master_data=$(master_stats $1); slave_data=$(slave_stats $2 $3); - master_size=$(echo $master_data | cut -f1 -d':'); - slave_size=$(echo $slave_data | cut -f1 -d':'); - master_version=$(echo $master_data | cut -f2 -d':'); - slave_version=$(echo $slave_data | cut -f2 -d':'); - slave_no_of_files=$(echo $slave_data | cut -f3 -d':'); - - if [[ "x$master_size" = "x" || "x$master_version" = "x" || "$master_size" -eq "0" ]]; then + master_disk_size=$(echo $master_data | cut -f1 -d':'); + slave_disk_size=$(echo $slave_data | cut -f1 -d':'); + master_used_size=$(echo $master_data | cut -f2 -d':'); + slave_used_size=$(echo $slave_data | cut -f2 -d':'); + master_version=$(echo $master_data | cut -f3 -d':'); + slave_version=$(echo $slave_data | cut -f3 -d':'); + slave_no_of_files=$(echo $slave_data | cut -f4 -d':'); + + if [[ "x$master_disk_size" = "x" || "x$master_version" = "x" || "$master_disk_size" -eq "0" ]]; then echo "FORCE_BLOCKER|Unable to fetch master volume details. Please check the master cluster and master volume." > $log_file; exit 1; fi; - if [[ "x$slave_size" = "x" || "x$slave_version" = "x" || "$slave_size" -eq "0" ]]; then + if [[ "x$slave_disk_size" = "x" || "x$slave_version" = "x" || "$slave_disk_size" -eq "0" ]]; then echo "FORCE_BLOCKER|Unable to fetch slave volume details. Please check the slave cluster and slave volume." > $log_file; exit 1; fi; @@ -138,16 +142,25 @@ function main() # if they fail. The checks below can be bypassed if force option is # provided hence no FORCE_BLOCKER flag. - if [ ! $slave_size -ge $(($master_size - $BUFFER_SIZE )) ]; then - echo "Total size of master is greater than available size of slave." >> $log_file; - ERRORS=$(($ERRORS + 1)); - fi; + if [ "$slave_disk_size" -lt "$master_disk_size" ]; then + echo "Total disk size of master is greater than disk size of slave." >> $log_file; + ERRORS=$(($ERRORS + 1)); + fi + + effective_master_used_size=$(( $master_used_size + $BUFFER_SIZE )) + slave_available_size=$(( $slave_disk_size - $slave_used_size )) + master_available_size=$(( $master_disk_size - $effective_master_used_size )); + + if [ "$slave_available_size" -lt "$master_available_size" ]; then + echo "Total available size of master is greater than available size of slave" >> $log_file; + ERRORS=$(($ERRORS + 1)); + fi if [ -z $slave_no_of_files ]; then echo "$2::$3 is not empty. Please delete existing files in $2::$3 and retry, or use force to continue without deleting the existing files." >> $log_file; ERRORS=$(($ERRORS + 1)); fi; - + if [[ $master_version > $slave_version ]]; then echo "Gluster version mismatch between master and slave." >> $log_file; ERRORS=$(($ERRORS + 1)); diff --git a/geo-replication/syncdaemon/configinterface.py b/geo-replication/syncdaemon/configinterface.py index 8353c1161..35f754c98 100644 --- a/geo-replication/syncdaemon/configinterface.py +++ b/geo-replication/syncdaemon/configinterface.py @@ -9,6 +9,8 @@ import os import errno import sys from stat import ST_DEV, ST_INO, ST_MTIME +import tempfile +import shutil from syncdutils import escape, unescape, norm, update_file, GsyncdError @@ -19,6 +21,56 @@ config_version = 2.0 re_type = type(re.compile('')) + +# (SECTION, OPTION, OLD VALUE, NEW VALUE) +CONFIGS = ( + ("peersrx . .", "georep_session_working_dir", "", "/var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/"), + ("peersrx .", "gluster_params", "aux-gfid-mount xlator-option=\*-dht.assert-no-child-down=true", "aux-gfid-mount"), + ("peersrx . .", "ssh_command_tar", "", "ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/tar_ssh.pem"), +) + +def upgrade_config_file(path): + config_change = False + config = ConfigParser.RawConfigParser() + config.read(path) + + for sec, opt, oldval, newval in CONFIGS: + try: + val = config.get(sec, opt) + except ConfigParser.NoOptionError: + # if new config opt not exists + config_change = True + config.set(sec, opt, newval) + continue + except ConfigParser.Error: + """ + When gsyncd invoked at the time of create, config file + will not be their. Ignore any ConfigParser errors + """ + continue + + if val == newval: + # value is same as new val + continue + + if val == oldval: + # config value needs update + config_change = True + config.set(sec, opt, newval) + + if config_change: + tempConfigFile = tempfile.NamedTemporaryFile(mode="wb", delete=False) + with open(tempConfigFile.name, 'wb') as configFile: + config.write(configFile) + + # If src and dst are two different file system, then os.rename + # fails, In this case if temp file created in /tmp and if /tmp is + # seperate fs then os.rename gives following error, so use shutil + # OSError: [Errno 18] Invalid cross-device link + # mail.python.org/pipermail/python-list/2005-February/342893.html + shutil.move(tempConfigFile.name, path) + + class MultiDict(object): """a virtual dict-like class which functions as the union of underlying dicts""" diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py index 64c26a5d2..6eb62c6b0 100644 --- a/geo-replication/syncdaemon/gsyncd.py +++ b/geo-replication/syncdaemon/gsyncd.py @@ -20,7 +20,7 @@ from ipaddr import IPAddress, IPNetwork from gconf import gconf from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception from syncdutils import GsyncdError, select, set_term_handler, privileged, update_file -from configinterface import GConffile +from configinterface import GConffile, upgrade_config_file import resource from monitor import monitor @@ -371,6 +371,8 @@ def main_i(): namedict['remotehost'] = x.remotehost if not 'config_file' in rconf: rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd_template.conf") + + upgrade_config_file(rconf['config_file']) gcnf = GConffile(rconf['config_file'], canon_peers, defaults.__dict__, opts.__dict__, namedict) checkpoint_change = False diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py index 662d2ba74..0817d7fe7 100644 --- a/geo-replication/syncdaemon/master.py +++ b/geo-replication/syncdaemon/master.py @@ -108,6 +108,10 @@ class NormalMixin(object): xt = rsc.server.xtime(path, self.uuid) else: xt = rsc.server.stime(path, self.uuid) + if isinstance(xt, int) and xt == ENODATA: + xt = rsc.server.xtime(path, self.uuid) + if not isinstance(xt, int): + self.slave.server.set_stime(path, self.uuid, xt) if isinstance(xt, int) and xt != ENODATA: return xt if xt == ENODATA or xt < self.volmark: @@ -1059,7 +1063,8 @@ class GMasterChangelogMixin(GMasterCommon): self.master.server.changelog_done(pr) changes.remove(pr) logging.debug('processing changes %s' % repr(changes)) - self.process(changes) + if changes: + self.process(changes) def register(self): (workdir, logfile) = self.setup_working_dir() @@ -1180,6 +1185,13 @@ class GMasterXsyncMixin(GMasterChangelogMixin): if stime and stime[-1]: self.sync_stime(stime[-1], last) + def is_sticky(self, path, mo): + """check for DHTs linkto sticky bit file""" + sticky = False + if mo & 01000: + sticky = self.master.server.linkto_check(path) + return sticky + def Xcrawl(self, path='.', xtr_root=None): """ generate a CHANGELOG file consumable by process_change. @@ -1231,6 +1243,9 @@ class GMasterXsyncMixin(GMasterChangelogMixin): if isinstance(st, int): logging.warn('%s got purged in the interim ...' % e) continue + if self.is_sticky(e, st.st_mode): + logging.debug('ignoring sticky bit file %s' % e) + continue gfid = self.master.server.gfid(e) if isinstance(gfid, int): logging.warn('skipping entry %s..' % e) diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py index 8deb5114b..41add6fb2 100644 --- a/geo-replication/syncdaemon/resource.py +++ b/geo-replication/syncdaemon/resource.py @@ -321,6 +321,18 @@ class Server(object): else: raise + @classmethod + @_pathguard + def linkto_check(cls, path): + try: + return not (Xattr.lgetxattr_buf(path, 'trusted.glusterfs.dht.linkto') == '') + except (IOError, OSError): + ex = sys.exc_info()[1] + if ex.errno in (ENOENT, ENODATA): + return False + else: + raise + @classmethod @_pathguard @@ -779,7 +791,11 @@ class SlaveRemote(object): p0.stdin.write(f) p0.stdin.write('\n') p0.stdin.close() + + # wait() for tar to terminate, collecting any errors, further + # waiting for transfer to complete p0.wait() + p0.terminate_geterr(fail_on_err = False) p1.wait() p1.terminate_geterr(fail_on_err = False) @@ -1135,10 +1151,14 @@ class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote): def gfid(cls, e): """ path based backend gfid fetch """ return super(brickserver, cls).gfid(e) + @classmethod + def linkto_check(cls, e): + return super(brickserver, cls).linkto_check(e) if gconf.slave_id: # define {,set_}xtime in slave, thus preempting # the call to remote, so that it takes data from # the local brick + slave.server.xtime = types.MethodType(lambda _self, path, uuid: brickserver.xtime(path, uuid + '.' + gconf.slave_id), slave.server) slave.server.stime = types.MethodType(lambda _self, path, uuid: brickserver.stime(path, uuid + '.' + gconf.slave_id), slave.server) slave.server.set_stime = types.MethodType(lambda _self, path, uuid, mark: brickserver.set_stime(path, uuid + '.' + gconf.slave_id, mark), slave.server) (g1, g2) = self.gmaster_instantiate_tuple(slave) diff --git a/glusterfs.spec.in b/glusterfs.spec.in index a409142ac..b34c8648d 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -154,6 +154,9 @@ BuildRequires: lvm2-devel %if ( 0%{!?_without_qemu_block:1} ) BuildRequires: glib2-devel %endif +%if ( 0%{!?_without_georeplication:1} ) +BuildRequires: libattr-devel +%endif Obsoletes: hekafs Obsoletes: %{name}-common < %{version}-%{release} @@ -609,6 +612,9 @@ mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/nfs/run touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid +install -p -m 0744 extras/hook-scripts/S56glusterd-geo-rep-create-post.sh \ + %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/gsync-create/post + find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs %clean @@ -691,15 +697,27 @@ if [ $1 -ge 1 ]; then fi %files geo-replication +%{_sysconfdir}/logrotate.d/glusterfs-georep %{_libexecdir}/glusterfs/gsyncd %{_libexecdir}/glusterfs/python/syncdaemon/* %{_libexecdir}/glusterfs/gverify.sh %{_libexecdir}/glusterfs/peer_add_secret_pub %{_libexecdir}/glusterfs/peer_gsec_create %ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/geo-replication +%dir %{_sharedstatedir}/glusterd/hooks +%dir %{_sharedstatedir}/glusterd/hooks/1 +%dir %{_sharedstatedir}/glusterd/hooks/1/gsync-create +%dir %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post +%{_sharedstatedir}/glusterd/hooks/1/gsync-create/post/S56glusterd-geo-rep-create-post.sh +%{_datadir}/glusterfs/scripts/get-gfid.sh +%{_datadir}/glusterfs/scripts/slave-upgrade.sh +%{_datadir}/glusterfs/scripts/gsync-upgrade.sh +%{_datadir}/glusterfs/scripts/generate-gfid-file.sh +%{_datadir}/glusterfs/scripts/gsync-sync-gfid %ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf %endif + %files fuse %if ( 0%{_for_fedora_koji_builds} ) %config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-fuse @@ -876,6 +894,9 @@ if [ $1 -ge 1 ]; then fi %changelog +* Thu Feb 06 2014 Aravinda VK <avishwan@redhat.com> +- Include geo-replication upgrade scripts and hook scripts. + * Wed Jan 15 2014 Niels de Vos <ndevos@redhat.com> - Install /var/lib/glusterd/groups/virt by default diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index d3234e8bc..c47d2ca3f 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -142,6 +142,9 @@ static struct argp_option gf_options[] = { "Mount the filesystem with POSIX ACL support"}, {"selinux", ARGP_SELINUX_KEY, 0, 0, "Enable SELinux label (extened attributes) support on inodes"}, + {"volfile-max-fetch-attempts", ARGP_VOLFILE_MAX_FETCH_ATTEMPTS, "0", + OPTION_HIDDEN, "Maximum number of attempts to fetch the volfile"}, + #ifdef GF_LINUX_HOST_OS {"aux-gfid-mount", ARGP_AUX_GFID_MOUNT_KEY, 0, 0, "Enable access to filesystem through gfid directly"}, @@ -189,6 +192,9 @@ static struct argp_option gf_options[] = { "[default: 48]"}, {"client-pid", ARGP_CLIENT_PID_KEY, "PID", OPTION_HIDDEN, "client will authenticate itself with process id PID to server"}, + {"no-root-squash", ARGP_FUSE_NO_ROOT_SQUASH_KEY, "BOOL", + OPTION_ARG_OPTIONAL, "disable/enable root squashing for the trusted " + "client"}, {"user-map-root", ARGP_USER_MAP_ROOT_KEY, "USER", OPTION_HIDDEN, "replace USER with root in messages"}, {"dump-fuse", ARGP_DUMP_FUSE_KEY, "PATH", 0, @@ -464,6 +470,32 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) break; } + switch (cmd_args->no_root_squash) { + case GF_OPTION_ENABLE: /* enable */ + ret = dict_set_static_ptr (options, "no-root-squash", + "enable"); + if (ret < 0) { + gf_log ("glusterfsd", GF_LOG_ERROR, + "failed to set 'enable' for key " + "no-root-squash"); + goto err; + } + break; + case GF_OPTION_DISABLE: /* disable/default */ + default: + ret = dict_set_static_ptr (options, "no-root-squash", + "disable"); + if (ret < 0) { + gf_log ("glusterfsd", GF_LOG_ERROR, + "failed to set 'disable' for key " + "no-root-squash"); + goto err; + } + gf_log ("", GF_LOG_DEBUG, "fuse no-root-squash mode %d", + cmd_args->no_root_squash); + break; + } + if (!cmd_args->no_daemon_mode) { ret = dict_set_static_ptr (options, "sync-to-mount", "enable"); @@ -733,6 +765,7 @@ parse_opts (int key, char *arg, struct argp_state *state) char *tmp_str = NULL; char *port_str = NULL; struct passwd *pw = NULL; + int ret = 0; cmd_args = state->input; @@ -878,6 +911,9 @@ parse_opts (int key, char *arg, struct argp_state *state) case ARGP_DEBUG_KEY: cmd_args->debug_mode = ENABLE_DEBUG_MODE; break; + case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS: + cmd_args->max_connect_attempts = 1; + break; case ARGP_DIRECT_IO_MODE_KEY: if (!arg) @@ -893,6 +929,10 @@ parse_opts (int key, char *arg, struct argp_state *state) "unknown direct I/O mode setting \"%s\"", arg); break; + case ARGP_FUSE_NO_ROOT_SQUASH_KEY: + cmd_args->no_root_squash = _gf_true; + break; + case ARGP_ENTRY_TIMEOUT_KEY: d = 0.0; @@ -908,8 +948,8 @@ parse_opts (int key, char *arg, struct argp_state *state) case ARGP_NEGATIVE_TIMEOUT_KEY: d = 0.0; - gf_string2double (arg, &d); - if (!(d < 0.0)) { + ret = gf_string2double (arg, &d); + if ((ret == 0) && !(d < 0.0)) { cmd_args->fuse_negative_timeout = d; break; } @@ -1476,6 +1516,16 @@ parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx) } } + /* + This option was made obsolete but parsing it for backward + compatibility with third party applications + */ + if (cmd_args->max_connect_attempts) { + gf_log ("glusterfs", GF_LOG_WARNING, + "obsolete option '--volfile-max-fetch-attempts" + " or fetch-attempts' was provided"); + } + #ifdef GF_DARWIN_HOST_OS if (cmd_args->mount_point) cmd_args->mac_compat = GF_OPTION_DEFERRED; @@ -1507,26 +1557,6 @@ glusterfs_pidfile_setup (glusterfs_ctx_t *ctx) goto out; } - ret = lockf (fileno (pidfp), F_TLOCK, 0); - if (ret) { - gf_log ("glusterfsd", GF_LOG_ERROR, - "pidfile %s lock error (%s)", - cmd_args->pid_file, strerror (errno)); - goto out; - } - - gf_log ("glusterfsd", GF_LOG_TRACE, - "pidfile %s lock acquired", - cmd_args->pid_file); - - ret = lockf (fileno (pidfp), F_ULOCK, 0); - if (ret) { - gf_log ("glusterfsd", GF_LOG_ERROR, - "pidfile %s unlock error (%s)", - cmd_args->pid_file, strerror (errno)); - goto out; - } - ctx->pidfp = pidfp; ret = 0; diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index 9e2a0e56e..ad4c3699b 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -84,6 +84,7 @@ enum argp_option_keys { ARGP_FUSE_MOUNTOPTS_KEY = 164, ARGP_FUSE_USE_READDIRP_KEY = 165, ARGP_AUX_GFID_MOUNT_KEY = 166, + ARGP_FUSE_NO_ROOT_SQUASH_KEY = 167, }; struct _gfd_vol_top_priv_t { diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c index 06447dc5d..1bf3de3f5 100644 --- a/libglusterfs/src/client_t.c +++ b/libglusterfs/src/client_t.c @@ -21,7 +21,6 @@ #include "config.h" #endif - static int gf_client_chain_client_entries (cliententry_t *entries, uint32_t startidx, uint32_t endcount) @@ -82,8 +81,9 @@ gf_client_clienttable_expand (clienttable_t *clienttable, uint32_t nr) memcpy (clienttable->cliententries, oldclients, cpy); } - gf_client_chain_client_entries (clienttable->cliententries, oldmax_clients, - clienttable->max_clients); + gf_client_chain_client_entries (clienttable->cliententries, + oldmax_clients, + clienttable->max_clients); /* Now that expansion is done, we must update the client list * head pointer so that the client allocation functions can continue @@ -149,6 +149,24 @@ gf_client_clienttable_destroy (clienttable_t *clienttable) } } + +/* + * a more comprehensive feature test is shown at + * http://lists.iptel.org/pipermail/semsdev/2010-October/005075.html + * this is sufficient for RHEL5 i386 builds + */ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && !defined(__i386__) +# define INCREMENT_ATOMIC(lk,op) __sync_add_and_fetch(&op, 1) +# define DECREMENT_ATOMIC(lk,op) __sync_sub_and_fetch(&op, 1) +#else +/* These are only here for old gcc, e.g. on RHEL5 i386. + * We're not ever going to use this in an if stmt, + * but let's be pedantically correct for style points */ +# define INCREMENT_ATOMIC(lk,op) do { LOCK (&lk); ++op; UNLOCK (&lk); } while (0) +/* this is a gcc 'statement expression', it works with llvm/clang too */ +# define DECREMENT_ATOMIC(lk,op) ({ LOCK (&lk); --op; UNLOCK (&lk); op; }) +#endif + client_t * gf_client_get (xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid) { @@ -184,15 +202,8 @@ gf_client_get (xlator_t *this, struct rpcsvc_auth_data *cred, char *client_uid) memcmp (cred->authdata, client->auth.data, client->auth.len) == 0))) { -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) - __sync_add_and_fetch(&client->ref.bind, 1); -#else - LOCK (&client->ref.lock); - { - ++client->ref.bind; - } - UNLOCK (&client->ref.lock); -#endif + INCREMENT_ATOMIC (client->ref.lock, + client->ref.bind); break; } } @@ -274,15 +285,7 @@ gf_client_put (client_t *client, gf_boolean_t *detached) if (detached) *detached = _gf_false; -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) - bind_ref = __sync_sub_and_fetch(&client->ref.bind, 1); -#else - LOCK (&client->ref.lock); - { - bind_ref = --client->ref.bind; - } - UNLOCK (&client->ref.lock); -#endif + bind_ref = DECREMENT_ATOMIC (client->ref.lock, client->ref.bind); if (bind_ref == 0) unref = _gf_true; @@ -295,7 +298,6 @@ gf_client_put (client_t *client, gf_boolean_t *detached) } } - client_t * gf_client_ref (client_t *client) { @@ -304,15 +306,7 @@ gf_client_ref (client_t *client) return NULL; } -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) - __sync_add_and_fetch(&client->ref.count, 1); -#else - LOCK (&client->ref.lock); - { - ++client->ref.count; - } - UNLOCK (&client->ref.lock); -#endif + INCREMENT_ATOMIC (client->ref.lock, client->ref.count); return client; } @@ -391,15 +385,7 @@ gf_client_unref (client_t *client) return; } -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) - refcount = __sync_sub_and_fetch(&client->ref.count, 1); -#else - LOCK (&client->ref.lock); - { - refcount = --client->ref.count; - } - UNLOCK (&client->ref.lock); -#endif + refcount = DECREMENT_ATOMIC (client->ref.lock, client->ref.count); if (refcount == 0) { client_destroy (client); } diff --git a/libglusterfs/src/client_t.h b/libglusterfs/src/client_t.h index 548081896..4113b9da9 100644 --- a/libglusterfs/src/client_t.h +++ b/libglusterfs/src/client_t.h @@ -44,6 +44,8 @@ typedef struct _client_t { int flavour; size_t len; char *data; + char *username; + char *passwd; } auth; } client_t; diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h index 6f8436fcb..e3b019b9e 100644 --- a/libglusterfs/src/common-utils.h +++ b/libglusterfs/src/common-utils.h @@ -106,6 +106,7 @@ enum _gf_client_pid GF_CLIENT_PID_GSYNCD = -1, GF_CLIENT_PID_HADOOP = -2, GF_CLIENT_PID_DEFRAG = -3, + GF_CLIENT_PID_NO_ROOT_SQUASH = -4, }; typedef enum _gf_boolean gf_boolean_t; diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c index e9fc1222d..7bc5d57b0 100644 --- a/libglusterfs/src/dict.c +++ b/libglusterfs/src/dict.c @@ -801,6 +801,8 @@ data_from_dynstr (char *value) data_t *data = get_new_data (); + if (!data) + return NULL; data->len = strlen (value) + 1; data->data = value; @@ -817,6 +819,8 @@ data_from_dynmstr (char *value) data_t *data = get_new_data (); + if (!data) + return NULL; data->len = strlen (value) + 1; data->data = value; data->is_stdalloc = 1; diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index 3085db21c..16ab96268 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -21,16 +21,18 @@ * * 3.3.0 - 1 * 3.4.0 - 2 - * 3.next (3.5?) - 3 + * 3.5.0 - 3 + * 3.next (3.6?) - 4 * * TODO: Change above comment once gluster version is finalised * TODO: Finalize the op-version ranges */ #define GD_OP_VERSION_MIN 1 /* MIN is the fresh start op-version, mostly should not change */ -#define GD_OP_VERSION_MAX 3 /* MAX VERSION is the maximum count in VME table, +#define GD_OP_VERSION_MAX 4 /* MAX VERSION is the maximum count in VME table, should keep changing with introduction of newer versions */ +#define GD_OP_VERSION_4 4 /* Op-Version 4 */ #include "xlator.h" diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 517e4270a..f537110d7 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -364,9 +364,10 @@ struct _cmd_args { int aux_gfid_mount; struct list_head xlator_options; /* list of xlator_option_t */ - /* fuse options */ - int fuse_direct_io_mode; - char *use_readdirp; + /* fuse options */ + int fuse_direct_io_mode; + char *use_readdirp; + int no_root_squash; int volfile_check; double fuse_entry_timeout; double fuse_negative_timeout; diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h index 6fcf17f35..c9a2fb070 100644 --- a/libglusterfs/src/list.h +++ b/libglusterfs/src/list.h @@ -46,6 +46,21 @@ list_add_tail (struct list_head *new, struct list_head *head) static inline void +list_add_order (struct list_head *new, struct list_head *head, + int (*compare)(struct list_head *, struct list_head *)) +{ + struct list_head *pos = head->next; + + while ( pos != head ) { + if (compare(new, pos) <= 0) + break; + pos = pos->next; + } + + list_add_tail(new, pos); +} + +static inline void list_del (struct list_head *old) { old->prev->next = old->next; diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h index 726d38eb6..26237fecb 100644 --- a/libglusterfs/src/mem-types.h +++ b/libglusterfs/src/mem-types.h @@ -119,6 +119,9 @@ enum gf_common_mem_types_ { gf_common_mt_syncopctx = 103, gf_common_mt_iobrefs = 104, gf_common_mt_gsync_status_t = 105, - gf_common_mt_end = 106 + gf_common_mt_uuid_t = 106, + gf_common_mt_vol_lock_obj_t = 107, + gf_common_mt_txn_opinfo_obj_t = 108, + gf_common_mt_end = 109 }; #endif diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c index 5af23592b..66a5906a3 100644 --- a/libglusterfs/src/store.c +++ b/libglusterfs/src/store.c @@ -362,12 +362,11 @@ gf_store_handle_new (char *path, gf_store_handle_t **handle) goto out; spath = gf_strdup (path); - if (!spath) goto out; fd = open (path, O_RDWR | O_CREAT | O_APPEND, 0600); - if (fd <= 0) { + if (fd < 0) { gf_log ("", GF_LOG_ERROR, "Failed to open file: %s, error: %s", path, strerror (errno)); goto out; @@ -383,7 +382,7 @@ gf_store_handle_new (char *path, gf_store_handle_t **handle) ret = 0; out: - if (fd > 0) + if (fd >= 0) close (fd); if (ret == -1) { diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 9ce52407f..f3df8e2ae 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -149,10 +149,13 @@ xlator_volopt_dynload (char *xlator_type, void **dl_handle, } *dl_handle = handle; + handle = NULL; ret = 0; out: GF_FREE (name); + if (handle) + dlclose (handle); gf_log ("xlator", GF_LOG_DEBUG, "Returning %d", ret); return ret; diff --git a/rpc/rpc-lib/src/auth-glusterfs.c b/rpc/rpc-lib/src/auth-glusterfs.c index 48871ffb3..7bafa82fb 100644 --- a/rpc/rpc-lib/src/auth-glusterfs.c +++ b/rpc/rpc-lib/src/auth-glusterfs.c @@ -109,7 +109,6 @@ int auth_glusterfs_authenticate (rpcsvc_request_t *req, void *priv) for (gidcount = 0; gidcount < au.ngrps; ++gidcount) req->auxgids[gidcount] = au.groups[gidcount]; - RPC_AUTH_ROOT_SQUASH(req); gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d" ", gid: %d, owner: %s", @@ -229,7 +228,6 @@ int auth_glusterfs_v2_authenticate (rpcsvc_request_t *req, void *priv) for (i = 0; i < au.lk_owner.lk_owner_len; ++i) req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i]; - RPC_AUTH_ROOT_SQUASH(req); gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d" ", gid: %d, owner: %s", diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 5876a500b..6d28ed90e 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -236,6 +236,13 @@ struct gf_gsync_detailed_status_ { char total_files_skipped[NAME_MAX]; }; +enum glusterd_mgmt_v3_procnum { + GLUSTERD_MGMT_V3_NULL, /* 0 */ + GLUSTERD_MGMT_V3_VOLUME_LOCK, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + GLUSTERD_MGMT_V3_MAXVALUE, +}; + typedef struct gf_gsync_detailed_status_ gf_gsync_status_t; #define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */ @@ -268,6 +275,9 @@ typedef struct gf_gsync_detailed_status_ gf_gsync_status_t; #define GD_BRICK_PROGRAM 4867634 /*Completely random*/ #define GD_BRICK_VERSION 2 +/* Third version */ +#define GD_MGMT_V3_VERSION 3 + /* OP-VERSION handshake */ #define GD_MGMT_HNDSK_PROGRAM 1239873 /* Completely random */ #define GD_MGMT_HNDSK_VERSION 1 diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h index 5a6f930d3..5a991d499 100644 --- a/rpc/rpc-lib/src/rpcsvc.h +++ b/rpc/rpc-lib/src/rpcsvc.h @@ -286,6 +286,7 @@ struct rpcsvc_request { req->uid = req->svc->anonuid; \ if (req->gid == RPC_ROOT_GID) \ req->gid = req->svc->anongid; \ + \ for (gidcount = 0; gidcount < req->auxgidcount; \ ++gidcount) { \ if (!req->auxgids[gidcount]) \ diff --git a/rpc/xdr/src/glusterd1-xdr.c b/rpc/xdr/src/glusterd1-xdr.c index 213b48bc6..6c6514c90 100644 --- a/rpc/xdr/src/glusterd1-xdr.c +++ b/rpc/xdr/src/glusterd1-xdr.c @@ -491,3 +491,81 @@ xdr_gd1_mgmt_brick_op_rsp (XDR *xdrs, gd1_mgmt_brick_op_rsp *objp) return FALSE; return TRUE; } + +bool_t +xdr_gd1_mgmt_volume_lock_req (XDR *xdrs, gd1_mgmt_volume_lock_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_vector (xdrs, (char *)objp->uuid, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_vector (xdrs, (char *)objp->txn_id, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_int (xdrs, &objp->op)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gd1_mgmt_volume_lock_rsp (XDR *xdrs, gd1_mgmt_volume_lock_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_vector (xdrs, (char *)objp->uuid, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_vector (xdrs, (char *)objp->txn_id, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gd1_mgmt_volume_unlock_req (XDR *xdrs, gd1_mgmt_volume_unlock_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_vector (xdrs, (char *)objp->uuid, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_vector (xdrs, (char *)objp->txn_id, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gd1_mgmt_volume_unlock_rsp (XDR *xdrs, gd1_mgmt_volume_unlock_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_vector (xdrs, (char *)objp->uuid, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_vector (xdrs, (char *)objp->txn_id, 16, + sizeof (u_char), (xdrproc_t) xdr_u_char)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + return TRUE; +} diff --git a/rpc/xdr/src/glusterd1-xdr.h b/rpc/xdr/src/glusterd1-xdr.h index c35930cad..4115ff7a8 100644 --- a/rpc/xdr/src/glusterd1-xdr.h +++ b/rpc/xdr/src/glusterd1-xdr.h @@ -202,6 +202,51 @@ struct gd1_mgmt_brick_op_rsp { }; typedef struct gd1_mgmt_brick_op_rsp gd1_mgmt_brick_op_rsp; +struct gd1_mgmt_volume_lock_req { + u_char uuid[16]; + u_char txn_id[16]; + int op; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gd1_mgmt_volume_lock_req gd1_mgmt_volume_lock_req; + +struct gd1_mgmt_volume_lock_rsp { + u_char uuid[16]; + u_char txn_id[16]; + struct { + u_int dict_len; + char *dict_val; + } dict; + int op_ret; + int op_errno; +}; +typedef struct gd1_mgmt_volume_lock_rsp gd1_mgmt_volume_lock_rsp; + +struct gd1_mgmt_volume_unlock_req { + u_char uuid[16]; + u_char txn_id[16]; + struct { + u_int dict_len; + char *dict_val; + } dict; +}; +typedef struct gd1_mgmt_volume_unlock_req gd1_mgmt_volume_unlock_req; + +struct gd1_mgmt_volume_unlock_rsp { + u_char uuid[16]; + u_char txn_id[16]; + struct { + u_int dict_len; + char *dict_val; + } dict; + int op_ret; + int op_errno; +}; +typedef struct gd1_mgmt_volume_unlock_rsp gd1_mgmt_volume_unlock_rsp; + /* the xdr functions */ #if defined(__STDC__) || defined(__cplusplus) @@ -224,6 +269,10 @@ extern bool_t xdr_gd1_mgmt_friend_update (XDR *, gd1_mgmt_friend_update*); extern bool_t xdr_gd1_mgmt_friend_update_rsp (XDR *, gd1_mgmt_friend_update_rsp*); extern bool_t xdr_gd1_mgmt_brick_op_req (XDR *, gd1_mgmt_brick_op_req*); extern bool_t xdr_gd1_mgmt_brick_op_rsp (XDR *, gd1_mgmt_brick_op_rsp*); +extern bool_t xdr_gd1_mgmt_volume_lock_req (XDR *, gd1_mgmt_volume_lock_req*); +extern bool_t xdr_gd1_mgmt_volume_lock_rsp (XDR *, gd1_mgmt_volume_lock_rsp*); +extern bool_t xdr_gd1_mgmt_volume_unlock_req (XDR *, gd1_mgmt_volume_unlock_req*); +extern bool_t xdr_gd1_mgmt_volume_unlock_rsp (XDR *, gd1_mgmt_volume_unlock_rsp*); #else /* K&R C */ extern bool_t xdr_glusterd_volume_status (); @@ -245,6 +294,10 @@ extern bool_t xdr_gd1_mgmt_friend_update (); extern bool_t xdr_gd1_mgmt_friend_update_rsp (); extern bool_t xdr_gd1_mgmt_brick_op_req (); extern bool_t xdr_gd1_mgmt_brick_op_rsp (); +extern bool_t xdr_gd1_mgmt_volume_lock_req (); +extern bool_t xdr_gd1_mgmt_volume_lock_rsp (); +extern bool_t xdr_gd1_mgmt_volume_unlock_req (); +extern bool_t xdr_gd1_mgmt_volume_unlock_rsp (); #endif /* K&R C */ diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x index fc1bb58b4..f29a9d214 100644 --- a/rpc/xdr/src/glusterd1-xdr.x +++ b/rpc/xdr/src/glusterd1-xdr.x @@ -125,3 +125,32 @@ struct gd1_mgmt_brick_op_rsp { opaque output<>; string op_errstr<>; } ; + +struct gd1_mgmt_volume_lock_req { + unsigned char uuid[16]; + unsigned char txn_id[16]; + int op; + opaque dict<>; +} ; + +struct gd1_mgmt_volume_lock_rsp { + unsigned char uuid[16]; + unsigned char txn_id[16]; + opaque dict<>; + int op_ret; + int op_errno; +} ; + +struct gd1_mgmt_volume_unlock_req { + unsigned char uuid[16]; + unsigned char txn_id[16]; + opaque dict<>; +} ; + +struct gd1_mgmt_volume_unlock_rsp { + unsigned char uuid[16]; + unsigned char txn_id[16]; + opaque dict<>; + int op_ret; + int op_errno; +} ; diff --git a/tests/basic/mount-options.disabled b/tests/basic/mount-options.disabled new file mode 100644 index 000000000..86d945ac5 --- /dev/null +++ b/tests/basic/mount-options.disabled @@ -0,0 +1,140 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd --xlator-option=*.rpc-auth-allow-insecure=on +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5} +TEST $CLI volume set $V0 server.allow-insecure on +TEST $CLI volume start $V0 + +#test all the options available to see if the mount succeeds with those options +#or not. This does not test functionality. This is added to prevent options +#being removed in future breaking backward-compatibility. + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --entry-timeout=0 +TEST umount -l $M0 + +TEST glusterfs --volfile=/var/lib/glusterd/vols/$V0/${V0}-fuse.vol $M0 +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --log-file=/tmp/a.txt --log-level=DEBUG $M0 +EXPECT_NOT "0" wc -l /tmp/a.txt +TEST grep " D " /tmp/a.txt +TEST rm -f /tmp/a.txt +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --acl +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --enable-ino32 +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=yes +TEST umount -l $M0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=no +TEST umount -l $M0 +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=fail + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=yes +TEST umount -l $M0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=no +TEST umount -l $M0 +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=fail + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=yes +TEST umount -l $M0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=no +TEST umount -l $M0 +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=fail + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=yes +TEST umount -l $M0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=no +TEST umount -l $M0 +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=fail + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --read-only +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --selinux +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --worm +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-check +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --dump-fuse=/tmp/a.txt +EXPECT "0" stat /tmp/a.txt +TEST rm -f /tmp/a.txt +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=0 +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=-1 +TEST umount -l $M0 + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=abc + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=16 +TEST umount -l $M0 + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=abc + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=-1 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion-threshold=12 +TEST umount -l $M0 + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion-threshold=abc + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion=threshold=-1 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=10 +TEST umount -l $M0 + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=abc +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=-1 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --pid-file=/tmp/a.txt +EXPECT_NOT "0" wc -l /tmp/a.txt +TEST rm -f /tmp/a.txt +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=24007 +TEST umount -l $M0 + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=2400 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-transport=tcp +TEST umount -l $M0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-transport=ib-verbs +TEST umount -l $M0 + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=socket + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volume-name=$V0 +TEST umount -l $M0 + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volume-name=abcd + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --invalid-option + +TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --invalid-option=abc +cleanup; diff --git a/tests/basic/mount.t b/tests/basic/mount.t index 90e522c5e..8163975d6 100755 --- a/tests/basic/mount.t +++ b/tests/basic/mount.t @@ -1,6 +1,7 @@ #!/bin/bash . $(dirname $0)/../include.rc +. $(dirname $0)/../nfs.rc cleanup; @@ -49,7 +50,7 @@ TEST 'mount -t fuse.glusterfs | grep -E "^$H0:$V0 .+ \(ro,"'; TEST 'grep -E "^$H0:$V0 .+ ,?ro,.+" /proc/mounts'; ## Wait for volume to register with rpc.mountd -sleep 5; +EXPECT_WITHIN 20 "1" is_nfs_export_available; ## Mount NFS TEST mount -t nfs -o nolock,soft,intr $H0:/$V0 $N0; diff --git a/tests/basic/volume-locks.t b/tests/basic/volume-locks.t new file mode 100755 index 000000000..b9e94b7e1 --- /dev/null +++ b/tests/basic/volume-locks.t @@ -0,0 +1,106 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../cluster.rc + +function check_peers { + $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l +} + +function volume_count { + local cli=$1; + if [ $cli -eq '1' ] ; then + $CLI_1 volume info | grep 'Volume Name' | wc -l; + else + $CLI_2 volume info | grep 'Volume Name' | wc -l; + fi +} + +function volinfo_field() +{ + local vol=$1; + local field=$2; + + $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //'; +} + +function two_diff_vols_create { + # Both volume creates should be successful + $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0 & + $CLI_2 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1 $H3:$B3/$V1 +} + +function two_diff_vols_start { + # Both volume starts should be successful + $CLI_1 volume start $V0 & + $CLI_2 volume start $V1 +} + +function two_diff_vols_stop_force { + # Force stop, so that if rebalance from the + # remove bricks is in progress, stop can + # still go ahead. Both volume stops should + # be successful + $CLI_1 volume stop $V0 force & + $CLI_2 volume stop $V1 force +} + +function same_vol_remove_brick { + + # Running two same vol commands at the same time can result in + # two success', two failures, or one success and one failure, all + # of which are valid. The only thing that shouldn't happen is a + # glusterd crash. + + local vol=$1 + local brick=$2 + $CLI_1 volume remove-brick $1 $2 start & + $CLI_2 volume remove-brick $1 $2 start +} + +cleanup; + +TEST launch_cluster 3; +TEST $CLI_1 peer probe $H2; +TEST $CLI_1 peer probe $H3; + +EXPECT_WITHIN 20 2 check_peers + +two_diff_vols_create +EXPECT 'Created' volinfo_field $V0 'Status'; +EXPECT 'Created' volinfo_field $V1 'Status'; + +two_diff_vols_start +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT 'Started' volinfo_field $V1 'Status'; + +same_vol_remove_brick $V0 $H2:$B2/$V0 +# Checking glusterd crashed or not after same volume remove brick +# on both nodes. +EXPECT_WITHIN 20 2 check_peers + +same_vol_remove_brick $V1 $H2:$B2/$V1 +# Checking glusterd crashed or not after same volume remove brick +# on both nodes. +EXPECT_WITHIN 20 2 check_peers + +$CLI_1 volume set $V0 diagnostics.client-log-level DEBUG & +$CLI_1 volume set $V1 diagnostics.client-log-level DEBUG +kill_glusterd 3 +$CLI_1 volume status $V0 +$CLI_2 volume status $V1 +$CLI_1 peer status +EXPECT_WITHIN 20 1 check_peers +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT 'Started' volinfo_field $V1 'Status'; + +TEST $glusterd_3 +$CLI_1 volume status $V0 +$CLI_2 volume status $V1 +$CLI_1 peer status +#EXPECT_WITHIN 20 2 check_peers +#EXPECT 'Started' volinfo_field $V0 'Status'; +#EXPECT 'Started' volinfo_field $V1 'Status'; +#two_diff_vols_stop_force +#EXPECT_WITHIN 20 2 check_peers +cleanup; diff --git a/tests/bugs/bug-1004744.t b/tests/bugs/bug-1004744.t index 0290119ef..1211002e2 100644 --- a/tests/bugs/bug-1004744.t +++ b/tests/bugs/bug-1004744.t @@ -38,7 +38,7 @@ TEST $CLI volume rebalance $V0 fix-layout start EXPECT_WITHIN 1 "fix-layout in progress" rebalance_status_field $V0; -EXPECT_WITHIN 20 "fix-layout completed" rebalance_status_field $V0; +EXPECT_WITHIN 30 "fix-layout completed" rebalance_status_field $V0; TEST umount $M0 TEST $CLI volume stop $V0 diff --git a/tests/bugs/bug-1038598.t b/tests/bugs/bug-1038598.t new file mode 100644 index 000000000..d4b0b3e63 --- /dev/null +++ b/tests/bugs/bug-1038598.t @@ -0,0 +1,80 @@ +#!/bin/bash +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}; + +function hard_limit() +{ + local QUOTA_PATH=$1; + $CLI volume quota $V0 list $QUOTA_PATH | grep "$QUOTA_PATH" | awk '{print $2}' +} + +function soft_limit() +{ + local QUOTA_PATH=$1; + $CLI volume quota $V0 list $QUOTA_PATH | grep "$QUOTA_PATH" | awk '{print $3}' +} + +function usage() +{ + local QUOTA_PATH=$1; + $CLI volume quota $V0 list $QUOTA_PATH | grep "$QUOTA_PATH" | awk '{print $4}' +} + +function sl_exceeded() +{ + local QUOTA_PATH=$1; + $CLI volume quota $V0 list $QUOTA_PATH | grep "$QUOTA_PATH" | awk '{print $6}' +} + +function hl_exceeded() +{ + local QUOTA_PATH=$1; + $CLI volume quota $V0 list $QUOTA_PATH | grep "$QUOTA_PATH" | awk '{print $7}' + +} + +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; +EXPECT '2' brick_count $V0 + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +TEST $CLI volume quota $V0 enable +sleep 5 + +TEST glusterfs -s $H0 --volfile-id $V0 $M0; + +TEST mkdir -p $M0/test_dir +TEST $CLI volume quota $V0 limit-usage /test_dir 10MB 50 + +EXPECT "10.0MB" hard_limit "/test_dir"; +EXPECT "50%" soft_limit "/test_dir"; + +TEST dd if=/dev/zero of=$M0/test_dir/file1.txt bs=1M count=4 +EXPECT "4.0MB" usage "/test_dir"; +EXPECT 'No' sl_exceeded "/test_dir"; +EXPECT 'No' hl_exceeded "/test_dir"; + +TEST dd if=/dev/zero of=$M0/test_dir/file1.txt bs=1M count=6 +EXPECT "6.0MB" usage "/test_dir"; +EXPECT 'Yes' sl_exceeded "/test_dir"; +EXPECT 'No' hl_exceeded "/test_dir"; + +#set timeout to 0 so that quota gets enforced without any lag +TEST $CLI volume set $V0 features.hard-timeout 0 +TEST $CLI volume set $V0 features.soft-timeout 0 + +TEST ! dd if=/dev/zero of=$M0/test_dir/file1.txt bs=1M count=15 +EXPECT 'Yes' sl_exceeded "/test_dir"; +EXPECT 'Yes' hl_exceeded "/test_dir"; + +cleanup; diff --git a/tests/bugs/bug-1042725.t b/tests/bugs/bug-1042725.t new file mode 100644 index 000000000..f3154e06d --- /dev/null +++ b/tests/bugs/bug-1042725.t @@ -0,0 +1,49 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +#Basic checks +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info + +#Create a distributed volume +TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}; +TEST $CLI volume start $V0 + +# Mount FUSE +TEST glusterfs -s $H0 --volfile-id $V0 $M0 + +#Create files +TEST mkdir $M0/foo +TEST touch $M0/foo/{1..20} +for file in {1..20}; do + ln $M0/foo/$file $M0/foo/${file}_linkfile; +done + +#Stop one of the brick +TEST kill_brick ${V0} ${H0} ${B0}/${V0}1 + +rm -rf $M0/foo 2>/dev/null +TEST stat $M0/foo + +touch $M0/foo/{1..20} 2>/dev/null +touch $M0/foo/{1..20}_linkfile 2>/dev/null + +TEST $CLI volume start $V0 force; +sleep 5 +function verify_duplicate { + count=`ls $M0/foo | sort | uniq --repeated | grep [0-9] -c` + echo $count +} +EXPECT 0 verify_duplicate + +TEST umount $M0 +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/tests/bugs/bug-954057.t b/tests/bugs/bug-954057.t new file mode 100755 index 000000000..30bc1d77e --- /dev/null +++ b/tests/bugs/bug-954057.t @@ -0,0 +1,44 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +#This script checks if use-readdirp option works as accepted in mount options + + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0} +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST mkdir $M0/dir +TEST mkdir $M0/nobody +TEST chown nfsnobody:nfsnobody $M0/nobody +TEST `echo "file" >> $M0/file` +TEST cp $M0/file $M0/new +TEST chmod 700 $M0/new +TEST cat $M0/new + +TEST $CLI volume set $V0 server.root-squash enable +TEST `echo 3 > /proc/sys/vm/drop_caches` +TEST ! mkdir $M0/other +TEST mkdir $M0/nobody/other +TEST cat $M0/file +TEST ! cat $M0/new +TEST `echo "nobody" >> $M0/nobody/file` + +#mount the client without root-squashing +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --no-root-squash=yes $M1 +TEST mkdir $M1/m1_dir +TEST `echo "file" >> $M1/m1_file` +TEST cp $M0/file $M1/new +TEST chmod 700 $M1/new +TEST cat $M1/new + +TEST $CLI volume set $V0 server.root-squash disable +TEST mkdir $M0/other +TEST cat $M0/new + +cleanup diff --git a/tests/include.rc b/tests/include.rc index 44259872a..250220efa 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -3,6 +3,7 @@ M1=${M1:=/mnt/glusterfs/1}; # 1st mount point for FUSE N0=${N0:=/mnt/nfs/0}; # 0th mount point for NFS N1=${N1:=/mnt/nfs/1}; # 1st mount point for NFS V0=${V0:=patchy}; # volume name to use in tests +V1=${V1:=patchy1}; # volume name to use in tests B0=${B0:=/d/backends}; # top level of brick directories H0=${H0:=`hostname --fqdn`}; # hostname DEBUG=${DEBUG:=0} # turn on debugging? diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 9027e2a33..a8e0c21ab 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -1678,6 +1678,7 @@ pump_setxattr (call_frame_t *frame, xlator_t *this, ret = afr_local_init (local, priv, &op_errno); if (ret < 0) { afr_local_cleanup (local, this); + mem_put (local); goto out; } @@ -2389,7 +2390,7 @@ int32_t init (xlator_t *this) { afr_private_t * priv = NULL; - pump_private_t *pump_priv = NULL; + pump_private_t *pump_priv = NULL; int child_count = 0; xlator_list_t * trav = NULL; int i = 0; @@ -2410,12 +2411,10 @@ init (xlator_t *this) "Volume is dangling."); } - this->private = GF_CALLOC (1, sizeof (afr_private_t), - gf_afr_mt_afr_private_t); - if (!this->private) + priv = GF_CALLOC (1, sizeof (afr_private_t), gf_afr_mt_afr_private_t); + if (!priv) goto out; - priv = this->private; LOCK_INIT (&priv->lock); LOCK_INIT (&priv->read_child_lock); //lock recovery is not done in afr @@ -2455,7 +2454,6 @@ init (xlator_t *this) */ priv->strict_readdir = _gf_false; - priv->wait_count = 1; priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count, gf_afr_mt_char); @@ -2490,7 +2488,8 @@ init (xlator_t *this) while (i < child_count) { priv->children[i] = trav->xlator; - ret = gf_asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX, + ret = gf_asprintf (&priv->pending_key[i], "%s.%s", + AFR_XATTR_PREFIX, trav->xlator->name); if (-1 == ret) { gf_log (this->name, GF_LOG_ERROR, @@ -2534,8 +2533,7 @@ init (xlator_t *this) pump_priv->resume_path = GF_CALLOC (1, PATH_MAX, gf_afr_mt_char); if (!pump_priv->resume_path) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); + gf_log (this->name, GF_LOG_ERROR, "Out of memory"); ret = -1; goto out; } @@ -2558,11 +2556,33 @@ init (xlator_t *this) } priv->pump_private = pump_priv; + pump_priv = NULL; + + this->private = priv; + priv = NULL; pump_change_state (this, PUMP_STATE_ABORT); ret = 0; out: + + if (pump_priv) { + GF_FREE (pump_priv->resume_path); + LOCK_DESTROY (&pump_priv->resume_path_lock); + LOCK_DESTROY (&pump_priv->pump_state_lock); + GF_FREE (pump_priv); + } + + if (priv) { + GF_FREE (priv->child_up); + GF_FREE (priv->children); + GF_FREE (priv->pending_key); + GF_FREE (priv->last_event); + LOCK_DESTROY (&priv->lock); + LOCK_DESTROY (&priv->read_child_lock); + GF_FREE (priv); + } + return ret; } diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index cf4ec258d..3055f4615 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -1843,10 +1843,13 @@ dht_vgetxattr_fill_and_set (dht_local_t *local, dict_t **dict, xlator_t *this, } else { gf_log (this->name, GF_LOG_WARNING, "Unknown local->xsel (%s)", local->xsel); + GF_FREE (xattr_buf); goto out; } ret = dict_set_dynstr (*dict, local->xsel, xattr_buf); + if (ret) + GF_FREE (xattr_buf); GF_FREE (local->xattr_val); out: @@ -3714,12 +3717,12 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, } hashed_subvol = dht_subvol_get_hashed (this, loc); + /* Dont fail unlink if hashed_subvol is NULL which can be the result + * of layout anomaly */ if (!hashed_subvol) { gf_log (this->name, GF_LOG_DEBUG, "no subvolume in layout for path=%s", loc->path); - op_errno = EINVAL; - goto err; } cached_subvol = local->cached_subvol; @@ -3731,7 +3734,7 @@ dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, } local->flags = xflag; - if (hashed_subvol != cached_subvol) { + if (hashed_subvol && hashed_subvol != cached_subvol) { STACK_WIND (frame, dht_unlink_linkfile_cbk, hashed_subvol, hashed_subvol->fops->unlink, loc, xflag, xdata); @@ -4611,17 +4614,85 @@ err: int +dht_rmdir_cached_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xattr, + struct iatt *parent) +{ + dht_local_t *local = NULL; + xlator_t *src = NULL; + call_frame_t *main_frame = NULL; + dht_local_t *main_local = NULL; + int this_call_cnt = 0; + dht_conf_t *conf = this->private; + dict_t *xattrs = NULL; + int ret = 0; + + local = frame->local; + src = local->hashed_subvol; + + main_frame = local->main_frame; + main_local = main_frame->local; + + if (op_ret == 0) { + main_local->op_ret = -1; + main_local->op_errno = ENOTEMPTY; + + gf_log (this->name, GF_LOG_WARNING, + "%s found on cached subvol %s", + local->loc.path, src->name); + goto err; + } else if (op_errno != ENOENT) { + main_local->op_ret = -1; + main_local->op_errno = op_errno; + goto err; + } + + xattrs = dict_new (); + if (!xattrs) { + gf_log (this->name, GF_LOG_ERROR, "dict_new failed"); + goto err; + } + + ret = dict_set_uint32 (xattrs, conf->link_xattr_name, 256); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key" + " in dict"); + if (xattrs) + dict_unref (xattrs); + goto err; + } + + STACK_WIND (frame, dht_rmdir_lookup_cbk, + src, src->fops->lookup, &local->loc, xattrs); + if (xattrs) + dict_unref (xattrs); + + return 0; +err: + + this_call_cnt = dht_frame_return (main_frame); + if (is_last_call (this_call_cnt)) + dht_rmdir_do (main_frame, this); + + DHT_STACK_DESTROY (frame); + return 0; +} + + +int dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, gf_dirent_t *entries, xlator_t *src) { - int ret = 0; - int build_ret = 0; - gf_dirent_t *trav = NULL; + int ret = 0; + int build_ret = 0; + gf_dirent_t *trav = NULL; call_frame_t *lookup_frame = NULL; dht_local_t *lookup_local = NULL; - dht_local_t *local = NULL; - dict_t *xattrs = NULL; - dht_conf_t *conf = this->private; + dht_local_t *local = NULL; + dict_t *xattrs = NULL; + dht_conf_t *conf = this->private; + xlator_t *subvol = NULL; local = frame->local; @@ -4681,6 +4752,7 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, lookup_frame->local = lookup_local; lookup_local->main_frame = frame; + lookup_local->hashed_subvol = src; build_ret = dht_build_child_loc (this, &lookup_local->loc, &local->loc, trav->d_name); @@ -4699,9 +4771,20 @@ dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this, } UNLOCK (&frame->lock); - STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk, - src, src->fops->lookup, - &lookup_local->loc, xattrs); + subvol = dht_linkfile_subvol (this, NULL, &trav->d_stat, + trav->dict); + if (!subvol) { + gf_log (this->name, GF_LOG_INFO, + "linkfile not having link subvolume. path=%s", + lookup_local->loc.path); + STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk, + src, src->fops->lookup, + &lookup_local->loc, xattrs); + } else { + STACK_WIND (lookup_frame, dht_rmdir_cached_lookup_cbk, + subvol, subvol->fops->lookup, + &lookup_local->loc, xattrs); + } ret++; } @@ -4767,16 +4850,18 @@ int dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, fd_t *fd, dict_t *xdata) { - dht_local_t *local = NULL; + dht_local_t *local = NULL; int this_call_cnt = -1; - call_frame_t *prev = NULL; - dict_t *dict = NULL; - int ret = 0; - dht_conf_t *conf = this->private; + call_frame_t *prev = NULL; + dict_t *dict = NULL; + int ret = 0; + dht_conf_t *conf = this->private; + int i = 0; local = frame->local; prev = cookie; + this_call_cnt = dht_frame_return (frame); if (op_ret == -1) { gf_log (this->name, GF_LOG_DEBUG, "opendir on %s for %s failed (%s)", @@ -4789,6 +4874,12 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } + if (!is_last_call (this_call_cnt)) + return 0; + + if (local->op_ret == -1) + goto err; + dict = dict_new (); if (!dict) { local->op_ret = -1; @@ -4802,9 +4893,13 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, "%s: failed to set '%s' key", local->loc.path, conf->link_xattr_name); - STACK_WIND (frame, dht_rmdir_readdirp_cbk, - prev->this, prev->this->fops->readdirp, - local->fd, 4096, 0, dict); + local->call_cnt = conf->subvolume_cnt; + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (frame, dht_rmdir_readdirp_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->readdirp, + local->fd, 4096, 0, dict); + } if (dict) dict_unref (dict); @@ -4812,8 +4907,6 @@ dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; err: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) { dht_rmdir_do (frame, this); } diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 381643b22..18a501f04 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -884,8 +884,12 @@ dht_migration_complete_check_task (void *data) if (fd_is_anonymous (iter_fd)) continue; + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ ret = syncop_open (dst_node, &tmp_loc, - iter_fd->flags, iter_fd); + (iter_fd->flags & + ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to open " "the fd (%p, flags=0%o) on file %s @ %s", @@ -1034,8 +1038,12 @@ dht_rebalance_inprogress_task (void *data) if (fd_is_anonymous (iter_fd)) continue; + /* flags for open are stripped down to allow following the + * new location of the file, otherwise we can get EEXIST or + * truncate the file again as rebalance is moving the data */ ret = syncop_open (dst_node, &tmp_loc, - iter_fd->flags, iter_fd); + (iter_fd->flags & + ~(O_CREAT | O_EXCL | O_TRUNC)), iter_fd); if (ret < 0) { gf_log (this->name, GF_LOG_ERROR, "failed to send open " "the fd (%p, flags=0%o) on file %s @ %s", diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 1e38d6be1..31d85a506 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -465,11 +465,8 @@ dht_layout_entry_cmp (dht_layout_t *layout, int i, int j) - (int64_t) layout->list[j].stop; goto out; } - if (layout->list[i].err || layout->list[j].err) - diff = layout->list[i].err - layout->list[j].err; - else - diff = (int64_t) layout->list[i].start - - (int64_t) layout->list[j].start; + diff = (int64_t) layout->list[i].start + - (int64_t) layout->list[j].start; out: return diff; @@ -536,8 +533,20 @@ dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout, char is_virgin = 1; uint32_t no_space = 0; - /* TODO: explain what is happening */ - + /* This funtion scans through the layout spread of a directory to + check if there are any anomalies. Prior to calling this function + the layout entries should be sorted in the ascending order. + + If the layout entry has err != 0 + then increment the corresponding anomaly. + else + if (start of the current layout entry > stop + 1 of previous + non erroneous layout entry) + then it indicates a hole in the layout + if (start of the current layout entry < stop + 1 of previous + non erroneous layout entry) + then it indicates an overlap in the layout + */ last_stop = layout->list[0].start - 1; prev_stop = last_stop; diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 324b30626..36c073973 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -103,20 +103,20 @@ dht_priv_dump (xlator_t *this) this->name); gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt); for (i = 0; i < conf->subvolume_cnt; i++) { - sprintf (key, "subvolumes[%d]", i); + snprintf (key, sizeof (key), "subvolumes[%d]", i); gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type, conf->subvolumes[i]->name); if (conf->file_layouts && conf->file_layouts[i]){ - sprintf (key, "file_layouts[%d]", i); + snprintf (key, sizeof (key), "file_layouts[%d]", i); dht_layout_dump(conf->file_layouts[i], key); } if (conf->dir_layouts && conf->dir_layouts[i]) { - sprintf (key, "dir_layouts[%d]", i); + snprintf (key, sizeof (key), "dir_layouts[%d]", i); dht_layout_dump(conf->dir_layouts[i], key); } if (conf->subvolume_status) { - sprintf (key, "subvolume_status[%d]", i); + snprintf (key, sizeof (key), "subvolume_status[%d]", i); gf_proc_dump_write(key, "%d", (int)conf->subvolume_status[i]); } @@ -130,14 +130,35 @@ dht_priv_dump (xlator_t *this) gf_proc_dump_write("disk_unit", "%c", conf->disk_unit); gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval); gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); - if (conf ->du_stats) { - gf_proc_dump_write("du_stats.avail_percent", "%lf", - conf->du_stats->avail_percent); - gf_proc_dump_write("du_stats.avail_space", "%lu", - conf->du_stats->avail_space); - gf_proc_dump_write("du_stats.avail_inodes", "%lf", - conf->du_stats->avail_inodes); - gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log); + + if (conf->du_stats) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (!conf->subvolume_status[i]) + continue; + + snprintf (key, sizeof (key), "subvolumes[%d]", i); + gf_proc_dump_write (key, "%s", + conf->subvolumes[i]->name); + + snprintf (key, sizeof (key), + "du_stats[%d].avail_percent", i); + gf_proc_dump_write (key, "%lf", + conf->du_stats[i].avail_percent); + + snprintf (key, sizeof (key), "du_stats[%d].avail_space", + i); + gf_proc_dump_write (key, "%lu", + conf->du_stats[i].avail_space); + + snprintf (key, sizeof (key), + "du_stats[%d].avail_inodes", i); + gf_proc_dump_write (key, "%lf", + conf->du_stats[i].avail_inodes); + + snprintf (key, sizeof (key), "du_stats[%d].log", i); + gf_proc_dump_write (key, "%lu", + conf->du_stats[i].log); + } } if (conf->last_stat_fetch.tv_sec) diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h index f35220ccb..e4cf506c4 100644 --- a/xlators/features/changelog/lib/src/gf-changelog-helpers.h +++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h @@ -51,7 +51,7 @@ typedef struct gf_changelog { /* connection retries */ int gfc_connretries; - char gfc_sockpath[PATH_MAX]; + char gfc_sockpath[UNIX_PATH_MAX]; char gfc_brickpath[PATH_MAX]; diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c index ca8e373e7..4b2b25ad5 100644 --- a/xlators/features/changelog/lib/src/gf-changelog.c +++ b/xlators/features/changelog/lib/src/gf-changelog.c @@ -173,7 +173,7 @@ gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc) } CHANGELOG_MAKE_SOCKET_PATH (gfc->gfc_brickpath, - gfc->gfc_sockpath, PATH_MAX); + gfc->gfc_sockpath, UNIX_PATH_MAX); gf_log (this->name, GF_LOG_INFO, "connecting to changelog socket: %s (brick: %s)", gfc->gfc_sockpath, gfc->gfc_brickpath); @@ -202,6 +202,7 @@ gf_changelog_notification_init (xlator_t *this, gf_changelog_t *gfc) gf_log (this->name, GF_LOG_ERROR, "could not connect to changelog socket!" " bailing out..."); + close (sockfd); ret = -1; } else gf_log (this->name, GF_LOG_INFO, diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h index 2412d810e..4c3d5405b 100644 --- a/xlators/features/changelog/src/changelog-helpers.h +++ b/xlators/features/changelog/src/changelog-helpers.h @@ -246,7 +246,7 @@ typedef struct changelog_notify { pthread_t notify_th; /* unique socket path */ - char sockpath[PATH_MAX]; + char sockpath[UNIX_PATH_MAX]; int socket_fd; diff --git a/xlators/features/changelog/src/changelog-notifier.c b/xlators/features/changelog/src/changelog-notifier.c index 1f8b31253..5f3d063a8 100644 --- a/xlators/features/changelog/src/changelog-notifier.c +++ b/xlators/features/changelog/src/changelog-notifier.c @@ -178,7 +178,7 @@ changelog_notifier (void *data) } CHANGELOG_MAKE_SOCKET_PATH (priv->changelog_brick, - cn->sockpath, PATH_MAX); + cn->sockpath, UNIX_PATH_MAX); if (unlink (cn->sockpath) < 0) { if (errno != ENOENT) { gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c index db592719b..5edfeda8f 100644 --- a/xlators/features/index/src/index.c +++ b/xlators/features/index/src/index.c @@ -371,6 +371,7 @@ sync_base_indices (void *index_priv) char xattrop_directory[PATH_MAX] = {0}; char base_index_path[PATH_MAX] = {0}; char xattrop_index_path[PATH_MAX] = {0}; + int32_t op_errno = 0; int ret = 0; priv = index_priv; @@ -381,11 +382,14 @@ sync_base_indices (void *index_priv) XATTROP_SUBDIR); if ((dir_base_holder = opendir(base_indices_holder)) == NULL) { + op_errno = errno; ret = -1; goto out; } if ((xattrop_dir = opendir (xattrop_directory)) == NULL) { + op_errno = errno; ret = -1; + (void) closedir (dir_base_holder); goto out; } @@ -410,20 +414,30 @@ sync_base_indices (void *index_priv) ret = sys_link (xattrop_index_path, base_index_path); - if (ret && errno != EEXIST) + if (ret && errno != EEXIST) { + op_errno = errno; + (void) closedir (dir_base_holder); + (void) closedir (xattrop_dir); goto out; + } } } ret = closedir (xattrop_dir); - if (ret) + if (ret) { + op_errno = errno; + (void) closedir (dir_base_holder); goto out; + } ret = closedir (dir_base_holder); - if (ret) + if (ret) { + op_errno = errno; goto out; + } ret = 0; out: + errno = op_errno; return ret; } @@ -490,8 +504,10 @@ sync_base_indices_from_xattrop (xlator_t *this) continue; } ret = unlink (entry->d_name); - if (ret) + if (ret) { + closedir (dir); goto out; + } } closedir (dir); } @@ -655,22 +671,16 @@ _check_key_is_zero_filled (dict_t *d, char *k, data_t *v, return 0; } - void -_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) +_index_action (xlator_t *this, inode_t *inode, gf_boolean_t zero_xattr) { - gf_boolean_t zero_xattr = _gf_true; + int ret = 0; index_inode_ctx_t *ctx = NULL; - int ret = 0; - - ret = dict_foreach (xattr, _check_key_is_zero_filled, NULL); - if (ret == -1) - zero_xattr = _gf_false; ret = index_inode_ctx_get (inode, this, &ctx); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Not able to %s %s -> index", - zero_xattr?"add":"del", uuid_utoa (inode->gfid)); + zero_xattr?"del":"add", uuid_utoa (inode->gfid)); goto out; } if (zero_xattr) { @@ -691,6 +701,19 @@ out: } void +_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) +{ + gf_boolean_t zero_xattr = _gf_true; + int ret = 0; + + ret = dict_foreach (xattr, _check_key_is_zero_filled, NULL); + if (ret == -1) + zero_xattr = _gf_false; + _index_action (this, inode, zero_xattr); + return; +} + +void fop_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr) { _xattrop_index_action (this, inode, xattr); @@ -868,6 +891,11 @@ int index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { + //In wind phase bring the gfid into index. This way if the brick crashes + //just after posix performs xattrop before _cbk reaches index xlator + //we will still have the gfid in index. + _index_action (this, frame->local, _gf_false); + STACK_WIND (frame, index_xattrop_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr, xdata); @@ -878,6 +906,10 @@ int index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { + //In wind phase bring the gfid into index. This way if the brick crashes + //just after posix performs xattrop before _cbk reaches index xlator + //we will still have the gfid in index. + _index_action (this, frame->local, _gf_false); STACK_WIND (frame, index_fxattrop_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr, xdata); diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index d6b6dd358..45058652b 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1021,7 +1021,7 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret < 0) { if (local->err == 0) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; } gf_log (this->name, GF_LOG_WARNING, @@ -1037,6 +1037,11 @@ marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this, } else if (local->err != 0) { STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL, NULL, NULL, NULL, NULL); + } else { + gf_log (this->name, GF_LOG_CRITICAL, + "continuation stub to unwind the call is absent, hence " + "call will be hung (call-stack id = %"PRIu64")", + frame->root->unique); } mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution); @@ -1082,7 +1087,7 @@ marker_rename_release_newp_lock (call_frame_t *frame, void *cookie, if (op_ret < 0) { if (local->err == 0) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; } gf_log (this->name, GF_LOG_WARNING, @@ -1271,7 +1276,7 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this, MARKER_RESET_UID_GID (frame, frame->root, local); if ((op_ret < 0) && (op_errno != ENOATTR)) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; gf_log (this->name, GF_LOG_WARNING, "fetching contribution values from %s (gfid:%s) " "failed (%s)", local->loc.path, @@ -1283,7 +1288,7 @@ marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this, if (local->loc.inode != NULL) { GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); if (ret < 0) { - local->err = errno; + local->err = errno ? errno : ENOMEM; goto err; } @@ -1323,7 +1328,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie, MARKER_RESET_UID_GID (frame, frame->root, local); if ((op_ret < 0) && (op_errno != ENOATTR)) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; gf_log (this->name, GF_LOG_WARNING, "fetching contribution values from %s (gfid:%s) " "failed (%s)", oplocal->loc.path, @@ -1334,7 +1339,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie, GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); if (ret < 0) { - local->err = errno; + local->err = errno ? errno : ENOMEM; goto err; } @@ -1344,7 +1349,7 @@ marker_get_newpath_contribution (call_frame_t *frame, void *cookie, if (local->loc.inode != NULL) { GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret); if (ret < 0) { - local->err = errno; + local->err = errno ? errno : ENOMEM; goto err; } @@ -1385,7 +1390,7 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie, oplocal = local->oplocal; if (op_ret < 0) { - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; gf_log (this->name, GF_LOG_WARNING, "cannot hold inodelk on %s (gfid:%s) (%s)", local->next_lock_on->path, @@ -1396,7 +1401,7 @@ marker_get_oldpath_contribution (call_frame_t *frame, void *cookie, GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret); if (ret < 0) { - local->err = errno; + local->err = errno ? errno : ENOMEM; goto quota_err; } @@ -1452,7 +1457,7 @@ marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, loc = &local->parent_loc; } - local->err = op_errno; + local->err = op_errno ? op_errno : EINVAL; gf_log (this->name, GF_LOG_WARNING, "cannot hold inodelk on %s (gfid:%s) (%s)", loc->path, uuid_utoa (loc->inode->gfid), diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c index aaf028cfe..1e55b5fb7 100644 --- a/xlators/features/qemu-block/src/bdrv-xlator.c +++ b/xlators/features/qemu-block/src/bdrv-xlator.c @@ -235,7 +235,6 @@ qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors iobref = iobref_new (); if (!iobref) { - iobuf_unref (iobuf); goto out; } diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 2812a2b13..bb5dc7aba 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -721,7 +721,7 @@ quota_check_limit_continuation (struct list_head *parents, inode_t *inode, this = THIS; if ((op_ret < 0) || list_empty (parents)) { - if (list_empty (parents)) { + if (op_ret >= 0) { gf_log (this->name, GF_LOG_WARNING, "Couldn't build ancestry for inode (gfid:%s). " "Without knowing ancestors till root, quota " @@ -3387,6 +3387,10 @@ quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, if (hard_lim > 0) { local = quota_local_new (); + if (local == NULL) { + op_errno = ENOMEM; + goto err; + } frame->local = local; local->loc.inode = inode_ref (fd->inode); @@ -3561,11 +3565,9 @@ quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, buf->f_blocks = blocks; avail = buf->f_blocks - usage; - avail = (avail >= 0) ? avail : 0; + avail = max (avail, 0); - if (buf->f_bfree > avail) { - buf->f_bfree = avail; - } + buf->f_bfree = avail; /* * We have to assume that the total assigned quota * won't cause us to dip into the reserved space, @@ -4029,6 +4031,7 @@ quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, char *path = NULL; int64_t cur_size = 0; quota_priv_t *priv = NULL; + gf_boolean_t dyn_mem = _gf_true; priv = this->private; if ((ctx->soft_lim <= 0) || (timerisset (&ctx->prev_log) && @@ -4043,6 +4046,7 @@ quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, if (!usage_str) { snprintf (size_str, sizeof (size_str), "%"PRId64, cur_size); usage_str = (char*) size_str; + dyn_mem = _gf_false; } inode_path (inode, NULL, &path); if (!path) @@ -4063,7 +4067,8 @@ quota_log_usage (xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode, ctx->prev_log = cur_time; } - GF_FREE (usage_str); + if (dyn_mem) + GF_FREE (usage_str); } int32_t diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c index 63e9bcf9f..4e680c510 100644 --- a/xlators/lib/src/libxlator.c +++ b/xlators/lib/src/libxlator.c @@ -233,6 +233,7 @@ out: frame->local = local->xl_local; local->xl_specf_unwind (frame, op_ret, op_errno, dict, xdata); + GF_FREE (local); } else if (need_unwind) { STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); @@ -344,6 +345,7 @@ unlock: frame->local = local->xl_local; local->xl_specf_unwind (frame, op_ret, op_errno, dict, xdata); + GF_FREE (local); return 0; } else if (need_unwind){ STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am index cbb6353f8..25f2ed051 100644 --- a/xlators/mgmt/glusterd/src/Makefile.am +++ b/xlators/mgmt/glusterd/src/Makefile.am @@ -12,7 +12,7 @@ glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \ glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \ glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \ - glusterd-etcd.c + glusterd-locks.c glusterd-etcd.c glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ $(top_builddir)/rpc/xdr/src/libgfxdr.la \ @@ -22,7 +22,7 @@ glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \ glusterd-sm.h glusterd-store.h glusterd-mem-types.h \ glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \ - glusterd-syncop.h glusterd-hooks.h glusterd-etcd.h + glusterd-syncop.h glusterd-hooks.h glusterd-locks.h glusterd-etcd.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(rpclibdir) -I$(CONTRIBDIR)/rbtree \ diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c index 26d608a2f..ced916ea1 100644 --- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c @@ -1467,7 +1467,7 @@ glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr) goto out; } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_generate_and_set_task_id (dict, GF_REMOVE_BRICK_TID_KEY); if (ret) { @@ -1733,7 +1733,7 @@ glusterd_op_remove_brick (dict_t *dict, char **op_errstr) /* Set task-id, if available, in ctx dict for operations other than * start */ - if (is_origin_glusterd () && (cmd != GF_OP_CMD_START)) { + if (is_origin_glusterd (dict) && (cmd != GF_OP_CMD_START)) { if (!uuid_is_null (volinfo->rebal.rebalance_id)) { ret = glusterd_copy_uuid_to_dict (volinfo->rebal.rebalance_id, dict, diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c index 3969db17c..a0bc9f737 100644 --- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c +++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c @@ -987,6 +987,8 @@ _get_status_mst_slv (dict_t *this, char *key, data_t *value, void *data) param->node); out: + GF_FREE (errmsg); + if (slave_buf) GF_FREE(slave_buf); @@ -1444,6 +1446,7 @@ _get_slave_status (dict_t *dict, char *key, data_t *value, void *data) ret = is_geo_rep_active (param->volinfo,slave, conf_path, ¶m->is_active); out: + GF_FREE(errmsg); return ret; } diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c index e296509d8..5a140bb22 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handler.c +++ b/xlators/mgmt/glusterd/src/glusterd-handler.c @@ -33,6 +33,7 @@ #include "glusterd-op-sm.h" #include "glusterd-utils.h" #include "glusterd-store.h" +#include "glusterd-locks.h" #include "glusterd1-xdr.h" #include "cli1-xdr.h" @@ -56,6 +57,7 @@ #endif extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; int glusterd_big_locked_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, @@ -591,10 +593,16 @@ int32_t glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, char *err_str, size_t err_len) { - int32_t ret = -1; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; - int32_t locked = 0; + int32_t ret = -1; + dict_t *dict = NULL; + xlator_t *this = NULL; + glusterd_conf_t *priv = NULL; + int32_t locked = 0; + char *tmp = NULL; + char *volname = NULL; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; GF_ASSERT (req); GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX)); @@ -605,33 +613,122 @@ glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx, priv = this->private; GF_ASSERT (priv); - ret = glusterd_lock (MY_UUID); + dict = ctx; + + /* Generate a transaction-id for this operation and + * save it in the dict. This transaction id distinguishes + * each transaction, and helps separate opinfos in the + * op state machine. */ + ret = glusterd_generate_txn_id (dict, &txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate transaction id"); + goto out; + + } + + /* Save the MY_UUID as the originator_uuid. This originator_uuid + * will be used by is_origin_glusterd() to determine if a node + * is the originator node for a command. */ + ret = glusterd_set_originator_uuid (dict); if (ret) { gf_log (this->name, GF_LOG_ERROR, - "Unable to acquire lock on localhost, ret: %d", ret); - snprintf (err_str, err_len, "Another transaction is in progress. " - "Please try again after sometime."); + "Failed to set originator_uuid."); goto out; } + /* Based on the op_version, acquire a cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock on localhost, ret: %d", + ret); + snprintf (err_str, err_len, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (dict, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_INFO, + "No Volume name present. " + "Locks not being held."); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_volume_lock (volname, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + snprintf (err_str, err_len, + "Another transaction is in progress for %s. " + "Please try again after sometime.", volname); + goto out; + } + } + locked = 1; gf_log (this->name, GF_LOG_DEBUG, "Acquired lock on localhost"); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_LOCK, NULL); +local_locking_done: + + /* If no volname is given as a part of the command, locks will + * not be held, hence sending stage event. */ + if (volname) + event_type = GD_OP_EVENT_START_LOCK; + else { + txn_op_info.state.state = GD_OP_STATE_LOCK_SENT; + event_type = GD_OP_EVENT_ALL_ACC; + } + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + if (ctx) + dict_unref (ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, ctx); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to acquire cluster" " lock."); goto out; } - glusterd_op_set_op (op); - glusterd_op_set_ctx (ctx); - glusterd_op_set_req (req); - - out: - if (locked && ret) - glusterd_unlock (MY_UUID); + if (locked && ret) { + /* Based on the op-version, we release the + * cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) + glusterd_unlock (MY_UUID); + else { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + ret = -1; + } + } + + if (volname) + GF_FREE (volname); gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; @@ -640,11 +737,15 @@ out: int __glusterd_handle_cluster_lock (rpcsvc_request_t *req) { - gd1_mgmt_cluster_lock_req lock_req = {{0},}; - int32_t ret = -1; - glusterd_op_lock_ctx_t *ctx = NULL; - glusterd_peerinfo_t *peerinfo = NULL; - xlator_t *this = NULL; + dict_t *op_ctx = NULL; + int32_t ret = -1; + gd1_mgmt_cluster_lock_req lock_req = {{0},}; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_op_t op = GD_OP_EVENT_LOCK; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + uuid_t *txn_id = &global_txn_id; + xlator_t *this = NULL; this = THIS; GF_ASSERT (this); @@ -679,8 +780,29 @@ __glusterd_handle_cluster_lock (rpcsvc_request_t *req) uuid_copy (ctx->uuid, lock_req.uuid); ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, ctx); + op_ctx = dict_new (); + if (!op_ctx) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set new dict"); + goto out; + } + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &op, op_ctx, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (txn_op_info.op_ctx); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); out: gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -698,6 +820,186 @@ glusterd_handle_cluster_lock (rpcsvc_request_t *req) __glusterd_handle_cluster_lock); } +static int +glusterd_handle_volume_lock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_volume_lock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + glusterd_op_info_t txn_op_info = {{0},}; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode lock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received volume lock req " + "from uuid: %s txn_id: %s", uuid_utoa (lock_req.uuid), + uuid_utoa (lock_req.txn_id)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + glusterd_txn_opinfo_init (&txn_op_info, NULL, &lock_req.op, + ctx->dict, req); + + ret = glusterd_set_txn_opinfo (&lock_req.txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, + &lock_req.txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_LOCK"); + +out: + if (ret) { + if (ctx->dict) + dict_destroy (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_volume_lock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_volume_lock_fn); +} + +static int +glusterd_handle_volume_unlock_fn (rpcsvc_request_t *req) +{ + gd1_mgmt_volume_unlock_req lock_req = {{0},}; + int32_t ret = -1; + glusterd_op_lock_ctx_t *ctx = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + ret = xdr_to_generic (req->msg[0], &lock_req, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_req); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "Failed to decode unlock " + "request received from peer"); + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + gf_log (this->name, GF_LOG_DEBUG, "Received volume unlock req " + "from uuid: %s", uuid_utoa (lock_req.uuid)); + + if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) { + gf_log (this->name, GF_LOG_WARNING, "%s doesn't " + "belong to the cluster. Ignoring request.", + uuid_utoa (lock_req.uuid)); + ret = -1; + goto out; + } + + ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t); + if (!ctx) { + ret = -1; + goto out; + } + + uuid_copy (ctx->uuid, lock_req.uuid); + ctx->req = req; + + ctx->dict = dict_new (); + if (!ctx->dict) { + ret = -1; + goto out; + } + + ret = dict_unserialize (lock_req.dict.dict_val, + lock_req.dict.dict_len, &ctx->dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "failed to unserialize the dictionary"); + goto out; + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, + &lock_req.txn_id, ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_UNLOCK"); + +out: + if (ret) { + if (ctx->dict) + dict_destroy (ctx->dict); + if (ctx) + GF_FREE (ctx); + } + + glusterd_friend_sm (); + glusterd_op_sm (); + + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int +glusterd_handle_volume_unlock (rpcsvc_request_t *req) +{ + return glusterd_big_locked_handler (req, + glusterd_handle_volume_unlock_fn); +} + int glusterd_req_ctx_create (rpcsvc_request_t *rpc_req, glusterd_op_t op, uuid_t uuid, @@ -756,6 +1058,9 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) gd1_mgmt_stage_op_req op_req = {{0},}; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; + glusterd_op_info_t txn_op_info = {{0},}; + glusterd_op_sm_state_info_t state = {0,}; this = THIS; GF_ASSERT (this); @@ -784,7 +1089,36 @@ __glusterd_handle_stage_op (rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, req_ctx); + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + /* In cases where there is no volname, the receivers won't have a + * transaction opinfo created, as for those operations, the locking + * phase where the transaction opinfos are created, won't be called. */ + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_DEBUG, + "No transaction's opinfo set"); + + state.state = GD_OP_STATE_LOCKED; + glusterd_txn_opinfo_init (&txn_op_info, &state, + &op_req.op, req_ctx->dict, req); + + ret = glusterd_set_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + dict_unref (req_ctx->dict); + goto out; + } + } + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, + txn_id, req_ctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to inject event GD_OP_EVENT_STAGE_OP"); out: free (op_req.buf.buf_val);//malloced by xdr @@ -808,6 +1142,7 @@ __glusterd_handle_commit_op (rpcsvc_request_t *req) gd1_mgmt_commit_op_req op_req = {{0},}; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -838,11 +1173,12 @@ __glusterd_handle_commit_op (rpcsvc_request_t *req) if (ret) goto out; - ret = glusterd_op_init_ctx (op_req.op); - if (ret) - goto out; + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, req_ctx); + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, + txn_id, req_ctx); out: free (op_req.buf.buf_val);//malloced by xdr @@ -1893,6 +2229,56 @@ glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status) } int +glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + int ret = -1; + + GF_ASSERT (req); + GF_ASSERT (txn_id); + glusterd_get_uuid (&rsp.uuid); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + uuid_copy (rsp.txn_id, *txn_id); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume lock, ret: %d", + ret); + + return ret; +} + +int +glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, uuid_t *txn_id, + int32_t status) +{ + + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + int ret = -1; + + GF_ASSERT (req); + GF_ASSERT (txn_id); + rsp.op_ret = status; + if (rsp.op_ret) + rsp.op_errno = errno; + glusterd_get_uuid (&rsp.uuid); + uuid_copy (rsp.txn_id, *txn_id); + + ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + + gf_log (THIS->name, GF_LOG_DEBUG, "Responded to volume unlock, ret: %d", + ret); + + return ret; +} + +int __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) { gd1_mgmt_cluster_unlock_req unlock_req = {{0}, }; @@ -1900,6 +2286,7 @@ __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) glusterd_op_lock_ctx_t *ctx = NULL; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -1934,8 +2321,9 @@ __glusterd_handle_cluster_unlock (rpcsvc_request_t *req) } uuid_copy (ctx->uuid, unlock_req.uuid); ctx->req = req; + ctx->dict = NULL; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, txn_id, ctx); out: glusterd_friend_sm (); @@ -3666,11 +4054,11 @@ get_brickinfo_from_brickid (char *brickid, glusterd_brickinfo_t **brickinfo) volid_str = brickid_dup; brick = strchr (brickid_dup, ':'); - *brick = '\0'; - brick++; if (!volid_str || !brick) goto out; + *brick = '\0'; + brick++; uuid_parse (volid_str, volid); ret = glusterd_volinfo_find_by_volume_id (volid, &volinfo); if (ret) @@ -3850,6 +4238,7 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerctx_t *peerctx = NULL; gf_boolean_t quorum_action = _gf_false; + glusterd_volinfo_t *volinfo = NULL; uuid_t uuid; peerctx = mydata; @@ -3878,6 +4267,30 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d", peerinfo->state.state); + if (peerinfo->connected) { + if (conf->op_version < GD_OP_VERSION_4) { + glusterd_get_lock_owner (&uuid); + if (!uuid_is_null (uuid) && + !uuid_compare (peerinfo->uuid, uuid)) + glusterd_unlock (peerinfo->uuid); + } else { + list_for_each_entry (volinfo, &conf->volumes, + vol_list) { + ret = glusterd_volume_unlock + (volinfo->volname, + peerinfo->uuid); + if (ret) + gf_log (this->name, + GF_LOG_TRACE, + "Lock not released " + "for %s", + volinfo->volname); + } + } + + ret = 0; + } + if ((peerinfo->quorum_contrib != QUORUM_DOWN) && (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) { peerinfo->quorum_contrib = QUORUM_DOWN; @@ -3892,13 +4305,6 @@ __glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata, glusterd_friend_remove_notify (peerctx); goto out; } - glusterd_get_lock_owner (&uuid); - if (!uuid_is_null (uuid) && - !uuid_compare (peerinfo->uuid, uuid)) { - glusterd_unlock (peerinfo->uuid); - if (opinfo.state.state != GD_OP_STATE_DEFAULT) - opinfo.state.state = GD_OP_STATE_DEFAULT; - } peerinfo->connected = 0; break; @@ -4033,3 +4439,18 @@ struct rpcsvc_program gd_svc_cli_prog_ro = { .actors = gd_svc_cli_actors_ro, .synctask = _gf_true, }; + +rpcsvc_actor_t gd_svc_mgmt_v3_actors[] = { + [GLUSTERD_MGMT_V3_NULL] = { "NULL", GLUSTERD_MGMT_V3_NULL, glusterd_null, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_VOLUME_LOCK] = { "VOL_LOCK", GLUSTERD_MGMT_V3_VOLUME_LOCK, glusterd_handle_volume_lock, NULL, 0, DRC_NA}, + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = { "VOL_UNLOCK", GLUSTERD_MGMT_V3_VOLUME_UNLOCK, glusterd_handle_volume_unlock, NULL, 0, DRC_NA}, +}; + +struct rpcsvc_program gd_svc_mgmt_v3_prog = { + .progname = "GlusterD svc mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .numactors = GLUSTERD_MGMT_V3_MAXVALUE, + .actors = gd_svc_mgmt_v3_actors, + .synctask = _gf_true, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c index 1420eb692..e0508faf6 100644 --- a/xlators/mgmt/glusterd/src/glusterd-handshake.c +++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c @@ -30,6 +30,7 @@ extern struct rpc_clnt_program gd_peer_prog; extern struct rpc_clnt_program gd_mgmt_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; #define TRUSTED_PREFIX "trusted-" @@ -812,6 +813,7 @@ __glusterd_mgmt_hndsk_version_ack_cbk (struct rpc_req *req, struct iovec *iov, */ peerinfo->mgmt = &gd_mgmt_prog; peerinfo->peer = &gd_peer_prog; + peerinfo->mgmt_v3 = &gd_mgmt_v3_prog; ret = default_notify (this, GF_EVENT_CHILD_UP, NULL); @@ -1038,6 +1040,15 @@ glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo, peerinfo->peer->progname, peerinfo->peer->prognum, peerinfo->peer->progver); } + + if (peerinfo->mgmt_v3) { + gf_log ("", GF_LOG_INFO, + "Using Program %s, Num (%d), Version (%d)", + peerinfo->mgmt_v3->progname, + peerinfo->mgmt_v3->prognum, + peerinfo->mgmt_v3->progver); + } + ret = 0; out: return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c new file mode 100644 index 000000000..68c6d7426 --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.c @@ -0,0 +1,177 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "common-utils.h" +#include "cli1-xdr.h" +#include "xdr-generic.h" +#include "glusterd.h" +#include "glusterd-op-sm.h" +#include "glusterd-store.h" +#include "glusterd-utils.h" +#include "glusterd-volgen.h" +#include "glusterd-locks.h" +#include "run.h" +#include "syscall.h" + +#include <signal.h> + +static dict_t *vol_lock; + +/* Initialize the global vol-lock list(dict) when + * glusterd is spawned */ +int32_t +glusterd_vol_lock_init () +{ + int32_t ret = -1; + + vol_lock = dict_new (); + if (!vol_lock) + goto out; + + ret = 0; +out: + return ret; +} + +/* Destroy the global vol-lock list(dict) when + * glusterd cleanup is performed */ +void +glusterd_vol_lock_fini () +{ + if (vol_lock) + dict_unref (vol_lock); +} + +int32_t +glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid) +{ + int32_t ret = -1; + vol_lock_obj *lock_obj = NULL; + uuid_t no_owner = {0,}; + + if (!volname || !uuid) { + gf_log ("", GF_LOG_ERROR, "volname or uuid is null."); + ret = -1; + goto out; + } + + ret = dict_get_bin (vol_lock, volname, (void **) &lock_obj); + if (!ret) + uuid_copy (*uuid, lock_obj->lock_owner); + else + uuid_copy (*uuid, no_owner); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_volume_lock (char *volname, uuid_t uuid) +{ + int32_t ret = -1; + vol_lock_obj *lock_obj = NULL; + uuid_t owner = {0}; + + if (!volname) { + gf_log ("", GF_LOG_ERROR, "volname is null."); + ret = -1; + goto out; + } + + ret = glusterd_get_vol_lock_owner (volname, &owner); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Unable to get volume lock owner"); + goto out; + } + + /* If the lock has already been held for the given volume + * we fail */ + if (!uuid_is_null (owner)) { + gf_log ("", GF_LOG_ERROR, "Unable to acquire lock. " + "Lock for %s held by %s", volname, + uuid_utoa (owner)); + ret = -1; + goto out; + } + + lock_obj = GF_CALLOC (1, sizeof(vol_lock_obj), + gf_common_mt_vol_lock_obj_t); + if (!lock_obj) { + ret = -1; + goto out; + } + + uuid_copy (lock_obj->lock_owner, uuid); + + ret = dict_set_bin (vol_lock, volname, lock_obj, sizeof(vol_lock_obj)); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Unable to set lock owner " + "in volume lock"); + if (lock_obj) + GF_FREE (lock_obj); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully held by %s", + volname, uuid_utoa (uuid)); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_volume_unlock (char *volname, uuid_t uuid) +{ + int32_t ret = -1; + uuid_t owner = {0}; + + if (!volname) { + gf_log ("", GF_LOG_ERROR, "volname is null."); + ret = -1; + goto out; + } + + ret = glusterd_get_vol_lock_owner (volname, &owner); + if (ret) + goto out; + + if (uuid_is_null (owner)) { + gf_log ("", GF_LOG_ERROR, "Lock for %s not held", volname); + ret = -1; + goto out; + } + + ret = uuid_compare (uuid, owner); + if (ret) { + gf_log (THIS->name, GF_LOG_ERROR, "Lock owner mismatch. " + "Lock for %s held by %s", + volname, uuid_utoa (owner)); + goto out; + } + + /* Removing the volume lock from the global list */ + dict_del (vol_lock, volname); + + gf_log ("", GF_LOG_DEBUG, "Lock for %s successfully released", + volname); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h new file mode 100644 index 000000000..2a8cc20ed --- /dev/null +++ b/xlators/mgmt/glusterd/src/glusterd-locks.h @@ -0,0 +1,38 @@ +/* + Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _GLUSTERD_LOCKS_H_ +#define _GLUSTERD_LOCKS_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +struct volume_lock_object_ { + uuid_t lock_owner; +}; +typedef struct volume_lock_object_ vol_lock_obj; + +int32_t +glusterd_vol_lock_init (); + +void +glusterd_vol_lock_fini (); + +int32_t +glusterd_get_vol_lock_owner (char *volname, uuid_t *uuid); + +int32_t +glusterd_volume_lock (char *volname, uuid_t uuid); + +int32_t +glusterd_volume_unlock (char *volname, uuid_t uuid); + +#endif diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c index 963aa0762..83c91a52d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c @@ -37,6 +37,7 @@ #include "glusterd-store.h" #include "glusterd-hooks.h" #include "glusterd-volgen.h" +#include "glusterd-locks.h" #include "syscall.h" #include "cli1-xdr.h" #include "common-utils.h" @@ -67,6 +68,195 @@ static struct list_head gd_op_sm_queue; pthread_mutex_t gd_op_sm_lock; glusterd_op_info_t opinfo = {{0},}; +uuid_t global_txn_id = {0}; /* To be used in + * heterogeneous + * cluster with no + * transaction ids */ + +static dict_t *txn_opinfo; + +struct glusterd_txn_opinfo_object_ { + glusterd_op_info_t opinfo; +}; +typedef struct glusterd_txn_opinfo_object_ glusterd_txn_opinfo_obj; + +int32_t +glusterd_txn_opinfo_dict_init () +{ + int32_t ret = -1; + + txn_opinfo = dict_new (); + if (!txn_opinfo) { + ret = -1; + goto out; + } + + ret = 0; +out: + return ret; +} + +void +glusterd_txn_opinfo_dict_fini () +{ + if (txn_opinfo) + dict_destroy (txn_opinfo); +} + +void +glusterd_txn_opinfo_init (glusterd_op_info_t *opinfo, + glusterd_op_sm_state_info_t *state, + glusterd_op_t *op, + dict_t *op_ctx, + rpcsvc_request_t *req) +{ + GF_ASSERT (opinfo); + + if (state) + opinfo->state = *state; + + if (op) + opinfo->op = *op; + + opinfo->op_ctx = dict_ref(op_ctx); + + if (req) + opinfo->req = req; + + return; +} + +int32_t +glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id) +{ + int32_t ret = -1; + + GF_ASSERT (dict); + + *txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + if (!*txn_id) + goto out; + + uuid_generate (**txn_id); + + ret = dict_set_bin (dict, "transaction_id", + *txn_id, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + + gf_log ("", GF_LOG_DEBUG, + "Transaction_id = %s", uuid_utoa (**txn_id)); +out: + if (ret && *txn_id) + GF_FREE (*txn_id); + + return ret; +} + +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + glusterd_txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id || !opinfo) { + gf_log ("", GF_LOG_ERROR, + "Empty transaction id or opinfo received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to get transaction opinfo"); + goto out; + } + + (*opinfo) = opinfo_obj->opinfo; + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo) +{ + int32_t ret = -1; + glusterd_txn_opinfo_obj *opinfo_obj = NULL; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = dict_get_bin(txn_opinfo, uuid_utoa (*txn_id), + (void **) &opinfo_obj); + if (ret) { + opinfo_obj = GF_CALLOC (1, sizeof(glusterd_txn_opinfo_obj), + gf_common_mt_txn_opinfo_obj_t); + if (!opinfo_obj) { + ret = -1; + goto out; + } + + ret = dict_set_bin(txn_opinfo, uuid_utoa (*txn_id), opinfo_obj, + sizeof(glusterd_txn_opinfo_obj)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Unable to set opinfo for transaction ID : %s", + uuid_utoa (*txn_id)); + goto out; + } + } + + opinfo_obj->opinfo = (*opinfo); + + ret = 0; +out: + if (ret) + if (opinfo_obj) + GF_FREE (opinfo_obj); + + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id) +{ + int32_t ret = -1; + glusterd_op_info_t txn_op_info = {{0},}; + + if (!txn_id) { + gf_log ("", GF_LOG_ERROR, "Empty transaction id received."); + ret = -1; + goto out; + } + + ret = glusterd_get_txn_opinfo (txn_id, &txn_op_info); + if (ret) { + gf_log ("", GF_LOG_ERROR, "Transaction opinfo not found"); + goto out; + } + + dict_unref (txn_op_info.op_ctx); + + dict_del(txn_opinfo, uuid_utoa (*txn_id)); + + ret = 0; +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + static int glusterfs_port = GLUSTERD_DEFAULT_PORT; static char *glusterd_op_sm_state_names[] = { "Default", @@ -147,10 +337,10 @@ glusterd_is_volume_started (glusterd_volinfo_t *volinfo) } static int -glusterd_op_sm_inject_all_acc () +glusterd_op_sm_inject_all_acc (uuid_t *txn_id) { int32_t ret = -1; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, txn_id, NULL); gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -354,6 +544,10 @@ glusterd_validate_quorum_options (xlator_t *this, char *fullkey, char *value, if (!glusterd_is_quorum_option (fullkey)) goto out; key = strchr (fullkey, '.'); + if (key == NULL) { + ret = -1; + goto out; + } key++; opt = xlator_volume_option_get (this, key); ret = xlator_option_validate (this, key, value, opt, op_errstr); @@ -441,7 +635,7 @@ glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr) * This check is not done on the originator glusterd. The originator * glusterd sets this value. */ - origin_glusterd = is_origin_glusterd (); + origin_glusterd = is_origin_glusterd (dict); if (!origin_glusterd) { /* Check for v3.3.x origin glusterd */ @@ -2211,7 +2405,7 @@ glusterd_op_status_volume (dict_t *dict, char **op_errstr, GF_ASSERT (dict); - origin_glusterd = is_origin_glusterd (); + origin_glusterd = is_origin_glusterd (dict); ret = dict_get_uint32 (dict, "cmd", &cmd); if (ret) @@ -2416,6 +2610,7 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; uint32_t pending_count = 0; + dict_t *dict = NULL; this = THIS; priv = this->private; @@ -2430,27 +2625,61 @@ glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to " - "send lock request for operation " - "'Volume %s' to peer %s", - gd_op_list[opinfo.op], - peerinfo->hostname); - continue; + /* Based on the op_version, acquire a cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + proc = &peerinfo->mgmt->proctable + [GLUSTERD_MGMT_CLUSTER_LOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send lock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_VOLUME_LOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send volume lock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; } - pending_count++; } } opinfo.pending_count = pending_count; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); +out: gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; } @@ -2463,17 +2692,12 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; uint32_t pending_count = 0; + dict_t *dict = NULL; this = THIS; priv = this->private; GF_ASSERT (priv); - /*ret = glusterd_unlock (MY_UUID); - - if (ret) - goto out; - */ - list_for_each_entry (peerinfo, &priv->peers, uuid_list) { GF_ASSERT (peerinfo); @@ -2483,29 +2707,63 @@ glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx) (glusterd_op_get_op() != GD_OP_SYNC_VOLUME)) continue; - proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK]; - if (proc->fn) { - ret = proc->fn (NULL, this, peerinfo); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Failed to " - "send unlock request for operation " - "'Volume %s' to peer %s", - gd_op_list[opinfo.op], - peerinfo->hostname); - continue; + /* Based on the op_version, + * release the cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + proc = &peerinfo->mgmt->proctable + [GLUSTERD_MGMT_CLUSTER_UNLOCK]; + if (proc->fn) { + ret = proc->fn (NULL, this, peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send unlock request " + "for operation 'Volume %s' to " + "peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + continue; + } + pending_count++; + } + } else { + dict = glusterd_op_get_ctx (); + dict_ref (dict); + + proc = &peerinfo->mgmt_v3->proctable + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK]; + if (proc->fn) { + ret = dict_set_static_ptr (dict, "peerinfo", + peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "failed to set peerinfo"); + dict_unref (dict); + goto out; + } + + ret = proc->fn (NULL, this, dict); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "Failed to send volume unlock " + "request for operation " + "'Volume %s' to peer %s", + gd_op_list[opinfo.op], + peerinfo->hostname); + dict_unref (dict); + continue; + } + pending_count++; } - pending_count++; } } opinfo.pending_count = pending_count; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); +out: gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); - return ret; - } static int @@ -2517,7 +2775,8 @@ glusterd_op_ac_ack_drain (glusterd_op_sm_event_t *event, void *ctx) opinfo.pending_count--; if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -2533,43 +2792,93 @@ glusterd_op_ac_send_unlock_drain (glusterd_op_sm_event_t *event, void *ctx) static int glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx) { - glusterd_op_lock_ctx_t *lock_ctx = NULL; - int32_t ret = 0; + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; GF_ASSERT (event); GF_ASSERT (ctx); + this = THIS; + priv = this->private; + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = glusterd_lock (lock_ctx->uuid); + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it acquiring a cluster + * or volume lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_lock (lock_ctx->uuid); + glusterd_op_lock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_volume_lock (volname, lock_ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", + volname); + } - gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); + glusterd_op_volume_lock_send_resp (lock_ctx->req, + &event->txn_id, ret); - glusterd_op_lock_send_resp (lock_ctx->req, ret); + dict_unref (lock_ctx->dict); + } + gf_log (THIS->name, GF_LOG_DEBUG, "Lock Returned %d", ret); return ret; } static int glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx) { - int ret = 0; - glusterd_op_lock_ctx_t *lock_ctx = NULL; - xlator_t *this = NULL; - glusterd_conf_t *priv = NULL; + int32_t ret = 0; + char *volname = NULL; + glusterd_op_lock_ctx_t *lock_ctx = NULL; + glusterd_conf_t *priv = NULL; + xlator_t *this = NULL; + GF_ASSERT (event); GF_ASSERT (ctx); this = THIS; priv = this->private; + lock_ctx = (glusterd_op_lock_ctx_t *)ctx; - ret = glusterd_unlock (lock_ctx->uuid); + /* If the req came from a node running on older op_version + * the dict won't be present. Based on it releasing the cluster + * or volume lock */ + if (lock_ctx->dict == NULL) { + ret = glusterd_unlock (lock_ctx->uuid); + glusterd_op_unlock_send_resp (lock_ctx->req, ret); + } else { + ret = dict_get_str (lock_ctx->dict, "volname", &volname); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire volname"); + else { + ret = glusterd_volume_unlock (volname, lock_ctx->uuid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } - gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); + glusterd_op_volume_unlock_send_resp (lock_ctx->req, + &event->txn_id, ret); + + dict_unref (lock_ctx->dict); + } - glusterd_op_unlock_send_resp (lock_ctx->req, ret); + gf_log (this->name, GF_LOG_DEBUG, "Unlock Returned %d", ret); if (priv->pending_quorum_action) glusterd_do_quorum_action (); @@ -2607,7 +2916,8 @@ glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3039,7 +3349,8 @@ out: if (dict) dict_unref (dict); if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); opinfo.op_ret = ret; } @@ -3048,7 +3359,7 @@ out: opinfo.pending_count); if (!opinfo.pending_count) - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret); @@ -3057,7 +3368,7 @@ out: } static int32_t -glusterd_op_start_rb_timer (dict_t *dict) +glusterd_op_start_rb_timer (dict_t *dict, uuid_t *txn_id) { int32_t op = 0; struct timespec timeout = {0, }; @@ -3076,7 +3387,7 @@ glusterd_op_start_rb_timer (dict_t *dict) } if (op != GF_REPLACE_OP_START) { - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (txn_id); goto out; } @@ -3091,6 +3402,17 @@ glusterd_op_start_rb_timer (dict_t *dict) ret = -1; goto out; } + + ret = dict_set_bin (rb_ctx, "transaction_id", + txn_id, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } else + gf_log ("", GF_LOG_DEBUG, + "transaction_id = %s", uuid_utoa (*txn_id)); + priv->timer = gf_timer_call_after (THIS->ctx, timeout, glusterd_do_replace_brick, (void *) rb_ctx); @@ -3580,17 +3902,19 @@ out: if (dict) dict_unref (dict); if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, NULL); opinfo.op_ret = ret; } if (!opinfo.pending_count) { if (op == GD_OP_REPLACE_BRICK) { - ret = glusterd_op_start_rb_timer (op_dict); + ret = glusterd_op_start_rb_timer (op_dict, + &event->txn_id); } else { glusterd_op_modify_op_ctx (op, NULL); - ret = glusterd_op_sm_inject_all_acc (); + ret = glusterd_op_sm_inject_all_acc (&event->txn_id); } goto err; } @@ -3615,7 +3939,8 @@ glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3636,7 +3961,8 @@ glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3657,7 +3983,8 @@ glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, NULL); out: gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3700,7 +4027,8 @@ glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, ev_ctx->commit_ctx); out: if (ev_ctx->rsp_dict) @@ -3742,7 +4070,7 @@ glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) goto out; } - ret = glusterd_op_start_rb_timer (op_ctx); + ret = glusterd_op_start_rb_timer (op_ctx, &event->txn_id); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Couldn't start " "replace-brick operation."); @@ -3757,10 +4085,14 @@ glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx) out: if (commit_ack_inject) { if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + &event->txn_id, + NULL); else if (!opinfo.pending_count) { glusterd_op_modify_op_ctx (op, NULL); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + ret = glusterd_op_sm_inject_event + (GD_OP_EVENT_COMMIT_ACC, + &event->txn_id, NULL); } /*else do nothing*/ } @@ -3781,7 +4113,8 @@ glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, + &event->txn_id, NULL); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3815,7 +4148,7 @@ glusterd_op_reset_ctx () } int32_t -glusterd_op_txn_complete () +glusterd_op_txn_complete (uuid_t *txn_id) { int32_t ret = -1; glusterd_conf_t *priv = NULL; @@ -3825,6 +4158,7 @@ glusterd_op_txn_complete () rpcsvc_request_t *req = NULL; void *ctx = NULL; char *op_errstr = NULL; + char *volname = NULL; xlator_t *this = NULL; this = THIS; @@ -3847,14 +4181,29 @@ glusterd_op_txn_complete () glusterd_op_reset_ctx (); glusterd_op_clear_errstr (); - ret = glusterd_unlock (MY_UUID); - - /* unlock cant/shouldnt fail here!! */ - if (ret) { - gf_log (this->name, GF_LOG_CRITICAL, - "Unable to clear local lock, ret: %d", ret); + /* Based on the op-version, we release the cluster or volume lock */ + if (priv->op_version < GD_OP_VERSION_4) { + ret = glusterd_unlock (MY_UUID); + /* unlock cant/shouldnt fail here!! */ + if (ret) + gf_log (this->name, GF_LOG_CRITICAL, + "Unable to clear local lock, ret: %d", ret); + else + gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); } else { - gf_log (this->name, GF_LOG_DEBUG, "Cleared local lock"); + ret = dict_get_str (ctx, "volname", &volname); + if (ret) + gf_log ("", GF_LOG_INFO, + "No Volume name present. " + "Locks have not been held."); + + if (volname) { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } } ret = glusterd_op_send_cli_response (op, op_ret, @@ -3873,6 +4222,13 @@ glusterd_op_txn_complete () if (priv->pending_quorum_action) glusterd_do_quorum_action (); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); return ret; } @@ -3884,7 +4240,7 @@ glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx) GF_ASSERT (event); - ret = glusterd_op_txn_complete (); + ret = glusterd_op_txn_complete (&event->txn_id); gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -3901,6 +4257,7 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) char *op_errstr = NULL; dict_t *dict = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -3926,9 +4283,27 @@ glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx) status); } + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else { + ret = -1; + goto out; + } + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(uuid_t *)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op, status, op_errstr, rsp_dict); +out: if (op_errstr && (strcmp (op_errstr, ""))) GF_FREE (op_errstr); @@ -3990,6 +4365,7 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) dict_t *dict = NULL; dict_t *rsp_dict = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -4019,10 +4395,27 @@ glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx) "'Volume %s' failed: %d", gd_op_list[req_ctx->op], status); + txn_id = GF_CALLOC (1, sizeof(uuid_t), gf_common_mt_uuid_t); + + if (txn_id) + uuid_copy (*txn_id, event->txn_id); + else { + ret = -1; + goto out; + } + + ret = dict_set_bin (rsp_dict, "transaction_id", + txn_id, sizeof(uuid_t)); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set transaction id."); + goto out; + } + ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op, status, op_errstr, rsp_dict); - glusterd_op_fini_ctx (); +out: if (op_errstr && (strcmp (op_errstr, ""))) GF_FREE (op_errstr); @@ -4051,7 +4444,6 @@ glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx) opinfo.op_ret, opinfo.op_errstr, op_ctx); - glusterd_op_fini_ctx (); if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) { GF_FREE (opinfo.op_errstr); opinfo.op_errstr = NULL; @@ -5233,7 +5625,8 @@ glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx) if (!opinfo.pending_count && !opinfo.brick_pending_count) { glusterd_clear_pending_nodes (&opinfo.pending_bricks); - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, req_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, + &event->txn_id, req_ctx); } out: @@ -5287,7 +5680,8 @@ glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx) if (opinfo.brick_pending_count > 0) goto out; - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, &event->txn_id, + ev_ctx->commit_ctx); out: if (ev_ctx->rsp_dict) @@ -5661,7 +6055,7 @@ glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx) + uuid_t *txn_id, void *ctx) { int32_t ret = -1; glusterd_op_sm_event_t *event = NULL; @@ -5676,6 +6070,9 @@ glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, event->ctx = ctx; + if (txn_id) + uuid_copy (event->txn_id, *txn_id); + gf_log (THIS->name, GF_LOG_DEBUG, "Enqueue event: '%s'", glusterd_op_sm_event_name_get (event->event)); list_add_tail (&event->list, &gd_op_sm_queue); @@ -5736,6 +6133,7 @@ glusterd_op_sm () glusterd_op_sm_t *state = NULL; glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; xlator_t *this = NULL; + glusterd_op_info_t txn_op_info; this = THIS; GF_ASSERT (this); @@ -5756,6 +6154,20 @@ glusterd_op_sm () "type: '%s'", glusterd_op_sm_event_name_get(event_type)); + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", + uuid_utoa (event->txn_id)); + + ret = glusterd_get_txn_opinfo (&event->txn_id, + &txn_op_info); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to get transaction's opinfo"); + glusterd_destroy_op_event_ctx (event); + GF_FREE (event); + continue; + } else + opinfo = txn_op_info; + state = glusterd_op_state_table[opinfo.state.state]; GF_ASSERT (state); @@ -5786,8 +6198,27 @@ glusterd_op_sm () return ret; } + if ((state[event_type].next_state == + GD_OP_STATE_DEFAULT) && + (event_type == GD_OP_EVENT_UNLOCK)) { + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo(&event->txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear " + "transaction's opinfo"); + } else { + ret = glusterd_set_txn_opinfo (&event->txn_id, + &opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set " + "transaction's opinfo"); + } + glusterd_destroy_op_event_ctx (event); GF_FREE (event); + } } @@ -5841,52 +6272,6 @@ glusterd_op_clear_op (glusterd_op_t op) } int32_t -glusterd_op_init_ctx (glusterd_op_t op) -{ - int ret = 0; - dict_t *dict = NULL; - xlator_t *this = NULL; - - this = THIS; - GF_ASSERT (this); - GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX); - - if (_gf_false == glusterd_need_brick_op (op)) { - gf_log (this->name, GF_LOG_DEBUG, "Received op: %s, returning", - gd_op_list[op]); - goto out; - } - dict = dict_new (); - if (dict == NULL) { - ret = -1; - goto out; - } - ret = glusterd_op_set_ctx (dict); - if (ret) - goto out; -out: - gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); - return ret; -} - - - -int32_t -glusterd_op_fini_ctx () -{ - dict_t *dict = NULL; - - dict = glusterd_op_get_ctx (); - if (dict) - dict_unref (dict); - - glusterd_op_reset_ctx (); - return 0; -} - - - -int32_t glusterd_op_free_ctx (glusterd_op_t op, void *ctx) { diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h index 1125368ce..53d4e2ff4 100644 --- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h @@ -77,6 +77,7 @@ struct glusterd_op_sm_event_ { struct list_head list; void *ctx; glusterd_op_sm_event_type_t event; + uuid_t txn_id; }; typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t; @@ -119,6 +120,7 @@ typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t; struct glusterd_op_lock_ctx_ { uuid_t uuid; + dict_t *dict; rpcsvc_request_t *req; }; @@ -180,7 +182,7 @@ glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type, glusterd_op_sm_event_t **new_event); int glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type, - void *ctx); + uuid_t *txn_id, void *ctx); int glusterd_op_sm_init (); @@ -264,10 +266,7 @@ glusterd_op_init_commit_rsp_dict (glusterd_op_t op); void glusterd_op_modify_op_ctx (glusterd_op_t op, void *op_ctx); -int32_t -glusterd_op_init_ctx (glusterd_op_t op); -int32_t -glusterd_op_fini_ctx (); + int32_t glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size, int32_t blk_count, double *throughput, double *time); @@ -296,4 +295,16 @@ int glusterd_is_valid_vg (glusterd_brickinfo_t *brick, int check_tag, char *msg); #endif +int32_t +glusterd_get_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_set_txn_opinfo (uuid_t *txn_id, glusterd_op_info_t *opinfo); + +int32_t +glusterd_clear_txn_opinfo (uuid_t *txn_id); + +int32_t +glusterd_generate_txn_id (dict_t *dict, uuid_t **txn_id); + #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c index d13533aa6..cf23b6404 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quota.c +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -824,7 +824,7 @@ glusterd_quota_limit_usage (glusterd_volinfo_t *volinfo, dict_t *dict, } } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_set_quota_limit (volinfo->volname, path, hard_limit, soft_limit, op_errstr); @@ -919,7 +919,7 @@ glusterd_quota_remove_limits (glusterd_volinfo_t *volinfo, dict_t *dict, if (ret) goto out; - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ret = glusterd_remove_quota_limit (volinfo->volname, path, op_errstr); if (ret) @@ -1385,7 +1385,7 @@ glusterd_op_stage_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) case GF_QUOTA_OPTION_TYPE_ENABLE: case GF_QUOTA_OPTION_TYPE_LIST: /* Fuse mount req. only for enable & list-usage options*/ - if (is_origin_glusterd () && + if (is_origin_glusterd (dict) && !glusterd_is_fuse_available ()) { *op_errstr = gf_strdup ("Fuse unavailable"); ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c index b28056135..b274e3367 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c +++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c @@ -545,7 +545,7 @@ glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr) case GF_DEFRAG_CMD_START: case GF_DEFRAG_CMD_START_LAYOUT_FIX: case GF_DEFRAG_CMD_START_FORCE: - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { op_ctx = glusterd_op_get_ctx (); if (!op_ctx) { ret = -1; diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c index 5c3fc2d82..f26108f89 100644 --- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c +++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c @@ -31,6 +31,7 @@ DEFAULT_VAR_RUN_DIRECTORY"/%s-"RB_CLIENT_MOUNTPOINT, \ volinfo->volname); +extern uuid_t global_txn_id; int glusterd_get_replace_op_str (gf1_cli_replace_op op, char *op_str) @@ -325,7 +326,7 @@ glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr, ret = -1; goto out; } - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { if (!ctx) { ret = -1; gf_log (this->name, GF_LOG_ERROR, @@ -1631,7 +1632,7 @@ glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict) /* Set task-id, if available, in op_ctx dict for operations * other than start */ - if (is_origin_glusterd ()) { + if (is_origin_glusterd (dict)) { ctx = glusterd_op_get_ctx(); if (!ctx) { gf_log (this->name, GF_LOG_ERROR, "Failed to " @@ -1894,6 +1895,7 @@ glusterd_do_replace_brick (void *data) glusterd_brickinfo_t *src_brickinfo = NULL; glusterd_brickinfo_t *dst_brickinfo = NULL; glusterd_conf_t *priv = NULL; + uuid_t *txn_id = &global_txn_id; int ret = 0; @@ -1913,6 +1915,10 @@ glusterd_do_replace_brick (void *data) gf_log ("", GF_LOG_DEBUG, "Replace brick operation detected"); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = dict_get_int32 (dict, "operation", &op); if (ret) { gf_log ("", GF_LOG_DEBUG, @@ -2008,9 +2014,11 @@ glusterd_do_replace_brick (void *data) out: if (ret) - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, NULL); else - ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL); + ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, + txn_id, NULL); synclock_lock (&priv->big_lock); { diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c index cd81383e9..9af26cfab 100644 --- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c @@ -33,6 +33,7 @@ extern glusterd_op_info_t opinfo; +extern uuid_t global_txn_id; int32_t glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret, @@ -574,6 +575,7 @@ __glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -618,7 +620,7 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -637,6 +639,168 @@ glusterd_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov, __glusterd_cluster_lock_cbk); } +static int32_t +glusterd_vol_lock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode volume lock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received volume lock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Volume lock response received " + "from unknown peer: %s. Ignoring response", + uuid_utoa (rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + +out: + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_vol_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_vol_lock_cbk_fn); +} + +static int32_t +glusterd_vol_unlock_cbk_fn (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + int ret = -1; + int32_t op_ret = -1; + glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; + glusterd_peerinfo_t *peerinfo = NULL; + xlator_t *this = NULL; + uuid_t *txn_id = NULL; + + this = THIS; + GF_ASSERT (this); + GF_ASSERT (req); + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to decode volume unlock " + "response received from peer"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + op_ret = rsp.op_ret; + + txn_id = &rsp.txn_id; + + gf_log (this->name, (op_ret) ? GF_LOG_ERROR : GF_LOG_DEBUG, + "Received volume unlock %s from uuid: %s", + (op_ret) ? "RJT" : "ACC", + uuid_utoa (rsp.uuid)); + + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); + + if (ret) { + gf_log (this->name, GF_LOG_CRITICAL, + "Volume unlock response received " + "from unknown peer: %s. Ignoring response", + uuid_utoa (rsp.uuid)); + goto out; + } + + if (op_ret) { + event_type = GD_OP_EVENT_RCVD_RJT; + opinfo.op_ret = op_ret; + opinfo.op_errstr = gf_strdup ("Another transaction could be in " + "progress. Please try again after" + " sometime."); + } else { + event_type = GD_OP_EVENT_RCVD_ACC; + } + + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); + + if (!ret) { + glusterd_friend_sm (); + glusterd_op_sm (); + } + +out: + GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe)); + return ret; +} + +int32_t +glusterd_vol_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + glusterd_vol_unlock_cbk_fn); +} + int32_t __glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) @@ -647,6 +811,7 @@ __glusterd_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov, glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE; glusterd_peerinfo_t *peerinfo = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -688,7 +853,7 @@ out: event_type = GD_OP_EVENT_RCVD_ACC; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -720,6 +885,7 @@ __glusterd_stage_op_cbk (struct rpc_req *req, struct iovec *iov, char err_str[2048] = {0}; char *peer_str = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; this = THIS; GF_ASSERT (this); @@ -772,11 +938,17 @@ out: "Received stage %s from uuid: %s", (op_ret) ? "RJT" : "ACC", uuid_utoa (rsp.uuid)); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { gf_log (this->name, GF_LOG_CRITICAL, "Stage response received " - "from unknown peer: %s", uuid_utoa (rsp.uuid)); + "from unknown peer: %s. Ignoring response.", + uuid_utoa (rsp.uuid)); + goto out; } if (op_ret) { @@ -807,7 +979,7 @@ out: break; } - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -847,6 +1019,8 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, char err_str[2048] = {0}; char *peer_str = NULL; xlator_t *this = NULL; + uuid_t *txn_id = NULL; + this = THIS; GF_ASSERT (this); @@ -900,6 +1074,10 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, "Received commit %s from uuid: %s", (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid)); + ret = dict_get_bin (dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo); if (ret) { @@ -979,7 +1157,7 @@ __glusterd_commit_op_cbk (struct rpc_req *req, struct iovec *iov, } out: - ret = glusterd_op_sm_inject_event (event_type, NULL); + ret = glusterd_op_sm_inject_event (event_type, txn_id, NULL); if (!ret) { glusterd_friend_sm (); @@ -1216,6 +1394,107 @@ out: } int32_t +glusterd_vol_lock (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_volume_lock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_VOLUME_LOCK, NULL, + this, glusterd_vol_lock_cbk, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +glusterd_vol_unlock (call_frame_t *frame, xlator_t *this, + void *data) +{ + gd1_mgmt_volume_unlock_req req = {{0},}; + int ret = -1; + glusterd_peerinfo_t *peerinfo = NULL; + glusterd_conf_t *priv = NULL; + call_frame_t *dummy_frame = NULL; + dict_t *dict = NULL; + + if (!this) + goto out; + + dict = data; + + priv = this->private; + GF_ASSERT (priv); + + ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo)); + if (ret) + goto out; + + //peerinfo should not be in payload + dict_del (dict, "peerinfo"); + + glusterd_get_uuid (&req.uuid); + + ret = dict_allocate_and_serialize (dict, &req.dict.dict_val, + &req.dict.dict_len); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "Failed to serialize dict " + "to request buffer"); + goto out; + } + + dummy_frame = create_frame (this, this->ctx->pool); + if (!dummy_frame) + goto out; + + ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame, + peerinfo->mgmt_v3, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, NULL, + this, glusterd_vol_unlock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_unlock_req); +out: + gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t glusterd_cluster_unlock (call_frame_t *frame, xlator_t *this, void *data) { @@ -1378,6 +1657,7 @@ __glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, glusterd_req_ctx_t *req_ctx = NULL; glusterd_pending_node_t *node = NULL; xlator_t *this = NULL; + uuid_t *txn_id = &global_txn_id; this = THIS; GF_ASSERT (this); @@ -1440,6 +1720,11 @@ __glusterd_brick_op_cbk (struct rpc_req *req, struct iovec *iov, } } out: + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t); GF_ASSERT (ev_ctx); if (op_ret) { @@ -1452,7 +1737,7 @@ out: ev_ctx->pending_node = frame->cookie; ev_ctx->rsp_dict = dict; ev_ctx->commit_ctx = frame->local; - ret = glusterd_op_sm_inject_event (event_type, ev_ctx); + ret = glusterd_op_sm_inject_event (event_type, txn_id, ev_ctx); if (!ret) { glusterd_friend_sm (); glusterd_op_sm (); @@ -1477,6 +1762,7 @@ int32_t glusterd_brick_op (call_frame_t *frame, xlator_t *this, void *data) { + gd1_mgmt_brick_op_req *req = NULL; int ret = 0; glusterd_conf_t *priv = NULL; @@ -1487,6 +1773,7 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, glusterd_req_ctx_t *req_ctx = NULL; struct rpc_clnt *rpc = NULL; dict_t *op_ctx = NULL; + uuid_t *txn_id = &global_txn_id; if (!this) { ret = -1; @@ -1509,6 +1796,11 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, goto out; } + + ret = dict_get_bin (req_ctx->dict, "transaction_id", (void **)&txn_id); + + gf_log ("", GF_LOG_DEBUG, "transaction ID = %s", uuid_utoa (*txn_id)); + list_for_each_entry (pending_node, &opinfo.pending_bricks, list) { dummy_frame = create_frame (this, this->ctx->pool); if (!dummy_frame) @@ -1587,7 +1879,8 @@ glusterd_brick_op (call_frame_t *frame, xlator_t *this, out: if (ret) { - glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, data); + glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, + txn_id, data); opinfo.op_ret = ret; } gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret); @@ -1615,6 +1908,12 @@ struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = { [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op}, }; +struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = { + [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL }, + [GLUSTERD_MGMT_V3_VOLUME_LOCK] = {"VOLUME_LOCK", glusterd_vol_lock}, + [GLUSTERD_MGMT_V3_VOLUME_UNLOCK] = {"VOLUME_UNLOCK", glusterd_vol_unlock}, +}; + struct rpc_clnt_program gd_mgmt_prog = { .progname = "glusterd mgmt", .prognum = GD_MGMT_PROGRAM, @@ -1639,4 +1938,10 @@ struct rpc_clnt_program gd_peer_prog = { .numproc = GLUSTERD_FRIEND_MAXVALUE, }; - +struct rpc_clnt_program gd_mgmt_v3_prog = { + .progname = "glusterd mgmt v3", + .prognum = GD_MGMT_PROGRAM, + .progver = GD_MGMT_V3_VERSION, + .proctable = gd_mgmt_v3_actors, + .numproc = GLUSTERD_MGMT_V3_MAXVALUE, +}; diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h index e29bb7277..b9bedbe69 100644 --- a/xlators/mgmt/glusterd/src/glusterd-sm.h +++ b/xlators/mgmt/glusterd/src/glusterd-sm.h @@ -97,6 +97,7 @@ struct glusterd_peerinfo_ { struct rpc_clnt *rpc; rpc_clnt_prog_t *mgmt; rpc_clnt_prog_t *peer; + rpc_clnt_prog_t *mgmt_v3; int connected; gf_store_handle_t *shandle; glusterd_sm_tr_log_t sm_log; diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c index 2c2fc6fb4..0ee430969 100644 --- a/xlators/mgmt/glusterd/src/glusterd-store.c +++ b/xlators/mgmt/glusterd/src/glusterd-store.c @@ -2085,7 +2085,8 @@ glusterd_store_retrieve_volume (char *volname) goto out; - list_add_tail (&volinfo->vol_list, &priv->volumes); + list_add_order (&volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); out: gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret); diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c index c975d01f1..5eb5e9f38 100644 --- a/xlators/mgmt/glusterd/src/glusterd-syncop.c +++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c @@ -17,6 +17,9 @@ #include "glusterd.h" #include "glusterd-op-sm.h" #include "glusterd-utils.h" +#include "glusterd-locks.h" + +extern glusterd_op_info_t opinfo; static inline void gd_synctask_barrier_wait (struct syncargs *args, int count) @@ -31,6 +34,62 @@ gd_synctask_barrier_wait (struct syncargs *args, int count) } static void +gd_mgmt_v3_collate_errors (struct syncargs *args, int op_ret, int op_errno, + char *op_errstr, int op_code, + glusterd_peerinfo_t *peerinfo, u_char *uuid) +{ + char err_str[PATH_MAX] = "Please check log file for details."; + char op_err[PATH_MAX] = ""; + char *peer_str = NULL; + + if (op_ret) { + args->op_ret = op_ret; + args->op_errno = op_errno; + + if (peerinfo) + peer_str = peerinfo->hostname; + else + peer_str = uuid_utoa (uuid); + + if (op_errstr && strcmp (op_errstr, "")) + snprintf (err_str, sizeof(err_str) - 1, + "Error: %s", op_errstr); + + switch (op_code) { + case GLUSTERD_MGMT_V3_VOLUME_LOCK: + { + snprintf (op_err, sizeof(op_err) - 1, + "Locking volume failed " + "on %s. %s", peer_str, err_str); + break; + } + case GLUSTERD_MGMT_V3_VOLUME_UNLOCK: + { + snprintf (op_err, sizeof(op_err) - 1, + "Unlocking volume failed " + "on %s. %s", peer_str, err_str); + break; + } + } + + if (args->errstr) { + snprintf (err_str, sizeof(err_str) - 1, + "%s\n%s", args->errstr, + op_err); + GF_FREE (args->errstr); + args->errstr = NULL; + } else + snprintf (err_str, sizeof(err_str) - 1, + "%s", op_err); + + gf_log ("", GF_LOG_ERROR, "%s", op_err); + args->errstr = gf_strdup (err_str); + } + + return; +} + +static void gd_collate_errors (struct syncargs *args, int op_ret, int op_errno, char *op_errstr, int op_code, glusterd_peerinfo_t *peerinfo, u_char *uuid) @@ -207,6 +266,7 @@ out: /* Defined in glusterd-rpc-ops.c */ extern struct rpc_clnt_program gd_mgmt_prog; extern struct rpc_clnt_program gd_brick_prog; +extern struct rpc_clnt_program gd_mgmt_v3_prog; static int glusterd_syncop_aggr_rsp_dict (glusterd_op_t op, dict_t *aggr, dict_t *rsp) @@ -287,6 +347,185 @@ out: } int32_t +_gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_volume_lock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_lock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_VOLUME_LOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_volume_lock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_volume_lock_cbk); +} + +int +gd_syncop_mgmt_volume_lock (glusterd_op_t op, dict_t *op_ctx, + glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_volume_lock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + req.op = op; + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_VOLUME_LOCK, + gd_syncop_mgmt_volume_lock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_lock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t +_gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + int ret = -1; + struct syncargs *args = NULL; + glusterd_peerinfo_t *peerinfo = NULL; + gd1_mgmt_volume_unlock_rsp rsp = {{0},}; + call_frame_t *frame = NULL; + int op_ret = -1; + int op_errno = -1; + + GF_ASSERT(req); + GF_ASSERT(iov); + GF_ASSERT(myframe); + + frame = myframe; + args = frame->local; + peerinfo = frame->cookie; + frame->local = NULL; + frame->cookie = NULL; + + if (-1 == req->rpc_status) { + op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic (*iov, &rsp, + (xdrproc_t)xdr_gd1_mgmt_volume_unlock_rsp); + if (ret < 0) + goto out; + + uuid_copy (args->uuid, rsp.uuid); + + /* Set peer as locked, so we unlock only the locked peers */ + if (rsp.op_ret == 0) + peerinfo->locked = _gf_true; + op_ret = rsp.op_ret; + op_errno = rsp.op_errno; +out: + gd_mgmt_v3_collate_errors (args, op_ret, op_errno, NULL, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + peerinfo, rsp.uuid); + STACK_DESTROY (frame->root); + synctask_barrier_wake(args); + return 0; +} + +int32_t +gd_syncop_mgmt_volume_unlock_cbk (struct rpc_req *req, struct iovec *iov, + int count, void *myframe) +{ + return glusterd_big_locked_cbk (req, iov, count, myframe, + _gd_syncop_mgmt_volume_unlock_cbk); +} + +int +gd_syncop_mgmt_volume_unlock (dict_t *op_ctx, glusterd_peerinfo_t *peerinfo, + struct syncargs *args, uuid_t my_uuid, + uuid_t recv_uuid, uuid_t txn_id) +{ + int ret = -1; + gd1_mgmt_volume_unlock_req req = {{0},}; + glusterd_conf_t *conf = THIS->private; + + GF_ASSERT(op_ctx); + GF_ASSERT(peerinfo); + GF_ASSERT(args); + + ret = dict_allocate_and_serialize (op_ctx, + &req.dict.dict_val, + &req.dict.dict_len); + if (ret) + goto out; + + uuid_copy (req.uuid, my_uuid); + uuid_copy (req.txn_id, txn_id); + synclock_unlock (&conf->big_lock); + ret = gd_syncop_submit_request (peerinfo->rpc, &req, args, peerinfo, + &gd_mgmt_v3_prog, + GLUSTERD_MGMT_V3_VOLUME_UNLOCK, + gd_syncop_mgmt_volume_unlock_cbk, + (xdrproc_t) + xdr_gd1_mgmt_volume_unlock_req); + synclock_lock (&conf->big_lock); +out: + gf_log ("", GF_LOG_DEBUG, "Returning %d", ret); + return ret; +} + +int32_t _gd_syncop_mgmt_lock_cbk (struct rpc_req *req, struct iovec *iov, int count, void *myframe) { @@ -830,8 +1069,8 @@ gd_build_peers_list (struct list_head *peers, struct list_head *xact_peers, } int -gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, - char **op_errstr, int npeers) +gd_lock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, dict_t *op_ctx, + char **op_errstr, int npeers, uuid_t txn_id) { int ret = -1; int peer_cnt = 0; @@ -839,6 +1078,9 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, xlator_t *this = NULL; glusterd_peerinfo_t *peerinfo = NULL; struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; if (!npeers) { ret = 0; @@ -849,22 +1091,38 @@ gd_lock_op_phase (struct list_head *peers, glusterd_op_t op, dict_t *op_ctx, synctask_barrier_init((&args)); peer_cnt = 0; list_for_each_entry (peerinfo, peers, op_peers_list) { - /* Reset lock status */ - peerinfo->locked = _gf_false; - gd_syncop_mgmt_lock (peerinfo, &args, MY_UUID, peer_uuid); + if (conf->op_version < GD_OP_VERSION_4) { + /* Reset lock status */ + peerinfo->locked = _gf_false; + gd_syncop_mgmt_lock (peerinfo, &args, + MY_UUID, peer_uuid); + } else + gd_syncop_mgmt_volume_lock (op, op_ctx, peerinfo, &args, + MY_UUID, peer_uuid, txn_id); peer_cnt++; } gd_synctask_barrier_wait((&args), peer_cnt); - ret = args.op_ret; - if (ret) { - gf_asprintf (op_errstr, "Another transaction could be " - "in progress. Please try again after " - "sometime."); - gf_log (this->name, GF_LOG_ERROR, "Failed to acquire lock"); - goto out; + + if (args.op_ret) { + if (args.errstr) + *op_errstr = gf_strdup (args.errstr); + else { + ret = gf_asprintf (op_errstr, "Another transaction " + "could be in progress. Please try " + "again after sometime."); + if (ret == -1) + *op_errstr = NULL; + + gf_log (this->name, GF_LOG_ERROR, + "Failed to acquire lock"); + + } } - ret = 0; + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent lock op req for 'Volume %s' " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); out: return ret; } @@ -1055,9 +1313,10 @@ out: } int -gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, +gd_unlock_op_phase (glusterd_conf_t *conf, glusterd_op_t op, int op_ret, rpcsvc_request_t *req, dict_t *op_ctx, char *op_errstr, - int npeers, gf_boolean_t is_locked) + int npeers, char *volname, gf_boolean_t is_acquired, + uuid_t txn_id) { glusterd_peerinfo_t *peerinfo = NULL; glusterd_peerinfo_t *tmp = NULL; @@ -1066,6 +1325,9 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, int ret = -1; xlator_t *this = NULL; struct syncargs args = {0}; + struct list_head *peers = NULL; + + peers = &conf->xaction_peers; if (!npeers) { ret = 0; @@ -1074,24 +1336,40 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, /* If the lock has not been held during this * transaction, do not send unlock requests */ - if (!is_locked) + if (!is_acquired) goto out; this = THIS; synctask_barrier_init((&args)); peer_cnt = 0; - list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { - /* Only unlock peers that were locked */ - if (peerinfo->locked) { - gd_syncop_mgmt_unlock (peerinfo, &args, MY_UUID, - tmp_uuid); - peer_cnt++; + if (conf->op_version < GD_OP_VERSION_4) { + list_for_each_entry_safe (peerinfo, tmp, peers, op_peers_list) { + /* Only unlock peers that were locked */ + if (peerinfo->locked) { + gd_syncop_mgmt_unlock (peerinfo, &args, + MY_UUID, tmp_uuid); + peer_cnt++; + list_del_init (&peerinfo->op_peers_list); + } + } + } else { + if (volname) { + list_for_each_entry_safe (peerinfo, tmp, + peers, op_peers_list) { + gd_syncop_mgmt_volume_unlock (op_ctx, peerinfo, + &args, MY_UUID, + tmp_uuid, txn_id); + peer_cnt++; + list_del_init (&peerinfo->op_peers_list); + } } - - list_del_init (&peerinfo->op_peers_list); } gd_synctask_barrier_wait((&args), peer_cnt); + ret = args.op_ret; + + gf_log (this->name, GF_LOG_DEBUG, "Sent unlock op req for 'Volume %s' " + "to %d peers. Returning %d", gd_op_list[op], peer_cnt, ret); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to unlock " "on some peer(s)"); @@ -1099,9 +1377,25 @@ gd_unlock_op_phase (struct list_head *peers, glusterd_op_t op, int op_ret, out: glusterd_op_send_cli_response (op, op_ret, 0, req, op_ctx, op_errstr); - glusterd_op_clear_op (op); - if (is_locked) - glusterd_unlock (MY_UUID); + + if (is_acquired) { + /* Based on the op-version, + * we release the cluster or volume lock + * and clear the op */ + + glusterd_op_clear_op (op); + if (conf->op_version < GD_OP_VERSION_4) + glusterd_unlock (MY_UUID); + else { + if (volname) { + ret = glusterd_volume_unlock (volname, MY_UUID); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to release lock for %s", + volname); + } + } + } return 0; } @@ -1118,7 +1412,8 @@ gd_get_brick_count (struct list_head *bricks) } int -gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, char **op_errstr) +gd_brick_op_phase (glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict, + char **op_errstr) { glusterd_pending_node_t *pending_node = NULL; struct list_head selected = {0,}; @@ -1190,21 +1485,43 @@ out: void gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) { - int ret = -1; - int npeers = 0; - dict_t *req_dict = NULL; - glusterd_conf_t *conf = NULL; - glusterd_op_t op = 0; - int32_t tmp_op = 0; - char *op_errstr = NULL; - xlator_t *this = NULL; - gf_boolean_t is_locked = _gf_false; + int ret = -1; + int npeers = 0; + dict_t *req_dict = NULL; + glusterd_conf_t *conf = NULL; + glusterd_op_t op = 0; + int32_t tmp_op = 0; + char *op_errstr = NULL; + char *tmp = NULL; + char *volname = NULL; + xlator_t *this = NULL; + gf_boolean_t is_acquired = _gf_false; + uuid_t *txn_id = NULL; + glusterd_op_info_t txn_opinfo; this = THIS; GF_ASSERT (this); conf = this->private; GF_ASSERT (conf); + /* Generate a transaction-id for this operation and + * save it in the dict */ + ret = glusterd_generate_txn_id (op_ctx, &txn_id); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to generate transaction id"); + goto out; + + } + + /* Save the MY_UUID as the originator_uuid */ + ret = glusterd_set_originator_uuid (op_ctx); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + ret = dict_get_int32 (op_ctx, GD_SYNC_OPCODE_KEY, &tmp_op); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Failed to get volume " @@ -1213,28 +1530,73 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) } op = tmp_op; - ret = glusterd_lock (MY_UUID); - if (ret) { - gf_log (this->name, GF_LOG_ERROR, "Unable to acquire lock"); - gf_asprintf (&op_errstr, "Another transaction is in progress. " - "Please try again after sometime."); - goto out; + + /* Based on the op_version, acquire a cluster or volume lock */ + if (conf->op_version < GD_OP_VERSION_4) { + ret = glusterd_lock (MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock"); + gf_asprintf (&op_errstr, + "Another transaction is in progress. " + "Please try again after sometime."); + goto out; + } + } else { + + /* If no volname is given as a part of the command, locks will + * not be held */ + ret = dict_get_str (op_ctx, "volname", &tmp); + if (ret) { + gf_log ("", GF_LOG_DEBUG, "Failed to get volume " + "name"); + goto local_locking_done; + } else { + /* Use a copy of volname, as cli response will be + * sent before the unlock, and the volname in the + * dict, might be removed */ + volname = gf_strdup (tmp); + if (!volname) + goto out; + } + + ret = glusterd_volume_lock (volname, MY_UUID); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to acquire lock for %s", volname); + gf_asprintf (&op_errstr, + "Another transaction is in progress " + "for %s. Please try again after sometime.", + volname); + goto out; + } } - is_locked = _gf_true; + is_acquired = _gf_true; + +local_locking_done: + + /* Save opinfo for this transaction with the transaction id */ + glusterd_txn_opinfo_init (&txn_opinfo, NULL, &op, NULL, NULL); + ret = glusterd_set_txn_opinfo (txn_id, &txn_opinfo); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to set transaction's opinfo"); + + opinfo = txn_opinfo; - /* storing op globally to access in synctask code paths - * This is still acceptable, as we are performing this under - * the 'cluster' lock*/ - glusterd_op_set_op (op); INIT_LIST_HEAD (&conf->xaction_peers); npeers = gd_build_peers_list (&conf->peers, &conf->xaction_peers, op); - ret = gd_lock_op_phase (&conf->xaction_peers, op, op_ctx, &op_errstr, - npeers); - if (ret) - goto out; + /* If no volname is given as a part of the command, locks will + * not be held */ + if (volname) { + ret = gd_lock_op_phase (conf, op, op_ctx, &op_errstr, + npeers, *txn_id); + if (ret) + goto out; + } ret = glusterd_op_build_payload (&req_dict, &op_errstr, op_ctx); if (ret) { @@ -1261,8 +1623,17 @@ gd_sync_task_begin (dict_t *op_ctx, rpcsvc_request_t * req) ret = 0; out: - (void) gd_unlock_op_phase (&conf->xaction_peers, op, ret, req, - op_ctx, op_errstr, npeers, is_locked); + (void) gd_unlock_op_phase (conf, op, ret, req, op_ctx, op_errstr, + npeers, volname, is_acquired, *txn_id); + + /* Clearing the transaction opinfo */ + ret = glusterd_clear_txn_opinfo (txn_id); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Unable to clear transaction's opinfo"); + + if (volname) + GF_FREE (volname); if (req_dict) dict_unref (req_dict); diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index 21a973086..1eb03f1de 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -37,6 +37,7 @@ #include "glusterd-pmap.h" #include "glusterfs-acl.h" #include "glusterd-syncop.h" +#include "glusterd-locks.h" #include "xdr-generic.h" #include <sys/resource.h> @@ -1307,16 +1308,6 @@ out: return ret; } -/* Caller should ensure that brick process is not running*/ -static void -_reap_brick_process (char *pidfile, char *brickpath) -{ - unlink (pidfile); - /* Brick process is not running and pmap may have an entry for it.*/ - pmap_registry_remove (THIS, 0, brickpath, - GF_PMAP_PORT_BRICKSERVER, NULL); -} - static int _mk_rundir_p (glusterd_volinfo_t *volinfo) { @@ -1371,8 +1362,6 @@ glusterd_volume_start_glusterfs (glusterd_volinfo_t *volinfo, if (gf_is_service_running (pidfile, NULL)) goto connect; - _reap_brick_process (pidfile, brickinfo->path); - port = brickinfo->port; if (!port) port = pmap_registry_alloc (THIS); @@ -3544,7 +3533,8 @@ glusterd_import_friend_volume (dict_t *vols, size_t count) if (ret) goto out; - list_add_tail (&new_volinfo->vol_list, &priv->volumes); + list_add_order (&new_volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); out: gf_log ("", GF_LOG_DEBUG, "Returning with ret: %d", ret); return ret; @@ -7911,7 +7901,7 @@ glusterd_volume_status_copy_to_op_ctx_dict (dict_t *aggr, dict_t *rsp_dict) if (ret) goto out; - if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd ()) { + if (cmd & GF_CLI_STATUS_ALL && is_origin_glusterd (ctx_dict)) { ret = dict_get_int32 (rsp_dict, "vol_count", &vol_count); if (ret == 0) { ret = dict_set_int32 (ctx_dict, "vol_count", @@ -8802,20 +8792,66 @@ glusterd_handle_node_rsp (dict_t *req_dict, void *pending_entry, return ret; } +int32_t +glusterd_set_originator_uuid (dict_t *dict) +{ + int ret = -1; + uuid_t *originator_uuid = NULL; + + GF_ASSERT (dict); + + originator_uuid = GF_CALLOC (1, sizeof(uuid_t), + gf_common_mt_uuid_t); + if (!originator_uuid) { + ret = -1; + goto out; + } + + uuid_copy (*originator_uuid, MY_UUID); + ret = dict_set_bin (dict, "originator_uuid", + originator_uuid, sizeof (uuid_t)); + if (ret) { + gf_log ("", GF_LOG_ERROR, + "Failed to set originator_uuid."); + goto out; + } + +out: + if (ret && originator_uuid) + GF_FREE (originator_uuid); + + return ret; +} + /* Should be used only when an operation is in progress, as that is the only * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd () +is_origin_glusterd (dict_t *dict) { - int ret = 0; - uuid_t lock_owner = {0,}; + gf_boolean_t ret = _gf_false; + uuid_t lock_owner = {0,}; + uuid_t *originator_uuid = NULL; - ret = glusterd_get_lock_owner (&lock_owner); - if (ret) - return _gf_false; + GF_ASSERT (dict); + + ret = dict_get_bin (dict, "originator_uuid", + (void **) &originator_uuid); + if (ret) { + /* If not originator_uuid has been set, then the command + * has been originated from a glusterd running on older version + * Hence fetching the lock owner */ + ret = glusterd_get_lock_owner (&lock_owner); + if (ret) { + ret = _gf_false; + goto out; + } + ret = !uuid_compare (MY_UUID, lock_owner); + } else + ret = !uuid_compare (MY_UUID, *originator_uuid); - return (uuid_compare (MY_UUID, lock_owner) == 0); +out: + return ret; } int @@ -9352,3 +9388,13 @@ glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc) return ret; } +int32_t +glusterd_compare_volume_name(struct list_head *list1, struct list_head *list2) +{ + glusterd_volinfo_t *volinfo1 = NULL; + glusterd_volinfo_t *volinfo2 = NULL; + + volinfo1 = list_entry(list1, glusterd_volinfo_t, vol_list); + volinfo2 = list_entry(list2, glusterd_volinfo_t, vol_list); + return strcmp(volinfo1->volname, volinfo2->volname); +} diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.h b/xlators/mgmt/glusterd/src/glusterd-utils.h index 05d5c7172..cd22b2960 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.h +++ b/xlators/mgmt/glusterd/src/glusterd-utils.h @@ -529,11 +529,14 @@ glusterd_are_vol_all_peers_up (glusterd_volinfo_t *volinfo, struct list_head *peers, char **down_peerstr); +int32_t +glusterd_set_originator_uuid (dict_t *dict); + /* Should be used only when an operation is in progress, as that is the only * time a lock_owner is set */ gf_boolean_t -is_origin_glusterd (); +is_origin_glusterd (dict_t *dict); gf_boolean_t glusterd_is_quorum_changed (dict_t *options, char *option, char *value); @@ -644,4 +647,7 @@ gd_stop_rebalance_process (glusterd_volinfo_t *volinfo); rpc_clnt_t * glusterd_rpc_clnt_unref (glusterd_conf_t *conf, rpc_clnt_t *rpc); + +int32_t +glusterd_compare_volume_name(struct list_head *, struct list_head *); #endif diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 303abd1bc..102bd9d8b 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -361,10 +361,9 @@ volopt_trie_section (int lvl, char **patt, char *word, char **hint, int hints) GF_ASSERT (hints <= 2); nodevec.cnt = hints; ret = trie_measure_vec (trie, word, &nodevec); - if (ret || !nodevec.nodes[0]) - trie_destroy (trie); + if (!ret && nodevec.nodes[0]) + ret = process_nodevec (&nodevec, hint); - ret = process_nodevec (&nodevec, hint); trie_destroy (trie); return ret; @@ -2839,6 +2838,9 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, xlator_t *xl = NULL; char *volname = NULL; glusterd_conf_t *conf = THIS->private; + char *tmp = NULL; + gf_boolean_t var = _gf_false; + gf_boolean_t ob = _gf_false; GF_ASSERT (conf); @@ -2904,7 +2906,93 @@ client_graph_builder (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, } } - ret = client_graph_set_perf_options(graph, volinfo, set_dict); + /* Do not allow changing read-after-open option if root-squash is + enabled. + */ + ret = dict_get_str (set_dict, "performance.read-after-open", &tmp); + if (!ret) { + ret = dict_get_str (volinfo->dict, "server.root-squash", &tmp); + if (!ret) { + ob = _gf_false; + ret = gf_string2boolean (tmp, &ob); + if (!ret && ob) { + gf_log (THIS->name, GF_LOG_WARNING, + "root-squash is enabled. Please turn it" + " off to change read-after-open " + "option"); + ret = -1; + goto out; + } + } + } + + /* open behind causes problems when root-squash is enabled + (by allowing reads to happen even though the squashed user + does not have permissions to do so) as it fakes open to be + successful and later sends reads on anonymous fds. So when + root-squash is enabled, open-behind's option to read after + open is done is also enabled. + */ + ret = dict_get_str (set_dict, "server.root-squash", &tmp); + if (!ret) { + ret = gf_string2boolean (tmp, &var); + if (ret) + goto out; + + if (var) { + ret = dict_get_str (volinfo->dict, + "performance.read-after-open", + &tmp); + if (!ret) { + ret = gf_string2boolean (tmp, &ob); + /* go ahead with turning read-after-open on + even if string2boolean conversion fails, + OR if read-after-open option is turned off + */ + if (ret || !ob) + ret = dict_set_str (set_dict, + "performance.read-after-open", + "yes"); + } else { + ret = dict_set_str (set_dict, + "performance.read-after-open", + "yes"); + } + } else { + /* When root-squash has to be turned off, open-behind's + read-after-open option should be reset to what was + there before root-squash was turned on. If the option + cannot be found in volinfo's dict, it means that + option was not set before turning on root-squash. + */ + ob = _gf_false; + ret = dict_get_str (volinfo->dict, + "performance.read-after-open", + &tmp); + if (!ret) { + ret = gf_string2boolean (tmp, &ob); + + if (!ret && ob) { + ret = dict_set_str (set_dict, + "performance.read-after-open", + "yes"); + } + } + /* consider operation is failure only if read-after-open + option is enabled and could not set into set_dict + */ + if (!ob) + ret = 0; + } + if (ret) { + gf_log (THIS->name, GF_LOG_WARNING, "setting " + "open behind option as part of root " + "squash failed"); + goto out; + } + } + + ret = client_graph_set_perf_options(graph, volinfo, set_dict); if (ret) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c index 051e7d756..4acea7686 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-ops.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-ops.c @@ -1708,7 +1708,8 @@ glusterd_op_create_volume (dict_t *dict, char **op_errstr) } volinfo->rebal.defrag_status = 0; - list_add_tail (&volinfo->vol_list, &priv->volumes); + list_add_order (&volinfo->vol_list, &priv->volumes, + glusterd_compare_volume_name); vol_added = _gf_true; out: diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 8d29e8edc..4e3162f01 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -711,6 +711,12 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = 3, .flags = OPT_FLAG_CLIENT_OPT }, + { .key = "performance.read-after-open", + .voltype = "performance/open-behind", + .option = "read-after-open", + .op_version = 3, + .flags = OPT_FLAG_CLIENT_OPT + }, { .key = "performance.read-ahead-page-count", .voltype = "performance/read-ahead", .option = "page-count", diff --git a/xlators/mgmt/glusterd/src/glusterd.c b/xlators/mgmt/glusterd/src/glusterd.c index 834a39968..1b04143be 100644 --- a/xlators/mgmt/glusterd/src/glusterd.c +++ b/xlators/mgmt/glusterd/src/glusterd.c @@ -36,6 +36,7 @@ #include "glusterd-store.h" #include "glusterd-hooks.h" #include "glusterd-utils.h" +#include "glusterd-locks.h" #include "common-utils.h" #include "run.h" @@ -48,6 +49,7 @@ extern struct rpcsvc_program gluster_cli_getspec_prog; extern struct rpcsvc_program gluster_pmap_prog; extern glusterd_op_info_t opinfo; extern struct rpcsvc_program gd_svc_mgmt_prog; +extern struct rpcsvc_program gd_svc_mgmt_v3_prog; extern struct rpcsvc_program gd_svc_peer_prog; extern struct rpcsvc_program gd_svc_cli_prog; extern struct rpcsvc_program gd_svc_cli_prog_ro; @@ -64,6 +66,7 @@ struct rpcsvc_program *gd_inet_programs[] = { &gd_svc_peer_prog, &gd_svc_cli_prog_ro, &gd_svc_mgmt_prog, + &gd_svc_mgmt_v3_prog, &gluster_pmap_prog, &gluster_handshake_prog, &glusterd_mgmt_hndsk_prog, @@ -1303,6 +1306,8 @@ init (xlator_t *this) glusterd_friend_sm_init (); glusterd_op_sm_init (); glusterd_opinfo_init (); + glusterd_vol_lock_init (); + glusterd_txn_opinfo_dict_init (); ret = glusterd_sm_tr_log_init (&conf->op_sm_log, glusterd_op_sm_state_name_get, glusterd_op_sm_event_name_get, @@ -1438,6 +1443,8 @@ fini (xlator_t *this) if (conf->handle) gf_store_handle_destroy (conf->handle); glusterd_sm_tr_log_delete (&conf->op_sm_log); + glusterd_vol_lock_fini (); + glusterd_txn_opinfo_dict_fini (); GF_FREE (conf); this->private = NULL; diff --git a/xlators/mgmt/glusterd/src/glusterd.h b/xlators/mgmt/glusterd/src/glusterd.h index e704de44b..d041be4a2 100644 --- a/xlators/mgmt/glusterd/src/glusterd.h +++ b/xlators/mgmt/glusterd/src/glusterd.h @@ -545,6 +545,14 @@ int glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status); int +glusterd_op_volume_lock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int +glusterd_op_volume_unlock_send_resp (rpcsvc_request_t *req, + uuid_t *txn_id, int32_t status); + +int glusterd_op_stage_send_resp (rpcsvc_request_t *req, int32_t op, int32_t status, char *op_errstr, dict_t *rsp_dict); @@ -817,4 +825,14 @@ int32_t glusterd_op_begin_synctask (rpcsvc_request_t *req, glusterd_op_t op, void *dict); int32_t glusterd_defrag_event_notify_handle (dict_t *dict); + +int32_t +glusterd_txn_opinfo_dict_init (); + +void +glusterd_txn_opinfo_dict_fini (); + +void +glusterd_txn_opinfo_init (); + #endif diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 315259ece..d9055468e 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -5323,6 +5323,18 @@ init (xlator_t *this_xl) GF_OPTION_INIT ("congestion-threshold", priv->congestion_threshold, int32, cleanup_exit); + GF_OPTION_INIT("no-root-squash", priv->no_root_squash, bool, + cleanup_exit); + /* change the client_pid to no-root-squash pid only if the + client is none of defrag process, hadoop access and gsyncd process. + */ + if (!priv->client_pid_set) { + if (priv->no_root_squash == _gf_true) { + priv->client_pid_set = _gf_true; + priv->client_pid = GF_CLIENT_PID_NO_ROOT_SQUASH; + } + } + /* user has set only background-qlen, not congestion-threshold, use the fuse kernel driver formula to set congestion. ie, 75% */ if (dict_get (this_xl->options, "background-qlen") && @@ -5563,5 +5575,15 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "yes" }, + { .key = {"no-root-squash"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "This is the mount option for disabling the " + "root squash for the client irrespective of whether the root-squash " + "option for the volume is set or not. But this option is honoured " + "only for the trusted clients. For non trusted clients this value " + "does not have any affect and the volume option for root-squash is " + "honoured.", + }, { .key = {NULL} }, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 34794b6ea..f1c4cb3f0 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -104,6 +104,14 @@ struct fuse_private { int32_t fopen_keep_cache; int32_t gid_cache_timeout; gf_boolean_t enable_ino32; + /* This is the mount option for disabling the root-squash for the + mount irrespective of whether the root-squash option for the + volume is set or not. But this option is honoured only for + thr trusted clients. For non trusted clients this value does + not have any affect and the volume option for root-squash is + honoured. + */ + gf_boolean_t no_root_squash; fdtable_t *fdtable; gid_cache_t gid_cache; char *fuse_mountopts; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 2799ec847..d22f6a69b 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -151,6 +151,9 @@ start_glusterfs () if [ -n "$worm" ]; then cmd_line=$(echo "$cmd_line --worm"); fi + if [ -n "$volfile_max_fetch_attempts" ]; then + cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts") + fi if [ -n "$fopen_keep_cache" ]; then cmd_line=$(echo "$cmd_line --fopen-keep-cache"); @@ -168,7 +171,11 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --aux-gfid-mount"); fi - # options with values start here + if [ -n "$no_root_squash" ]; then + cmd_line=$(echo "$cmd_line --no-root-squash"); + fi + +#options with values start here if [ -n "$log_level" ]; then cmd_line=$(echo "$cmd_line --log-level=$log_level"); fi @@ -424,6 +431,9 @@ with_options() "backupvolfile-server") backupvolfile_server=$value ;; + "fetch-attempts") + volfile_max_fetch_attempts=$value + ;; "congestion-threshold") cong_threshold=$value ;; @@ -436,6 +446,13 @@ with_options() "use-readdirp") use_readdirp=$value ;; + "root-squash") + if [ $value == "no" ] || + [ $value == "off" ] || + [ $value == "disable" ] || + [ $value == "false" ] ; then + no_root_squash=1; + fi ;; *) echo "Invalid option: $key" exit 0 diff --git a/xlators/nfs/server/src/acl3.c b/xlators/nfs/server/src/acl3.c index 25476ebbe..43156eb44 100644 --- a/xlators/nfs/server/src/acl3.c +++ b/xlators/nfs/server/src/acl3.c @@ -229,13 +229,21 @@ acl3svc_null (rpcsvc_request_t *req) } int -acl3_getacl_reply (nfs3_call_state_t *cs, getaclreply *reply) +acl3_getacl_reply (rpcsvc_request_t *req, getaclreply *reply) { - acl3svc_submit_reply (cs->req, (void *)reply, + acl3svc_submit_reply (req, (void *)reply, (acl3_serializer)xdr_serialize_getaclreply); return 0; } +int +acl3_setacl_reply (rpcsvc_request_t *req, setaclreply *reply) +{ + acl3svc_submit_reply (req, (void *)reply, + (acl3_serializer)xdr_serialize_setaclreply); + return 0; +} + int acl3_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -300,14 +308,14 @@ acl3_getacl_cbk (call_frame_t *frame, void *cookie, xlator_t *this, getaclreply->daclentry.daclentry_len = aclcount; } - acl3_getacl_reply (cs, getaclreply); + acl3_getacl_reply (cs->req, getaclreply); nfs3_call_state_wipe (cs); return 0; err: if (getaclreply) getaclreply->status = stat; - acl3_getacl_reply (cs, getaclreply); + acl3_getacl_reply (cs->req, getaclreply); nfs3_call_state_wipe (cs); return 0; } @@ -354,7 +362,7 @@ acl3_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; err: getaclreply->status = stat; - acl3_getacl_reply (cs, getaclreply); + acl3_getacl_reply (cs->req, getaclreply); nfs3_call_state_wipe (cs); return 0; } @@ -382,7 +390,7 @@ acl3err: if (ret < 0) { gf_log (GF_ACL, GF_LOG_ERROR, "unable to open_and_resume"); cs->args.getaclreply.status = nfs3_errno_to_nfsstat3 (stat); - acl3_getacl_reply (cs, &cs->args.getaclreply); + acl3_getacl_reply (cs->req, &cs->args.getaclreply); nfs3_call_state_wipe (cs); } @@ -401,6 +409,7 @@ acl3svc_getacl (rpcsvc_request_t *req) nfsstat3 stat = NFS3ERR_SERVERFAULT; struct nfs3_fh fh, *fhp = NULL; getaclargs getaclargs; + getaclreply getaclreply; if (!req) return ret; @@ -408,6 +417,7 @@ acl3svc_getacl (rpcsvc_request_t *req) acl3_validate_nfs3_state (req, nfs3, stat, rpcerr, ret); nfs = nfs_state (nfs3->nfsx); memset (&getaclargs, 0, sizeof (getaclargs)); + memset (&getaclreply, 0, sizeof (getaclreply)); getaclargs.fh.n_bytes = (char *)&fh; if (xdr_to_getaclargs(req->msg[0], &getaclargs) <= 0) { gf_log (GF_ACL, GF_LOG_ERROR, "Error decoding args"); @@ -423,26 +433,23 @@ acl3svc_getacl (rpcsvc_request_t *req) fhp = &fh; acl3_validate_gluster_fh (&fh, stat, acl3err); - acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, - vol, stat, acl3err); + acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, vol, stat, acl3err); + acl3_volume_started_check (nfs3, vol, ret, rpcerr); acl3_handle_call_state_init (nfs->nfs3state, cs, req, - vol, stat, rpcerr); + vol, stat, acl3err); cs->vol = vol; cs->args.getaclreply.mask = getaclargs.mask; - acl3_volume_started_check (nfs3, vol, ret, acl3err); - ret = nfs3_fh_resolve_and_resume (cs, fhp, - NULL, acl3_getacl_resume); + ret = nfs3_fh_resolve_and_resume (cs, fhp, NULL, acl3_getacl_resume); + stat = nfs3_errno_to_nfsstat3 (-ret); acl3err: if (ret < 0) { gf_log (GF_ACL, GF_LOG_ERROR, "unable to resolve and resume"); - if (cs) { - cs->args.getaclreply.status = stat; - acl3_getacl_reply (cs, &cs->args.getaclreply); - nfs3_call_state_wipe (cs); - } + getaclreply.status = stat; + acl3_getacl_reply (req, &getaclreply); + nfs3_call_state_wipe (cs); return 0; } @@ -462,8 +469,8 @@ acl3_setacl_cbk (call_frame_t *frame, void *cookie, cs->args.setaclreply.status = status; } - acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply, - (acl3_serializer)xdr_serialize_setaclreply); + acl3_setacl_reply (cs->req, &cs->args.setaclreply); + return 0; } @@ -500,8 +507,7 @@ acl3err: stat = -ret; gf_log (GF_ACL, GF_LOG_ERROR, "unable to open_and_resume"); cs->args.setaclreply.status = nfs3_errno_to_nfsstat3 (stat); - acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply, - (acl3_serializer)xdr_serialize_setaclreply); + acl3_setacl_reply (cs->req, &cs->args.setaclreply); nfs3_call_state_wipe (cs); } @@ -521,8 +527,9 @@ acl3svc_setacl (rpcsvc_request_t *req) struct nfs3_fh fh; struct nfs3_fh *fhp = NULL; setaclargs setaclargs; + setaclreply setaclreply; + aclentry *daclentry = NULL; aclentry *aclentry = NULL; - struct aclentry *daclentry = NULL; int aclerrno = 0; int defacl = 1; @@ -542,6 +549,7 @@ acl3svc_setacl (rpcsvc_request_t *req) acl3_validate_nfs3_state (req, nfs3, stat, rpcerr, ret); nfs = nfs_state (nfs3->nfsx); memset (&setaclargs, 0, sizeof (setaclargs)); + memset (&setaclreply, 0, sizeof (setaclreply)); memset (&fh, 0, sizeof (fh)); setaclargs.fh.n_bytes = (char *)&fh; setaclargs.aclentry.aclentry_val = aclentry; @@ -560,14 +568,12 @@ acl3svc_setacl (rpcsvc_request_t *req) fhp = &fh; acl3_validate_gluster_fh (fhp, stat, acl3err); - acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, - vol, stat, acl3err); + acl3_map_fh_to_volume (nfs->nfs3state, fhp, req, vol, stat, acl3err); + acl3_volume_started_check (nfs3, vol, ret, rpcerr); acl3_handle_call_state_init (nfs->nfs3state, cs, req, - vol, stat, rpcerr); + vol, stat, acl3err); cs->vol = vol; - acl3_volume_started_check (nfs3, vol, ret, rpcerr); - cs->aclcount = setaclargs.aclcount; cs->daclcount = setaclargs.daclcount; @@ -593,15 +599,14 @@ acl3svc_setacl (rpcsvc_request_t *req) goto acl3err; } - ret = nfs3_fh_resolve_and_resume (cs, fhp, - NULL, acl3_setacl_resume); + ret = nfs3_fh_resolve_and_resume (cs, fhp, NULL, acl3_setacl_resume); + stat = nfs3_errno_to_nfsstat3 (-ret); acl3err: if (ret < 0) { gf_log (GF_ACL, GF_LOG_ERROR, "unable to resolve and resume"); - cs->args.setaclreply.status = stat; - acl3svc_submit_reply (cs->req, (void *)&cs->args.setaclreply, - (acl3_serializer)xdr_serialize_setaclreply); + setaclreply.status = stat; + acl3_setacl_reply (req, &setaclreply); nfs3_call_state_wipe (cs); GF_FREE(aclentry); GF_FREE(daclentry); diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index 94b8f229b..a21b31816 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -315,6 +315,7 @@ __ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset, local->op_errno = ENOMEM; gf_log (frame->this->name, GF_LOG_WARNING, "asked to wait on a NULL page"); + goto out; } waitq = GF_CALLOC (1, sizeof (*waitq), gf_ioc_mt_ioc_waitq_t); @@ -476,6 +477,7 @@ ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, iobref_unref (page->iobref); GF_FREE (page->vector); page->vector = NULL; + page->iobref = NULL; } /* keep a copy of the page for our cache */ diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index 29ef64364..742e4df3f 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -23,6 +23,11 @@ typedef struct ob_conf { like mandatory locks */ gf_boolean_t lazy_open; /* delay backend open as much as possible */ + gf_boolean_t read_after_open; /* instead of sending readvs on + anonymous fds, open the file + first and then send readv i.e + similar to what writev does + */ } ob_conf_t; @@ -367,8 +372,14 @@ ob_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, { call_stub_t *stub = NULL; fd_t *wind_fd = NULL; + ob_conf_t *conf = NULL; - wind_fd = ob_get_wind_fd (this, fd); + conf = this->private; + + if (!conf->read_after_open) + wind_fd = ob_get_wind_fd (this, fd); + else + wind_fd = fd_ref (fd); stub = fop_readv_stub (frame, default_readv_resume, wind_fd, size, offset, flags, xdata); @@ -894,6 +905,8 @@ reconfigure (xlator_t *this, dict_t *options) bool, out); GF_OPTION_RECONF ("lazy-open", conf->lazy_open, options, bool, out); + GF_OPTION_RECONF ("read-after-open", conf->read_after_open, options, + bool, out); ret = 0; out: @@ -924,7 +937,7 @@ init (xlator_t *this) GF_OPTION_INIT ("use-anonymous-fd", conf->use_anonymous_fd, bool, err); GF_OPTION_INIT ("lazy-open", conf->lazy_open, bool, err); - + GF_OPTION_INIT ("read-after-open", conf->read_after_open, bool, err); this->private = conf; return 0; @@ -996,6 +1009,12 @@ struct volume_options options[] = { "FOP arrives (e.g writev on the FD, unlink of the file). When option " "is disabled, perform backend open right after unwinding open().", }, + { .key = {"read-after-open"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .description = "read is sent only after actual open happens and real " + "fd is obtained, instead of doing on anonymous fd (similar to write)", + }, { .key = {NULL} } }; diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index 445ea8658..402da886f 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -101,6 +101,7 @@ qr_inode_ctx_get_or_new (xlator_t *this, inode_t *inode) if (ret) { __qr_inode_prune (&priv->table, qr_inode); GF_FREE (qr_inode); + qr_inode = NULL; } } unlock: @@ -417,10 +418,11 @@ qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (content) { /* new content came along, always replace old content */ qr_inode = qr_inode_ctx_get_or_new (this, inode); - if (!qr_inode) + if (!qr_inode) { /* no harm done */ + GF_FREE (content); goto out; - + } qr_content_update (this, qr_inode, content, buf); } else { /* purge old content if necessary */ @@ -565,7 +567,6 @@ qr_readv_cached (call_frame_t *frame, qr_inode_t *qr_inode, size_t size, iobref = iobref_new (); if (!iobref) { op_ret = -1; - iobuf_unref (iobuf); goto unlock; } diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 2be211fbf..b9cef1152 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -967,11 +967,11 @@ __wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req) ret = iobref_add (iobref, iobuf); if (ret != 0) { - iobuf_unref (iobuf); - iobref_unref (iobref); gf_log (req->wb_inode->this->name, GF_LOG_WARNING, "cannot add iobuf (%p) into iobref (%p)", iobuf, iobref); + iobuf_unref (iobuf); + iobref_unref (iobref); goto out; } diff --git a/xlators/protocol/server/src/server-handshake.c b/xlators/protocol/server/src/server-handshake.c index d4941011d..708acd936 100644 --- a/xlators/protocol/server/src/server-handshake.c +++ b/xlators/protocol/server/src/server-handshake.c @@ -448,6 +448,8 @@ server_setvolume (rpcsvc_request_t *req) if (req->trans->xl_private != client) req->trans->xl_private = client; + auth_set_username_passwd (params, config_params, client); + ret = dict_get_int32 (params, "fops-version", &fop_version); if (ret < 0) { ret = dict_set_str (reply, "ERROR", diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index b2b6c486f..c11011abf 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -302,6 +302,11 @@ get_frame_from_request (rpcsvc_request_t *req) { call_frame_t *frame = NULL; client_t *client = NULL; + client_t *tmp_client = NULL; + xlator_t *this = NULL; + server_conf_t *priv = NULL; + clienttable_t *clienttable = NULL; + unsigned int i = 0; GF_VALIDATE_OR_GOTO ("server", req, out); @@ -315,6 +320,57 @@ get_frame_from_request (rpcsvc_request_t *req) frame->root->unique = req->xid; + client = req->trans->xl_private; + this = req->trans->xl; + priv = this->private; + clienttable = this->ctx->clienttable; + + for (i = 0; i < clienttable->max_clients; i++) { + tmp_client = clienttable->cliententries[i].client; + if (client == tmp_client) { + /* for non trusted clients username and password + would not have been set. So for non trusted clients + (i.e clients not from the same machine as the brick, + and clients from outside the storage pool) + do the root-squashing. + TODO: If any client within the storage pool (i.e + mounting within a machine from the pool but using + other machine's ip/hostname from the same pool) + is present treat it as a trusted client + */ + if (!client->auth.username && req->pid != NFS_PID) + RPC_AUTH_ROOT_SQUASH (req); + + /* Problem: If we just check whether the client is + trusted client and do not do root squashing for + them, then for smb clients and UFO clients root + squashing will never happen as they use the fuse + mounts done within the trusted pool (i.e they are + trusted clients). + Solution: To fix it, do root squashing for trusted + clients also. If one wants to have a client within + the storage pool for which root-squashing does not + happen, then the client has to be mounted with + --no-root-squash option. But for defrag client and + gsyncd client do not do root-squashing. + */ + if (client->auth.username && + req->pid != GF_CLIENT_PID_NO_ROOT_SQUASH && + req->pid != GF_CLIENT_PID_GSYNCD && + req->pid != GF_CLIENT_PID_DEFRAG) + RPC_AUTH_ROOT_SQUASH (req); + + /* For nfs clients the server processes will be running + within the trusted storage pool machines. So if we + do not do root-squashing for nfs servers, thinking + that its a trusted client, then root-squashing wont + work for nfs clients. + */ + if (req->pid == NFS_PID) + RPC_AUTH_ROOT_SQUASH (req); + } + } + frame->root->uid = req->uid; frame->root->gid = req->gid; frame->root->pid = req->pid; @@ -935,3 +991,104 @@ server_ctx_get (client_t *client, xlator_t *xlator) out: return ctx; } + +int +auth_set_username_passwd (dict_t *input_params, dict_t *config_params, + client_t *client) +{ + int ret = 0; + data_t *allow_user = NULL; + data_t *passwd_data = NULL; + char *username = NULL; + char *password = NULL; + char *brick_name = NULL; + char *searchstr = NULL; + char *username_str = NULL; + char *tmp = NULL; + char *username_cpy = NULL; + + ret = dict_get_str (input_params, "username", &username); + if (ret) { + gf_log ("auth/login", GF_LOG_DEBUG, + "username not found, returning DONT-CARE"); + /* For non trusted clients username and password + will not be there. So dont reject the client. + */ + ret = 0; + goto out; + } + + ret = dict_get_str (input_params, "password", &password); + if (ret) { + gf_log ("auth/login", GF_LOG_WARNING, + "password not found, returning DONT-CARE"); + goto out; + } + + ret = dict_get_str (input_params, "remote-subvolume", &brick_name); + if (ret) { + gf_log ("auth/login", GF_LOG_ERROR, + "remote-subvolume not specified"); + ret = -1; + goto out; + } + + ret = gf_asprintf (&searchstr, "auth.login.%s.allow", brick_name); + if (-1 == ret) { + ret = 0; + goto out; + } + + allow_user = dict_get (config_params, searchstr); + GF_FREE (searchstr); + + if (allow_user) { + username_cpy = gf_strdup (allow_user->data); + if (!username_cpy) + goto out; + + username_str = strtok_r (username_cpy, " ,", &tmp); + + while (username_str) { + if (!fnmatch (username_str, username, 0)) { + ret = gf_asprintf (&searchstr, + "auth.login.%s.password", + username); + if (-1 == ret) + goto out; + + passwd_data = dict_get (config_params, + searchstr); + GF_FREE (searchstr); + + if (!passwd_data) { + gf_log ("auth/login", GF_LOG_ERROR, + "wrong username/password " + "combination"); + ret = -1; + goto out; + } + + ret = !((strcmp (data_to_str (passwd_data), + password))?0: -1); + if (!ret) { + client->auth.username = + gf_strdup (username); + client->auth.passwd = + gf_strdup (password); + } + if (ret == -1) + gf_log ("auth/login", GF_LOG_ERROR, + "wrong password for user %s", + username); + break; + } + username_str = strtok_r (NULL, " ,", &tmp); + } + } + +out: + GF_FREE (username_cpy); + + return ret; +} diff --git a/xlators/protocol/server/src/server-helpers.h b/xlators/protocol/server/src/server-helpers.h index 93ea35851..3c257b3bc 100644 --- a/xlators/protocol/server/src/server-helpers.h +++ b/xlators/protocol/server/src/server-helpers.h @@ -53,6 +53,8 @@ int serialize_rsp_dirent (gf_dirent_t *entries, gfs3_readdir_rsp *rsp); int serialize_rsp_direntp (gf_dirent_t *entries, gfs3_readdirp_rsp *rsp); int readdirp_rsp_cleanup (gfs3_readdirp_rsp *rsp); int readdir_rsp_cleanup (gfs3_readdir_rsp *rsp); +int auth_set_username_passwd (dict_t *input_params, dict_t *config_params, + struct _client_t *client); server_ctx_t *server_ctx_get (client_t *client, xlator_t *xlator); diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index c1cbc83f4..fde322099 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3269,7 +3269,7 @@ posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode, dirpath + priv->base_path_length, pathlen, head, - POSIX_ANCESTRY_PATH | POSIX_ANCESTRY_DENTRY, + type | POSIX_ANCESTRY_PATH, pgfid, handle_size, priv->base_path, @@ -3383,7 +3383,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, dict = dict_new (); if (!dict) { - op_errno = ENOMEM; + op_errno = ENOMEM; goto out; } @@ -3550,6 +3550,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); if (!value) { op_ret = -1; + op_errno = ENOMEM; goto out; } size = sys_lgetxattr (real_path, key, value, size); @@ -3565,6 +3566,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, value [size] = '\0'; op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret < 0) { + op_errno = -op_ret; gf_log (this->name, GF_LOG_ERROR, "dict set operation " "on %s for the key %s failed.", real_path, key); GF_FREE (value); @@ -3642,6 +3644,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, value [size] = '\0'; op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret < 0) { + op_errno = -op_ret; gf_log (this->name, GF_LOG_ERROR, "dict set operation " "on %s for the key %s failed.", real_path, key); GF_FREE (value); |