diff options
24 files changed, 281 insertions, 137 deletions
diff --git a/configure.ac b/configure.ac index 739728085a6..4c2bb32fe23 100644 --- a/configure.ac +++ b/configure.ac @@ -740,7 +740,7 @@ AC_ARG_ENABLE([firewalld],                [BUILD_FIREWALLD="${enableval}"], [BUILD_FIREWALLD="no"])  if test "x${BUILD_FIREWALLD}" = "xyes"; then -        if !(which firewalld 1>/dev/null 2>&1) ; then +        if !(test -d /usr/lib/firewalld/services 1>/dev/null 2>&1) ; then                  BUILD_FIREWALLD="no (firewalld not installed)"          fi  fi diff --git a/doc/release-notes/3.8.8.md b/doc/release-notes/3.8.8.md new file mode 100644 index 00000000000..282dee4cad0 --- /dev/null +++ b/doc/release-notes/3.8.8.md @@ -0,0 +1,48 @@ +# Release notes for Gluster 3.8.8 + +This is a bugfix release. The [Release Notes for 3.8.0](3.8.0.md), +[3.8.1](3.8.1.md), [3.8.2](3.8.2.md), [3.8.3](3.8.3.md), [3.8.4](3.8.4.md), +[3.8.5](3.8.5.md), [3.8.6](3.8.6.md) and [3.8.7](3.8.7.md) contain a listing of +all the new features that were added and bugs fixed in the GlusterFS 3.8 stable +release. + + +## Bugs addressed + +A total of 38 patches have been merged, addressing 35 bugs: + +- [#1375849](https://bugzilla.redhat.com/1375849): [RFE] enable sharding with virt profile - /var/lib/glusterd/groups/virt +- [#1378384](https://bugzilla.redhat.com/1378384): log level set in glfs_set_logging() does not work +- [#1378547](https://bugzilla.redhat.com/1378547): Asynchronous Unsplit-brain still causes Input/Output Error on system calls +- [#1389781](https://bugzilla.redhat.com/1389781): build: python on Debian-based dists use .../lib/python2.7/dist-packages instead of .../site-packages +- [#1394635](https://bugzilla.redhat.com/1394635): errors appear in brick and nfs logs and getting stale files on NFS clients +- [#1395510](https://bugzilla.redhat.com/1395510): Seeing error messages [snapview-client.c:283:gf_svc_lookup_cbk] and [dht-helper.c:1666ht_inode_ctx_time_update] (-->/usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x5d75c) +- [#1399423](https://bugzilla.redhat.com/1399423): GlusterFS client crashes during remove-brick operation +- [#1399432](https://bugzilla.redhat.com/1399432): A hard link is lost during rebalance+lookup +- [#1399468](https://bugzilla.redhat.com/1399468): Wrong value in Last Synced column during Hybrid Crawl +- [#1399915](https://bugzilla.redhat.com/1399915): [SAMBA-CIFS] : IO hungs in cifs mount while graph switch on & off +- [#1401029](https://bugzilla.redhat.com/1401029): OOM kill of nfs-ganesha on one node while fs-sanity test suite is executed. +- [#1401534](https://bugzilla.redhat.com/1401534): fuse mount point not accessible +- [#1402697](https://bugzilla.redhat.com/1402697): glusterfsd crashed while taking snapshot using scheduler +- [#1402728](https://bugzilla.redhat.com/1402728): Worker restarts on log-rsync-performance config update +- [#1403109](https://bugzilla.redhat.com/1403109): Crash of glusterd when using long username with geo-replication +- [#1404105](https://bugzilla.redhat.com/1404105): Incorrect incrementation of volinfo refcnt during volume start +- [#1404583](https://bugzilla.redhat.com/1404583): Upcall: Possible use after free when log level set to TRACE +- [#1405004](https://bugzilla.redhat.com/1405004): [Perf] : pcs cluster resources went into stopped state during Multithreaded perf tests on RHGS layered over RHEL 6 +- [#1405130](https://bugzilla.redhat.com/1405130): `gluster volume heal <vol-name> split-brain' does not heal if data/metadata/entry self-heal options are turned off +- [#1405450](https://bugzilla.redhat.com/1405450): tests/bugs/snapshot/bug-1316437.t test is causing spurious failure +- [#1405577](https://bugzilla.redhat.com/1405577): [GANESHA] failed to create directory of hostname of new node in var/lib/nfs/ganesha/ in already existing  cluster nodes +- [#1405886](https://bugzilla.redhat.com/1405886): Fix potential leaks in INODELK cbk in protocol/client +- [#1405890](https://bugzilla.redhat.com/1405890): Fix spurious failure in bug-1402841.t-mt-dir-scan-race.t +- [#1405951](https://bugzilla.redhat.com/1405951): NFS-Ganesha:Volume reset for any option causes reset of ganesha enable option and bring down the ganesha services +- [#1406740](https://bugzilla.redhat.com/1406740): Fix spurious failure in tests/bugs/replicate/bug-1402730.t +- [#1408414](https://bugzilla.redhat.com/1408414): Remove-brick rebalance failed while rm -rf is in progress +- [#1408772](https://bugzilla.redhat.com/1408772): [Arbiter] After Killing a brick writes drastically slow down +- [#1408786](https://bugzilla.redhat.com/1408786): with granular-entry-self-heal enabled i see that there is a gfid mismatch and vm goes to paused state after migrating to another host +- [#1410073](https://bugzilla.redhat.com/1410073): Fix failure of split-brain-favorite-child-policy.t in CentOS7 +- [#1410369](https://bugzilla.redhat.com/1410369): Dict_t leak in dht_migration_complete_check_task and  dht_rebalance_inprogress_task +- [#1410699](https://bugzilla.redhat.com/1410699): [geo-rep]: Config commands fail when the status is 'Created' +- [#1410708](https://bugzilla.redhat.com/1410708): glusterd/geo-rep: geo-rep config command leaks fd +- [#1410764](https://bugzilla.redhat.com/1410764): Remove-brick rebalance failed while rm -rf is in progress +- [#1411011](https://bugzilla.redhat.com/1411011): atime becomes zero when truncating file via ganesha (or gluster-NFS) +- [#1411613](https://bugzilla.redhat.com/1411613): Fix the place where  graph switch event is logged diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 29bf00c60a9..29d07c530c0 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -226,7 +226,7 @@ BuildRequires:    libattr-devel  %endif  %if (0%{?_with_firewalld:1}) -BuildRequires:    firewalld +BuildRequires:    firewalld-filesystem  %endif  Obsoletes:        hekafs @@ -541,6 +541,10 @@ Requires(preun):  /sbin/service  Requires(preun):  /sbin/chkconfig  Requires(postun): /sbin/service  %endif +%if (0%{?_with_firewalld:1}) +# we install firewalld rules, so we need to have the directory owned +Requires:         firewalld-filesystem +%endif  %if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )  Requires:         rpcbind  %else @@ -811,11 +815,7 @@ if [ -e /etc/ld.so.conf.d/glusterfs.conf ]; then  fi  %if (0%{?_with_firewalld:1}) -#reload service files if firewalld running -if $(systemctl is-active firewalld 1>/dev/null 2>&1); then -  #firewalld-filesystem is not available for rhel7, so command used for reload. -  firewall-cmd  --reload 1>/dev/null 2>&1 -fi +    %firewalld_reload  %endif  pidof -c -o %PPID -x glusterd &> /dev/null @@ -883,10 +883,7 @@ exit 0  %postun server  /sbin/ldconfig  %if (0%{?_with_firewalld:1}) -#reload service files if firewalld running -if $(systemctl is-active firewalld 1>/dev/null 2>&1); then -    firewall-cmd  --reload -fi +    %firewalld_reload  %endif  exit 0 @@ -1195,6 +1192,9 @@ exit 0  %endif  %changelog +* Fri Jan 6 2017 Niels de Vos <ndevos@redhat.com> +- use macro provided by firewalld-filesystem to reload firewalld +  * Thu Dec 19 2016 Jiffin Tony Thottan <jhottan@redhat.com>  - remove S31ganesha-reset.sh from hooks (#1405951) diff --git a/libglusterfs/src/lkowner.h b/libglusterfs/src/lkowner.h index b6a950f5e12..9712f176f30 100644 --- a/libglusterfs/src/lkowner.h +++ b/libglusterfs/src/lkowner.h @@ -84,4 +84,10 @@ out:          return is_null;  } +static inline void +lk_owner_copy (gf_lkowner_t *dst, gf_lkowner_t *src) +{ +        dst->len = src->len; +        memcpy(dst->data, src->data, src->len); +}  #endif /* _LK_OWNER_H */ diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index 316d80452fb..7cf1c7757fe 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -309,7 +309,40 @@ sys_lseek (int fd, off_t offset, int whence)  int  sys_statvfs (const char *path, struct statvfs *buf)  { -        return statvfs (path, buf); +        int ret; + +        ret = statvfs (path, buf); +#ifdef __FreeBSD__ +        /* FreeBSD doesn't return the expected vaule in buf->f_bsize. It +         * contains the optimal I/O size instead of the file system block +         * size. Gluster expects that this field contains the block size. +         */ +        if (ret == 0) { +                buf->f_bsize = buf->f_frsize; +        } +#endif /* __FreeBSD__ */ + +        return ret; +} + + +int +sys_fstatvfs (int fd, struct statvfs *buf) +{ +        int ret; + +        ret = fstatvfs (fd, buf); +#ifdef __FreeBSD__ +        /* FreeBSD doesn't return the expected vaule in buf->f_bsize. It +         * contains the optimal I/O size instead of the file system block +         * size. Gluster expects this field to contain the block size. +         */ +        if (ret == 0) { +                buf->f_bsize = buf->f_frsize; +        } +#endif /* __FreeBSD__ */ + +        return ret;  } diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/syscall.h index b549f6a1b3c..81884f88164 100644 --- a/libglusterfs/src/syscall.h +++ b/libglusterfs/src/syscall.h @@ -147,6 +147,9 @@ int  sys_statvfs (const char *path, struct statvfs *buf);  int +sys_fstatvfs (int fd, struct statvfs *buf); + +int  sys_close (int fd);  int diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 0f878a9be86..fb3318da36a 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -131,6 +131,7 @@ __afr_inode_ctx_get (xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)  out:          return ret;  } +  /*   * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:   * @@ -192,10 +193,7 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,          metadatamap_old = metadatamap = (val & 0x000000000000ffff);          datamap_old = datamap = (val & 0x00000000ffff0000) >> 16; -        /* Hard-code event to 0 since there is a failure and the inode -         * needs to be refreshed anyway. -         */ -        event = 0; +        event = (val & 0xffffffff00000000) >> 32;          if (txn_type == AFR_DATA_TRANSACTION)                  tmp_map = datamap; @@ -228,6 +226,8 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,                          local->transaction.in_flight_sb = _gf_true;                          metadatamap |= (1 << index);                  } +                if (metadatamap_old != metadatamap) +                        event = 0;                  break;          case AFR_DATA_TRANSACTION: @@ -237,10 +237,12 @@ __afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local,                          local->transaction.in_flight_sb = _gf_true;                          datamap |= (1 << index);                  } +                if (datamap_old != datamap) +                        event = 0;                  break;          default: -        break; +                break;          }          val = ((uint64_t) metadatamap) | @@ -351,7 +353,7 @@ out:  }  int -__afr_inode_read_subvol_reset_small (inode_t *inode, xlator_t *this) +__afr_inode_event_gen_reset_small (inode_t *inode, xlator_t *this)  {  	int               ret         = -1;  	uint16_t          datamap     = 0; @@ -452,7 +454,7 @@ out:  }  int -__afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this) +__afr_inode_event_gen_reset (inode_t *inode, xlator_t *this)  {  	afr_private_t *priv = NULL;  	int ret = -1; @@ -460,7 +462,7 @@ __afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)  	priv = this->private;  	if (priv->child_count <= 16) -		ret = __afr_inode_read_subvol_reset_small (inode, this); +		ret = __afr_inode_event_gen_reset_small (inode, this);  	else  		ret = -1; @@ -593,7 +595,7 @@ out:  int -afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this) +afr_inode_event_gen_reset (inode_t *inode, xlator_t *this)  {  	int ret = -1; @@ -601,7 +603,7 @@ afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this)          LOCK(&inode->lock);          { -                ret = __afr_inode_read_subvol_reset (inode, this); +                ret = __afr_inode_event_gen_reset (inode, this);          }          UNLOCK(&inode->lock);  out: @@ -2086,7 +2088,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)  		if (afr_replies_interpret (frame, this, local->inode, NULL)) {                          read_subvol = afr_read_subvol_decide (local->inode,                                                                this, &args); -			afr_inode_read_subvol_reset (local->inode, this); +			afr_inode_event_gen_reset (local->inode, this);  			goto cant_interpret;  		} else {                          read_subvol = afr_data_subvol_get (local->inode, this, diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 286a5392da6..8e483c382c4 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -122,12 +122,12 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)  			continue;  		if (local->replies[i].op_ret < 0) {  			if (local->inode) -				afr_inode_read_subvol_reset (local->inode, this); +				afr_inode_event_gen_reset (local->inode, this);  			if (local->parent) -				afr_inode_read_subvol_reset (local->parent, +				afr_inode_event_gen_reset (local->parent,  							     this);  			if (local->parent2) -				afr_inode_read_subvol_reset (local->parent2, +				afr_inode_event_gen_reset (local->parent2,  							     this);  			continue;  		} diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 926f7c4dc47..2390764bccd 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -48,17 +48,6 @@ afr_read_txn_next_subvol (call_frame_t *frame, xlator_t *this)  	return 0;  } -#define AFR_READ_TXN_SET_ERROR_AND_GOTO(ret, errnum, index, label) \ -        do {                                                      \ -                local->op_ret = ret;                              \ -                local->op_errno = errnum;                          \ -                read_subvol = index;                              \ -                gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,\ -                        "Failing %s on gfid %s: split-brain observed.",\ -                        gf_fop_list[local->op], uuid_utoa (inode->gfid));\ -                goto label;                                       \ -        } while (0) -  int  afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)  { @@ -72,19 +61,16 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)  	inode = local->inode;          if (err) { -                local->op_errno = -err; -                local->op_ret = -1;                  read_subvol = -1; -                gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, -                        "Failing %s on gfid %s: split-brain observed.", -                        gf_fop_list[local->op], uuid_utoa (inode->gfid));                  goto readfn;          }  	read_subvol = afr_read_subvol_select_by_policy (inode, this,  							local->readable, NULL); -	if (read_subvol == -1) -                AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); +	if (read_subvol == -1) { +                err = -EIO; +                goto readfn; +        }  	if (local->read_attempted[read_subvol]) {  		afr_read_txn_next_subvol (frame, this); @@ -99,6 +85,10 @@ readfn:                  if ((ret == 0) && spb_choice >= 0)                          read_subvol = spb_choice;          } + +        if (read_subvol == -1) { +                AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN (-1, -err); +        }  	local->readfn (frame, this, read_subvol);  	return 0; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 9cb735ea7fa..8178fc0d18b 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2255,15 +2255,9 @@ int  afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)  {          afr_local_t   *local           = frame->local; -        afr_private_t *priv            = this->private; -        int           ret              = 0;          if (err) { -                local->op_errno = -err; -                local->op_ret = -1; -                gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN, -                        "Failing %s on gfid %s: split-brain observed.", -                        gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); +                AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, -err);                  goto fail;          } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index bbfa309b868..aa19f1eeb37 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -54,6 +54,18 @@ typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this);  #define AFR_CMP(a1,a2,len) ({int __cmp = 0; int __i; for (__i = 0; __i < len; __i++) if (a1[__i] != a2[__i]) { __cmp = 1; break;} __cmp;})  #define AFR_IS_ARBITER_BRICK(priv, index) ((priv->arbiter_count == 1) && (index == ARBITER_BRICK_INDEX)) +#define AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(ret, errnum)                       \ +        do {                                                                   \ +                local->op_ret = ret;                                           \ +                local->op_errno = errnum;                                      \ +                if (local->op_errno == EIO)                                    \ +                        gf_msg (this->name, GF_LOG_ERROR, local->op_errno,     \ +                                AFR_MSG_SPLIT_BRAIN, "Failing %s on gfid %s: " \ +                                "split-brain observed.",                       \ +                                gf_fop_list[local->op],                        \ +                                uuid_utoa (local->inode->gfid));               \ +        } while (0) +  typedef enum {          AFR_FAV_CHILD_NONE,          AFR_FAV_CHILD_BY_SIZE, @@ -882,7 +894,7 @@ afr_inode_read_subvol_set (inode_t *inode, xlator_t *this,  			   int event_generation);  int -afr_inode_read_subvol_reset (inode_t *inode, xlator_t *this); +afr_inode_event_gen_reset (inode_t *inode, xlator_t *this);  int  afr_read_subvol_select_by_policy (inode_t *inode, xlator_t *this, @@ -906,10 +918,6 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,  	afr_read_subvol_get(i, t, s, r, e, AFR_METADATA_TRANSACTION, a)  int -afr_inode_ctx_reset_unreadable_subvol (inode_t *inode, xlator_t *this, -                                       int subvol_idx, int txn_type); - -int  afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,                     uuid_t gfid, afr_inode_refresh_cbk_t cbk); diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index e320109c796..ac0f0e186fa 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -849,6 +849,12 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,          dht_set_local_rebalance (this, local, NULL, prebuf,                                   postbuf, xdata); +        if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { +                ret = dht_rebalance_complete_check (this, frame); +                if (!ret) +                        return 0; +        } +          /* Check if the rebalance phase1 is true */          if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { @@ -870,11 +876,6 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,                  }          } -        if (IS_DHT_MIGRATION_PHASE2 (postbuf)) { -                ret = dht_rebalance_complete_check (this, frame); -                if (!ret) -                        return 0; -        }  out:          DHT_STRIP_PHASE1_FLAGS (postbuf); diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index a9ffd1d9fb5..d955ee411eb 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -724,6 +724,7 @@ dht_rename_cleanup (call_frame_t *frame)                  DHT_MARKER_DONT_ACCOUNT(xattr_new); +                FRAME_SU_DO (frame, dht_local_t);                  STACK_WIND (frame, dht_rename_unlink_cbk,                              dst_hashed, dst_hashed->fops->unlink,                              &local->loc, 0, xattr_new); diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 2e6759a2803..2b4cec406a9 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -788,10 +788,10 @@ void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags)      ec_lock_prepare_inode_internal(fop, loc, flags, NULL);  } -void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, +void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,                                    uint32_t flags)  { -    loc_t tmp, *base = NULL; +    loc_t tmp;      int32_t err;      if (fop->error != 0) { @@ -806,8 +806,9 @@ void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc,      }      if ((flags & EC_INODE_SIZE) != 0) { -        base = loc;          flags ^= EC_INODE_SIZE; +    } else { +            base = NULL;      }      ec_lock_prepare_inode_internal(fop, &tmp, flags, base); @@ -1442,20 +1443,21 @@ gf_boolean_t ec_lock_acquire(ec_lock_link_t *link)  {      ec_lock_t *lock;      ec_fop_data_t *fop; +    gf_lkowner_t lk_owner;      lock = link->lock;      fop = link->fop;      if (!lock->acquired) { -        ec_owner_set(fop->frame, lock); +        set_lk_owner_from_ptr(&lk_owner, lock);          ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p", lock,                   lock->loc.inode);          lock->flock.l_type = F_WRLCK; -        ec_inodelk(fop->frame, fop->xl, -1, EC_MINIMUM_ALL, ec_locked, -                   link, fop->xl->name, &lock->loc, F_SETLKW, &lock->flock, -                   NULL); +        ec_inodelk(fop->frame, fop->xl, &lk_owner, -1, EC_MINIMUM_ALL, +                   ec_locked, link, fop->xl->name, &lock->loc, F_SETLKW, +                   &lock->flock, NULL);          return _gf_false;      } @@ -1760,6 +1762,7 @@ void ec_unlock_lock(ec_lock_link_t *link)  {      ec_lock_t *lock;      ec_fop_data_t *fop; +    gf_lkowner_t lk_owner;      lock = link->lock;      fop = link->fop; @@ -1767,13 +1770,13 @@ void ec_unlock_lock(ec_lock_link_t *link)      ec_clear_inode_info(fop, lock->loc.inode);      if ((lock->mask != 0) && lock->acquired) { -        ec_owner_set(fop->frame, lock); +        set_lk_owner_from_ptr(&lk_owner, lock);          lock->flock.l_type = F_UNLCK;          ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock,                   lock->loc.inode); -        ec_inodelk(fop->frame, fop->xl, lock->mask, EC_MINIMUM_ONE, +        ec_inodelk(fop->frame, fop->xl, &lk_owner, lock->mask, EC_MINIMUM_ONE,                     ec_unlocked, link, fop->xl->name, &lock->loc, F_SETLK,                     &lock->flock, NULL);      } else { diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h index 8e724a81380..7c096db829c 100644 --- a/xlators/cluster/ec/src/ec-common.h +++ b/xlators/cluster/ec/src/ec-common.h @@ -89,7 +89,7 @@ gf_boolean_t  ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro);  void ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags); -void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, +void ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,                                    uint32_t flags);  void ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags);  void ec_lock(ec_fop_data_t * fop); diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c index e181170650d..e068d77c827 100644 --- a/xlators/cluster/ec/src/ec-dir-write.c +++ b/xlators/cluster/ec/src/ec-dir-write.c @@ -177,7 +177,7 @@ int32_t ec_manager_create(ec_fop_data_t * fop, int32_t state)          /* Fall through */          case EC_STATE_LOCK: -            ec_lock_prepare_parent_inode(fop, &fop->loc[0], +            ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,                                           EC_UPDATE_DATA | EC_UPDATE_META);              ec_lock(fop); @@ -355,9 +355,9 @@ int32_t ec_manager_link(ec_fop_data_t * fop, int32_t state)      {          case EC_STATE_INIT:          case EC_STATE_LOCK: -            ec_lock_prepare_parent_inode(fop, &fop->loc[1], EC_UPDATE_DATA | -                                                            EC_UPDATE_META | -                                                            EC_INODE_SIZE); +            ec_lock_prepare_parent_inode(fop, &fop->loc[1], &fop->loc[0], +                                         EC_UPDATE_DATA | EC_UPDATE_META | +                                         EC_INODE_SIZE);              ec_lock(fop);              return EC_STATE_DISPATCH; @@ -540,7 +540,7 @@ int32_t ec_manager_mkdir(ec_fop_data_t * fop, int32_t state)          /* Fall through */          case EC_STATE_LOCK: -            ec_lock_prepare_parent_inode(fop, &fop->loc[0], +            ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,                                           EC_UPDATE_DATA | EC_UPDATE_META);              ec_lock(fop); @@ -746,7 +746,7 @@ int32_t ec_manager_mknod(ec_fop_data_t * fop, int32_t state)          /* Fall through */          case EC_STATE_LOCK: -            ec_lock_prepare_parent_inode(fop, &fop->loc[0], +            ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,                                           EC_UPDATE_DATA | EC_UPDATE_META);              ec_lock(fop); @@ -905,10 +905,10 @@ int32_t ec_manager_rename(ec_fop_data_t * fop, int32_t state)      {          case EC_STATE_INIT:          case EC_STATE_LOCK: -            ec_lock_prepare_parent_inode(fop, &fop->loc[0], EC_UPDATE_DATA | -                                                            EC_UPDATE_META | -                                                            EC_INODE_SIZE); -            ec_lock_prepare_parent_inode(fop, &fop->loc[1], +            ec_lock_prepare_parent_inode(fop, &fop->loc[0], &fop->loc[0], +                                         EC_UPDATE_DATA | EC_UPDATE_META | +                                         EC_INODE_SIZE); +            ec_lock_prepare_parent_inode(fop, &fop->loc[1], NULL,                                           EC_UPDATE_DATA | EC_UPDATE_META);              ec_lock(fop); @@ -1067,7 +1067,7 @@ int32_t ec_manager_rmdir(ec_fop_data_t * fop, int32_t state)      {          case EC_STATE_INIT:          case EC_STATE_LOCK: -            ec_lock_prepare_parent_inode(fop, &fop->loc[0], +            ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,                                           EC_UPDATE_DATA | EC_UPDATE_META);              ec_lock(fop); @@ -1213,7 +1213,7 @@ int32_t ec_manager_symlink(ec_fop_data_t * fop, int32_t state)      {          case EC_STATE_INIT:          case EC_STATE_LOCK: -            ec_lock_prepare_parent_inode(fop, &fop->loc[0], +            ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,                                           EC_UPDATE_DATA | EC_UPDATE_META);              ec_lock(fop); @@ -1379,7 +1379,7 @@ int32_t ec_manager_unlink(ec_fop_data_t * fop, int32_t state)      {          case EC_STATE_INIT:          case EC_STATE_LOCK: -            ec_lock_prepare_parent_inode(fop, &fop->loc[0], +            ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,                                           EC_UPDATE_DATA | EC_UPDATE_META);              ec_lock(fop); diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h index 8d938427a18..bbacedc0140 100644 --- a/xlators/cluster/ec/src/ec-fops.h +++ b/xlators/cluster/ec/src/ec-fops.h @@ -63,16 +63,16 @@ void ec_fheal(call_frame_t * frame, xlator_t * this, uintptr_t target,                int32_t minimum, fop_fheal_cbk_t func, void *data, fd_t * fd,                int32_t partial, dict_t *xdata); -void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, -                int32_t minimum, fop_inodelk_cbk_t func, void *data, -                const char * volume, loc_t * loc, int32_t cmd, -                struct gf_flock * flock, dict_t * xdata); - -void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, -                 int32_t minimum, fop_finodelk_cbk_t func, void *data, -                 const char * volume, fd_t * fd, int32_t cmd, +void ec_inodelk (call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +                 uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, +                 void *data, const char *volume, loc_t *loc, int32_t cmd,                   struct gf_flock * flock, dict_t * xdata); +void ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +                 uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, +                 void *data, const char *volume, fd_t *fd, int32_t cmd, +                 struct gf_flock *flock, dict_t *xdata); +  void ec_link(call_frame_t * frame, xlator_t * this, uintptr_t target,               int32_t minimum, fop_link_cbk_t func, void *data, loc_t * oldloc,               loc_t * newloc, dict_t * xdata); diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index 051d218331a..da08f6c9a75 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -177,14 +177,17 @@ void ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc,      if (fd != NULL)      { -        ec_finodelk(heal->fop->frame, heal->xl, heal->fop->mask, +        ec_finodelk(heal->fop->frame, heal->xl, +                    &heal->fop->frame->root->lk_owner, heal->fop->mask,                      EC_MINIMUM_ALL, cbk, heal, heal->xl->name, fd, F_SETLKW,                      &flock, NULL);      }      else      { -        ec_inodelk(heal->fop->frame, heal->xl, heal->fop->mask, EC_MINIMUM_ALL, -                   cbk, heal, heal->xl->name, loc, F_SETLKW, &flock, NULL); +        ec_inodelk(heal->fop->frame, heal->xl, +                   &heal->fop->frame->root->lk_owner, heal->fop->mask, +                   EC_MINIMUM_ALL, cbk, heal, heal->xl->name, loc, F_SETLKW, +                   &flock, NULL);      }  } diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c index 7cf8232353d..7df83126ae5 100644 --- a/xlators/cluster/ec/src/ec-helpers.c +++ b/xlators/cluster/ec/src/ec-helpers.c @@ -643,10 +643,9 @@ void ec_owner_set(call_frame_t * frame, void * owner)      set_lk_owner_from_ptr(&frame->root->lk_owner, owner);  } -void ec_owner_copy(call_frame_t * frame, gf_lkowner_t * owner) +void ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner)  { -    frame->root->lk_owner.len = owner->len; -    memcpy(frame->root->lk_owner.data, owner->data, owner->len); +    lk_owner_copy (&frame->root->lk_owner, owner);  }  ec_inode_t * __ec_inode_get(inode_t * inode, xlator_t * xl) diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c index ed835f1aadc..bd525723ddf 100644 --- a/xlators/cluster/ec/src/ec-locks.c +++ b/xlators/cluster/ec/src/ec-locks.c @@ -608,12 +608,14 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state)                          flock.l_owner.len = 0;                          if (fop->id == GF_FOP_INODELK) { -                            ec_inodelk(fop->frame, fop->xl, mask, 1, +                            ec_inodelk(fop->frame, fop->xl, +                                       &fop->frame->root->lk_owner, mask, 1,                                         ec_lock_unlocked, NULL, fop->str[0],                                         &fop->loc[0], F_SETLK, &flock,                                         fop->xdata);                          } else { -                            ec_finodelk(fop->frame, fop->xl, mask, 1, +                            ec_finodelk(fop->frame, fop->xl, +                                        &fop->frame->root->lk_owner, mask, 1,                                          ec_lock_unlocked, NULL, fop->str[0],                                          fop->fd, F_SETLK, &flock, fop->xdata);                          } @@ -692,10 +694,10 @@ int32_t ec_manager_inodelk(ec_fop_data_t * fop, int32_t state)      }  } -void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, -                int32_t minimum, fop_inodelk_cbk_t func, void * data, -                const char * volume, loc_t * loc, int32_t cmd, -                struct gf_flock * flock, dict_t * xdata) +void ec_inodelk (call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +                 uintptr_t target, int32_t minimum, fop_inodelk_cbk_t func, +                 void *data, const char *volume, loc_t *loc, int32_t cmd, +                 struct gf_flock *flock, dict_t *xdata)  {      ec_cbk_t callback = { .inodelk = func };      ec_fop_data_t * fop = NULL; @@ -715,6 +717,7 @@ void ec_inodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,      }      fop->int32 = cmd; +    ec_owner_copy (fop->frame, owner);      if (volume != NULL) {          fop->str[0] = gf_strdup(volume); @@ -828,10 +831,10 @@ void ec_wind_finodelk(ec_t * ec, ec_fop_data_t * fop, int32_t idx)                        fop->xdata);  } -void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target, -                 int32_t minimum, fop_finodelk_cbk_t func, void * data, -                 const char * volume, fd_t * fd, int32_t cmd, -                 struct gf_flock * flock, dict_t * xdata) +void ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner, +                 uintptr_t target, int32_t minimum, fop_finodelk_cbk_t func, +                 void *data, const char *volume, fd_t *fd, int32_t cmd, +                 struct gf_flock *flock, dict_t *xdata)  {      ec_cbk_t callback = { .finodelk = func };      ec_fop_data_t * fop = NULL; @@ -853,6 +856,7 @@ void ec_finodelk(call_frame_t * frame, xlator_t * this, uintptr_t target,      fop->use_fd = 1;      fop->int32 = cmd; +    ec_owner_copy (fop->frame, owner);      if (volume != NULL) {          fop->str[0] = gf_strdup(volume); diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index 94d1241b364..b5e6bc08216 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -839,8 +839,8 @@ int32_t ec_gf_inodelk(call_frame_t * frame, xlator_t * this,      if (flock->l_type == F_UNLCK)              minimum = EC_MINIMUM_ONE; -    ec_inodelk(frame, this, -1, minimum, default_inodelk_cbk, NULL, -               volume, loc, cmd, flock, xdata); +    ec_inodelk(frame, this, &frame->root->lk_owner, -1, minimum, +               default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata);      return 0;  } @@ -852,8 +852,8 @@ int32_t ec_gf_finodelk(call_frame_t * frame, xlator_t * this,      int32_t minimum = EC_MINIMUM_ALL;      if (flock->l_type == F_UNLCK)              minimum = EC_MINIMUM_ONE; -    ec_finodelk(frame, this, -1, minimum, default_finodelk_cbk, NULL, -                volume, fd, cmd, flock, xdata); +    ec_finodelk(frame, this, &frame->root->lk_owner, -1, minimum, +                default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata);      return 0;  } diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index eead33fbd55..525a6a6fbbc 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -4802,6 +4802,7 @@ fuse_graph_sync (xlator_t *this)          fuse_private_t *priv                = NULL;          int             need_first_lookup   = 0;          int             ret                 = 0; +        int             new_graph_id        = 0;          xlator_t       *old_subvol          = NULL, *new_subvol = NULL;          uint64_t        winds_on_old_subvol = 0; @@ -4814,6 +4815,7 @@ fuse_graph_sync (xlator_t *this)                  old_subvol = priv->active_subvol;                  new_subvol = priv->active_subvol = priv->next_graph->top; +                new_graph_id = priv->next_graph->id;                  priv->next_graph = NULL;                  need_first_lookup = 1; @@ -4832,6 +4834,8 @@ unlock:          pthread_mutex_unlock (&priv->sync_mutex);          if (need_first_lookup) { +                gf_log ("fuse", GF_LOG_INFO, "switched to graph %d", +                        new_graph_id);                  fuse_first_lookup (this);          } @@ -5237,9 +5241,6 @@ fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph)                                 prev_graph->top, NULL);          } -        gf_log ("fuse", GF_LOG_INFO, "switched to graph %d", -                ((graph) ? graph->id : 0)); -          return ret;  unlock:          pthread_mutex_unlock (&priv->sync_mutex); diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 5ab38890df3..79845316315 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -153,9 +153,8 @@ iot_worker (void *data)          struct timespec   sleep_till = {0, };          int               ret = 0;          int               pri = -1; -        char              timeout = 0; -        char              bye = 0;  	struct timespec	  sleep = {0,}; +        gf_boolean_t      bye = _gf_false;          conf = data;          this = conf->this; @@ -169,6 +168,12 @@ iot_worker (void *data)                                  pri = -1;                          }                          while (conf->queue_size == 0) { +                                if (conf->down) { +                                        bye = _gf_true;/*Avoid sleep*/ +                                        break; +                                } + +                                conf->sleep_count++;                                  clock_gettime (CLOCK_REALTIME_COARSE,                                                 &sleep_till);                                  sleep_till.tv_sec += conf->idle_time; @@ -179,48 +184,48 @@ iot_worker (void *data)                                                                &sleep_till);                                  conf->sleep_count--; -                                if (ret == ETIMEDOUT) { -                                        timeout = 1; +                                if (conf->down || ret == ETIMEDOUT) { +                                        bye = _gf_true;                                          break;                                  }                          } -                        if (timeout) { -                                if (conf->curr_count > IOT_MIN_THREADS) { +                        if (bye) { +                                if (conf->down || +                                    conf->curr_count > IOT_MIN_THREADS) {                                          conf->curr_count--; -                                        bye = 1; +                                        if (conf->curr_count == 0) +                                           pthread_cond_broadcast (&conf->cond);                                          gf_msg_debug (conf->this->name, 0, -                                                      "timeout, terminated. conf->curr_count=%d", +                                                      "terminated. " +                                                      "conf->curr_count=%d",                                                        conf->curr_count);                                  } else { -                                        timeout = 0; +                                        bye = _gf_false;                                  }                          } -                        stub = __iot_dequeue (conf, &pri, &sleep); -			if (!stub && (sleep.tv_sec || sleep.tv_nsec)) { -				pthread_cond_timedwait(&conf->cond, -						       &conf->mutex, &sleep); -				pthread_mutex_unlock(&conf->mutex); -				continue; +                        if (!bye) { +                                stub = __iot_dequeue (conf, &pri, &sleep); +                                if (!stub && (sleep.tv_sec || sleep.tv_nsec)) { +                                        pthread_cond_timedwait(&conf->cond, +                                                               &conf->mutex, +                                                               &sleep); +                                        pthread_mutex_unlock(&conf->mutex); +                                        continue; +                                }                          }                  }                  pthread_mutex_unlock (&conf->mutex);                  if (stub) /* guard against spurious wakeups */                          call_resume (stub); +                stub = NULL;                  if (bye)                          break;          } -        if (pri != -1) { -                pthread_mutex_lock (&conf->mutex); -                { -                        conf->ac_iot_count[pri]--; -                } -                pthread_mutex_unlock (&conf->mutex); -        }          return NULL;  } @@ -975,6 +980,7 @@ init (xlator_t *this)                          "pthread_cond_init failed (%d)", ret);                  goto out;          } +        conf->cond_inited = _gf_true;          if ((ret = pthread_mutex_init(&conf->mutex, NULL)) != 0) {                  gf_msg (this->name, GF_LOG_ERROR, 0, @@ -982,6 +988,7 @@ init (xlator_t *this)                          "pthread_mutex_init failed (%d)", ret);                  goto out;          } +        conf->mutex_inited = _gf_true;          set_stack_size (conf); @@ -1039,12 +1046,50 @@ out:  	return ret;  } +static void +iot_exit_threads (iot_conf_t *conf) +{ +        pthread_mutex_lock (&conf->mutex); +        { +                conf->down = _gf_true; +                /*Let all the threads know that xl is going down*/ +                pthread_cond_broadcast (&conf->cond); +                while (conf->curr_count)/*Wait for threads to exit*/ +                        pthread_cond_wait (&conf->cond, &conf->mutex); +        } +        pthread_mutex_unlock (&conf->mutex); +} + +int +notify (xlator_t *this, int32_t event, void *data, ...) +{ +        iot_conf_t *conf = this->private; + +        if (GF_EVENT_PARENT_DOWN == event) +                iot_exit_threads (conf); + +        default_notify (this, event, data); + +        return 0; +}  void  fini (xlator_t *this)  {  	iot_conf_t *conf = this->private; +        if (!conf) +                return; + +        if (conf->mutex_inited && conf->cond_inited) +                iot_exit_threads (conf); + +        if (conf->cond_inited) +                pthread_cond_destroy (&conf->cond); + +        if (conf->mutex_inited) +                pthread_mutex_destroy (&conf->mutex); +  	GF_FREE (conf);  	this->private = NULL; diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h index e5c97f690a2..673e1967617 100644 --- a/xlators/performance/io-threads/src/io-threads.h +++ b/xlators/performance/io-threads/src/io-threads.h @@ -82,6 +82,9 @@ struct iot_conf {          xlator_t            *this;          size_t              stack_size; +        gf_boolean_t         down; /*PARENT_DOWN event is notified*/ +        gf_boolean_t         mutex_inited; +        gf_boolean_t         cond_inited;  	struct iot_least_throttle throttle;  };  | 
