From 498b4436d6d360c249692ec2c41a5162c0095121 Mon Sep 17 00:00:00 2001 From: Grady Wong Date: Thu, 4 Oct 2018 02:21:55 +0800 Subject: [PATCH] switch "reassign" txg to committing empty txg and try again. --- include/spl/sys/uio.h | 1 + include/sys/dmu.h | 1 - include/sys/uio_impl.h | 2 +- module/zcommon/zfs_uio.c | 21 ++++++++++++--------- module/zfs/dmu.c | 4 ++-- module/zfs/dmu_tx.c | 33 --------------------------------- module/zfs/sa.c | 2 +- module/zfs/zfs_sa.c | 4 ++-- module/zfs/zfs_vnops.c | 30 +++++++++++------------------- 9 files changed, 30 insertions(+), 68 deletions(-) diff --git a/include/spl/sys/uio.h b/include/spl/sys/uio.h index 64c452b8d17f..fac26079d7bc 100644 --- a/include/spl/sys/uio.h +++ b/include/spl/sys/uio.h @@ -53,6 +53,7 @@ typedef struct uio { int uio_iovcnt; offset_t uio_loffset; uio_seg_t uio_segflg; + boolean_t uio_fault_disable; uint16_t uio_fmode; uint16_t uio_extflg; offset_t uio_limit; diff --git a/include/sys/dmu.h b/include/sys/dmu.h index 90fe2a9424e4..b2cb6bcf1755 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -778,7 +778,6 @@ void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow); void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size); void dmu_tx_abort(dmu_tx_t *tx); int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how); -int dmu_tx_reassign(dmu_tx_t *tx, uint64_t txg_how); void dmu_tx_wait(dmu_tx_t *tx); void dmu_tx_commit(dmu_tx_t *tx); void dmu_tx_mark_netfree(dmu_tx_t *tx); diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h index 2cd9ac2f9ba6..37e283da0f8b 100644 --- a/include/sys/uio_impl.h +++ b/include/sys/uio_impl.h @@ -41,7 +41,7 @@ #include -extern int uiomove(void *, size_t, enum uio_rw, uio_t *, boolean_t); +extern int uiomove(void *, size_t, enum uio_rw, uio_t *); extern void uio_prefaultpages(ssize_t, uio_t *); extern int uiocopy(void *, size_t, enum uio_rw, uio_t *, size_t *); extern void uioskip(uio_t *, size_t); diff --git a/module/zcommon/zfs_uio.c b/module/zcommon/zfs_uio.c index 8a6b1ea36fd5..03fa9f5eede5 100644 --- a/module/zcommon/zfs_uio.c +++ b/module/zcommon/zfs_uio.c @@ -61,8 +61,7 @@ * a non-zero errno on failure. */ static int -uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio, - boolean_t fault_disable) +uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio) { const struct iovec *iov = uio->uio_iov; size_t skip = uio->uio_skip; @@ -81,17 +80,22 @@ uiomove_iov(void *p, size_t n, enum uio_rw rw, struct uio *uio, if (copy_to_user(iov->iov_base+skip, p, cnt)) return (EFAULT); } else { - if (fault_disable) { + if (uio->uio_fault_disable) { + if (!access_ok(VERIFY_READ, + (iov->iov_base+skip), cnt)) { + return (EFAULT); + } + pagefault_disable(); if (__copy_from_user_inatomic(p, - iov->iov_base+skip, cnt)) { + (iov->iov_base+skip), cnt)) { pagefault_enable(); return (EFAULT); } pagefault_enable(); } else { if (copy_from_user(p, - iov->iov_base+skip, cnt)) + (iov->iov_base+skip), cnt)) return (EFAULT); } } @@ -154,11 +158,10 @@ uiomove_bvec(void *p, size_t n, enum uio_rw rw, struct uio *uio) } int -uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio, - boolean_t fault_disable) +uiomove(void *p, size_t n, enum uio_rw rw, struct uio *uio) { if (uio->uio_segflg != UIO_BVEC) - return (uiomove_iov(p, n, rw, uio, fault_disable)); + return (uiomove_iov(p, n, rw, uio)); else return (uiomove_bvec(p, n, rw, uio)); } @@ -236,7 +239,7 @@ uiocopy(void *p, size_t n, enum uio_rw rw, struct uio *uio, size_t *cbytes) int ret; bcopy(uio, &uio_copy, sizeof (struct uio)); - ret = uiomove(p, n, rw, &uio_copy, B_FALSE); + ret = uiomove(p, n, rw, &uio_copy); *cbytes = uio->uio_resid - uio_copy.uio_resid; return (ret); } diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index ab9cbc643f45..96698b6f9ea2 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1424,7 +1424,7 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size) } else #endif err = uiomove((char *)db->db_data + bufoff, tocpy, - UIO_READ, uio, B_FALSE); + UIO_READ, uio); if (err) break; @@ -1525,7 +1525,7 @@ dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx, * block. */ err = uiomove((char *)db->db_data + bufoff, tocpy, - UIO_WRITE, uio, fault_disable); + UIO_WRITE, uio); if (tocpy == db->db_size) dmu_buf_fill_done(db, tx); diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c index 9085a2442050..5bde56c18c2a 100644 --- a/module/zfs/dmu_tx.c +++ b/module/zfs/dmu_tx.c @@ -1040,39 +1040,6 @@ dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how) return (0); } -int -dmu_tx_reassign(dmu_tx_t *tx, uint64_t txg_how) -{ - int err; - dmu_tx_hold_t *txh; - - /* - * Walk the transaction's hold list, removing the hold on the - * associated dnode, and notifying waiters if the refcount drops to 0. - */ - for (txh = list_head(&tx->tx_holds); txh != NULL; - txh = list_next(&tx->tx_holds, txh)) { - dnode_t *dn = txh->txh_dnode; - - if (dn == NULL) - continue; - mutex_enter(&dn->dn_mtx); - ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg); - - if (refcount_remove(&dn->dn_tx_holds, tx) == 0) { - dn->dn_assigned_txg = 0; - cv_broadcast(&dn->dn_notxholds); - } - mutex_exit(&dn->dn_mtx); - } - - txg_rele_to_sync(&tx->tx_txgh); - - tx->tx_txg = 0; - err = dmu_tx_assign(tx, txg_how); - - return (err); -} void dmu_tx_wait(dmu_tx_t *tx) diff --git a/module/zfs/sa.c b/module/zfs/sa.c index 511368f033c9..caa91bc4c4e1 100644 --- a/module/zfs/sa.c +++ b/module/zfs/sa.c @@ -1517,7 +1517,7 @@ sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio) mutex_enter(&hdl->sa_lock); if ((error = sa_attr_op(hdl, &bulk, 1, SA_LOOKUP, NULL)) == 0) { error = uiomove((void *)bulk.sa_addr, MIN(bulk.sa_size, - uio->uio_resid), UIO_READ, uio, B_FALSE); + uio->uio_resid), UIO_READ, uio); } mutex_exit(&hdl->sa_lock); return (error); diff --git a/module/zfs/zfs_sa.c b/module/zfs/zfs_sa.c index dc611fc1e67b..a1dc7dc4ccb1 100644 --- a/module/zfs/zfs_sa.c +++ b/module/zfs/zfs_sa.c @@ -81,13 +81,13 @@ zfs_sa_readlink(znode_t *zp, uio_t *uio) if (bufsz + ZFS_OLD_ZNODE_PHYS_SIZE <= db->db_size) { error = uiomove((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE, - MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio, B_FALSE); + MIN((size_t)bufsz, uio->uio_resid), UIO_READ, uio); } else { dmu_buf_t *dbp; if ((error = dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH)) == 0) { error = uiomove(dbp->db_data, MIN((size_t)bufsz, - uio->uio_resid), UIO_READ, uio, B_FALSE); + uio->uio_resid), UIO_READ, uio); dmu_buf_rele(dbp, FTAG); } } diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 80913f1d2dc4..37e414892d99 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -392,8 +392,7 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio) unlock_page(pp); pb = kmap(pp); - error = uiomove(pb + off, bytes, UIO_READ, uio, - B_FALSE); + error = uiomove(pb + off, bytes, UIO_READ, uio); kunmap(pp); if (mapping_writably_mapped(mp)) @@ -764,7 +763,9 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) /* * Start a transaction. */ - dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os); + dmu_tx_t *tx = NULL; +top: + tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz)); zfs_sa_upgrade_txholds(tx, zp); @@ -809,23 +810,14 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr) ssize_t tx_bytes; if (abuf == NULL) { - int reassign_error = 0; tx_bytes = uio->uio_resid; - do { - error = dmu_write_uio_dbuf( - sa_get_db(zp->z_sa_hdl), uio, nbytes, tx); - if (error == EFAULT) { - uio_prefaultpages(MIN(n, max_blksz), - uio); - reassign_error = dmu_tx_reassign(tx, - TXG_WAIT); - if (reassign_error) { - break; - } - } - } while (error == EFAULT); - - if (reassign_error || error) { + error = dmu_write_uio_dbuf(sa_get_db(zp->z_sa_hdl), + uio, nbytes, tx); + if (error == EFAULT) { + uio_prefaultpages(MIN(n, max_blksz), uio); + dmu_tx_commit(tx); + goto top; + } else if (error != 0) { dmu_tx_abort(tx); break; }