diff --git a/config/kernel-blk-queue.m4 b/config/kernel-blk-queue.m4
index 559ae9800e8f..907a2c48ce67 100644
--- a/config/kernel-blk-queue.m4
+++ b/config/kernel-blk-queue.m4
@@ -315,6 +315,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
 	])
 ])
 
+dnl #
+dnl # See if kernel supports block multi-queue and blk_status_t.
+dnl # blk_status_t represents the new status codes introduced in the 4.13
+dnl # kernel patch:
+dnl #
+dnl #  block: introduce new block status code type
+dnl #
+dnl # We do not currently support the "old" block multi-queue interfaces from
+dnl # prior kernels.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
+	ZFS_LINUX_TEST_SRC([blk_mq], [
+		#include <linux/blk-mq.h>
+	], [
+		struct blk_mq_tag_set tag_set = {0};
+		(void) blk_mq_alloc_tag_set(&tag_set);
+		return BLK_STS_OK;
+	], [$NO_UNUSED_BUT_SET_VARIABLE])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
+	AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available])
+	ZFS_LINUX_TEST_RESULT([blk_mq], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
@@ -326,6 +356,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
+	ZFS_AC_KERNEL_SRC_BLK_MQ
 ])
 
 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
@@ -339,4 +370,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
 	ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
+	ZFS_AC_KERNEL_BLK_MQ
 ])
diff --git a/configure.ac b/configure.ac
index 7037c06b225f..dc711fe7b6fe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -403,6 +403,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/tests/functional/zpool_influxdb/Makefile
 	tests/zfs-tests/tests/functional/zvol/Makefile
 	tests/zfs-tests/tests/functional/zvol/zvol_ENOSPC/Makefile
+	tests/zfs-tests/tests/functional/zvol/zvol_stress/Makefile
 	tests/zfs-tests/tests/functional/zvol/zvol_cli/Makefile
 	tests/zfs-tests/tests/functional/zvol/zvol_misc/Makefile
 	tests/zfs-tests/tests/functional/zvol/zvol_swap/Makefile
diff --git a/include/os/linux/kernel/linux/blkdev_compat.h b/include/os/linux/kernel/linux/blkdev_compat.h
index 9fa8884bb7a1..6d9c6c0f1a48 100644
--- a/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/include/os/linux/kernel/linux/blkdev_compat.h
@@ -34,6 +34,7 @@
 #include <linux/hdreg.h>
 #include <linux/major.h>
 #include <linux/msdos_fs.h>	/* for SECTOR_* */
+#include <linux/bio.h>
 
 #ifndef HAVE_BLK_QUEUE_FLAG_SET
 static inline void
@@ -579,4 +580,90 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id)
 }
 #endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
 
+static inline int
+io_data_dir(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL) {
+		enum req_opf op = req_op(rq);
+		if (op_is_write(op)) {
+			return (WRITE);
+		} else {
+			return (READ);
+		}
+	}
+#endif
+	return (bio_data_dir(bio));
+}
+
+static inline int
+io_is_flush(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_FLUSH);
+#endif
+	return (bio_is_flush(bio));
+}
+
+static inline int
+io_is_discard(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_DISCARD);
+#endif
+	return (bio_is_discard(bio));
+}
+
+static inline int
+io_is_secure_erase(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_SECURE_ERASE);
+#endif
+	return (bio_is_secure_erase(bio));
+}
+
+static inline int
+io_is_fua(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (rq->cmd_flags & REQ_FUA);
+#endif
+	return (bio_is_fua(bio));
+}
+
+
+static inline uint64_t
+io_offset(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (blk_rq_pos(rq) << 9);
+#endif
+	return (BIO_BI_SECTOR(bio) << 9);
+}
+
+static inline uint64_t
+io_size(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (blk_rq_bytes(rq));
+#endif
+	return (BIO_BI_SIZE(bio));
+}
+
+static inline int
+io_has_data(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (bio_has_data(rq->bio));
+#endif
+	return (bio_has_data(bio));
+}
 #endif /* _ZFS_BLKDEV_H */
diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h
index 439eec986236..ed13a4398b25 100644
--- a/include/os/linux/spl/sys/uio.h
+++ b/include/os/linux/spl/sys/uio.h
@@ -68,10 +68,20 @@ typedef struct zfs_uio {
 	boolean_t	uio_fault_disable;
 	uint16_t	uio_fmode;
 	uint16_t	uio_extflg;
-	ssize_t		uio_resid;
+	ssize_t	uio_resid;
 	size_t		uio_skip;
+
+	struct request	*rq;
+
+	/*
+	 * Used for saving rq_for_each_segment() state between calls
+	 * to zfs_uiomove_bvec_rq().
+	 */
+	struct req_iterator iter;
+	struct bio_vec bv;
 } zfs_uio_t;
 
+
 #define	zfs_uio_segflg(u)		(u)->uio_segflg
 #define	zfs_uio_offset(u)		(u)->uio_loffset
 #define	zfs_uio_resid(u)		(u)->uio_resid
@@ -116,17 +126,30 @@ zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov,
 }
 
 static inline void
-zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio)
+zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
 {
-	uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
-	uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
-	uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
+	if (bio) {
+		uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
+		uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
+	} else {
+		uio->uio_bvec = NULL;
+		uio->uio_iovcnt = 0;
+		memset(&uio->iter, 0, sizeof (uio->iter));
+	}
+
+	uio->uio_loffset = io_offset(bio, rq);
 	uio->uio_segflg = UIO_BVEC;
 	uio->uio_fault_disable = B_FALSE;
 	uio->uio_fmode = 0;
 	uio->uio_extflg = 0;
-	uio->uio_resid = BIO_BI_SIZE(bio);
-	uio->uio_skip = BIO_BI_SKIP(bio);
+	uio->uio_resid = io_size(bio, rq);
+	if (bio) {
+		uio->uio_skip = BIO_BI_SKIP(bio);
+	} else {
+		uio->uio_skip = 0;
+	}
+
+	uio->rq = rq;
 }
 
 #if defined(HAVE_VFS_IOV_ITER)
diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
index bb3cd2243ad3..eed90fbd2f51 100644
--- a/man/man4/zfs.4
+++ b/man/man4/zfs.4
@@ -2207,9 +2207,67 @@ for each I/O submitter.
 When unset, requests are handled asynchronously by a thread pool.
 The number of requests which can be handled concurrently is controlled by
 .Sy zvol_threads .
-.
-.It Sy zvol_threads Ns = Ns Sy 32 Pq uint
-Max number of threads which can handle zvol I/O requests concurrently.
+.Sy zvol_request_sync
+is ignored when running on a kernel that supports block multiqueue
+.Pq Li blk-mq .
+.
+.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
+The number of threads to use for processing zvol block IOs.
+On older
+.No non- Ns Li blk-mq
+kernels,
+.Sy zvol_threads
+is the total number of threads to use for all zvols.
+On kernels that support
+.Li blk-mq
+.Sy zvol_threads
+is the total number of threads per zvol.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_threads
+to the number of CPUs present.
+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Set to
+.Sy 1
+to use the
+.Li blk-mq
+API for zvols.
+Set to
+.Sy 0
+(the default) to use the legacy zvol APIs.
+This setting can give better or worse zvol performance depending on
+the workload.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+.
+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
+If
+.Sy zvol_use_blk_mq
+is enabled, then process this number of volblocksize blocks per zvol thread.
+This tunable can be use to favor better performance for zvol reads (lower
+values) or writes (higher values).
+If set to 0, then the zvol layer will process the maximum number of blocks
+per thread that it can.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+.
+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
+The queue_depth value for the zvol
+.Li blk-mq
+interface.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read at zvol load time.
+If
+.Sy 0
+(the default) then use the kernel's default queue depth.
+If you set
+.Sy zvol_blk_mq_queue_depth
+lower than the kernel's minimum queue depth, it will be internally
+capped to the kernel's minimum queue depth (currently 4 on 5.15 kernels).
 .
 .It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
 Defines zvol block devices behaviour when
diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c
index a3d5d5f83b6f..ece9cfe8dfdb 100644
--- a/module/os/linux/zfs/zfs_uio.c
+++ b/module/os/linux/zfs/zfs_uio.c
@@ -47,6 +47,7 @@
 #include <sys/strings.h>
 #include <linux/kmap_compat.h>
 #include <linux/uaccess.h>
+#include <linux/delay.h>
 
 /*
  * Move "n" bytes at byte address "p"; "rw" indicates the direction
@@ -126,7 +127,7 @@ zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 }
 
 static int
-zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 {
 	const struct bio_vec *bv = uio->uio_bvec;
 	size_t skip = uio->uio_skip;
@@ -137,10 +138,13 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 		cnt = MIN(bv->bv_len - skip, n);
 
 		paddr = zfs_kmap_atomic(bv->bv_page);
-		if (rw == UIO_READ)
+		if (rw == UIO_READ) {
+			/* Copy from buffer 'p' to the bvec data */
 			bcopy(p, paddr + bv->bv_offset + skip, cnt);
-		else
+		} else {
+			/* Copy from bvec data to buffer 'p' */
 			bcopy(paddr + bv->bv_offset + skip, p, cnt);
+		}
 		zfs_kunmap_atomic(paddr);
 
 		skip += cnt;
@@ -158,6 +162,139 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 	return (0);
 }
 
+#ifdef HAVE_BLK_MQ
+static void
+zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
+    struct bio_vec *bv)
+{
+	void *paddr;
+
+	paddr = zfs_kmap_atomic(bv->bv_page);
+	if (rw == UIO_READ) {
+		/* Copy from buffer 'p' to the bvec data */
+		bcopy(p, paddr + bv->bv_offset + skip, cnt);
+	} else {
+		/* Copy from bvec data to buffer 'p' */
+		bcopy(paddr + bv->bv_offset + skip, p, cnt);
+	}
+	zfs_kunmap_atomic(paddr);
+}
+
+/*
+ * Copy 'n' bytes of data between the buffer p[] and the data represented
+ * by the request in the uio.
+ */
+static int
+zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+	struct request *rq = uio->rq;
+	struct bio_vec bv;
+	struct req_iterator iter;
+	size_t this_seg_start;	/* logical offset */
+	size_t this_seg_end;		/* logical offset */
+	size_t skip_in_seg;
+	size_t copy_from_seg;
+	size_t orig_loffset;
+	int copied = 0;
+
+	/*
+	 * Get the original logical offset of this entire request (because
+	 * uio->uio_loffset will be modified over time).
+	 */
+	orig_loffset = io_offset(NULL, rq);
+	this_seg_start = orig_loffset;
+
+	rq_for_each_segment(bv, rq, iter) {
+		if (uio->iter.bio) {
+			/*
+			 * If uio->iter.bio is present, then we know we've saved
+			 * uio->iter from a previous call to this function, and
+			 * we can skip ahead in this rq_for_each_segment() loop
+			 * to where we last left off.  That way, we don't need
+			 * to iterate over tons of segments we've already
+			 * processed - we can just restore the "saved state".
+			 */
+			iter = uio->iter;
+			bv = uio->bv;
+			this_seg_start = uio->uio_loffset;
+			memset(&uio->iter, 0, sizeof (uio->iter));
+			continue;
+		}
+
+		/*
+		 * Lookup what the logical offset of the last byte of this
+		 * segment is.
+		 */
+		this_seg_end = this_seg_start + bv.bv_len - 1;
+
+		/*
+		 * We only need to operate on segments that have data we're
+		 * copying.
+		 */
+		if (uio->uio_loffset >= this_seg_start &&
+		    uio->uio_loffset <= this_seg_end) {
+			/*
+			 * Some, or all, of the data in this segment needs to be
+			 * copied.
+			 */
+
+			/*
+			 * We may be not be copying from the first byte in the
+			 * segment.  Figure out how many bytes to skip copying
+			 * from the beginning of this segment.
+			 */
+			skip_in_seg = uio->uio_loffset - this_seg_start;
+
+			/*
+			 * Calculate the total number of bytes from this
+			 * segment that we will be copying.
+			 */
+			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
+
+			/* Copy the bytes */
+			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
+			p = ((char *)p) + copy_from_seg;
+
+			n -= copy_from_seg;
+			uio->uio_resid -= copy_from_seg;
+			uio->uio_loffset += copy_from_seg;
+			copied = 1;	/* We copied some data */
+		}
+
+		if (n == 0) {
+			/*
+			 * All done copying.  Save our 'iter' value to the uio.
+			 * This allows us to "save our state" and skip ahead in
+			 * the rq_for_each_segment() loop the next time we call
+			 * call zfs_uiomove_bvec_rq() on this uio (which we
+			 * will be doing for any remaining data in the uio).
+			 */
+			uio->iter = iter; /* make a copy of the struct data */
+			uio->bv = bv;
+			return (0);
+		}
+
+		this_seg_start = this_seg_end + 1;
+	}
+
+	if (!copied) {
+		/* Didn't copy anything */
+		uio->uio_resid = 0;
+	}
+	return (0);
+}
+#endif
+
+static inline int
+zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+#ifdef HAVE_BLK_MQ
+	if (uio->rq != NULL)
+		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
+#endif
+	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
+}
+
 #if defined(HAVE_VFS_IOV_ITER)
 static int
 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
@@ -300,8 +437,14 @@ zfs_uioskip(zfs_uio_t *uio, size_t n)
 {
 	if (n > uio->uio_resid)
 		return;
-
-	if (uio->uio_segflg == UIO_BVEC) {
+	/*
+	 * When using a uio with a struct request, we simply
+	 * use uio_loffset as a pointer to the next logical byte to
+	 * copy in the request.  We don't have to do any fancy
+	 * accounting with uio_bvec/uio_iovcnt since we don't use
+	 * them.
+	 */
+	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
 		uio->uio_skip += n;
 		while (uio->uio_iovcnt &&
 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
index f772f416043e..a9606caef48b 100644
--- a/module/os/linux/zfs/zvol_os.c
+++ b/module/os/linux/zfs/zvol_os.c
@@ -41,17 +41,80 @@
 #include <linux/blkdev_compat.h>
 #include <linux/task_io_accounting_ops.h>
 
-static unsigned int zvol_major = ZVOL_MAJOR;
-static unsigned int zvol_request_sync = 0;
-static unsigned int zvol_prefetch_bytes = (128 * 1024);
-static unsigned long zvol_max_discard_blocks = 16384;
-static unsigned int zvol_threads = 32;
-static const unsigned int zvol_open_timeout_ms = 1000;
+#ifdef HAVE_BLK_MQ
+#include <linux/blk-mq.h>
+#endif
+
+static void zvol_request_impl(zvol_state_t *zv, struct bio *bio,
+    struct request *rq, boolean_t force_sync);
+
+unsigned int zvol_major = ZVOL_MAJOR;
+unsigned int zvol_request_sync = 0;
+unsigned int zvol_prefetch_bytes = (128 * 1024);
+unsigned long zvol_max_discard_blocks = 16384;
+unsigned int zvol_open_timeout_ms = 1000;
+
+/*
+ * zvol_threads is the module param the user passes in.
+ *
+ * zvol_actual_threads is what we use internally, since the user can pass
+ * zvol_thread = 0 to mean "use all the CPUs" (the default).  So on a quad
+ * core system, you would have: zvol_threads = 0, zvol_actual_threads = 4.
+ */
+static unsigned int zvol_threads = 0;
+static unsigned int zvol_actual_threads;
+#ifdef HAVE_BLK_MQ
+static boolean_t zvol_use_blk_mq = B_FALSE;
+
+/*
+ * The maximum number of volblocksize blocks to process per thread.  Typically,
+ * write heavy workloads perform better with higher values here, and read
+ * heavy workloads preform better with lower values, but that's not a hard
+ * and fast rule.  It's basically a knob to tune between "less overhead with
+ * less parallelism" and "more overhead, but more parallelism".
+ *
+ * '8' was chosen as a reasonable, balanced, default based off of sequential
+ * read and write tests to a zvol in an NVMe pool (with 16 CPUs).
+ */
+static unsigned int zvol_blk_mq_blocks_per_thread = 8;
+#endif
+
+#ifndef	BLKDEV_DEFAULT_RQ
+/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
+#define	BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
+#endif
+
+/*
+ * Finalize our BIO or request.
+ */
+#ifdef	HAVE_BLK_MQ
+#define	END_IO(zv, bio, rq, error)  do { \
+	if (bio) { \
+		BIO_END_IO(bio, error); \
+	} else { \
+		blk_mq_end_request(rq, errno_to_bi_status(error)); \
+	} \
+} while (0)
+#else
+#define	END_IO(zv, bio, rq, error)	BIO_END_IO(bio, error)
+#endif
+
+#ifdef HAVE_BLK_MQ
+static unsigned int zvol_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
+static unsigned int zvol_actual_blk_mq_queue_depth;
+#endif
 
 struct zvol_state_os {
 	struct gendisk		*zvo_disk;	/* generic disk */
 	struct request_queue	*zvo_queue;	/* request queue */
 	dev_t			zvo_dev;	/* device id */
+
+#ifdef HAVE_BLK_MQ
+	struct blk_mq_tag_set tag_set;
+#endif
+
+	/* Set from the global 'zvol_use_blk_mq' at zvol load */
+	boolean_t use_blk_mq;
 };
 
 taskq_t *zvol_taskq;
@@ -60,8 +123,14 @@ static struct ida zvol_ida;
 typedef struct zv_request_stack {
 	zvol_state_t	*zv;
 	struct bio	*bio;
+	struct request *rq;
 } zv_request_t;
 
+typedef struct zv_work {
+	struct request  *rq;
+	struct work_struct work;
+} zv_work_t;
+
 typedef struct zv_request_task {
 	zv_request_t zvr;
 	taskq_ent_t	ent;
@@ -83,6 +152,62 @@ zv_request_task_free(zv_request_task_t *task)
 	kmem_free(task, sizeof (*task));
 }
 
+#ifdef HAVE_BLK_MQ
+
+/*
+ * This is called when a new block multiqueue request comes in.  A request
+ * contains one or more BIOs.
+ */
+static blk_status_t zvol_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+    const struct blk_mq_queue_data *bd)
+{
+	struct request *rq = bd->rq;
+	zvol_state_t *zv = rq->q->queuedata;
+
+	/* Tell the kernel that we are starting to process this request */
+	blk_mq_start_request(rq);
+
+	if (blk_rq_is_passthrough(rq)) {
+		/* Skip non filesystem request */
+		blk_mq_end_request(rq, BLK_STS_IOERR);
+		return (BLK_STS_IOERR);
+	}
+
+	zvol_request_impl(zv, NULL, rq, 0);
+
+	/* Acknowledge to the kernel that we got this request */
+	return (BLK_STS_OK);
+}
+
+static struct blk_mq_ops zvol_blk_mq_queue_ops = {
+	.queue_rq = zvol_mq_queue_rq,
+};
+
+/* Initialize our blk-mq struct */
+static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv)
+{
+	struct zvol_state_os *zso = zv->zv_zso;
+
+	memset(&zso->tag_set, 0, sizeof (zso->tag_set));
+
+	/* Initialize tag set. */
+	zso->tag_set.ops = &zvol_blk_mq_queue_ops;
+	zso->tag_set.nr_hw_queues = zvol_actual_threads;
+	zso->tag_set.queue_depth = zvol_actual_blk_mq_queue_depth;
+	zso->tag_set.numa_node = NUMA_NO_NODE;
+	zso->tag_set.cmd_size = 0;
+
+	/*
+	 * We need BLK_MQ_F_BLOCKING here since we do blocking calls in
+	 * zvol_request_impl()
+	 */
+	zso->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+	zso->tag_set.driver_data = zv;
+
+	return (blk_mq_alloc_tag_set(&zso->tag_set));
+}
+#endif /* HAVE_BLK_MQ */
+
 /*
  * Given a path, return TRUE if path is a ZVOL.
  */
@@ -104,38 +229,47 @@ static void
 zvol_write(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	int error = 0;
 	zfs_uio_t uio;
-
-	zfs_uio_bvec_init(&uio, bio);
-
 	zvol_state_t *zv = zvr->zv;
+	struct request_queue *q = zv->zv_zso->zvo_queue;
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	unsigned long start_time = 0;
+
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);
 
 	/* bio marked as FLUSH need to flush before write */
-	if (bio_is_flush(bio))
+	if (io_is_flush(bio, rq))
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);
 
 	/* Some requests are just for flush and nothing else. */
-	if (uio.uio_resid == 0) {
+	if (io_size(bio, rq) == 0) {
 		rw_exit(&zv->zv_suspend_lock);
-		BIO_END_IO(bio, 0);
+		END_IO(zv, bio, rq, 0);
 		return;
 	}
 
-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	zfs_uio_bvec_init(&uio, bio, rq);
+
 	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_time;
 
-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
+	/*
+	 * With use_blk_mq, accounting is done by blk_mq_start_request()
+	 * and blk_mq_end_request(), so we can skip it here.
+	 */
+	if (!zv->zv_zso->use_blk_mq) {
+		boolean_t acct = blk_queue_io_stat(q);
+		if (acct) {
+			start_time = blk_generic_start_io_acct(q, disk, WRITE,
+			    bio);
+		}
+	}
 
 	boolean_t sync =
-	    bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+	    io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_WRITER);
@@ -177,10 +311,11 @@ zvol_write(zv_request_t *zvr)
 
 	rw_exit(&zv->zv_suspend_lock);
 
-	if (acct)
+	if (!zv->zv_zso->use_blk_mq) {
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+	}
 
-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }
 
 static void
@@ -195,27 +330,32 @@ static void
 zvol_discard(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	zvol_state_t *zv = zvr->zv;
-	uint64_t start = BIO_BI_SECTOR(bio) << 9;
-	uint64_t size = BIO_BI_SIZE(bio);
+	uint64_t start = io_offset(bio, rq);
+	uint64_t size = io_size(bio, rq);
 	uint64_t end = start + size;
 	boolean_t sync;
 	int error = 0;
 	dmu_tx_t *tx;
+	struct request_queue *q = zv->zv_zso->zvo_queue;
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	unsigned long start_time = 0;
+
+	boolean_t acct = blk_queue_io_stat(q);
 
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);
 
-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
-	unsigned long start_time;
-
-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
+	if (bio) {
+		if (acct) {
+			start_time = blk_generic_start_io_acct(q, disk, WRITE,
+			    bio);
+		}
+	}
 
-	sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+	sync = io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
 
 	if (end > zv->zv_volsize) {
 		error = SET_ERROR(EIO);
@@ -228,7 +368,7 @@ zvol_discard(zv_request_t *zvr)
 	 * the unaligned parts which is slow (read-modify-write) and useless
 	 * since we are not freeing any space by doing so.
 	 */
-	if (!bio_is_secure_erase(bio)) {
+	if (!io_is_secure_erase(bio, rq)) {
 		start = P2ROUNDUP(start, zv->zv_volblocksize);
 		end = P2ALIGN(end, zv->zv_volblocksize);
 		size = end - start;
@@ -259,10 +399,14 @@ zvol_discard(zv_request_t *zvr)
 unlock:
 	rw_exit(&zv->zv_suspend_lock);
 
-	if (acct)
-		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+	if (bio) {
+		if (acct) {
+			blk_generic_end_io_acct(q, disk, WRITE, bio,
+			    start_time);
+		}
+	}
 
-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }
 
 static void
@@ -277,28 +421,38 @@ static void
 zvol_read(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	int error = 0;
 	zfs_uio_t uio;
+	boolean_t acct = 0;
+	zvol_state_t *zv = zvr->zv;
+	struct request_queue *q = zv->zv_zso->zvo_queue;
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	unsigned long start_time = 0;
 
-	zfs_uio_bvec_init(&uio, bio);
+	zfs_uio_bvec_init(&uio, bio, rq);
 
-	zvol_state_t *zv = zvr->zv;
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 
-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
 	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_time;
 
-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, READ, bio);
+	/*
+	 * When blk-mq is being used, accounting is done by
+	 * blk_mq_start_request() and blk_mq_end_request().
+	 */
+	if (!zv->zv_zso->use_blk_mq) {
+		acct = blk_queue_io_stat(q);
+		if (acct)
+			start_time = blk_generic_start_io_acct(q, disk, READ,
+			    bio);
+	}
 
 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_READER);
 
 	uint64_t volsize = zv->zv_volsize;
+
 	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
 		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
 
@@ -322,10 +476,12 @@ zvol_read(zv_request_t *zvr)
 
 	rw_exit(&zv->zv_suspend_lock);
 
-	if (acct)
-		blk_generic_end_io_acct(q, disk, READ, bio, start_time);
+	if (!zv->zv_zso->use_blk_mq) {
+		if (acct)
+			blk_generic_end_io_acct(q, disk, READ, bio, start_time);
+	}
 
-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }
 
 static void
@@ -336,55 +492,45 @@ zvol_read_task(void *arg)
 	zv_request_task_free(task);
 }
 
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+
+/*
+ * Process a BIO
+ *
+ * force_sync:	Set to 0 to defer processing the BIO to a background taskq
+ * 			Set to 1 to process the BIO right now.
+ */
 static void
-zvol_submit_bio(struct bio *bio)
-#else
-static blk_qc_t
-zvol_submit_bio(struct bio *bio)
-#endif
-#else
-static MAKE_REQUEST_FN_RET
-zvol_request(struct request_queue *q, struct bio *bio)
-#endif
+zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
+    boolean_t force_sync)
 {
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#if defined(HAVE_BIO_BDEV_DISK)
-	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
-#else
-	struct request_queue *q = bio->bi_disk->queue;
-#endif
-#endif
-	zvol_state_t *zv = q->queuedata;
 	fstrans_cookie_t cookie = spl_fstrans_mark();
-	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
-	uint64_t size = BIO_BI_SIZE(bio);
-	int rw = bio_data_dir(bio);
+	uint64_t offset = io_offset(bio, rq);
+	uint64_t size = io_size(bio, rq);
+	int rw = io_data_dir(bio, rq);
+
+	if (zvol_request_sync) {
+		force_sync = 1;
+	}
+
+	zv_request_t zvr = {
+		.zv = zv,
+		.bio = bio,
+		.rq = rq,
+	};
 
-	if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
-		printk(KERN_INFO
-		    "%s: bad access: offset=%llu, size=%lu\n",
+	if (io_has_data(bio, rq) && offset + size > zv->zv_volsize) {
+		printk(KERN_INFO "%s: bad access: offset=%llu, size=%lu\n",
 		    zv->zv_zso->zvo_disk->disk_name,
 		    (long long unsigned)offset,
 		    (long unsigned)size);
 
-		BIO_END_IO(bio, -SET_ERROR(EIO));
+		END_IO(zv, bio, rq, -SET_ERROR(EIO));
 		goto out;
 	}
 
-	zv_request_t zvr = {
-		.zv = zv,
-		.bio = bio,
-	};
 	zv_request_task_t *task;
 
 	if (rw == WRITE) {
-		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
-			BIO_END_IO(bio, -SET_ERROR(EROFS));
-			goto out;
-		}
-
 		/*
 		 * Prevents the zvol from being suspended, or the ZIL being
 		 * concurrently opened.  Will be released after the i/o
@@ -418,7 +564,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * i/o may be a ZIL write (via zil_commit()), or a read of an
 		 * indirect block, or a read of a data block (if this is a
 		 * partial-block write).  We will indicate that the i/o is
-		 * complete by calling BIO_END_IO() from the taskq callback.
+		 * complete by calling END_IO() from the taskq callback.
 		 *
 		 * This design allows the calling thread to continue and
 		 * initiate more concurrent operations by calling
@@ -438,12 +584,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * of one i/o at a time per zvol.  However, an even better
 		 * design would be for zvol_request() to initiate the zio
 		 * directly, and then be notified by the zio_done callback,
-		 * which would call BIO_END_IO().  Unfortunately, the DMU/ZIL
+		 * which would call END_IO().  Unfortunately, the DMU/ZIL
 		 * interfaces lack this functionality (they block waiting for
 		 * the i/o to complete).
 		 */
-		if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
-			if (zvol_request_sync) {
+		if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) {
+			if (force_sync) {
 				zvol_discard(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
@@ -451,7 +597,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
 				    zvol_discard_task, task, 0, &task->ent);
 			}
 		} else {
-			if (zvol_request_sync) {
+			if (force_sync) {
 				zvol_write(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
@@ -466,14 +612,14 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * data and require no additional handling.
 		 */
 		if (size == 0) {
-			BIO_END_IO(bio, 0);
+			END_IO(zv, bio, rq, 0);
 			goto out;
 		}
 
 		rw_enter(&zv->zv_suspend_lock, RW_READER);
 
 		/* See comment in WRITE case above. */
-		if (zvol_request_sync) {
+		if (force_sync) {
 			zvol_read(&zvr);
 		} else {
 			task = zv_request_task_create(zvr);
@@ -484,8 +630,33 @@ zvol_request(struct request_queue *q, struct bio *bio)
 
 out:
 	spl_fstrans_unmark(cookie);
-#if (defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
-	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)) && \
+}
+
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+static void
+zvol_submit_bio(struct bio *bio)
+#else
+static blk_qc_t
+zvol_submit_bio(struct bio *bio)
+#endif
+#else
+static MAKE_REQUEST_FN_RET
+zvol_request(struct request_queue *q, struct bio *bio)
+#endif
+{
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#if defined(HAVE_BIO_BDEV_DISK)
+	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+#else
+	struct request_queue *q = bio->bi_disk->queue;
+#endif
+#endif
+	zvol_state_t *zv = q->queuedata;
+
+	zvol_request_impl(zv, bio, NULL, 0);
+#if defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
+	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
 	!defined(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID)
 	return (BLK_QC_T_NONE);
 #endif
@@ -802,6 +973,27 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return (0);
 }
 
+/*
+ * Why have two separate block_device_operations structs?
+ *
+ * Normally we'd just have one, and assign 'submit_bio' as needed.  However,
+ * it's possible the user's kernel is built with CONSTIFY_PLUGIN, meaning we
+ * can't just change submit_bio dynamically at runtime.  So just create two
+ * separate structs to get around this.
+ */
+static const struct block_device_operations zvol_ops_blk_mq = {
+	.open			= zvol_open,
+	.release		= zvol_release,
+	.ioctl			= zvol_ioctl,
+	.compat_ioctl		= zvol_compat_ioctl,
+	.check_events		= zvol_check_events,
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
+	.revalidate_disk	= zvol_revalidate_disk,
+#endif
+	.getgeo			= zvol_getgeo,
+	.owner			= THIS_MODULE,
+};
+
 static const struct block_device_operations zvol_ops = {
 	.open			= zvol_open,
 	.release		= zvol_release,
@@ -818,6 +1010,87 @@ static const struct block_device_operations zvol_ops = {
 #endif
 };
 
+static int
+zvol_alloc_non_blk_mq(struct zvol_state_os *zso)
+{
+#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)
+#if defined(HAVE_BLK_ALLOC_DISK)
+	zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (zso->zvo_disk == NULL)
+		return (1);
+
+	zso->zvo_disk->minors = ZVOL_MINORS;
+	zso->zvo_queue = zso->zvo_disk->queue;
+#else
+	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
+	if (zso->zvo_queue == NULL)
+		return (1);
+
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		return (1);
+	}
+
+	zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_BLK_ALLOC_DISK */
+#else
+	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
+	if (zso->zvo_queue == NULL)
+		return (1);
+
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		return (1);
+	}
+
+	zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+	return (0);
+
+}
+
+static int
+zvol_alloc_blk_mq(zvol_state_t *zv)
+{
+#ifdef HAVE_BLK_MQ
+	struct zvol_state_os *zso = zv->zv_zso;
+
+	/* Allocate our blk-mq tag_set */
+	if (zvol_blk_mq_alloc_tag_set(zv) != 0)
+		return (1);
+
+#if	defined(HAVE_BLK_ALLOC_DISK)
+	zso->zvo_disk = blk_mq_alloc_disk(&zso->tag_set, zv);
+	if (zso->zvo_disk == NULL)
+		return (1);
+	zso->zvo_queue = zso->zvo_disk->queue;
+	zso->zvo_disk->minors = ZVOL_MINORS;
+#else
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		return (1);
+	}
+	/* Allocate queue */
+	zso->zvo_queue = blk_mq_init_queue(&zso->tag_set);
+	if (IS_ERR(zso->zvo_queue)) {
+		blk_mq_free_tag_set(&zso->tag_set);
+		return (1);
+	}
+
+	/* Our queue is now created, assign it to our disk */
+	zso->zvo_disk->queue = zso->zvo_queue;
+
+#endif
+
+	/* Finish blk-mq init */
+	blk_queue_logical_block_size(zso->zvo_queue, 512);
+#endif
+	return (0);
+}
+
 /*
  * Allocate memory for a new zvol_state_t and setup the required
  * request queue and generic disk structures for the block device.
@@ -828,6 +1101,7 @@ zvol_alloc(dev_t dev, const char *name)
 	zvol_state_t *zv;
 	struct zvol_state_os *zso;
 	uint64_t volmode;
+	int ret;
 
 	if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
 		return (NULL);
@@ -846,48 +1120,46 @@ zvol_alloc(dev_t dev, const char *name)
 	list_link_init(&zv->zv_next);
 	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
 
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#ifdef HAVE_BLK_ALLOC_DISK
-	zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
-	if (zso->zvo_disk == NULL)
-		goto out_kmem;
-
-	zso->zvo_disk->minors = ZVOL_MINORS;
-	zso->zvo_queue = zso->zvo_disk->queue;
+#ifdef HAVE_BLK_MQ
+	zv->zv_zso->use_blk_mq = zvol_use_blk_mq;
 #else
-	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (zso->zvo_queue == NULL)
-		goto out_kmem;
+	zv->zv_zso->use_blk_mq = 0;
+#endif
 
-	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
-	if (zso->zvo_disk == NULL) {
-		blk_cleanup_queue(zso->zvo_queue);
-		goto out_kmem;
+	/*
+	 * The block layer has 3 interfaces for getting BIOs:
+	 *
+	 * 1. blk-mq request queues (new)
+	 * 2. submit_bio() (oldest)
+	 * 3. regular request queues (old).
+	 *
+	 * Each of those interfaces has two permutations:
+	 *
+	 * a) We have blk_alloc_disk()/blk_mq_alloc_disk(), which allocates
+	 *    both the disk and its queue (5.14 kernel or newer)
+	 *
+	 * b) We don't have blk_*alloc_disk(), and have to allocate the
+	 *    disk and the queue separately. (5.13 kernel or older)
+	 */
+	if (zv->zv_zso->use_blk_mq) {
+		ret = zvol_alloc_blk_mq(zv);
+		zso->zvo_disk->fops = &zvol_ops_blk_mq;
+	} else {
+		ret = zvol_alloc_non_blk_mq(zso);
+		zso->zvo_disk->fops = &zvol_ops;
 	}
-
-	zso->zvo_disk->queue = zso->zvo_queue;
-#endif /* HAVE_BLK_ALLOC_DISK */
-#else
-	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
-	if (zso->zvo_queue == NULL)
-		goto out_kmem;
-
-	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
-	if (zso->zvo_disk == NULL) {
-		blk_cleanup_queue(zso->zvo_queue);
+	if (ret != 0)
 		goto out_kmem;
-	}
-
-	zso->zvo_disk->queue = zso->zvo_queue;
-#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
 
 	blk_queue_set_write_cache(zso->zvo_queue, B_TRUE, B_TRUE);
 
 	/* Limit read-ahead to a single page to prevent over-prefetching. */
 	blk_queue_set_read_ahead(zso->zvo_queue, 1);
 
-	/* Disable write merging in favor of the ZIO pipeline. */
-	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+	if (!zv->zv_zso->use_blk_mq) {
+		/* Disable write merging in favor of the ZIO pipeline. */
+		blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+	}
 
 	/* Enable /proc/diskstats */
 	blk_queue_flag_set(QUEUE_FLAG_IO_STAT, zso->zvo_queue);
@@ -920,7 +1192,6 @@ zvol_alloc(dev_t dev, const char *name)
 #endif
 	}
 	zso->zvo_disk->first_minor = (dev & MINORMASK);
-	zso->zvo_disk->fops = &zvol_ops;
 	zso->zvo_disk->private_data = zv;
 	snprintf(zso->zvo_disk->disk_name, DISK_NAME_LEN, "%s%d",
 	    ZVOL_DEV_NAME, (dev & MINORMASK));
@@ -953,6 +1224,11 @@ zvol_os_free(zvol_state_t *zv)
 	ASSERT0(zv->zv_open_count);
 	ASSERT3P(zv->zv_zso->zvo_disk->private_data, ==, NULL);
 
+#ifdef HAVE_BLK_MQ
+	if (zv->zv_zso->use_blk_mq)
+		flush_scheduled_work();
+#endif
+
 	rw_destroy(&zv->zv_suspend_lock);
 	zfs_rangelock_fini(&zv->zv_rangelock);
 
@@ -965,6 +1241,11 @@ zvol_os_free(zvol_state_t *zv)
 	put_disk(zv->zv_zso->zvo_disk);
 #endif
 
+#ifdef HAVE_BLK_MQ
+	if (zv->zv_zso->use_blk_mq)
+		blk_mq_free_tag_set(&zv->zv_zso->tag_set);
+#endif
+
 	ida_simple_remove(&zvol_ida,
 	    MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS);
 
@@ -1046,8 +1327,69 @@ zvol_os_create_minor(const char *name)
 
 	blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,
 	    (DMU_MAX_ACCESS / 4) >> 9);
-	blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
-	blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+
+	if (zv->zv_zso->use_blk_mq) {
+		/*
+		 * IO requests can be really big (1MB).  When an IO request
+		 * comes * in, it is passed off to zvol_read() or zvol_write()
+		 * in a new thread, where it is chunked up into 'volblocksize'
+		 * sized pieces and processed.  So for example, if the request
+		 * is a 1MB write and your volblocksize is 128k, one zvol_write
+		 * thread will * take that request and sequentially do ten 128k
+		 * IOs.  This is due to the fact that the thread needs to lock
+		 * each volblocksize sized block.  So you might be wondering:
+		 * "instead of passing the whole 1MB request to one thread,
+		 * why not pass ten individual 128k chunks to ten threads and
+		 * process the whole write in parallel?"  The short answer is
+		 * that there's a sweet spot number of chunks that balances
+		 * the greater parallelism with the added overhead of more
+		 * threads. The sweet spot can be different depending on if you
+		 * have a read or write  heavy workload.  Writes typically want
+		 * high chunk counts while reads typically want lower ones.  On
+		 * a test pool with 6 NVMe drives in a 3x 2-disk mirror
+		 * configuration, with volblocksize=8k, the sweet spot for good
+		 * sequential reads and writes was at 8 chunks.
+		 */
+
+		/*
+		 * Below we tell the kernel how big we want our requests
+		 * to be.  You would think that blk_queue_io_opt() would be
+		 * used to do this since it is used to "set optimal request
+		 * size for the queue", but that doesn't seem to do
+		 * anything - the kernel still gives you huge requests
+		 * with tons of little PAGE_SIZE segments contained within it.
+		 *
+		 * Knowing that the kernel will just give you PAGE_SIZE segments
+		 * no matter what, you can say "ok, I want PAGE_SIZE byte
+		 * segments, and I want 'N' of them per request", where N is
+		 * the correct number of segments for the volblocksize and
+		 * number of chunks you want.
+		 */
+#ifdef HAVE_BLK_MQ
+		if (zvol_blk_mq_blocks_per_thread != 0) {
+			unsigned int chunks;
+			chunks = MIN(zvol_blk_mq_blocks_per_thread, UINT16_MAX);
+
+			blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
+			    PAGE_SIZE);
+			blk_queue_max_segments(zv->zv_zso->zvo_queue,
+			    (zv->zv_volblocksize * chunks) / PAGE_SIZE);
+		} else {
+			/*
+			 * Special case: zvol_blk_mq_blocks_per_thread = 0
+			 * Max everything out.
+			 */
+			blk_queue_max_segments(zv->zv_zso->zvo_queue,
+			    UINT16_MAX);
+			blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
+			    UINT_MAX);
+		}
+#endif
+	} else {
+		blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
+		blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+	}
+
 	blk_queue_physical_block_size(zv->zv_zso->zvo_queue,
 	    zv->zv_volblocksize);
 	blk_queue_io_opt(zv->zv_zso->zvo_queue, zv->zv_volblocksize);
@@ -1167,19 +1509,36 @@ int
 zvol_init(void)
 {
 	int error;
-	int threads = MIN(MAX(zvol_threads, 1), 1024);
+
+	if (zvol_threads == 0) {
+		zvol_actual_threads = num_online_cpus();
+	} else {
+		zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
+	}
 
 	error = register_blkdev(zvol_major, ZVOL_DRIVER);
 	if (error) {
 		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
 		return (error);
 	}
-	zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
-	    threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+
+#ifdef HAVE_BLK_MQ
+	if (zvol_blk_mq_queue_depth == 0) {
+		zvol_actual_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
+	} else {
+		zvol_actual_blk_mq_queue_depth =
+		    MAX(zvol_blk_mq_queue_depth, BLKDEV_MIN_RQ);
+	}
+#endif
+	/* We're not using blk-mq so setup taskqueues */
+	zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
+	    zvol_actual_threads, INT_MAX,
+	    TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 	if (zvol_taskq == NULL) {
 		unregister_blkdev(zvol_major, ZVOL_DRIVER);
 		return (-ENOMEM);
 	}
+
 	zvol_init_impl();
 	ida_init(&zvol_ida);
 	return (0);
@@ -1202,7 +1561,8 @@ module_param(zvol_major, uint, 0444);
 MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
 
 module_param(zvol_threads, uint, 0444);
-MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
+MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set"
+    "to 0 to use all active CPUs");
 
 module_param(zvol_request_sync, uint, 0644);
 MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
@@ -1215,4 +1575,17 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
 
 module_param(zvol_volmode, uint, 0644);
 MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+
+#ifdef HAVE_BLK_MQ
+module_param(zvol_blk_mq_queue_depth, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
+
+module_param(zvol_use_blk_mq, uint, 0644);
+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
+
+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
+    "Process volblocksize blocks per thread");
+#endif
+
 /* END CSTYLED */
diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
index a7ddb146e59b..2b846a6e66fb 100644
--- a/tests/runfiles/common.run
+++ b/tests/runfiles/common.run
@@ -941,6 +941,10 @@ tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse',
     'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil']
 tags = ['functional', 'zvol', 'zvol_misc']
 
+[tests/functional/zvol/zvol_stress]
+tests = ['zvol_stress']
+tags = ['functional', 'zvol', 'zvol_stress']
+
 [tests/functional/zvol/zvol_swap]
 tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos']
 tags = ['functional', 'zvol', 'zvol_swap']
diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
index b229a161518b..ddf9d349c5ff 100644
--- a/tests/zfs-tests/include/libtest.shlib
+++ b/tests/zfs-tests/include/libtest.shlib
@@ -3334,18 +3334,22 @@ function is_te_enabled
 	fi
 }
 
-# Utility function to determine if a system has multiple cpus.
-function is_mp
+# Return the number of CPUs (cross-platform)
+function get_num_cpus
 {
-	if is_linux; then
-		(($(nproc) > 1))
+	if is_linux ; then
+		nproc
 	elif is_freebsd; then
 		sysctl -n kern.smp.cpus
 	else
-		(($(psrinfo | wc -l) > 1))
+		psrinfo | wc -l
 	fi
+}
 
-	return $?
+# Utility function to determine if a system has multiple cpus.
+function is_mp
+{
+	[[ $(get_num_cpus) -gt 1 ]]
 }
 
 function get_cpu_freq
@@ -3888,14 +3892,23 @@ function get_tunable_impl
 {
 	typeset name="$1"
 	typeset module="${2:-zfs}"
+	typeset check_only="$3"
 
 	eval "typeset tunable=\$$name"
 	case "$tunable" in
 	UNSUPPORTED)
-		log_unsupported "Tunable '$name' is unsupported on $(uname)"
+		if [ -z "$check_only" ] ; then
+			log_unsupported "Tunable '$name' is unsupported on $(uname)"
+		else
+			return 1
+		fi
 		;;
 	"")
-		log_fail "Tunable '$name' must be added to tunables.cfg"
+		if [ -z "$check_only" ] ; then
+			log_fail "Tunable '$name' must be added to tunables.cfg"
+		else
+			return 1
+		fi
 		;;
 	*)
 		;;
@@ -3919,6 +3932,14 @@ function get_tunable_impl
 	return 1
 }
 
+# Does a tunable exist?
+#
+# $1: Tunable name
+function tunable_exists
+{
+	get_tunable_impl $1 "zfs" 1
+}
+
 #
 # Prints the current time in seconds since UNIX Epoch.
 #
diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
index eea2af2edcf0..fcacf519ce44 100644
--- a/tests/zfs-tests/include/tunables.cfg
+++ b/tests/zfs-tests/include/tunables.cfg
@@ -87,6 +87,7 @@ VDEV_VALIDATE_SKIP		vdev.validate_skip		vdev_validate_skip
 VOL_INHIBIT_DEV			UNSUPPORTED			zvol_inhibit_dev
 VOL_MODE			vol.mode			zvol_volmode
 VOL_RECURSIVE			vol.recursive			UNSUPPORTED
+VOL_USE_BLK_MQ			UNSUPPORTED			zvol_use_blk_mq
 XATTR_COMPAT			xattr_compat			zfs_xattr_compat
 ZEVENT_LEN_MAX			zevent.len_max			zfs_zevent_len_max
 ZEVENT_RETAIN_MAX		zevent.retain_max		zfs_zevent_retain_max
diff --git a/tests/zfs-tests/tests/functional/zvol/Makefile.am b/tests/zfs-tests/tests/functional/zvol/Makefile.am
index e4910754bb81..9089a939abb0 100644
--- a/tests/zfs-tests/tests/functional/zvol/Makefile.am
+++ b/tests/zfs-tests/tests/functional/zvol/Makefile.am
@@ -5,6 +5,7 @@ dist_pkgdata_DATA = \
 
 SUBDIRS = \
 	zvol_ENOSPC \
+	zvol_stress \
 	zvol_cli \
 	zvol_misc \
 	zvol_swap
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_stress/Makefile.am b/tests/zfs-tests/tests/functional/zvol/zvol_stress/Makefile.am
new file mode 100644
index 000000000000..5ccd0c7b5619
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_stress/Makefile.am
@@ -0,0 +1,5 @@
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/zvol/zvol_stress
+dist_pkgdata_SCRIPTS = \
+	cleanup.ksh \
+	setup.ksh \
+	zvol_stress.ksh
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh
new file mode 100755
index 000000000000..b81a372638e3
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_stress/cleanup.ksh
@@ -0,0 +1,36 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+default_cleanup
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh
new file mode 100755
index 000000000000..746ac307a755
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_stress/setup.ksh
@@ -0,0 +1,38 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Copyright (c) 2013 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/include/libtest.shlib
+
+verify_runnable "global"
+
+default_setup "$DISKS"
+
+log_pass
diff --git a/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh b/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
new file mode 100755
index 000000000000..94d3717c42af
--- /dev/null
+++ b/tests/zfs-tests/tests/functional/zvol/zvol_stress/zvol_stress.ksh
@@ -0,0 +1,171 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2022 by Lawrence Livermore National Security, LLC.
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/reservation/reservation.shlib
+
+#
+# DESCRIPTION:
+# Stress test multithreaded transfers to multiple zvols.  Also verify
+# zvol errors show up in zpool status.
+#
+# STRATEGY:
+#
+# For both the normal submit_bio() codepath and the blk-mq codepath, do
+# the following:
+#
+# 1. Create one zvol per CPU
+# 2. In parallel, spawn an fio "write and verify" for each zvol
+# 3. Inject write errors
+# 4. Write to one of the zvols with dd and verify the errors
+#
+
+verify_runnable "global"
+
+num_zvols=$(get_num_cpus)
+
+# If we were making one big zvol from all the pool space, it would
+# be this big:
+biggest_zvol_size_possible=$(largest_volsize_from_pool $TESTPOOL)
+
+# Crude calculation: take the biggest zvol size we could possibly
+# create, knock 10% off it (for overhead) and divide by the number
+# of ZVOLs we want to make.
+each_zvol_size=$(( floor($biggest_zvol_size_possible * 0.9) / $num_zvols ))
+
+typeset tmpdir="$(mktemp -d zvol_stress_fio_state.XXXXXX)"
+
+function create_zvols
+{
+	log_note "Creating $num_zvols zvols that are ${each_zvol_size}B each"
+	for i in $(seq $num_zvols) ; do
+		log_must zfs create -V $each_zvol_size $TESTPOOL/testvol$i
+		block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/testvol$i"
+	done
+}
+
+function destroy_zvols
+{
+	for i in $(seq $num_zvols) ; do
+		log_must_busy zfs destroy $TESTPOOL/testvol$i
+	done
+}
+
+# enable/disable blk-mq (if available)
+#
+# $1: 1 = enable, 0 = disable
+function set_blk_mq
+{
+	# Not all kernels support blk-mq
+	if tunable_exists VOL_USE_BLK_MQ ; then
+		log_must set_tunable32 VOL_USE_BLK_MQ $1
+	fi
+}
+
+function do_zvol_stress
+{
+	# Write 10% of each zvol, or 50MB, whichever is less
+	zvol_write_size=$((each_zvol_size / 10))
+	if [ $zvol_write_size -gt $((50 * 1048576)) ] ; then
+		zvol_write_size=$((50 * 1048576))
+	fi
+	zvol_write_size_mb=$(($zvol_write_size / 1048576))
+
+	if is_linux ; then
+		engine=libaio
+	else
+		engine=psync
+	fi
+
+	# Spawn off one fio per zvol in parallel
+	pids=""
+	for i in $(seq $num_zvols) ; do
+		# Spawn one fio per zvol as its own process
+		fio --ioengine=$engine --name=zvol_stress$i --direct=0 \
+			--filename="$ZVOL_DEVDIR/$TESTPOOL/testvol$i" --bs=1048576 \
+			--iodepth=10 --readwrite=randwrite --size=${zvol_write_size} \
+			--verify_async=2 --numjobs=1 --verify=sha1 \
+			--verify_fatal=1 \
+			--continue_on_error=none \
+			--error_dump=1 \
+			--exitall_on_error \
+			--aux-path="$tmpdir" --do_verify=1 &
+		pids="$pids $!"
+	done
+
+	# Wait for all the spawned fios to finish and look for errors
+	fail=""
+	i=0
+	for pid in $pids ; do
+		log_note "$s waiting on $pid"
+		if ! wait $pid ; then
+			log_fail "fio error on $TESTPOOL/testvol$i"
+		fi
+		i=$(($i + 1))
+	done
+}
+
+function cleanup
+{
+	log_must zinject -c all
+	log_must zpool clear $TESTPOOL
+	destroy_zvols
+	set_blk_mq 0
+
+	# Remove all fio's leftover state files
+	if [ -n "$tmpdir" ] ; then
+		rm -f "$tmpdir"/*.state
+		rmdir "$tmpdir"
+	fi
+}
+
+log_onexit cleanup
+
+log_assert "Stress test zvols"
+
+set_blk_mq 0
+create_zvols
+# Do some fio write/verifies in parallel
+do_zvol_stress
+destroy_zvols
+
+# Enable blk-mq (block multi-queue), and re-run the same test
+set_blk_mq 1
+create_zvols
+do_zvol_stress
+
+# Inject some errors, and verify we see some IO errors in zpool status
+for DISK in $DISKS ; do
+	log_must zinject -d $DISK -f 10 -e io -T write $TESTPOOL
+done
+log_must dd if=/dev/zero of=$ZVOL_DEVDIR/$TESTPOOL/testvol1 bs=512 count=50
+log_must zinject -c all
+
+log_must zpool status
+write_errors=$(zpool status -pv | grep $DISK | awk '{print $4}')
+if [ $write_errors -le 0 ] ; then
+	log_fail "Expected to see some write errors (saw $write_errors)"
+else
+	log_note "Correctly saw $write_errors write errors"
+fi
+log_pass "Done with zvol_stress"