From e5beb3700a3834956c7c6680e972b0eef237b60c Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Thu, 18 May 2023 12:04:05 -0700 Subject: [PATCH] zvol: Fix zvol_misc crashes when using blk-mq We have recently been seeing a lot of zvol_misc test failures when blk-mq was enabled on F38 and Centos 9 (#14872). The failures look to be caused by kernel memory corruption. This fix removes a slightly dubious optimization in zfs_uiomove_bvec_rq() that saved the iterator contents of a rq_for_each_segment(). This optimization allowed restoring the "saved state" from a previous rq_for_each_segment() call on the same uio so that you wouldn't need to iterate though each bvec on every zfs_uiomove_bvec_rq() call. However, if the kernel is manipulating the requests/bios/bvecs under the covers between zfs_uiomove_bvec_rq() calls, then it could result in corruption from using the "saved state". Fixes: #14872 Signed-off-by: Tony Hutter --- TEST | 4 ++-- include/os/linux/spl/sys/uio.h | 8 -------- module/os/linux/zfs/zfs_uio.c | 29 ----------------------------- module/os/linux/zfs/zvol_os.c | 2 +- 4 files changed, 3 insertions(+), 40 deletions(-) diff --git a/TEST b/TEST index 376d6eb691e2..de686d169e87 100644 --- a/TEST +++ b/TEST @@ -33,9 +33,9 @@ #TEST_ZFSTESTS_DISKS="vdb vdc vdd" #TEST_ZFSTESTS_DISKSIZE="8G" #TEST_ZFSTESTS_ITERS="1" -#TEST_ZFSTESTS_OPTIONS="-vx" +TEST_ZFSTESTS_OPTIONS="-Kvx" #TEST_ZFSTESTS_RUNFILE="linux.run" -#TEST_ZFSTESTS_TAGS="functional" +TEST_ZFSTESTS_TAGS="zvol_misc" ### zfsstress #TEST_ZFSSTRESS_SKIP="yes" diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h index fe2b5c07a018..388fa1f2819c 100644 --- a/include/os/linux/spl/sys/uio.h +++ b/include/os/linux/spl/sys/uio.h @@ -73,13 +73,6 @@ typedef struct zfs_uio { size_t uio_skip; struct request *rq; - - /* - * Used for saving rq_for_each_segment() state between calls - * to zfs_uiomove_bvec_rq(). - */ - struct req_iterator iter; - struct bio_vec bv; } zfs_uio_t; @@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq) } else { uio->uio_bvec = NULL; uio->uio_iovcnt = 0; - memset(&uio->iter, 0, sizeof (uio->iter)); } uio->uio_loffset = io_offset(bio, rq); diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c index 3efd4ab159c6..c2ed67c438c6 100644 --- a/module/os/linux/zfs/zfs_uio.c +++ b/module/os/linux/zfs/zfs_uio.c @@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) this_seg_start = orig_loffset; rq_for_each_segment(bv, rq, iter) { - if (uio->iter.bio) { - /* - * If uio->iter.bio is present, then we know we've saved - * uio->iter from a previous call to this function, and - * we can skip ahead in this rq_for_each_segment() loop - * to where we last left off. That way, we don't need - * to iterate over tons of segments we've already - * processed - we can just restore the "saved state". - */ - iter = uio->iter; - bv = uio->bv; - this_seg_start = uio->uio_loffset; - memset(&uio->iter, 0, sizeof (uio->iter)); - continue; - } - /* * Lookup what the logical offset of the last byte of this * segment is. @@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio) copied = 1; /* We copied some data */ } - if (n == 0) { - /* - * All done copying. Save our 'iter' value to the uio. - * This allows us to "save our state" and skip ahead in - * the rq_for_each_segment() loop the next time we call - * call zfs_uiomove_bvec_rq() on this uio (which we - * will be doing for any remaining data in the uio). - */ - uio->iter = iter; /* make a copy of the struct data */ - uio->bv = bv; - return (0); - } - this_seg_start = this_seg_end + 1; } diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index cdf32c78b4fe..23d2cd96cdbd 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -605,7 +605,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, zvol_discard_task, task, 0, &task->ent); } } else { - if (force_sync) { + if (force_sync || io_is_flush(bio,rq)) { zvol_write(&zvr); } else { task = zv_request_task_create(zvr);