Skip to content

Commit

Permalink
fuse: {uring} Pin the user buffer
Browse files Browse the repository at this point in the history
This is to allow copying into the buffer from the application
without the need to copy in ring context (and with that,
the need that the ring task is active in kernel space).

Also absolutely needed for now to avoid this teardown issue

 1525.905504] KASAN: null-ptr-deref in range [0x00000000000001a0-0x00000000000001a7]
[ 1525.910431] CPU: 15 PID: 183 Comm: kworker/15:1 Tainted: G           O       6.10.0+ torvalds#48
[ 1525.916449] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[ 1525.922470] Workqueue: events io_fallback_req_func
[ 1525.925840] RIP: 0010:__lock_acquire+0x74/0x7b80
[ 1525.929010] Code: 89 bc 24 80 00 00 00 0f 85 1c 5f 00 00 83 3d 6e 80 b0 02 00 0f 84 1d 12 00 00 83 3d 65 c7 67 02 00 74 27 48 89 f8 48 c1 e8 03 <42> 80 3c 30 00 74 0d e8 50 44 42 00 48 8b bc 24 80 00 00 00 48 c7
[ 1525.942211] RSP: 0018:ffff88810b2af490 EFLAGS: 00010002
[ 1525.945672] RAX: 0000000000000034 RBX: 0000000000000000 RCX: 0000000000000001
[ 1525.950421] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000000001a0
[ 1525.955200] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000
[ 1525.959979] R10: dffffc0000000000 R11: fffffbfff07b1cbe R12: 0000000000000000
[ 1525.964252] R13: 0000000000000001 R14: dffffc0000000000 R15: 0000000000000001
[ 1525.968225] FS:  0000000000000000(0000) GS:ffff88875b200000(0000) knlGS:0000000000000000
[ 1525.973932] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1525.976694] CR2: 00005555b6a381f0 CR3: 000000012f5f1000 CR4: 00000000000006f0
[ 1525.980030] Call Trace:
[ 1525.981371]  <TASK>
[ 1525.982567]  ? __die_body+0x66/0xb0
[ 1525.984376]  ? die_addr+0xc1/0x100
[ 1525.986111]  ? exc_general_protection+0x1c6/0x330
[ 1525.988401]  ? asm_exc_general_protection+0x22/0x30
[ 1525.990864]  ? __lock_acquire+0x74/0x7b80
[ 1525.992901]  ? mark_lock+0x9f/0x360
[ 1525.994635]  ? __lock_acquire+0x1420/0x7b80
[ 1525.996629]  ? attach_entity_load_avg+0x47d/0x550
[ 1525.998765]  ? hlock_conflict+0x5a/0x1f0
[ 1526.000515]  ? __bfs+0x2dc/0x5a0
[ 1526.001993]  lock_acquire+0x1fb/0x3d0
[ 1526.004727]  ? gup_fast_fallback+0x13f/0x1d80
[ 1526.006586]  ? gup_fast_fallback+0x13f/0x1d80
[ 1526.008412]  gup_fast_fallback+0x158/0x1d80
[ 1526.010170]  ? gup_fast_fallback+0x13f/0x1d80
[ 1526.011999]  ? __lock_acquire+0x2b07/0x7b80
[ 1526.013793]  __iov_iter_get_pages_alloc+0x36e/0x980
[ 1526.015876]  ? do_raw_spin_unlock+0x5a/0x8a0
[ 1526.017734]  iov_iter_get_pages2+0x56/0x70
[ 1526.019491]  fuse_copy_fill+0x48e/0x980 [fuse]
[ 1526.021400]  fuse_copy_args+0x174/0x6a0 [fuse]
[ 1526.023199]  fuse_uring_prepare_send+0x319/0x6c0 [fuse]
[ 1526.025178]  fuse_uring_send_req_in_task+0x42/0x100 [fuse]
[ 1526.027163]  io_fallback_req_func+0xb4/0x170
[ 1526.028737]  ? process_scheduled_works+0x75b/0x1160
[ 1526.030445]  process_scheduled_works+0x85c/0x1160
[ 1526.032073]  worker_thread+0x8ba/0xce0
[ 1526.033388]  kthread+0x23e/0x2b0
[ 1526.035404]  ? pr_cont_work_flush+0x290/0x290
[ 1526.036958]  ? kthread_blkcg+0xa0/0xa0
[ 1526.038321]  ret_from_fork+0x30/0x60
[ 1526.039600]  ? kthread_blkcg+0xa0/0xa0
[ 1526.040942]  ret_from_fork_asm+0x11/0x20
[ 1526.042353]  </TASK>
  • Loading branch information
bsbernd committed Sep 1, 2024
1 parent 387c59c commit 9a10f51
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 69 deletions.
9 changes: 9 additions & 0 deletions fs/fuse/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -769,6 +769,15 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
cs->pipebufs++;
cs->nr_segs++;
}
} else if (cs->is_uring) {
cs->pg = cs->ring.pages[cs->ring.page_idx++];
/*
* non stricly needed, just to avoid a uring exception in
* fuse_copy_finish
*/
get_page(cs->pg);
cs->len = PAGE_SIZE;
cs->offset = 0;
} else {
size_t off;
err = iov_iter_get_pages2(cs->iter, &page, PAGE_SIZE, 1, &off);
Expand Down
186 changes: 121 additions & 65 deletions fs/fuse/dev_uring.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@
#include <linux/topology.h>
#include <linux/io_uring/cmd.h>

#define FUSE_RING_HEADER_PG 0
#define FUSE_RING_PAYLOAD_PG 1

struct fuse_uring_cmd_pdu
{
struct fuse_ring_ent *ring_ent;
Expand Down Expand Up @@ -251,6 +254,21 @@ static void fuse_uring_stop_fuse_req_end(struct fuse_ring_ent *ent)
fuse_request_end(req);
}

/*
* Copy from memmap.c, should be exported
*/
static void io_pages_free(struct page ***pages, int npages)
{
struct page **page_array = *pages;

if (!page_array)
return;

unpin_user_pages(page_array, npages);
kvfree(page_array);
*pages = NULL;
}

/*
* Release a request/entry on connection tear down
*/
Expand All @@ -276,6 +294,8 @@ static void fuse_uring_entry_teardown(struct fuse_ring_ent *ent,
if (ent->fuse_req)
fuse_uring_stop_fuse_req_end(ent);

io_pages_free(&ent->user_pages, ent->nr_user_pages);

ent->state = FRRS_FREED;
}

Expand Down Expand Up @@ -418,6 +438,7 @@ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,
goto seterr;
}

/* FIXME copied from dev.c, check what 512 means */
if (oh->error <= -512 || oh->error > 0) {
err = -EINVAL;
goto seterr;
Expand Down Expand Up @@ -466,53 +487,41 @@ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh,

static int fuse_uring_copy_from_ring(struct fuse_ring *ring,
struct fuse_req *req,
struct fuse_ring_ent *ent)
struct fuse_ring_ent *ent,
struct fuse_ring_req *rreq)
{
struct fuse_ring_req __user *rreq = ent->rreq;
struct fuse_copy_state cs;
struct fuse_args *args = req->args;
struct iov_iter iter;
int err;
int res_arg_len;
int res_arg_len, err;

err = copy_from_user(&res_arg_len, &rreq->in_out_arg_len,
sizeof(res_arg_len));
if (err)
return err;

err = import_ubuf(ITER_SOURCE, (void __user *)&rreq->in_out_arg,
ent->max_arg_len, &iter);
if (err)
return err;
res_arg_len = rreq->in_out_arg_len;

fuse_copy_init(&cs, 0, &iter);
cs.is_uring = 1;
cs.ring.pages = &ent->user_pages[FUSE_RING_PAYLOAD_PG];
cs.req = req;

return fuse_copy_out_args(&cs, args, res_arg_len);
err = fuse_copy_out_args(&cs, args, res_arg_len);

return err;
}

/*
* Copy data from the req to the ring buffer
*/
/*
* Copy data from the req to the ring buffer
*/
static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req,
struct fuse_ring_ent *ent)
struct fuse_ring_ent *ent,
struct fuse_ring_req *rreq)
{
struct fuse_ring_req __user *rreq = ent->rreq;
struct fuse_copy_state cs;
struct fuse_args *args = req->args;
int err, res;
int err;
struct iov_iter iter;

err = import_ubuf(ITER_DEST, (void __user *)&rreq->in_out_arg,
ent->max_arg_len, &iter);
if (err) {
pr_info("Import user buffer failed\n");
return err;
}

fuse_copy_init(&cs, 1, &iter);
cs.is_uring = 1;
cs.ring.pages = &ent->user_pages[FUSE_RING_PAYLOAD_PG];
cs.req = req;
err = fuse_copy_args(&cs, args->in_numargs, args->in_pages,
(struct fuse_arg *)args->in_args, 0);
Expand All @@ -521,22 +530,19 @@ static int fuse_uring_copy_to_ring(struct fuse_ring *ring, struct fuse_req *req,
return err;
}

BUILD_BUG_ON((sizeof(rreq->in_out_arg_len) != sizeof(cs.ring.offset)));
res = copy_to_user(&rreq->in_out_arg_len, &cs.ring.offset,
sizeof(rreq->in_out_arg_len));
err = res > 0 ? -EFAULT : res;
rreq->in_out_arg_len = cs.ring.offset;

return err;
}

static int
fuse_uring_prepare_send(struct fuse_ring_ent *ring_ent)
{
struct fuse_ring_req *rreq = ring_ent->rreq;
struct fuse_ring_req *rreq = NULL;
struct fuse_ring_queue *queue = ring_ent->queue;
struct fuse_ring *ring = queue->ring;
struct fuse_req *req = ring_ent->fuse_req;
int err = 0, res;
int err = 0;

if (WARN_ON(ring_ent->state != FRRS_FUSE_REQ)) {
pr_err("qid=%d tag=%d ring-req=%p buf_req=%p invalid state %d on send\n",
Expand All @@ -552,25 +558,27 @@ fuse_uring_prepare_send(struct fuse_ring_ent *ring_ent)
__func__, queue->qid, ring_ent->tag, ring_ent->state,
req->in.h.opcode, req->in.h.unique);

rreq = kmap_local_page(ring_ent->user_pages[FUSE_RING_HEADER_PG]);

/* copy the request */
err = fuse_uring_copy_to_ring(ring, req, ring_ent);
err = fuse_uring_copy_to_ring(ring, req, ring_ent, rreq);
if (unlikely(err)) {
pr_info("Copy to ring failed: %d\n", err);
goto err;
}

/* copy fuse_in_header */
res = copy_to_user(&rreq->in, &req->in.h, sizeof(rreq->in));
err = res > 0 ? -EFAULT : res;
if (err)
goto err;
rreq->in = req->in.h;

err = 0;
set_bit(FR_SENT, &req->flags);
return 0;

out:
if (rreq)
kunmap_local(rreq);
return err;
err:
fuse_uring_req_end(ring_ent, true, err);
return err;
goto out;
}

/*
Expand Down Expand Up @@ -683,16 +691,13 @@ static void fuse_uring_commit(struct fuse_ring_ent *ring_ent,
{
struct fuse_ring *ring = ring_ent->queue->ring;
struct fuse_conn *fc = ring->fc;
struct fuse_ring_req *rreq = ring_ent->rreq;
struct fuse_ring_req *rreq;
struct fuse_req *req = ring_ent->fuse_req;
ssize_t err = 0;
bool set_err = false;

err = copy_from_user(&req->out.h, &rreq->out, sizeof(req->out.h));
if (err) {
req->out.h.error = err;
goto out;
}
rreq = kmap_local_page(ring_ent->user_pages[FUSE_RING_HEADER_PG]);
req->out.h = rreq->out;

err = fuse_uring_out_header_has_err(&req->out.h, req, fc);
if (err) {
Expand All @@ -702,7 +707,8 @@ static void fuse_uring_commit(struct fuse_ring_ent *ring_ent,
goto out;
}

err = fuse_uring_copy_from_ring(ring, req, ring_ent);
err = fuse_uring_copy_from_ring(ring, req, ring_ent, rreq);
kunmap_local(rreq);
if (err)
set_err = true;

Expand Down Expand Up @@ -831,46 +837,95 @@ __must_hold(ring_ent->queue->lock)
return 0;
}

/*
* Copy from memmap.c, should be exported there
*/
static struct page **io_pin_pages(unsigned long uaddr, unsigned long len,
int *npages)
{
unsigned long start, end, nr_pages;
struct page **pages;
int ret;

end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
start = uaddr >> PAGE_SHIFT;
nr_pages = end - start;
if (WARN_ON_ONCE(!nr_pages))
return ERR_PTR(-EINVAL);

pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
if (!pages)
return ERR_PTR(-ENOMEM);

ret = pin_user_pages_fast(uaddr, nr_pages, FOLL_WRITE | FOLL_LONGTERM,
pages);
/* success, mapped all pages */
if (ret == nr_pages) {
*npages = nr_pages;
return pages;
}

/* partial map, or didn't map anything */
if (ret >= 0) {
/* if we did partial map, release any pages we did get */
if (ret)
unpin_user_pages(pages, ret);
ret = -EFAULT;
}
kvfree(pages);
return ERR_PTR(ret);
}


/* FUSE_URING_REQ_FETCH handler */
static int fuse_uring_fetch(struct fuse_ring_ent *ring_ent,
struct io_uring_cmd *cmd, unsigned int issue_flags)
__releases(ring_ent->queue->lock)
{
struct fuse_ring *ring = ring_ent->queue->ring;
struct fuse_ring_queue *queue = ring_ent->queue;
int ret;
int err;

/* No other bit must be set here */
ret = -EINVAL;
err = -EINVAL;
if (ring_ent->state != FRRS_INIT)
goto err;
goto err_unlock;

/*
* FUSE_URING_REQ_FETCH is an initialization exception, needs
* state override
*/
ring_ent->state = FRRS_USERSPACE;
ret = fuse_ring_ring_ent_unset_userspace(ring_ent);
if (ret != 0) {
pr_info_ratelimited(
"qid=%d tag=%d register req state %d expected %d",
queue->qid, ring_ent->tag, ring_ent->state,
FRRS_INIT);
goto err;
}
fuse_ring_ring_ent_unset_userspace(ring_ent);

err = _fuse_uring_fetch(ring_ent, cmd, issue_flags);
if (err)
goto err_unlock;

spin_unlock(&queue->lock);

ret = _fuse_uring_fetch(ring_ent, cmd, issue_flags);
if (ret)
/* must not hold the queue->lock */
ring_ent->user_pages = io_pin_pages(ring_ent->user_buf,
ring_ent->user_buf_len,
&ring_ent->nr_user_pages);
if (IS_ERR(ring_ent->user_pages)) {
err = PTR_ERR(ring_ent->user_pages);
pr_info("qid=%d ent=%d pin-res=%d\n",
queue->qid, ring_ent->tag, err);
goto err;
}

/*
* The ring entry is registered now and needs to be handled
* for shutdown.
*/
atomic_inc(&ring->queue_refs);
err:
return 0;

err_unlock:
spin_unlock(&queue->lock);
return ret;
err:
return err;
}

/**
Expand Down Expand Up @@ -921,7 +976,9 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
if (unlikely(fc->aborted || queue->stopped))
goto err_unlock;

ring_ent->rreq = (void __user *)cmd_req->buf_ptr;
ring_ent->user_buf = cmd_req->buf_ptr;
ring_ent->user_buf_len = cmd_req->buf_len;

ring_ent->max_arg_len = cmd_req->buf_len -
offsetof(struct fuse_ring_req, in_out_arg);
ret = -EINVAL;
Expand All @@ -931,7 +988,6 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
goto err_unlock;
}

ring_ent->rreq = (void __user *)cmd_req->buf_ptr;
ring_ent->max_arg_len = cmd_req->buf_len -
offsetof(struct fuse_ring_req, in_out_arg);
if (cmd_req->buf_len < ring->req_buf_sz) {
Expand Down
15 changes: 11 additions & 4 deletions fs/fuse/dev_uring_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,17 @@ struct fuse_ring_ent {
/* fuse_req assigned to the ring entry */
struct fuse_req *fuse_req;

/*
* buffer provided by fuse server
*/
struct fuse_ring_req __user *rreq;
/* buffer provided by fuse server */
unsigned long __user user_buf;

/* length of user_buf */
size_t user_buf_len;

/* mapped user_buf pages */
struct page **user_pages;

/* number of user pages */
int nr_user_pages;

/* struct fuse_ring_req::in_out_arg size*/
size_t max_arg_len;
Expand Down
2 changes: 2 additions & 0 deletions fs/fuse/fuse_dev_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ struct fuse_copy_state {
struct {
/* overall offset with the user buffer */
unsigned int offset;
struct page **pages;
int page_idx;
} ring;
};

Expand Down

0 comments on commit 9a10f51

Please sign in to comment.