Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-92888: Fix memoryview bad __index__ use after free #92946

Merged
merged 12 commits into from
Jun 17, 2022
96 changes: 96 additions & 0 deletions Lib/test/test_memoryview.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,102 @@ def test_pickle(self):
with self.assertRaises(TypeError):
pickle.dumps(m, proto)

def test_use_released_memory(self):
Fidget-Spinner marked this conversation as resolved.
Show resolved Hide resolved
# gh-92888: Previously it was possible to use a memoryview even after
# backing buffer is freed in certain cases. This tests that those
# cases raise an exception.
size = 128
def release():
m.release()
nonlocal ba
ba = bytearray(size)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's useless, no?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, we need it for tests below that tests indexing into ba[].

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We allocate a bytearray of the same size as the bytearray just released in memoryview in hope that it will be allocated at the same memory. It helps to check that we do nor read/write a freed memory.

class MyIndex:
def __index__(self):
release()
return 4
class MyFloat:
def __float__(self):
release()
return 4.25
class MyBool:
def __bool__(self):
release()
return True

m = memoryview(bytearray(b'\xff'*size))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my PR, I tried to make the code more generic to test more cases: https://github.com/python/cpython/pull/93127/files#diff-d41c6bb40a1e03fea5a20d15c4077413e0ddde65651147922b625b03a66a2f16R399:

        tp = self.rw_type
        b = self.rw_type(self._source)
        view = self._view(b)

with self.assertRaises(ValueError):
m[MyIndex()]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is very long. Can you try to factorize similar code and use loop with subTest(), and put pack operations in one test method and unpack in another test method?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Then we will need to duplicate the definitions of internal classes.

The tested code is so different, that it is difficult to use a loop. And I think that the result will be more complicated.


m = memoryview(bytearray(b'\xff'*size))
self.assertEqual(list(m[:MyIndex()]), [255] * 4)

m = memoryview(bytearray(b'\xff'*size))
self.assertEqual(list(m[MyIndex():8]), [255] * 4)

Fidget-Spinner marked this conversation as resolved.
Show resolved Hide resolved
m = memoryview(bytearray(b'\xff'*size)).cast('B', (64, 2))
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[MyIndex(), 0]

m = memoryview(bytearray(b'\xff'*size)).cast('B', (2, 64))
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[0, MyIndex()]

ba = None
m = memoryview(bytearray(b'\xff'*size))
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[MyIndex()] = 42
self.assertEqual(ba[:8], b'\0'*8)

ba = None
m = memoryview(bytearray(b'\xff'*size))
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[:MyIndex()] = b'spam'
self.assertEqual(ba[:8], b'\0'*8)

ba = None
m = memoryview(bytearray(b'\xff'*size))
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[MyIndex():8] = b'spam'
self.assertEqual(ba[:8], b'\0'*8)

Fidget-Spinner marked this conversation as resolved.
Show resolved Hide resolved
ba = None
m = memoryview(bytearray(b'\xff'*size)).cast('B', (64, 2))
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[MyIndex(), 0] = 42
self.assertEqual(ba[8:16], b'\0'*8)
ba = None
m = memoryview(bytearray(b'\xff'*size)).cast('B', (2, 64))
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[0, MyIndex()] = 42
self.assertEqual(ba[:8], b'\0'*8)

ba = None
m = memoryview(bytearray(b'\xff'*size))
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[0] = MyIndex()
self.assertEqual(ba[:8], b'\0'*8)

for fmt in 'bhilqnBHILQN':
with self.subTest(fmt=fmt):
ba = None
m = memoryview(bytearray(b'\xff'*size)).cast(fmt)
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[0] = MyIndex()
self.assertEqual(ba[:8], b'\0'*8)

for fmt in 'fd':
with self.subTest(fmt=fmt):
ba = None
m = memoryview(bytearray(b'\xff'*size)).cast(fmt)
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[0] = MyFloat()
self.assertEqual(ba[:8], b'\0'*8)

ba = None
m = memoryview(bytearray(b'\xff'*size)).cast('?')
with self.assertRaisesRegex(ValueError, "operation forbidden"):
m[0] = MyBool()
self.assertEqual(ba[:8], b'\0'*8)

if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fix ``memoryview`` use after free when accessing the backing buffer in certain cases.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I propose to mention more explicitly that the protection is about released views:

If a memoryview is released while getting or setting an item, raise an exception.
For example, converting an object to an index can execute arbitrary Python
code which can indirectly release the memoryview.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not always an exception is raised.

The bug was in reading or wring the freed memory. Now it is prevented -- you either get an exception or free the memory after reading. @Fidget-Spinner's description is more correct.

I am going to address such inconsistency in a separate issue.


55 changes: 36 additions & 19 deletions Objects/memoryobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,11 @@ PyTypeObject _PyManagedBuffer_Type = {
return -1; \
}

/* See gh-92888. These macros signal that we need to check the memoryview
again due to possible read after frees. */
#define CHECK_RELEASED_AGAIN(mv) CHECK_RELEASED(mv)
#define CHECK_RELEASED_INT_AGAIN(mv) CHECK_RELEASED_INT(mv)

#define CHECK_LIST_OR_TUPLE(v) \
if (!PyList_Check(v) && !PyTuple_Check(v)) { \
PyErr_SetString(PyExc_TypeError, \
Expand Down Expand Up @@ -381,8 +386,9 @@ copy_rec(const Py_ssize_t *shape, Py_ssize_t ndim, Py_ssize_t itemsize,

/* Faster copying of one-dimensional arrays. */
static int
copy_single(const Py_buffer *dest, const Py_buffer *src)
copy_single(PyMemoryViewObject *self, const Py_buffer *dest, const Py_buffer *src)
{
CHECK_RELEASED_INT_AGAIN(self);
char *mem = NULL;

assert(dest->ndim == 1);
Expand Down Expand Up @@ -1677,7 +1683,7 @@ pylong_as_zu(PyObject *item)
module syntax. This function is very sensitive to small changes. With this
layout gcc automatically generates a fast jump table. */
static inline PyObject *
unpack_single(const char *ptr, const char *fmt)
unpack_single(PyMemoryViewObject *self, const char *ptr, const char *fmt)
{
unsigned long long llu;
unsigned long lu;
Expand All @@ -1689,6 +1695,8 @@ unpack_single(const char *ptr, const char *fmt)
unsigned char uc;
void *p;

CHECK_RELEASED_AGAIN(self);

switch (fmt[0]) {

/* signed integers and fast path for 'B' */
Expand Down Expand Up @@ -1767,7 +1775,7 @@ unpack_single(const char *ptr, const char *fmt)
/* Pack a single item. 'fmt' can be any native format character in
struct module syntax. */
static int
pack_single(char *ptr, PyObject *item, const char *fmt)
pack_single(PyMemoryViewObject *self, char *ptr, PyObject *item, const char *fmt)
{
unsigned long long llu;
unsigned long lu;
Expand All @@ -1784,6 +1792,7 @@ pack_single(char *ptr, PyObject *item, const char *fmt)
ld = pylong_as_ld(item);
if (ld == -1 && PyErr_Occurred())
goto err_occurred;
CHECK_RELEASED_INT_AGAIN(self);
switch (fmt[0]) {
case 'b':
if (ld < SCHAR_MIN || ld > SCHAR_MAX) goto err_range;
Expand All @@ -1804,6 +1813,7 @@ pack_single(char *ptr, PyObject *item, const char *fmt)
lu = pylong_as_lu(item);
if (lu == (unsigned long)-1 && PyErr_Occurred())
goto err_occurred;
CHECK_RELEASED_INT_AGAIN(self);
switch (fmt[0]) {
case 'B':
if (lu > UCHAR_MAX) goto err_range;
Expand All @@ -1824,12 +1834,14 @@ pack_single(char *ptr, PyObject *item, const char *fmt)
lld = pylong_as_lld(item);
if (lld == -1 && PyErr_Occurred())
goto err_occurred;
CHECK_RELEASED_INT_AGAIN(self);
PACK_SINGLE(ptr, lld, long long);
break;
case 'Q':
llu = pylong_as_llu(item);
if (llu == (unsigned long long)-1 && PyErr_Occurred())
goto err_occurred;
CHECK_RELEASED_INT_AGAIN(self);
PACK_SINGLE(ptr, llu, unsigned long long);
break;

Expand All @@ -1838,12 +1850,14 @@ pack_single(char *ptr, PyObject *item, const char *fmt)
zd = pylong_as_zd(item);
if (zd == -1 && PyErr_Occurred())
goto err_occurred;
CHECK_RELEASED_INT_AGAIN(self);
PACK_SINGLE(ptr, zd, Py_ssize_t);
break;
case 'N':
zu = pylong_as_zu(item);
if (zu == (size_t)-1 && PyErr_Occurred())
goto err_occurred;
CHECK_RELEASED_INT_AGAIN(self);
PACK_SINGLE(ptr, zu, size_t);
break;

Expand All @@ -1852,6 +1866,7 @@ pack_single(char *ptr, PyObject *item, const char *fmt)
d = PyFloat_AsDouble(item);
if (d == -1.0 && PyErr_Occurred())
goto err_occurred;
CHECK_RELEASED_INT_AGAIN(self);
if (fmt[0] == 'f') {
PACK_SINGLE(ptr, d, float);
}
Expand All @@ -1865,6 +1880,7 @@ pack_single(char *ptr, PyObject *item, const char *fmt)
ld = PyObject_IsTrue(item);
if (ld < 0)
return -1; /* preserve original error */
CHECK_RELEASED_INT_AGAIN(self);
PACK_SINGLE(ptr, ld, _Bool);
break;

Expand All @@ -1882,6 +1898,7 @@ pack_single(char *ptr, PyObject *item, const char *fmt)
p = PyLong_AsVoidPtr(item);
if (p == NULL && PyErr_Occurred())
goto err_occurred;
CHECK_RELEASED_INT_AGAIN(self);
PACK_SINGLE(ptr, p, void *);
break;

Expand Down Expand Up @@ -2048,7 +2065,7 @@ adjust_fmt(const Py_buffer *view)

/* Base case for multi-dimensional unpacking. Assumption: ndim == 1. */
static PyObject *
tolist_base(const char *ptr, const Py_ssize_t *shape,
tolist_base(PyMemoryViewObject *self, const char *ptr, const Py_ssize_t *shape,
const Py_ssize_t *strides, const Py_ssize_t *suboffsets,
const char *fmt)
{
Expand All @@ -2061,7 +2078,7 @@ tolist_base(const char *ptr, const Py_ssize_t *shape,

for (i = 0; i < shape[0]; ptr+=strides[0], i++) {
const char *xptr = ADJUST_PTR(ptr, suboffsets, 0);
item = unpack_single(xptr, fmt);
item = unpack_single(self, xptr, fmt);
if (item == NULL) {
Py_DECREF(lst);
return NULL;
Expand All @@ -2075,7 +2092,7 @@ tolist_base(const char *ptr, const Py_ssize_t *shape,
/* Unpack a multi-dimensional array into a nested list.
Assumption: ndim >= 1. */
static PyObject *
tolist_rec(const char *ptr, Py_ssize_t ndim, const Py_ssize_t *shape,
tolist_rec(PyMemoryViewObject *self, const char *ptr, Py_ssize_t ndim, const Py_ssize_t *shape,
const Py_ssize_t *strides, const Py_ssize_t *suboffsets,
const char *fmt)
{
Expand All @@ -2087,15 +2104,15 @@ tolist_rec(const char *ptr, Py_ssize_t ndim, const Py_ssize_t *shape,
assert(strides != NULL);

if (ndim == 1)
return tolist_base(ptr, shape, strides, suboffsets, fmt);
return tolist_base(self, ptr, shape, strides, suboffsets, fmt);

lst = PyList_New(shape[0]);
if (lst == NULL)
return NULL;

for (i = 0; i < shape[0]; ptr+=strides[0], i++) {
const char *xptr = ADJUST_PTR(ptr, suboffsets, 0);
item = tolist_rec(xptr, ndim-1, shape+1,
item = tolist_rec(self, xptr, ndim-1, shape+1,
strides+1, suboffsets ? suboffsets+1 : NULL,
fmt);
if (item == NULL) {
Expand Down Expand Up @@ -2129,15 +2146,15 @@ memoryview_tolist_impl(PyMemoryViewObject *self)
if (fmt == NULL)
return NULL;
if (view->ndim == 0) {
return unpack_single(view->buf, fmt);
return unpack_single(self, view->buf, fmt);
}
else if (view->ndim == 1) {
return tolist_base(view->buf, view->shape,
return tolist_base(self, view->buf, view->shape,
view->strides, view->suboffsets,
fmt);
}
else {
return tolist_rec(view->buf, view->ndim, view->shape,
return tolist_rec(self, view->buf, view->ndim, view->shape,
view->strides, view->suboffsets,
fmt);
}
Expand Down Expand Up @@ -2345,7 +2362,7 @@ memory_item(PyMemoryViewObject *self, Py_ssize_t index)
char *ptr = ptr_from_index(view, index);
if (ptr == NULL)
return NULL;
return unpack_single(ptr, fmt);
return unpack_single(self, ptr, fmt);
}

PyErr_SetString(PyExc_NotImplementedError,
Expand Down Expand Up @@ -2376,7 +2393,7 @@ memory_item_multi(PyMemoryViewObject *self, PyObject *tup)
ptr = ptr_from_tuple(view, tup);
if (ptr == NULL)
return NULL;
return unpack_single(ptr, fmt);
return unpack_single(self, ptr, fmt);
}

static inline int
Expand Down Expand Up @@ -2463,7 +2480,7 @@ memory_subscript(PyMemoryViewObject *self, PyObject *key)
const char *fmt = adjust_fmt(view);
if (fmt == NULL)
return NULL;
return unpack_single(view->buf, fmt);
return unpack_single(self, view->buf, fmt);
}
else if (key == Py_Ellipsis) {
Py_INCREF(self);
Expand Down Expand Up @@ -2538,7 +2555,7 @@ memory_ass_sub(PyMemoryViewObject *self, PyObject *key, PyObject *value)
if (key == Py_Ellipsis ||
(PyTuple_Check(key) && PyTuple_GET_SIZE(key)==0)) {
ptr = (char *)view->buf;
return pack_single(ptr, value, fmt);
return pack_single(self, ptr, value, fmt);
}
else {
PyErr_SetString(PyExc_TypeError,
Expand All @@ -2560,7 +2577,7 @@ memory_ass_sub(PyMemoryViewObject *self, PyObject *key, PyObject *value)
ptr = ptr_from_index(view, index);
if (ptr == NULL)
return -1;
return pack_single(ptr, value, fmt);
return pack_single(self, ptr, value, fmt);
}
/* one-dimensional: fast path */
if (PySlice_Check(key) && view->ndim == 1) {
Expand All @@ -2583,7 +2600,7 @@ memory_ass_sub(PyMemoryViewObject *self, PyObject *key, PyObject *value)
goto end_block;
dest.len = dest.shape[0] * dest.itemsize;

ret = copy_single(&dest, &src);
ret = copy_single(self, &dest, &src);

end_block:
PyBuffer_Release(&src);
Expand All @@ -2599,7 +2616,7 @@ memory_ass_sub(PyMemoryViewObject *self, PyObject *key, PyObject *value)
ptr = ptr_from_tuple(view, key);
if (ptr == NULL)
return -1;
return pack_single(ptr, value, fmt);
return pack_single(self, ptr, value, fmt);
}
if (PySlice_Check(key) || is_multislice(key)) {
/* Call memory_subscript() to produce a sliced lvalue, then copy
Expand Down Expand Up @@ -3200,7 +3217,7 @@ memoryiter_next(memoryiterobject *it)
if (ptr == NULL) {
return NULL;
}
return unpack_single(ptr, it->it_fmt);
return unpack_single(seq, ptr, it->it_fmt);
}

it->it_seq = NULL;
Expand Down