Skip to content

Commit 44e5b2b

Browse files
markshannonsrinivasreddy
authored andcommitted
pythonGH-127058: Make PySequence_Tuple safer and probably faster. (python#127758)
* Use a small buffer, then list when constructing a tuple from an arbitrary sequence.
1 parent 5258f0e commit 44e5b2b

File tree

5 files changed

+88
-48
lines changed

5 files changed

+88
-48
lines changed

Include/internal/pycore_list.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ typedef struct {
6262
union _PyStackRef;
6363

6464
PyAPI_FUNC(PyObject *)_PyList_FromStackRefSteal(const union _PyStackRef *src, Py_ssize_t n);
65-
65+
PyAPI_FUNC(PyObject *)_PyList_AsTupleAndClear(PyListObject *v);
6666

6767
#ifdef __cplusplus
6868
}

Lib/test/test_capi/test_tuple.py

+25
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import unittest
22
import sys
3+
import gc
34
from collections import namedtuple
45
from test.support import import_helper
56

@@ -257,5 +258,29 @@ def test__tuple_resize(self):
257258
self.assertRaises(SystemError, resize, [1, 2, 3], 0, False)
258259
self.assertRaises(SystemError, resize, NULL, 0, False)
259260

261+
def test_bug_59313(self):
262+
# Before 3.14, the C-API function PySequence_Tuple
263+
# would create incomplete tuples which were visible to
264+
# the cycle GC, and this test would crash the interpeter.
265+
TAG = object()
266+
tuples = []
267+
268+
def referrer_tuples():
269+
return [x for x in gc.get_referrers(TAG)
270+
if isinstance(x, tuple)]
271+
272+
def my_iter():
273+
nonlocal tuples
274+
yield TAG # 'tag' gets stored in the result tuple
275+
tuples += referrer_tuples()
276+
for x in range(10):
277+
tuples += referrer_tuples()
278+
# Prior to 3.13 would raise a SystemError when the tuple needs to be resized
279+
yield x
280+
281+
self.assertEqual(tuple(my_iter()), (TAG, *range(10)))
282+
self.assertEqual(tuples, [])
283+
284+
260285
if __name__ == "__main__":
261286
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
``PySequence_Tuple`` now creates the resulting tuple atomically, preventing
2+
partially created tuples being visible to the garbage collector or through
3+
``gc.get_referrers()``

Objects/abstract.c

+40-47
Original file line numberDiff line numberDiff line change
@@ -1993,9 +1993,6 @@ PyObject *
19931993
PySequence_Tuple(PyObject *v)
19941994
{
19951995
PyObject *it; /* iter(v) */
1996-
Py_ssize_t n; /* guess for result tuple size */
1997-
PyObject *result = NULL;
1998-
Py_ssize_t j;
19991996

20001997
if (v == NULL) {
20011998
return null_error();
@@ -2017,58 +2014,54 @@ PySequence_Tuple(PyObject *v)
20172014
if (it == NULL)
20182015
return NULL;
20192016

2020-
/* Guess result size and allocate space. */
2021-
n = PyObject_LengthHint(v, 10);
2022-
if (n == -1)
2023-
goto Fail;
2024-
result = PyTuple_New(n);
2025-
if (result == NULL)
2026-
goto Fail;
2027-
2028-
/* Fill the tuple. */
2029-
for (j = 0; ; ++j) {
2017+
Py_ssize_t n;
2018+
PyObject *buffer[8];
2019+
for (n = 0; n < 8; n++) {
20302020
PyObject *item = PyIter_Next(it);
20312021
if (item == NULL) {
2032-
if (PyErr_Occurred())
2033-
goto Fail;
2034-
break;
2035-
}
2036-
if (j >= n) {
2037-
size_t newn = (size_t)n;
2038-
/* The over-allocation strategy can grow a bit faster
2039-
than for lists because unlike lists the
2040-
over-allocation isn't permanent -- we reclaim
2041-
the excess before the end of this routine.
2042-
So, grow by ten and then add 25%.
2043-
*/
2044-
newn += 10u;
2045-
newn += newn >> 2;
2046-
if (newn > PY_SSIZE_T_MAX) {
2047-
/* Check for overflow */
2048-
PyErr_NoMemory();
2049-
Py_DECREF(item);
2050-
goto Fail;
2022+
if (PyErr_Occurred()) {
2023+
goto fail;
20512024
}
2052-
n = (Py_ssize_t)newn;
2053-
if (_PyTuple_Resize(&result, n) != 0) {
2054-
Py_DECREF(item);
2055-
goto Fail;
2025+
Py_DECREF(it);
2026+
return _PyTuple_FromArraySteal(buffer, n);
2027+
}
2028+
buffer[n] = item;
2029+
}
2030+
PyListObject *list = (PyListObject *)PyList_New(16);
2031+
if (list == NULL) {
2032+
goto fail;
2033+
}
2034+
assert(n == 8);
2035+
Py_SET_SIZE(list, n);
2036+
for (Py_ssize_t j = 0; j < n; j++) {
2037+
PyList_SET_ITEM(list, j, buffer[j]);
2038+
}
2039+
for (;;) {
2040+
PyObject *item = PyIter_Next(it);
2041+
if (item == NULL) {
2042+
if (PyErr_Occurred()) {
2043+
Py_DECREF(list);
2044+
Py_DECREF(it);
2045+
return NULL;
20562046
}
2047+
break;
2048+
}
2049+
if (_PyList_AppendTakeRef(list, item) < 0) {
2050+
Py_DECREF(list);
2051+
Py_DECREF(it);
2052+
return NULL;
20572053
}
2058-
PyTuple_SET_ITEM(result, j, item);
20592054
}
2060-
2061-
/* Cut tuple back if guess was too large. */
2062-
if (j < n &&
2063-
_PyTuple_Resize(&result, j) != 0)
2064-
goto Fail;
2065-
20662055
Py_DECREF(it);
2067-
return result;
2068-
2069-
Fail:
2070-
Py_XDECREF(result);
2056+
PyObject *res = _PyList_AsTupleAndClear(list);
2057+
Py_DECREF(list);
2058+
return res;
2059+
fail:
20712060
Py_DECREF(it);
2061+
while (n > 0) {
2062+
n--;
2063+
Py_DECREF(buffer[n]);
2064+
}
20722065
return NULL;
20732066
}
20742067

Objects/listobject.c

+19
Original file line numberDiff line numberDiff line change
@@ -3174,6 +3174,25 @@ PyList_AsTuple(PyObject *v)
31743174
return ret;
31753175
}
31763176

3177+
PyObject *
3178+
_PyList_AsTupleAndClear(PyListObject *self)
3179+
{
3180+
assert(self != NULL);
3181+
PyObject *ret;
3182+
if (self->ob_item == NULL) {
3183+
return PyTuple_New(0);
3184+
}
3185+
Py_BEGIN_CRITICAL_SECTION(self);
3186+
PyObject **items = self->ob_item;
3187+
Py_ssize_t size = Py_SIZE(self);
3188+
self->ob_item = NULL;
3189+
Py_SET_SIZE(self, 0);
3190+
ret = _PyTuple_FromArraySteal(items, size);
3191+
free_list_items(items, false);
3192+
Py_END_CRITICAL_SECTION();
3193+
return ret;
3194+
}
3195+
31773196
PyObject *
31783197
_PyList_FromStackRefSteal(const _PyStackRef *src, Py_ssize_t n)
31793198
{

0 commit comments

Comments
 (0)