Skip to content

Commit b32350a

Browse files
eduardo-elizondoblhsing
authored andcommitted
pythongh-113190: Reenable non-debug interned string cleanup (pythonGH-113601)
1 parent 2e74761 commit b32350a

File tree

5 files changed

+42
-42
lines changed

5 files changed

+42
-42
lines changed

Doc/c-api/init.rst

+12-6
Original file line numberDiff line numberDiff line change
@@ -394,8 +394,7 @@ Initializing and finalizing the interpreter
394394
Undo all initializations made by :c:func:`Py_Initialize` and subsequent use of
395395
Python/C API functions, and destroy all sub-interpreters (see
396396
:c:func:`Py_NewInterpreter` below) that were created and not yet destroyed since
397-
the last call to :c:func:`Py_Initialize`. Ideally, this frees all memory
398-
allocated by the Python interpreter. This is a no-op when called for a second
397+
the last call to :c:func:`Py_Initialize`. This is a no-op when called for a second
399398
time (without calling :c:func:`Py_Initialize` again first).
400399
401400
Since this is the reverse of :c:func:`Py_Initialize`, it should be called
@@ -407,6 +406,12 @@ Initializing and finalizing the interpreter
407406
If there were errors during finalization (flushing buffered data),
408407
``-1`` is returned.
409408
409+
Note that Python will do a best effort at freeing all memory allocated by the Python
410+
interpreter. Therefore, any C-Extension should make sure to correctly clean up all
411+
of the preveiously allocated PyObjects before using them in subsequent calls to
412+
:c:func:`Py_Initialize`. Otherwise it could introduce vulnerabilities and incorrect
413+
behavior.
414+
410415
This function is provided for a number of reasons. An embedding application
411416
might want to restart Python without having to restart the application itself.
412417
An application that has loaded the Python interpreter from a dynamically
@@ -421,10 +426,11 @@ Initializing and finalizing the interpreter
421426
loaded extension modules loaded by Python are not unloaded. Small amounts of
422427
memory allocated by the Python interpreter may not be freed (if you find a leak,
423428
please report it). Memory tied up in circular references between objects is not
424-
freed. Some memory allocated by extension modules may not be freed. Some
425-
extensions may not work properly if their initialization routine is called more
426-
than once; this can happen if an application calls :c:func:`Py_Initialize` and
427-
:c:func:`Py_FinalizeEx` more than once.
429+
freed. Interned strings will all be deallocated regarldess of their reference count.
430+
Some memory allocated by extension modules may not be freed. Some extensions may not
431+
work properly if their initialization routine is called more than once; this can
432+
happen if an application calls :c:func:`Py_Initialize` and :c:func:`Py_FinalizeEx`
433+
more than once.
428434

429435
.. audit-event:: cpython._PySys_ClearAuditHooks "" c.Py_FinalizeEx
430436

Doc/whatsnew/3.14.rst

+10
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,16 @@ New Features
419419
which has an ambiguous return value.
420420
(Contributed by Irit Katriel and Erlend Aasland in :gh:`105201`.)
421421

422+
* :c:func:`Py_Finalize` now deletes all interned strings. This
423+
is backwards incompatible to any C-Extension that holds onto an interned
424+
string after a call to :c:func:`Py_Finalize` and is then reused after a
425+
call to :c:func:`Py_Initialize`. Any issues arising from this behavior will
426+
normally result in crashes during the exectuion of the subsequent call to
427+
:c:func:`Py_Initialize` from accessing uninitialized memory. To fix, use
428+
an address sanitizer to identify any use-after-free coming from
429+
an interned string and deallocate it during module shutdown.
430+
(Contribued by Eddie Elizondo in :gh:`113601`.)
431+
422432
Porting to Python 3.14
423433
----------------------
424434

Lib/test/_test_embed_structseq.py

+18-22
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,27 @@
11
import sys
22
import types
3+
import unittest
34

4-
# Note: This test file can't import `unittest` since the runtime can't
5-
# currently guarantee that it will not leak memory. Doing so will mark
6-
# the test as passing but with reference leaks. This can safely import
7-
# the `unittest` library once there's a strict guarantee of no leaks
8-
# during runtime shutdown.
95

106
# bpo-46417: Test that structseq types used by the sys module are still
117
# valid when Py_Finalize()/Py_Initialize() are called multiple times.
12-
class TestStructSeq:
8+
class TestStructSeq(unittest.TestCase):
139
# test PyTypeObject members
14-
def _check_structseq(self, obj_type):
10+
def check_structseq(self, obj_type):
1511
# ob_refcnt
16-
assert sys.getrefcount(obj_type) > 1
12+
self.assertGreaterEqual(sys.getrefcount(obj_type), 1)
1713
# tp_base
18-
assert issubclass(obj_type, tuple)
14+
self.assertTrue(issubclass(obj_type, tuple))
1915
# tp_bases
20-
assert obj_type.__bases__ == (tuple,)
16+
self.assertEqual(obj_type.__bases__, (tuple,))
2117
# tp_dict
22-
assert isinstance(obj_type.__dict__, types.MappingProxyType)
18+
self.assertIsInstance(obj_type.__dict__, types.MappingProxyType)
2319
# tp_mro
24-
assert obj_type.__mro__ == (obj_type, tuple, object)
20+
self.assertEqual(obj_type.__mro__, (obj_type, tuple, object))
2521
# tp_name
26-
assert isinstance(type.__name__, str)
22+
self.assertIsInstance(type.__name__, str)
2723
# tp_subclasses
28-
assert obj_type.__subclasses__() == []
24+
self.assertEqual(obj_type.__subclasses__(), [])
2925

3026
def test_sys_attrs(self):
3127
for attr_name in (
@@ -36,23 +32,23 @@ def test_sys_attrs(self):
3632
'thread_info', # ThreadInfoType
3733
'version_info', # VersionInfoType
3834
):
39-
attr = getattr(sys, attr_name)
40-
self._check_structseq(type(attr))
35+
with self.subTest(attr=attr_name):
36+
attr = getattr(sys, attr_name)
37+
self.check_structseq(type(attr))
4138

4239
def test_sys_funcs(self):
4340
func_names = ['get_asyncgen_hooks'] # AsyncGenHooksType
4441
if hasattr(sys, 'getwindowsversion'):
4542
func_names.append('getwindowsversion') # WindowsVersionType
4643
for func_name in func_names:
47-
func = getattr(sys, func_name)
48-
obj = func()
49-
self._check_structseq(type(obj))
44+
with self.subTest(func=func_name):
45+
func = getattr(sys, func_name)
46+
obj = func()
47+
self.check_structseq(type(obj))
5048

5149

5250
try:
53-
tests = TestStructSeq()
54-
tests.test_sys_attrs()
55-
tests.test_sys_funcs()
51+
unittest.main()
5652
except SystemExit as exc:
5753
if exc.args[0] != 0:
5854
raise
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
:c:func:`Py_Finalize` now deletes all interned strings.

Objects/unicodeobject.c

+1-14
Original file line numberDiff line numberDiff line change
@@ -15623,19 +15623,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1562315623
int shared = 0;
1562415624
switch (PyUnicode_CHECK_INTERNED(s)) {
1562515625
case SSTATE_INTERNED_IMMORTAL:
15626-
/* Make immortal interned strings mortal again.
15627-
*
15628-
* Currently, the runtime is not able to guarantee that it can exit
15629-
* without allocations that carry over to a future initialization
15630-
* of Python within the same process. i.e:
15631-
* ./python -X showrefcount -c 'import itertools'
15632-
* [237 refs, 237 blocks]
15633-
*
15634-
* This should remain disabled (`Py_DEBUG` only) until there is a
15635-
* strict guarantee that no memory will be left after
15636-
* `Py_Finalize`.
15637-
*/
15638-
#ifdef Py_DEBUG
15626+
/* Make immortal interned strings mortal again. */
1563915627
// Skip the Immortal Instance check and restore
1564015628
// the two references (key and value) ignored
1564115629
// by PyUnicode_InternInPlace().
@@ -15648,7 +15636,6 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
1564815636
#ifdef INTERNED_STATS
1564915637
total_length += PyUnicode_GET_LENGTH(s);
1565015638
#endif
15651-
#endif // Py_DEBUG
1565215639
break;
1565315640
case SSTATE_INTERNED_IMMORTAL_STATIC:
1565415641
/* It is shared between interpreters, so we should unmark it

0 commit comments

Comments
 (0)