Skip to content

Commit 46f821d

Browse files
[3.11] gh-114763: Protect lazy loading modules from attribute access races (GH-114781) (GH-115871)
gh-114763: Protect lazy loading modules from attribute access races (GH-114781) Setting the __class__ attribute of a lazy-loading module to ModuleType enables other threads to attempt to access attributes before the loading is complete. Now that is protected by a lock. (cherry picked from commit 200271c) Co-authored-by: Chris Markiewicz <[email protected]>
1 parent a30a1e7 commit 46f821d

File tree

3 files changed

+94
-32
lines changed

3 files changed

+94
-32
lines changed

Lib/importlib/util.py

+51-30
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import _imp
1616
import functools
1717
import sys
18+
import threading
1819
import types
1920
import warnings
2021

@@ -225,36 +226,54 @@ class _LazyModule(types.ModuleType):
225226

226227
def __getattribute__(self, attr):
227228
"""Trigger the load of the module and return the attribute."""
228-
# All module metadata must be garnered from __spec__ in order to avoid
229-
# using mutated values.
230-
# Stop triggering this method.
231-
self.__class__ = types.ModuleType
232-
# Get the original name to make sure no object substitution occurred
233-
# in sys.modules.
234-
original_name = self.__spec__.name
235-
# Figure out exactly what attributes were mutated between the creation
236-
# of the module and now.
237-
attrs_then = self.__spec__.loader_state['__dict__']
238-
attrs_now = self.__dict__
239-
attrs_updated = {}
240-
for key, value in attrs_now.items():
241-
# Code that set the attribute may have kept a reference to the
242-
# assigned object, making identity more important than equality.
243-
if key not in attrs_then:
244-
attrs_updated[key] = value
245-
elif id(attrs_now[key]) != id(attrs_then[key]):
246-
attrs_updated[key] = value
247-
self.__spec__.loader.exec_module(self)
248-
# If exec_module() was used directly there is no guarantee the module
249-
# object was put into sys.modules.
250-
if original_name in sys.modules:
251-
if id(self) != id(sys.modules[original_name]):
252-
raise ValueError(f"module object for {original_name!r} "
253-
"substituted in sys.modules during a lazy "
254-
"load")
255-
# Update after loading since that's what would happen in an eager
256-
# loading situation.
257-
self.__dict__.update(attrs_updated)
229+
__spec__ = object.__getattribute__(self, '__spec__')
230+
loader_state = __spec__.loader_state
231+
with loader_state['lock']:
232+
# Only the first thread to get the lock should trigger the load
233+
# and reset the module's class. The rest can now getattr().
234+
if object.__getattribute__(self, '__class__') is _LazyModule:
235+
# The first thread comes here multiple times as it descends the
236+
# call stack. The first time, it sets is_loading and triggers
237+
# exec_module(), which will access module.__dict__, module.__name__,
238+
# and/or module.__spec__, reentering this method. These accesses
239+
# need to be allowed to proceed without triggering the load again.
240+
if loader_state['is_loading'] and attr.startswith('__') and attr.endswith('__'):
241+
return object.__getattribute__(self, attr)
242+
loader_state['is_loading'] = True
243+
244+
__dict__ = object.__getattribute__(self, '__dict__')
245+
246+
# All module metadata must be gathered from __spec__ in order to avoid
247+
# using mutated values.
248+
# Get the original name to make sure no object substitution occurred
249+
# in sys.modules.
250+
original_name = __spec__.name
251+
# Figure out exactly what attributes were mutated between the creation
252+
# of the module and now.
253+
attrs_then = loader_state['__dict__']
254+
attrs_now = __dict__
255+
attrs_updated = {}
256+
for key, value in attrs_now.items():
257+
# Code that set an attribute may have kept a reference to the
258+
# assigned object, making identity more important than equality.
259+
if key not in attrs_then:
260+
attrs_updated[key] = value
261+
elif id(attrs_now[key]) != id(attrs_then[key]):
262+
attrs_updated[key] = value
263+
__spec__.loader.exec_module(self)
264+
# If exec_module() was used directly there is no guarantee the module
265+
# object was put into sys.modules.
266+
if original_name in sys.modules:
267+
if id(self) != id(sys.modules[original_name]):
268+
raise ValueError(f"module object for {original_name!r} "
269+
"substituted in sys.modules during a lazy "
270+
"load")
271+
# Update after loading since that's what would happen in an eager
272+
# loading situation.
273+
__dict__.update(attrs_updated)
274+
# Finally, stop triggering this method.
275+
self.__class__ = types.ModuleType
276+
258277
return getattr(self, attr)
259278

260279
def __delattr__(self, attr):
@@ -298,5 +317,7 @@ def exec_module(self, module):
298317
loader_state = {}
299318
loader_state['__dict__'] = module.__dict__.copy()
300319
loader_state['__class__'] = module.__class__
320+
loader_state['lock'] = threading.RLock()
321+
loader_state['is_loading'] = False
301322
module.__spec__.loader_state = loader_state
302323
module.__class__ = _LazyModule

Lib/test/test_importlib/test_lazy.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,12 @@
22
from importlib import abc
33
from importlib import util
44
import sys
5+
import time
6+
import threading
57
import types
68
import unittest
79

10+
from test.support import threading_helper
811
from test.test_importlib import util as test_util
912

1013

@@ -40,6 +43,7 @@ class TestingImporter(abc.MetaPathFinder, abc.Loader):
4043
module_name = 'lazy_loader_test'
4144
mutated_name = 'changed'
4245
loaded = None
46+
load_count = 0
4347
source_code = 'attr = 42; __name__ = {!r}'.format(mutated_name)
4448

4549
def find_spec(self, name, path, target=None):
@@ -48,8 +52,10 @@ def find_spec(self, name, path, target=None):
4852
return util.spec_from_loader(name, util.LazyLoader(self))
4953

5054
def exec_module(self, module):
55+
time.sleep(0.01) # Simulate a slow load.
5156
exec(self.source_code, module.__dict__)
5257
self.loaded = module
58+
self.load_count += 1
5359

5460

5561
class LazyLoaderTests(unittest.TestCase):
@@ -59,8 +65,9 @@ def test_init(self):
5965
# Classes that don't define exec_module() trigger TypeError.
6066
util.LazyLoader(object)
6167

62-
def new_module(self, source_code=None):
63-
loader = TestingImporter()
68+
def new_module(self, source_code=None, loader=None):
69+
if loader is None:
70+
loader = TestingImporter()
6471
if source_code is not None:
6572
loader.source_code = source_code
6673
spec = util.spec_from_loader(TestingImporter.module_name,
@@ -140,6 +147,37 @@ def test_module_already_in_sys(self):
140147
# Force the load; just care that no exception is raised.
141148
module.__name__
142149

150+
@threading_helper.requires_working_threading()
151+
def test_module_load_race(self):
152+
with test_util.uncache(TestingImporter.module_name):
153+
loader = TestingImporter()
154+
module = self.new_module(loader=loader)
155+
self.assertEqual(loader.load_count, 0)
156+
157+
class RaisingThread(threading.Thread):
158+
exc = None
159+
def run(self):
160+
try:
161+
super().run()
162+
except Exception as exc:
163+
self.exc = exc
164+
165+
def access_module():
166+
return module.attr
167+
168+
threads = []
169+
for _ in range(2):
170+
threads.append(thread := RaisingThread(target=access_module))
171+
thread.start()
172+
173+
# Races could cause errors
174+
for thread in threads:
175+
thread.join()
176+
self.assertIsNone(thread.exc)
177+
178+
# Or multiple load attempts
179+
self.assertEqual(loader.load_count, 1)
180+
143181

144182
if __name__ == '__main__':
145183
unittest.main()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Protect modules loaded with :class:`importlib.util.LazyLoader` from race
2+
conditions when multiple threads try to access attributes before the loading
3+
is complete.

0 commit comments

Comments
 (0)