Skip to content

Commit b40459f

Browse files
gpsheadserhiy-storchaka
authored andcommitted
pythongh-117378: Fix multiprocessing forkserver preload sys.path inheritance. (pythonGH-126538)
pythongh-117378: Fix multiprocessing forkserver preload sys.path inheritance. `sys.path` was not properly being sent from the parent process when launching the multiprocessing forkserver process to preload imports. This bug has been there since the forkserver start method was introduced in Python 3.4. It was always _supposed_ to inherit `sys.path` the same way the spawn method does. Observable behavior change: A `''` value in `sys.path` will now be replaced in the forkserver's `sys.path` with an absolute pathname `os.path.abspath(os.getcwd())` saved at the time that `multiprocessing` was imported in the parent process as it already was when using the spawn start method. **This will only be observable during forkserver preload imports**. The code invoked before calling things in another process already correctly sets `sys.path`. Which is likely why this went unnoticed for so long as a mere performance issue in some configurations. A workaround for the bug on impacted Pythons is to set PYTHONPATH in the environment before multiprocessing's forkserver process was started. Not perfect as that is then inherited by other children, etc, but likely good enough for many people's purposes. Co-authored-by: Serhiy Storchaka <[email protected]>
1 parent 3b07365 commit b40459f

File tree

3 files changed

+97
-0
lines changed

3 files changed

+97
-0
lines changed

Lib/multiprocessing/forkserver.py

+2
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,8 @@ def ensure_running(self):
168168
def main(listener_fd, alive_r, preload, main_path=None, sys_path=None):
169169
'''Run forkserver.'''
170170
if preload:
171+
if sys_path is not None:
172+
sys.path[:] = sys_path
171173
if '__main__' in preload and main_path is not None:
172174
process.current_process()._inheriting = True
173175
try:

Lib/test/_test_multiprocessing.py

+78
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import sys
1313
import os
1414
import gc
15+
import importlib
1516
import errno
1617
import functools
1718
import signal
@@ -20,8 +21,10 @@
2021
import socket
2122
import random
2223
import logging
24+
import shutil
2325
import subprocess
2426
import struct
27+
import tempfile
2528
import operator
2629
import pickle
2730
import weakref
@@ -6397,6 +6400,81 @@ def test_atexit(self):
63976400
self.assertEqual(f.read(), 'deadbeef')
63986401

63996402

6403+
class _TestSpawnedSysPath(BaseTestCase):
6404+
"""Test that sys.path is setup in forkserver and spawn processes."""
6405+
6406+
ALLOWED_TYPES = ('processes',)
6407+
6408+
def setUp(self):
6409+
self._orig_sys_path = list(sys.path)
6410+
self._temp_dir = tempfile.mkdtemp(prefix="test_sys_path-")
6411+
self._mod_name = "unique_test_mod"
6412+
module_path = os.path.join(self._temp_dir, f"{self._mod_name}.py")
6413+
with open(module_path, "w", encoding="utf-8") as mod:
6414+
mod.write("# A simple test module\n")
6415+
sys.path[:] = [p for p in sys.path if p] # remove any existing ""s
6416+
sys.path.insert(0, self._temp_dir)
6417+
sys.path.insert(0, "") # Replaced with an abspath in child.
6418+
try:
6419+
self._ctx_forkserver = multiprocessing.get_context("forkserver")
6420+
except ValueError:
6421+
self._ctx_forkserver = None
6422+
self._ctx_spawn = multiprocessing.get_context("spawn")
6423+
6424+
def tearDown(self):
6425+
sys.path[:] = self._orig_sys_path
6426+
shutil.rmtree(self._temp_dir, ignore_errors=True)
6427+
6428+
@staticmethod
6429+
def enq_imported_module_names(queue):
6430+
queue.put(tuple(sys.modules))
6431+
6432+
def test_forkserver_preload_imports_sys_path(self):
6433+
ctx = self._ctx_forkserver
6434+
if not ctx:
6435+
self.skipTest("requires forkserver start method.")
6436+
self.assertNotIn(self._mod_name, sys.modules)
6437+
multiprocessing.forkserver._forkserver._stop() # Must be fresh.
6438+
ctx.set_forkserver_preload(
6439+
["test.test_multiprocessing_forkserver", self._mod_name])
6440+
q = ctx.Queue()
6441+
proc = ctx.Process(target=self.enq_imported_module_names, args=(q,))
6442+
proc.start()
6443+
proc.join()
6444+
child_imported_modules = q.get()
6445+
q.close()
6446+
self.assertIn(self._mod_name, child_imported_modules)
6447+
6448+
@staticmethod
6449+
def enq_sys_path_and_import(queue, mod_name):
6450+
queue.put(sys.path)
6451+
try:
6452+
importlib.import_module(mod_name)
6453+
except ImportError as exc:
6454+
queue.put(exc)
6455+
else:
6456+
queue.put(None)
6457+
6458+
def test_child_sys_path(self):
6459+
for ctx in (self._ctx_spawn, self._ctx_forkserver):
6460+
if not ctx:
6461+
continue
6462+
with self.subTest(f"{ctx.get_start_method()} start method"):
6463+
q = ctx.Queue()
6464+
proc = ctx.Process(target=self.enq_sys_path_and_import,
6465+
args=(q, self._mod_name))
6466+
proc.start()
6467+
proc.join()
6468+
child_sys_path = q.get()
6469+
import_error = q.get()
6470+
q.close()
6471+
self.assertNotIn("", child_sys_path) # replaced by an abspath
6472+
self.assertIn(self._temp_dir, child_sys_path) # our addition
6473+
# ignore the first element, it is the absolute "" replacement
6474+
self.assertEqual(child_sys_path[1:], sys.path[1:])
6475+
self.assertIsNone(import_error, msg=f"child could not import {self._mod_name}")
6476+
6477+
64006478
class MiscTestCase(unittest.TestCase):
64016479
def test__all__(self):
64026480
# Just make sure names in not_exported are excluded
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
Fixed the :mod:`multiprocessing` ``"forkserver"`` start method forkserver
2+
process to correctly inherit the parent's :data:`sys.path` during the importing
3+
of :func:`multiprocessing.set_forkserver_preload` modules in the same manner as
4+
:data:`sys.path` is configured in workers before executing work items.
5+
6+
This bug caused some forkserver module preloading to silently fail to preload.
7+
This manifested as a performance degration in child processes when the
8+
``sys.path`` was required due to additional repeated work in every worker.
9+
10+
It could also have a side effect of ``""`` remaining in :data:`sys.path` during
11+
forkserver preload imports instead of the absolute path from :func:`os.getcwd`
12+
at multiprocessing import time used in the worker ``sys.path``.
13+
14+
Potentially leading to incorrect imports from the wrong location during
15+
preload. We are unaware of that actually happening. The issue was discovered
16+
by someone observing unexpected preload performance gains.
17+

0 commit comments

Comments
 (0)