56
56
import io
57
57
import os
58
58
import shutil
59
+ import signal
59
60
import subprocess
60
61
import threading
61
62
@@ -1118,6 +1119,10 @@ def maybe_canonicalize_exe_path(exe_name, iocontext):
1118
1119
popen_lock = threading .Lock ()
1119
1120
1120
1121
1122
+ def is_windows ():
1123
+ return os .name == "nt"
1124
+
1125
+
1121
1126
# This wrapper works around two major deadlock issues to do with pipes. The
1122
1127
# first is that, before Python 3.2 on POSIX systems, os.pipe() creates
1123
1128
# inheritable file descriptors, which leak to all child processes and prevent
@@ -1139,7 +1144,7 @@ def maybe_canonicalize_exe_path(exe_name, iocontext):
1139
1144
# subprocess.Popen. That type works around another race condition to do with
1140
1145
# signaling children.
1141
1146
def safe_popen (* args , ** kwargs ):
1142
- close_fds = ( os . name != 'nt' )
1147
+ close_fds = not is_windows ( )
1143
1148
with popen_lock :
1144
1149
return SharedChild (* args , close_fds = close_fds , ** kwargs )
1145
1150
@@ -1164,7 +1169,7 @@ def encode_with_universal_newlines(s):
1164
1169
# removals in that copy won't interact properly with the inherited parent
1165
1170
# environment.
1166
1171
def convert_env_var_name (var ):
1167
- if os . name == 'nt' :
1172
+ if is_windows () :
1168
1173
return var .upper ()
1169
1174
return var
1170
1175
@@ -1194,26 +1199,24 @@ def convert_env_var_name(var):
1194
1199
class SharedChild :
1195
1200
def __init__ (self , * args , ** kwargs ):
1196
1201
self ._child = subprocess .Popen (* args , ** kwargs )
1197
- self ._status = None
1198
- # The status lock is only held long enough to read or write the status,
1199
- # or to make non-blocking calls like Popen.poll(). Threads making a
1200
- # blocking call to os.waitid() release the status lock first. This
1202
+ # The child lock is only held for non-blocking calls. Threads making a
1203
+ # blocking call to os.waitid() release the child lock first. This
1201
1204
# ensures that one thread can call try_wait() while another thread is
1202
1205
# blocked on wait().
1203
- self ._status_lock = threading .Lock ()
1206
+ self ._child_lock = threading .Lock ()
1204
1207
self ._wait_lock = threading .Lock ()
1205
1208
1206
1209
def wait (self ):
1207
1210
with self ._wait_lock :
1208
1211
# See if another thread already waited. If so, return the status we
1209
- # got before. If not, immediately release the status lock, and move
1212
+ # got before. If not, immediately release the child lock, and move
1210
1213
# on to call wait ourselves.
1211
- with self ._status_lock :
1212
- if self ._status is not None :
1213
- return self ._status
1214
+ with self ._child_lock :
1215
+ if self ._child . returncode is not None :
1216
+ return self ._child . returncode
1214
1217
1215
1218
# No other thread has waited, we're holding the wait lock, and
1216
- # we've released the status lock. It's now our job to wait. As
1219
+ # we've released the child lock. It's now our job to wait. As
1217
1220
# documented above, if os.waitid is defined, use that function to
1218
1221
# await the child without reaping it. Otherwise we do an ordinary
1219
1222
# Popen.wait and accept the race condition on some platforms.
@@ -1225,20 +1228,20 @@ def wait(self):
1225
1228
# races with kill(), which is what all of this is about.
1226
1229
self ._child .wait ()
1227
1230
1228
- # Finally, while still holding the wait lock, re-acquire the status
1231
+ # Finally, while still holding the wait lock, re-acquire the child
1229
1232
# lock to reap the child and write the result. Since we know the
1230
1233
# child has already exited, this won't block. Any other waiting
1231
1234
# threads that were blocked on us will see our result.
1232
- with self ._status_lock :
1235
+ with self ._child_lock :
1233
1236
# If the child was already reaped above in the !HAS_WAITID
1234
- # branch, this will just return the same status again.
1235
- self . _status = self . _child . wait ()
1236
- return self ._status
1237
+ # branch, this second wait will be a no-op with a cached
1238
+ # returncode.
1239
+ return self ._child . wait ()
1237
1240
1238
1241
def try_wait (self ):
1239
- with self ._status_lock :
1240
- if self ._status is not None :
1241
- return self ._status
1242
+ with self ._child_lock :
1243
+ if self ._child . returncode is not None :
1244
+ return self ._child . returncode
1242
1245
1243
1246
# The child hasn't been waited on yet, so we need to do a
1244
1247
# non-blocking check to see if it's still running. The Popen type
@@ -1256,16 +1259,35 @@ def try_wait(self):
1256
1259
1257
1260
# If either of the poll approaches above returned non-None, do a full
1258
1261
# wait to reap the child, which will not block. Note that we've
1259
- # released the status lock here, because wait() will re-acquire it.
1262
+ # released the child lock here, because wait() will re-acquire it.
1260
1263
if poll_result is not None :
1261
1264
return self .wait ()
1262
1265
else :
1263
1266
return None
1264
1267
1265
1268
def kill (self ):
1266
- with self ._status_lock :
1267
- if self ._status is None :
1268
- self ._child .kill ()
1269
+ with self ._child_lock :
1270
+ if self ._child .returncode is None :
1271
+ # Previously we just used Popen.kill here. However, as of Python 3.9,
1272
+ # Popen.send_signal (which is called by Popen.kill) calls Popen.poll first, as a
1273
+ # best-effort check for the same PID race that this class is designed around. That
1274
+ # means that if the child has already exited, Popen.kill will reap it. Now that we
1275
+ # check Popen.returncode throughout this class (as of the same commit that adds this
1276
+ # comment), we'll see the non-None exit status there as a side effect if reaping has
1277
+ # happened. That *might* mean we could still call Popen.kill here safely. However,
1278
+ # there's also the question of how Popen.poll's call to os.waitpid would interact
1279
+ # with our own blocking call to os.waitid from another thread. The worry is that the
1280
+ # waitpid call might take effect first, causing waitid to return a "no child found"
1281
+ # error. I can confirm that happens on Linux when both calls are blocking. Here
1282
+ # though, the waitpid call is non-blocking, which *might* mean it can't happen
1283
+ # first, but that's going to depend on the OS. We could assume that it can happen
1284
+ # and try to catch the error from waitid, but that codepath would be impossible to
1285
+ # test. So what we actually do here is reimplement the documented behavior of
1286
+ # Popen.kill: os.kill(pid, SIGKILL) on Unix, and Popen.terminate on Windows.
1287
+ if is_windows ():
1288
+ self ._child .terminate ()
1289
+ else :
1290
+ os .kill (self ._child .pid , signal .SIGKILL )
1269
1291
1270
1292
def pid (self ):
1271
1293
return self ._child .pid
0 commit comments