diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 9c0ee0248d20aa..aa05db972f068d 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -3,8 +3,10 @@ import textwrap import importlib import sys +import socket from test.support import os_helper, SHORT_TIMEOUT, busy_retry from test.support.script_helper import make_script +from test.support.socket_helper import find_unused_port import subprocess @@ -24,16 +26,24 @@ def _make_test_script(script_dir, script_basename, source): importlib.invalidate_caches() return to_return +skip_if_not_supported = unittest.skipIf((sys.platform != "darwin" + and sys.platform != "linux" + and sys.platform != "win32"), + "Test only runs on Linux, Windows and MacOS") class TestGetStackTrace(unittest.TestCase): - @unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", - "Test only runs on Linux and MacOS") + @skip_if_not_supported @unittest.skipIf(sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, "Test only runs on Linux with process_vm_readv support") def test_remote_stack_trace(self): # Spawn a process with some realistic Python code - script = textwrap.dedent("""\ - import time, sys + port = find_unused_port() + script = textwrap.dedent(f"""\ + import time, sys, socket + # Connect to the test process + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + def bar(): for x in range(100): if x == 50: @@ -42,9 +52,7 @@ def baz(): foo() def foo(): - fifo_path = sys.argv[1] - with open(fifo_path, "w") as fifo: - fifo.write("ready") + sock.sendall(b"ready") time.sleep(1000) bar() @@ -53,19 +61,28 @@ def foo(): with os_helper.temp_dir() as work_dir: script_dir = os.path.join(work_dir, "script_pkg") os.mkdir(script_dir) - fifo = f"{work_dir}/the_fifo" - os.mkfifo(fifo) + + # Create a socket server to communicate with the target process + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('localhost', port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) + script_name = _make_test_script(script_dir, 'script', script) + client_socket = None try: - p = subprocess.Popen([sys.executable, script_name, str(fifo)]) - with open(fifo, "r") as fifo_file: - response = fifo_file.read() - self.assertEqual(response, "ready") + p = subprocess.Popen([sys.executable, script_name]) + client_socket, _ = server_socket.accept() + server_socket.close() + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") stack_trace = get_stack_trace(p.pid) except PermissionError: self.skipTest("Insufficient permissions to read the stack trace") finally: - os.remove(fifo) + if client_socket is not None: + client_socket.close() p.kill() p.terminate() p.wait(timeout=SHORT_TIMEOUT) @@ -79,21 +96,23 @@ def foo(): ] self.assertEqual(stack_trace, expected_stack_trace) - @unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", - "Test only runs on Linux and MacOS") + @skip_if_not_supported @unittest.skipIf(sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, "Test only runs on Linux with process_vm_readv support") def test_async_remote_stack_trace(self): # Spawn a process with some realistic Python code - script = textwrap.dedent("""\ + port = find_unused_port() + script = textwrap.dedent(f"""\ import asyncio import time import sys + import socket + # Connect to the test process + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) def c5(): - fifo_path = sys.argv[1] - with open(fifo_path, "w") as fifo: - fifo.write("ready") + sock.sendall(b"ready") time.sleep(10000) async def c4(): @@ -122,7 +141,7 @@ def new_eager_loop(): loop.set_task_factory(eager_task_factory) return loop - asyncio.run(main(), loop_factory={TASK_FACTORY}) + asyncio.run(main(), loop_factory={{TASK_FACTORY}}) """) stack_trace = None for task_factory_variant in "asyncio.new_event_loop", "new_eager_loop": @@ -132,24 +151,30 @@ def new_eager_loop(): ): script_dir = os.path.join(work_dir, "script_pkg") os.mkdir(script_dir) - fifo = f"{work_dir}/the_fifo" - os.mkfifo(fifo) + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('localhost', port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) script_name = _make_test_script( script_dir, 'script', script.format(TASK_FACTORY=task_factory_variant)) + client_socket = None try: p = subprocess.Popen( - [sys.executable, script_name, str(fifo)] + [sys.executable, script_name] ) - with open(fifo, "r") as fifo_file: - response = fifo_file.read() - self.assertEqual(response, "ready") + client_socket, _ = server_socket.accept() + server_socket.close() + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") stack_trace = get_async_stack_trace(p.pid) except PermissionError: self.skipTest( "Insufficient permissions to read the stack trace") finally: - os.remove(fifo) + if client_socket is not None: + client_socket.close() p.kill() p.terminate() p.wait(timeout=SHORT_TIMEOUT) @@ -169,21 +194,23 @@ def new_eager_loop(): ] self.assertEqual(stack_trace, expected_stack_trace) - @unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", - "Test only runs on Linux and MacOS") + @skip_if_not_supported @unittest.skipIf(sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, "Test only runs on Linux with process_vm_readv support") def test_asyncgen_remote_stack_trace(self): # Spawn a process with some realistic Python code - script = textwrap.dedent("""\ + port = find_unused_port() + script = textwrap.dedent(f"""\ import asyncio import time import sys + import socket + # Connect to the test process + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) async def gen_nested_call(): - fifo_path = sys.argv[1] - with open(fifo_path, "w") as fifo: - fifo.write("ready") + sock.sendall(b"ready") time.sleep(10000) async def gen(): @@ -202,19 +229,26 @@ async def main(): with os_helper.temp_dir() as work_dir: script_dir = os.path.join(work_dir, "script_pkg") os.mkdir(script_dir) - fifo = f"{work_dir}/the_fifo" - os.mkfifo(fifo) + # Create a socket server to communicate with the target process + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('localhost', port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) script_name = _make_test_script(script_dir, 'script', script) + client_socket = None try: - p = subprocess.Popen([sys.executable, script_name, str(fifo)]) - with open(fifo, "r") as fifo_file: - response = fifo_file.read() - self.assertEqual(response, "ready") + p = subprocess.Popen([sys.executable, script_name]) + client_socket, _ = server_socket.accept() + server_socket.close() + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") stack_trace = get_async_stack_trace(p.pid) except PermissionError: self.skipTest("Insufficient permissions to read the stack trace") finally: - os.remove(fifo) + if client_socket is not None: + client_socket.close() p.kill() p.terminate() p.wait(timeout=SHORT_TIMEOUT) @@ -227,22 +261,24 @@ async def main(): ] self.assertEqual(stack_trace, expected_stack_trace) - @unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", - "Test only runs on Linux and MacOS") + @skip_if_not_supported @unittest.skipIf(sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, "Test only runs on Linux with process_vm_readv support") def test_async_gather_remote_stack_trace(self): # Spawn a process with some realistic Python code - script = textwrap.dedent("""\ + port = find_unused_port() + script = textwrap.dedent(f"""\ import asyncio import time import sys + import socket + # Connect to the test process + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) async def deep(): await asyncio.sleep(0) - fifo_path = sys.argv[1] - with open(fifo_path, "w") as fifo: - fifo.write("ready") + sock.sendall(b"ready") time.sleep(10000) async def c1(): @@ -261,20 +297,27 @@ async def main(): with os_helper.temp_dir() as work_dir: script_dir = os.path.join(work_dir, "script_pkg") os.mkdir(script_dir) - fifo = f"{work_dir}/the_fifo" - os.mkfifo(fifo) + # Create a socket server to communicate with the target process + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('localhost', port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) script_name = _make_test_script(script_dir, 'script', script) + client_socket = None try: - p = subprocess.Popen([sys.executable, script_name, str(fifo)]) - with open(fifo, "r") as fifo_file: - response = fifo_file.read() - self.assertEqual(response, "ready") + p = subprocess.Popen([sys.executable, script_name]) + client_socket, _ = server_socket.accept() + server_socket.close() + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") stack_trace = get_async_stack_trace(p.pid) except PermissionError: self.skipTest( "Insufficient permissions to read the stack trace") finally: - os.remove(fifo) + if client_socket is not None: + client_socket.close() p.kill() p.terminate() p.wait(timeout=SHORT_TIMEOUT) @@ -287,22 +330,24 @@ async def main(): ] self.assertEqual(stack_trace, expected_stack_trace) - @unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", - "Test only runs on Linux and MacOS") + @skip_if_not_supported @unittest.skipIf(sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, "Test only runs on Linux with process_vm_readv support") def test_async_staggered_race_remote_stack_trace(self): # Spawn a process with some realistic Python code - script = textwrap.dedent("""\ + port = find_unused_port() + script = textwrap.dedent(f"""\ import asyncio.staggered import time import sys + import socket + # Connect to the test process + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) async def deep(): await asyncio.sleep(0) - fifo_path = sys.argv[1] - with open(fifo_path, "w") as fifo: - fifo.write("ready") + sock.sendall(b"ready") time.sleep(10000) async def c1(): @@ -324,20 +369,27 @@ async def main(): with os_helper.temp_dir() as work_dir: script_dir = os.path.join(work_dir, "script_pkg") os.mkdir(script_dir) - fifo = f"{work_dir}/the_fifo" - os.mkfifo(fifo) + # Create a socket server to communicate with the target process + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('localhost', port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) script_name = _make_test_script(script_dir, 'script', script) + client_socket = None try: - p = subprocess.Popen([sys.executable, script_name, str(fifo)]) - with open(fifo, "r") as fifo_file: - response = fifo_file.read() - self.assertEqual(response, "ready") + p = subprocess.Popen([sys.executable, script_name]) + client_socket, _ = server_socket.accept() + server_socket.close() + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") stack_trace = get_async_stack_trace(p.pid) except PermissionError: self.skipTest( "Insufficient permissions to read the stack trace") finally: - os.remove(fifo) + if client_socket is not None: + client_socket.close() p.kill() p.terminate() p.wait(timeout=SHORT_TIMEOUT) @@ -350,16 +402,17 @@ async def main(): ] self.assertEqual(stack_trace, expected_stack_trace) - @unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", - "Test only runs on Linux and MacOS") + @skip_if_not_supported @unittest.skipIf(sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, "Test only runs on Linux with process_vm_readv support") def test_async_global_awaited_by(self): - script = textwrap.dedent("""\ + port = find_unused_port() + script = textwrap.dedent(f"""\ import asyncio import os import random import sys + import socket from string import ascii_lowercase, digits from test.support import socket_helper, SHORT_TIMEOUT @@ -367,6 +420,10 @@ def test_async_global_awaited_by(self): PORT = socket_helper.find_unused_port() connections = 0 + # Connect to the test process + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.connect(('localhost', {port})) + class EchoServerProtocol(asyncio.Protocol): def connection_made(self, transport): global connections @@ -396,9 +453,7 @@ async def echo_client_spam(server): tg.create_task(echo_client("".join(msg))) await asyncio.sleep(0) # at least a 1000 tasks created - fifo_path = sys.argv[1] - with open(fifo_path, "w") as fifo: - fifo.write("ready") + sock.sendall(b"ready") # at this point all client tasks completed without assertion errors # let's wrap up the test server.close() @@ -418,14 +473,20 @@ async def main(): with os_helper.temp_dir() as work_dir: script_dir = os.path.join(work_dir, "script_pkg") os.mkdir(script_dir) - fifo = f"{work_dir}/the_fifo" - os.mkfifo(fifo) + # Create a socket server to communicate with the target process + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) + server_socket.bind(('localhost', port)) + server_socket.settimeout(SHORT_TIMEOUT) + server_socket.listen(1) script_name = _make_test_script(script_dir, 'script', script) + client_socket = None try: - p = subprocess.Popen([sys.executable, script_name, str(fifo)]) - with open(fifo, "r") as fifo_file: - response = fifo_file.read() - self.assertEqual(response, "ready") + p = subprocess.Popen([sys.executable, script_name]) + client_socket, _ = server_socket.accept() + server_socket.close() + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") for _ in busy_retry(SHORT_TIMEOUT): try: all_awaited_by = get_all_awaited_by(p.pid) @@ -458,24 +519,32 @@ async def main(): self.assertIn(('Task-1', []), entries) self.assertIn(('server task', [[['main'], 'Task-1', []]]), entries) self.assertIn(('echo client spam', [[['main'], 'Task-1', []]]), entries) + + expected_stack = [[['echo_client_spam'], 'echo client spam', [[['main'], 'Task-1', []]]]] + tasks_with_stack = [task for task in entries if task[1] == expected_stack] + self.assertGreaterEqual(len(tasks_with_stack), 1000) + # the final task will have some random number, but it should for - # sure be one of the echo client spam horde - self.assertEqual([[['echo_client_spam'], 'echo client spam', [[['main'], 'Task-1', []]]]], entries[-1][1]) + # sure be one of the echo client spam horde (In windows this is not true + # for some reason) + if sys.platform != "win32": + self.assertEqual([[['echo_client_spam'], 'echo client spam', [[['main'], 'Task-1', []]]]], entries[-1][1]) except PermissionError: self.skipTest( "Insufficient permissions to read the stack trace") finally: - os.remove(fifo) + if client_socket is not None: + client_socket.close() p.kill() p.terminate() p.wait(timeout=SHORT_TIMEOUT) - @unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", - "Test only runs on Linux and MacOS") + @skip_if_not_supported @unittest.skipIf(sys.platform == "linux" and not PROCESS_VM_READV_SUPPORTED, "Test only runs on Linux with process_vm_readv support") def test_self_trace(self): stack_trace = get_stack_trace(os.getpid()) + print(stack_trace) self.assertEqual(stack_trace[0], "test_self_trace") if __name__ == "__main__": diff --git a/Modules/_testexternalinspection.c b/Modules/_testexternalinspection.c index 73d63df63261b3..ec61007483e2ca 100644 --- a/Modules/_testexternalinspection.c +++ b/Modules/_testexternalinspection.c @@ -1,40 +1,5 @@ #define _GNU_SOURCE -#ifdef __linux__ -# include -# include -# if INTPTR_MAX == INT64_MAX -# define Elf_Ehdr Elf64_Ehdr -# define Elf_Shdr Elf64_Shdr -# define Elf_Phdr Elf64_Phdr -# else -# define Elf_Ehdr Elf32_Ehdr -# define Elf_Shdr Elf32_Shdr -# define Elf_Phdr Elf32_Phdr -# endif -# include -#endif - -#if defined(__APPLE__) -# include -// Older macOS SDKs do not define TARGET_OS_OSX -# if !defined(TARGET_OS_OSX) -# define TARGET_OS_OSX 1 -# endif -# if TARGET_OS_OSX -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# endif -#endif - #include #include #include @@ -42,10 +7,6 @@ #include #include #include -#include -#include -#include -#include #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 @@ -56,6 +17,7 @@ #include // FRAME_OWNED_BY_CSTACK #include // struct llist_node #include // Py_TAG_BITS +#include "../Python/remote_debug.h" #ifndef HAVE_PROCESS_VM_READV # define HAVE_PROCESS_VM_READV 0 @@ -83,452 +45,46 @@ struct _Py_AsyncioModuleDebugOffsets { } asyncio_thread_state; }; -#if defined(__APPLE__) && TARGET_OS_OSX -static uintptr_t -return_section_address( - const char* section, - mach_port_t proc_ref, - uintptr_t base, - void* map -) { - struct mach_header_64* hdr = (struct mach_header_64*)map; - int ncmds = hdr->ncmds; - - int cmd_cnt = 0; - struct segment_command_64* cmd = map + sizeof(struct mach_header_64); - - mach_vm_size_t size = 0; - mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); - mach_vm_address_t address = (mach_vm_address_t)base; - vm_region_basic_info_data_64_t r_info; - mach_port_t object_name; - uintptr_t vmaddr = 0; - - for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) { - if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__TEXT") == 0) { - vmaddr = cmd->vmaddr; - } - if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__DATA") == 0) { - while (cmd->filesize != size) { - address += size; - kern_return_t ret = mach_vm_region( - proc_ref, - &address, - &size, - VM_REGION_BASIC_INFO_64, - (vm_region_info_t)&r_info, // cppcheck-suppress [uninitvar] - &count, - &object_name - ); - if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); - return 0; - } - } - - int nsects = cmd->nsects; - struct section_64* sec = (struct section_64*)( - (void*)cmd + sizeof(struct segment_command_64) - ); - for (int j = 0; j < nsects; j++) { - if (strcmp(sec[j].sectname, section) == 0) { - return base + sec[j].addr - vmaddr; - } - } - cmd_cnt++; - } - - cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize); - } - - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); - return 0; -} - +// Get the PyAsyncioDebug section address for any platform static uintptr_t -search_section_in_file( - const char* secname, - char* path, - uintptr_t base, - mach_vm_size_t size, - mach_port_t proc_ref -) { - int fd = open(path, O_RDONLY); - if (fd == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path); - return 0; - } - - struct stat fs; - if (fstat(fd, &fs) == -1) { - PyErr_Format( - PyExc_RuntimeError, "Cannot get size of binary %s\n", path); - close(fd); - return 0; - } - - void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0); - if (map == MAP_FAILED) { - PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path); - close(fd); - return 0; - } - - uintptr_t result = 0; - - struct mach_header_64* hdr = (struct mach_header_64*)map; - switch (hdr->magic) { - case MH_MAGIC: - case MH_CIGAM: - case FAT_MAGIC: - case FAT_CIGAM: - PyErr_SetString( - PyExc_RuntimeError, - "32-bit Mach-O binaries are not supported"); - break; - case MH_MAGIC_64: - case MH_CIGAM_64: - result = return_section_address(secname, proc_ref, base, map); - break; - default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); - break; - } - - munmap(map, fs.st_size); - if (close(fd) != 0) { - // This might hide one of the above exceptions, maybe we - // should chain them? - PyErr_SetFromErrno(PyExc_OSError); - } - return result; -} - -static mach_port_t -pid_to_task(pid_t pid) +_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) { - mach_port_t task; - kern_return_t result; - - result = task_for_pid(mach_task_self(), pid, &task); - if (result != KERN_SUCCESS) { - PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid); - return 0; - } - return task; -} - -static uintptr_t -search_map_for_section(pid_t pid, const char* secname, const char* substr) { - mach_vm_address_t address = 0; - mach_vm_size_t size = 0; - mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); - vm_region_basic_info_data_64_t region_info; - mach_port_t object_name; - - mach_port_t proc_ref = pid_to_task(pid); - if (proc_ref == 0) { - return 0; - } - - int match_found = 0; - char map_filename[MAXPATHLEN + 1]; - while (mach_vm_region( - proc_ref, - &address, - &size, - VM_REGION_BASIC_INFO_64, - (vm_region_info_t)®ion_info, - &count, - &object_name) == KERN_SUCCESS) - { - if ((region_info.protection & VM_PROT_READ) == 0 - || (region_info.protection & VM_PROT_EXECUTE) == 0) { - address += size; - continue; - } - - int path_len = proc_regionfilename( - pid, address, map_filename, MAXPATHLEN); - if (path_len == 0) { - address += size; - continue; - } - - char* filename = strrchr(map_filename, '/'); - if (filename != NULL) { - filename++; // Move past the '/' - } else { - filename = map_filename; // No path, use the whole string - } - - if (!match_found && strncmp(filename, substr, strlen(substr)) == 0) { - match_found = 1; - return search_section_in_file( - secname, map_filename, address, size, proc_ref); - } - - address += size; - } - - PyErr_SetString(PyExc_RuntimeError, - "mach_vm_region failed to find the section"); - return 0; -} + uintptr_t address = 0; +#ifdef MS_WINDOWS + // On Windows, search for asyncio debug in executable or DLL + address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); #elif defined(__linux__) -static uintptr_t -find_map_start_address(pid_t pid, char* result_filename, const char* map) -{ - char maps_file_path[64]; - sprintf(maps_file_path, "/proc/%d/maps", pid); - - FILE* maps_file = fopen(maps_file_path, "r"); - if (maps_file == NULL) { - PyErr_SetFromErrno(PyExc_OSError); - return 0; - } - - int match_found = 0; - - char line[256]; - char map_filename[PATH_MAX]; - uintptr_t result_address = 0; - while (fgets(line, sizeof(line), maps_file) != NULL) { - unsigned long start_address = 0; - sscanf( - line, "%lx-%*x %*s %*s %*s %*s %s", - &start_address, map_filename - ); - char* filename = strrchr(map_filename, '/'); - if (filename != NULL) { - filename++; // Move past the '/' - } else { - filename = map_filename; // No path, use the whole string - } - - if (!match_found && strncmp(filename, map, strlen(map)) == 0) { - match_found = 1; - result_address = start_address; - strcpy(result_filename, map_filename); - break; - } - } - - fclose(maps_file); - - if (!match_found) { - map_filename[0] = '\0'; - PyErr_Format(PyExc_RuntimeError, - "Cannot find map start address for map: %s", map); - } - - return result_address; -} - -static uintptr_t -search_map_for_section(pid_t pid, const char* secname, const char* map) -{ - char elf_file[256]; - uintptr_t start_address = find_map_start_address(pid, elf_file, map); - - if (start_address == 0) { - return 0; - } - - uintptr_t result = 0; - void* file_memory = NULL; - - int fd = open(elf_file, O_RDONLY); - if (fd < 0) { - PyErr_SetFromErrno(PyExc_OSError); - goto exit; - } - - struct stat file_stats; - if (fstat(fd, &file_stats) != 0) { - PyErr_SetFromErrno(PyExc_OSError); - goto exit; - } - - file_memory = mmap(NULL, file_stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (file_memory == MAP_FAILED) { - PyErr_SetFromErrno(PyExc_OSError); - goto exit; - } - - Elf_Ehdr* elf_header = (Elf_Ehdr*)file_memory; - - Elf_Shdr* section_header_table = - (Elf_Shdr*)(file_memory + elf_header->e_shoff); - - Elf_Shdr* shstrtab_section = §ion_header_table[elf_header->e_shstrndx]; - char* shstrtab = (char*)(file_memory + shstrtab_section->sh_offset); - - Elf_Shdr* section = NULL; - for (int i = 0; i < elf_header->e_shnum; i++) { - const char* this_sec_name = ( - shstrtab + - section_header_table[i].sh_name + - 1 // "+1" accounts for the leading "." - ); - - if (strcmp(secname, this_sec_name) == 0) { - section = §ion_header_table[i]; - break; - } - } - - Elf_Phdr* program_header_table = - (Elf_Phdr*)(file_memory + elf_header->e_phoff); - - // Find the first PT_LOAD segment - Elf_Phdr* first_load_segment = NULL; - for (int i = 0; i < elf_header->e_phnum; i++) { - if (program_header_table[i].p_type == PT_LOAD) { - first_load_segment = &program_header_table[i]; - break; - } - } - - if (section != NULL && first_load_segment != NULL) { - uintptr_t elf_load_addr = - first_load_segment->p_vaddr - ( - first_load_segment->p_vaddr % first_load_segment->p_align - ); - result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; - } - else { - PyErr_Format(PyExc_KeyError, - "cannot find map for section %s", secname); - } - -exit: - if (fd >= 0 && close(fd) != 0) { - PyObject *exc = PyErr_GetRaisedException(); - PyErr_SetFromErrno(PyExc_OSError); - _PyErr_ChainExceptions1(exc); - } - if (file_memory != NULL) { - munmap(file_memory, file_stats.st_size); - } - return result; -} + // On Linux, search for asyncio debug in executable or DLL + address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); #else -static uintptr_t -search_map_for_section(pid_t pid, const char* secname, const char* map) -{ - PyErr_SetString(PyExc_NotImplementedError, - "Not supported on this platform"); - return 0; -} -#endif - -static uintptr_t -get_py_runtime(pid_t pid) -{ - uintptr_t address = search_map_for_section(pid, "PyRuntime", "libpython"); + // On macOS, try libpython first, then fall back to python + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); if (address == 0) { PyErr_Clear(); - address = search_map_for_section(pid, "PyRuntime", "python"); + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); } - return address; -} - -static uintptr_t -get_async_debug(pid_t pid) -{ - uintptr_t result = search_map_for_section(pid, "AsyncioDebug", - "_asyncio.cpython"); - if (result == 0 && !PyErr_Occurred()) { - PyErr_SetString(PyExc_RuntimeError, "Cannot find AsyncioDebug section"); - } - return result; -} - - -static ssize_t -read_memory(pid_t pid, uintptr_t remote_address, size_t len, void* dst) -{ - ssize_t total_bytes_read = 0; -#if defined(__linux__) && HAVE_PROCESS_VM_READV - struct iovec local[1]; - struct iovec remote[1]; - ssize_t result = 0; - ssize_t read = 0; - - do { - local[0].iov_base = dst + result; - local[0].iov_len = len - result; - remote[0].iov_base = (void*)(remote_address + result); - remote[0].iov_len = len - result; - - read = process_vm_readv(pid, local, 1, remote, 1, 0); - if (read < 0) { - PyErr_SetFromErrno(PyExc_OSError); - return -1; - } - - result += read; - } while ((size_t)read != local[0].iov_len); - total_bytes_read = result; -#elif defined(__APPLE__) && TARGET_OS_OSX - ssize_t result = -1; - kern_return_t kr = mach_vm_read_overwrite( - pid_to_task(pid), - (mach_vm_address_t)remote_address, - len, - (mach_vm_address_t)dst, - (mach_vm_size_t*)&result); - - if (kr != KERN_SUCCESS) { - switch (kr) { - case KERN_PROTECTION_FAILURE: - PyErr_SetString( - PyExc_PermissionError, - "Not enough permissions to read memory"); - break; - case KERN_INVALID_ARGUMENT: - PyErr_SetString( - PyExc_PermissionError, - "Invalid argument to mach_vm_read_overwrite"); - break; - default: - PyErr_SetString( - PyExc_RuntimeError, - "Unknown error reading memory"); - } - return -1; - } - total_bytes_read = len; -#else - PyErr_SetString( - PyExc_RuntimeError, - "Memory reading is not supported on this platform"); - return -1; #endif - return total_bytes_read; + + return address; } static int read_string( - pid_t pid, + proc_handle_t *handle, _Py_DebugOffsets* debug_offsets, uintptr_t address, char* buffer, Py_ssize_t size ) { Py_ssize_t len; - ssize_t bytes_read = read_memory( - pid, + int result = _Py_RemoteDebug_ReadRemoteMemory( + handle, address + debug_offsets->unicode_object.length, sizeof(Py_ssize_t), &len ); - if (bytes_read < 0) { + if (result < 0) { return -1; } if (len >= size) { @@ -536,39 +92,38 @@ read_string( return -1; } size_t offset = debug_offsets->unicode_object.asciiobject_size; - bytes_read = read_memory(pid, address + offset, len, buffer); - if (bytes_read < 0) { + result = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buffer); + if (result < 0) { return -1; } buffer[len] = '\0'; return 0; } - static inline int -read_ptr(pid_t pid, uintptr_t address, uintptr_t *ptr_addr) +read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) { - int bytes_read = read_memory(pid, address, sizeof(void*), ptr_addr); - if (bytes_read < 0) { + int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void*), ptr_addr); + if (result < 0) { return -1; } return 0; } static inline int -read_ssize_t(pid_t pid, uintptr_t address, Py_ssize_t *size) +read_Py_ssize_t(proc_handle_t *handle, uintptr_t address, Py_ssize_t *size) { - int bytes_read = read_memory(pid, address, sizeof(Py_ssize_t), size); - if (bytes_read < 0) { + int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size); + if (result < 0) { return -1; } return 0; } static int -read_py_ptr(pid_t pid, uintptr_t address, uintptr_t *ptr_addr) +read_py_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) { - if (read_ptr(pid, address, ptr_addr)) { + if (read_ptr(handle, address, ptr_addr)) { return -1; } *ptr_addr &= ~Py_TAG_BITS; @@ -576,40 +131,40 @@ read_py_ptr(pid_t pid, uintptr_t address, uintptr_t *ptr_addr) } static int -read_char(pid_t pid, uintptr_t address, char *result) +read_char(proc_handle_t *handle, uintptr_t address, char *result) { - int bytes_read = read_memory(pid, address, sizeof(char), result); - if (bytes_read < 0) { + int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(char), result); + if (res < 0) { return -1; } return 0; } static int -read_int(pid_t pid, uintptr_t address, int *result) +read_int(proc_handle_t *handle, uintptr_t address, int *result) { - int bytes_read = read_memory(pid, address, sizeof(int), result); - if (bytes_read < 0) { + int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(int), result); + if (res < 0) { return -1; } return 0; } static int -read_unsigned_long(pid_t pid, uintptr_t address, unsigned long *result) +read_unsigned_long(proc_handle_t *handle, uintptr_t address, unsigned long *result) { - int bytes_read = read_memory(pid, address, sizeof(unsigned long), result); - if (bytes_read < 0) { + int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(unsigned long), result); + if (res < 0) { return -1; } return 0; } static int -read_pyobj(pid_t pid, uintptr_t address, PyObject *ptr_addr) +read_pyobj(proc_handle_t *handle, uintptr_t address, PyObject *ptr_addr) { - int bytes_read = read_memory(pid, address, sizeof(PyObject), ptr_addr); - if (bytes_read < 0) { + int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(PyObject), ptr_addr); + if (res < 0) { return -1; } return 0; @@ -617,10 +172,10 @@ read_pyobj(pid_t pid, uintptr_t address, PyObject *ptr_addr) static PyObject * read_py_str( - pid_t pid, + proc_handle_t *handle, _Py_DebugOffsets* debug_offsets, uintptr_t address, - ssize_t max_len + Py_ssize_t max_len ) { assert(max_len > 0); @@ -631,7 +186,7 @@ read_py_str( PyErr_NoMemory(); return NULL; } - if (read_string(pid, debug_offsets, address, buf, max_len)) { + if (read_string(handle, debug_offsets, address, buf, max_len)) { goto err; } @@ -650,15 +205,15 @@ read_py_str( } static long -read_py_long(pid_t pid, _Py_DebugOffsets* offsets, uintptr_t address) +read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address) { unsigned int shift = PYLONG_BITS_IN_DIGIT; - ssize_t size; + Py_ssize_t size; uintptr_t lv_tag; - int bytes_read = read_memory( - pid, address + offsets->long_object.lv_tag, + int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, address + offsets->long_object.lv_tag, sizeof(uintptr_t), &lv_tag); if (bytes_read < 0) { @@ -678,8 +233,8 @@ read_py_long(pid_t pid, _Py_DebugOffsets* offsets, uintptr_t address) return -1; } - bytes_read = read_memory( - pid, + bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, address + offsets->long_object.ob_digit, sizeof(digit) * size, digits @@ -688,21 +243,21 @@ read_py_long(pid_t pid, _Py_DebugOffsets* offsets, uintptr_t address) goto error; } - long value = 0; + long long value = 0; // In theory this can overflow, but because of llvm/llvm-project#16778 // we can't use __builtin_mul_overflow because it fails to link with // __muloti4 on aarch64. In practice this is fine because all we're // testing here are task numbers that would fit in a single byte. - for (ssize_t i = 0; i < size; ++i) { - long long factor = digits[i] * (1UL << (ssize_t)(shift * i)); + for (Py_ssize_t i = 0; i < size; ++i) { + long long factor = digits[i] * (1UL << (Py_ssize_t)(shift * i)); value += factor; } PyMem_RawFree(digits); if (negative) { value *= -1; } - return value; + return (long)value; error: PyMem_RawFree(digits); return -1; @@ -710,14 +265,14 @@ read_py_long(pid_t pid, _Py_DebugOffsets* offsets, uintptr_t address) static PyObject * parse_task_name( - int pid, + proc_handle_t *handle, _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t task_address ) { uintptr_t task_name_addr; int err = read_py_ptr( - pid, + handle, task_address + async_offsets->asyncio_task_object.task_name, &task_name_addr); if (err) { @@ -728,7 +283,7 @@ parse_task_name( PyObject task_name_obj; err = read_pyobj( - pid, + handle, task_name_addr, &task_name_obj); if (err) { @@ -737,7 +292,7 @@ parse_task_name( unsigned long flags; err = read_unsigned_long( - pid, + handle, (uintptr_t)task_name_obj.ob_type + offsets->type_object.tp_flags, &flags); if (err) { @@ -745,7 +300,7 @@ parse_task_name( } if ((flags & Py_TPFLAGS_LONG_SUBCLASS)) { - long res = read_py_long(pid, offsets, task_name_addr); + long res = read_py_long(handle, offsets, task_name_addr); if (res == -1) { PyErr_SetString(PyExc_RuntimeError, "Failed to get task name"); return NULL; @@ -759,7 +314,7 @@ parse_task_name( } return read_py_str( - pid, + handle, offsets, task_name_addr, 255 @@ -768,7 +323,7 @@ parse_task_name( static int parse_coro_chain( - int pid, + proc_handle_t *handle, struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t coro_address, @@ -778,7 +333,7 @@ parse_coro_chain( uintptr_t gen_type_addr; int err = read_ptr( - pid, + handle, coro_address + sizeof(void*), &gen_type_addr); if (err) { @@ -787,7 +342,7 @@ parse_coro_chain( uintptr_t gen_name_addr; err = read_py_ptr( - pid, + handle, coro_address + offsets->gen_object.gi_name, &gen_name_addr); if (err) { @@ -795,7 +350,7 @@ parse_coro_chain( } PyObject *name = read_py_str( - pid, + handle, offsets, gen_name_addr, 255 @@ -812,7 +367,7 @@ parse_coro_chain( int gi_frame_state; err = read_int( - pid, + handle, coro_address + offsets->gen_object.gi_frame_state, &gi_frame_state); if (err) { @@ -822,7 +377,7 @@ parse_coro_chain( if (gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { char owner; err = read_char( - pid, + handle, coro_address + offsets->gen_object.gi_iframe + offsets->interpreter_frame.owner, &owner @@ -839,7 +394,7 @@ parse_coro_chain( uintptr_t stackpointer_addr; err = read_py_ptr( - pid, + handle, coro_address + offsets->gen_object.gi_iframe + offsets->interpreter_frame.stackpointer, &stackpointer_addr); @@ -850,7 +405,7 @@ parse_coro_chain( if ((void*)stackpointer_addr != NULL) { uintptr_t gi_await_addr; err = read_py_ptr( - pid, + handle, stackpointer_addr - sizeof(void*), &gi_await_addr); if (err) { @@ -860,7 +415,7 @@ parse_coro_chain( if ((void*)gi_await_addr != NULL) { uintptr_t gi_await_addr_type_addr; int err = read_ptr( - pid, + handle, gi_await_addr + sizeof(void*), &gi_await_addr_type_addr); if (err) { @@ -879,7 +434,7 @@ parse_coro_chain( in its cr_await. */ err = parse_coro_chain( - pid, + handle, offsets, async_offsets, gi_await_addr, @@ -900,7 +455,7 @@ parse_coro_chain( static int parse_task_awaited_by( - int pid, + proc_handle_t *handle, struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t task_address, @@ -910,7 +465,7 @@ parse_task_awaited_by( static int parse_task( - int pid, + proc_handle_t *handle, struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t task_address, @@ -918,7 +473,7 @@ parse_task( ) { char is_task; int err = read_char( - pid, + handle, task_address + async_offsets->asyncio_task_object.task_is_task, &is_task); if (err) { @@ -926,7 +481,7 @@ parse_task( } uintptr_t refcnt; - read_ptr(pid, task_address + sizeof(Py_ssize_t), &refcnt); + read_ptr(handle, task_address + sizeof(Py_ssize_t), &refcnt); PyObject* result = PyList_New(0); if (result == NULL) { @@ -946,7 +501,7 @@ parse_task( if (is_task) { PyObject *tn = parse_task_name( - pid, offsets, async_offsets, task_address); + handle, offsets, async_offsets, task_address); if (tn == NULL) { goto err; } @@ -958,7 +513,7 @@ parse_task( uintptr_t coro_addr; err = read_py_ptr( - pid, + handle, task_address + async_offsets->asyncio_task_object.task_coro, &coro_addr); if (err) { @@ -967,7 +522,7 @@ parse_task( if ((void*)coro_addr != NULL) { err = parse_coro_chain( - pid, + handle, offsets, async_offsets, coro_addr, @@ -998,7 +553,7 @@ parse_task( /* we can operate on a borrowed one to simplify cleanup */ Py_DECREF(awaited_by); - if (parse_task_awaited_by(pid, offsets, async_offsets, + if (parse_task_awaited_by(handle, offsets, async_offsets, task_address, awaited_by) ) { goto err; @@ -1014,7 +569,7 @@ parse_task( static int parse_tasks_in_set( - int pid, + proc_handle_t *handle, struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t set_addr, @@ -1022,7 +577,7 @@ parse_tasks_in_set( ) { uintptr_t set_obj; if (read_py_ptr( - pid, + handle, set_addr, &set_obj) ) { @@ -1030,8 +585,8 @@ parse_tasks_in_set( } Py_ssize_t num_els; - if (read_ssize_t( - pid, + if (read_Py_ssize_t( + handle, set_obj + offsets->set_object.used, &num_els) ) { @@ -1039,8 +594,8 @@ parse_tasks_in_set( } Py_ssize_t set_len; - if (read_ssize_t( - pid, + if (read_Py_ssize_t( + handle, set_obj + offsets->set_object.mask, &set_len) ) { @@ -1050,7 +605,7 @@ parse_tasks_in_set( uintptr_t table_ptr; if (read_ptr( - pid, + handle, set_obj + offsets->set_object.table, &table_ptr) ) { @@ -1061,13 +616,13 @@ parse_tasks_in_set( Py_ssize_t els = 0; while (i < set_len) { uintptr_t key_addr; - if (read_py_ptr(pid, table_ptr, &key_addr)) { + if (read_py_ptr(handle, table_ptr, &key_addr)) { return -1; } if ((void*)key_addr != NULL) { Py_ssize_t ref_cnt; - if (read_ssize_t(pid, table_ptr, &ref_cnt)) { + if (read_Py_ssize_t(handle, table_ptr, &ref_cnt)) { return -1; } @@ -1075,7 +630,7 @@ parse_tasks_in_set( // if 'ref_cnt=0' it's a set dummy marker if (parse_task( - pid, + handle, offsets, async_offsets, key_addr, @@ -1099,7 +654,7 @@ parse_tasks_in_set( static int parse_task_awaited_by( - int pid, + proc_handle_t *handle, struct _Py_DebugOffsets* offsets, struct _Py_AsyncioModuleDebugOffsets* async_offsets, uintptr_t task_address, @@ -1107,7 +662,7 @@ parse_task_awaited_by( ) { uintptr_t task_ab_addr; int err = read_py_ptr( - pid, + handle, task_address + async_offsets->asyncio_task_object.task_awaited_by, &task_ab_addr); if (err) { @@ -1120,7 +675,7 @@ parse_task_awaited_by( char awaited_by_is_a_set; err = read_char( - pid, + handle, task_address + async_offsets->asyncio_task_object.task_awaited_by_is_set, &awaited_by_is_a_set); if (err) { @@ -1129,7 +684,7 @@ parse_task_awaited_by( if (awaited_by_is_a_set) { if (parse_tasks_in_set( - pid, + handle, offsets, async_offsets, task_address + async_offsets->asyncio_task_object.task_awaited_by, @@ -1140,7 +695,7 @@ parse_task_awaited_by( } else { uintptr_t sub_task; if (read_py_ptr( - pid, + handle, task_address + async_offsets->asyncio_task_object.task_awaited_by, &sub_task) ) { @@ -1148,7 +703,7 @@ parse_task_awaited_by( } if (parse_task( - pid, + handle, offsets, async_offsets, sub_task, @@ -1163,15 +718,15 @@ parse_task_awaited_by( static int parse_code_object( - int pid, + proc_handle_t *handle, PyObject* result, struct _Py_DebugOffsets* offsets, uintptr_t address, uintptr_t* previous_frame ) { uintptr_t address_of_function_name; - int bytes_read = read_memory( - pid, + int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, address + offsets->code_object.name, sizeof(void*), &address_of_function_name @@ -1186,7 +741,7 @@ parse_code_object( } PyObject* py_function_name = read_py_str( - pid, offsets, address_of_function_name, 256); + handle, offsets, address_of_function_name, 256); if (py_function_name == NULL) { return -1; } @@ -1202,7 +757,7 @@ parse_code_object( static int parse_frame_object( - int pid, + proc_handle_t *handle, PyObject* result, struct _Py_DebugOffsets* offsets, uintptr_t address, @@ -1210,8 +765,8 @@ parse_frame_object( ) { int err; - ssize_t bytes_read = read_memory( - pid, + Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, address + offsets->interpreter_frame.previous, sizeof(void*), previous_frame @@ -1221,7 +776,7 @@ parse_frame_object( } char owner; - if (read_char(pid, address + offsets->interpreter_frame.owner, &owner)) { + if (read_char(handle, address + offsets->interpreter_frame.owner, &owner)) { return -1; } @@ -1231,7 +786,7 @@ parse_frame_object( uintptr_t address_of_code_object; err = read_py_ptr( - pid, + handle, address + offsets->interpreter_frame.executable, &address_of_code_object ); @@ -1244,12 +799,12 @@ parse_frame_object( } return parse_code_object( - pid, result, offsets, address_of_code_object, previous_frame); + handle, result, offsets, address_of_code_object, previous_frame); } static int parse_async_frame_object( - int pid, + proc_handle_t *handle, PyObject* result, struct _Py_DebugOffsets* offsets, uintptr_t address, @@ -1258,8 +813,8 @@ parse_async_frame_object( ) { int err; - ssize_t bytes_read = read_memory( - pid, + Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, address + offsets->interpreter_frame.previous, sizeof(void*), previous_frame @@ -1269,8 +824,8 @@ parse_async_frame_object( } char owner; - bytes_read = read_memory( - pid, address + offsets->interpreter_frame.owner, sizeof(char), &owner); + bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, address + offsets->interpreter_frame.owner, sizeof(char), &owner); if (bytes_read < 0) { return -1; } @@ -1286,7 +841,7 @@ parse_async_frame_object( } err = read_py_ptr( - pid, + handle, address + offsets->interpreter_frame.executable, code_object ); @@ -1300,67 +855,41 @@ parse_async_frame_object( } if (parse_code_object( - pid, result, offsets, *code_object, previous_frame)) { + handle, result, offsets, *code_object, previous_frame)) { return -1; } return 1; } -static int -read_offsets( - int pid, - uintptr_t *runtime_start_address, - _Py_DebugOffsets* debug_offsets -) { - *runtime_start_address = get_py_runtime(pid); - if ((void*)*runtime_start_address == NULL) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - return -1; - } - size_t size = sizeof(struct _Py_DebugOffsets); - ssize_t bytes_read = read_memory( - pid, *runtime_start_address, size, debug_offsets); - if (bytes_read < 0) { - return -1; - } - return 0; -} - static int read_async_debug( - int pid, + proc_handle_t *handle, struct _Py_AsyncioModuleDebugOffsets* async_debug ) { - uintptr_t async_debug_addr = get_async_debug(pid); + uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(handle); if (!async_debug_addr) { return -1; } + size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); - ssize_t bytes_read = read_memory( - pid, async_debug_addr, size, async_debug); - if (bytes_read < 0) { - return -1; - } - return 0; + int result = _Py_RemoteDebug_ReadRemoteMemory(handle, async_debug_addr, size, async_debug); + return result; } static int find_running_frame( - int pid, + proc_handle_t *handle, uintptr_t runtime_start_address, _Py_DebugOffsets* local_debug_offsets, uintptr_t *frame ) { - off_t interpreter_state_list_head = + uint64_t interpreter_state_list_head = local_debug_offsets->runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes_read = read_memory( - pid, + int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); @@ -1374,10 +903,10 @@ find_running_frame( } uintptr_t address_of_thread; - bytes_read = read_memory( - pid, + bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_head, + local_debug_offsets->interpreter_state.threads_main, sizeof(void*), &address_of_thread); if (bytes_read < 0) { @@ -1387,7 +916,7 @@ find_running_frame( // No Python frames are available for us (can happen at tear-down). if ((void*)address_of_thread != NULL) { int err = read_ptr( - pid, + handle, address_of_thread + local_debug_offsets->thread_state.current_frame, frame); if (err) { @@ -1402,7 +931,7 @@ find_running_frame( static int find_running_task( - int pid, + proc_handle_t *handle, uintptr_t runtime_start_address, _Py_DebugOffsets *local_debug_offsets, struct _Py_AsyncioModuleDebugOffsets *async_offsets, @@ -1410,12 +939,12 @@ find_running_task( ) { *running_task_addr = (uintptr_t)NULL; - off_t interpreter_state_list_head = + uint64_t interpreter_state_list_head = local_debug_offsets->runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes_read = read_memory( - pid, + int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); @@ -1429,8 +958,8 @@ find_running_task( } uintptr_t address_of_thread; - bytes_read = read_memory( - pid, + bytes_read = _Py_RemoteDebug_ReadRemoteMemory( + handle, address_of_interpreter_state + local_debug_offsets->interpreter_state.threads_head, sizeof(void*), @@ -1446,7 +975,7 @@ find_running_task( } bytes_read = read_py_ptr( - pid, + handle, address_of_thread + async_offsets->asyncio_thread_state.asyncio_running_loop, &address_of_running_loop); @@ -1460,7 +989,7 @@ find_running_task( } int err = read_ptr( - pid, + handle, address_of_thread + async_offsets->asyncio_thread_state.asyncio_running_task, running_task_addr); @@ -1473,7 +1002,7 @@ find_running_task( static int append_awaited_by_for_thread( - int pid, + proc_handle_t *handle, uintptr_t head_addr, struct _Py_DebugOffsets *debug_offsets, struct _Py_AsyncioModuleDebugOffsets *async_offsets, @@ -1481,8 +1010,8 @@ append_awaited_by_for_thread( ) { struct llist_node task_node; - if (0 > read_memory( - pid, + if (0 > _Py_RemoteDebug_ReadRemoteMemory( + handle, head_addr, sizeof(task_node), &task_node)) @@ -1509,7 +1038,7 @@ append_awaited_by_for_thread( - async_offsets->asyncio_task_object.task_node; PyObject *tn = parse_task_name( - pid, + handle, debug_offsets, async_offsets, task_addr); @@ -1538,15 +1067,15 @@ append_awaited_by_for_thread( } Py_DECREF(result_item); - if (parse_task_awaited_by(pid, debug_offsets, async_offsets, + if (parse_task_awaited_by(handle, debug_offsets, async_offsets, task_addr, current_awaited_by)) { return -1; } // onto the next one... - if (0 > read_memory( - pid, + if (0 > _Py_RemoteDebug_ReadRemoteMemory( + handle, (uintptr_t)task_node.next, sizeof(task_node), &task_node)) @@ -1560,7 +1089,7 @@ append_awaited_by_for_thread( static int append_awaited_by( - int pid, + proc_handle_t *handle, unsigned long tid, uintptr_t head_addr, struct _Py_DebugOffsets *debug_offsets, @@ -1594,7 +1123,7 @@ append_awaited_by( Py_DECREF(result_item); if (append_awaited_by_for_thread( - pid, + handle, head_addr, debug_offsets, async_offsets, @@ -1609,7 +1138,7 @@ append_awaited_by( static PyObject* get_all_awaited_by(PyObject* self, PyObject* args) { -#if (!defined(__linux__) && !defined(__APPLE__)) || \ +#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ (defined(__linux__) && !HAVE_PROCESS_VM_READV) PyErr_SetString( PyExc_RuntimeError, @@ -1618,12 +1147,17 @@ get_all_awaited_by(PyObject* self, PyObject* args) #endif int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { return NULL; } - uintptr_t runtime_start_addr = get_py_runtime(pid); + proc_handle_t the_handle; + proc_handle_t *handle = &the_handle; + if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { + return 0; + } + + uintptr_t runtime_start_addr = _Py_RemoteDebug_GetPyRuntimeAddress(handle); if (runtime_start_addr == 0) { if (!PyErr_Occurred()) { PyErr_SetString( @@ -1633,12 +1167,14 @@ get_all_awaited_by(PyObject* self, PyObject* args) } struct _Py_DebugOffsets local_debug_offsets; - if (read_offsets(pid, &runtime_start_addr, &local_debug_offsets)) { + if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_addr, &local_debug_offsets)) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read debug offsets"); return NULL; } struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(pid, &local_async_debug)) { + if (read_async_debug(handle, &local_async_debug)) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); return NULL; } @@ -1647,12 +1183,12 @@ get_all_awaited_by(PyObject* self, PyObject* args) return NULL; } - off_t interpreter_state_list_head = + uint64_t interpreter_state_list_head = local_debug_offsets.runtime_state.interpreters_head; uintptr_t interpreter_state_addr; - if (0 > read_memory( - pid, + if (0 > _Py_RemoteDebug_ReadRemoteMemory( + handle, runtime_start_addr + interpreter_state_list_head, sizeof(void*), &interpreter_state_addr)) @@ -1662,8 +1198,8 @@ get_all_awaited_by(PyObject* self, PyObject* args) uintptr_t thread_state_addr; unsigned long tid = 0; - if (0 > read_memory( - pid, + if (0 > _Py_RemoteDebug_ReadRemoteMemory( + handle, interpreter_state_addr + local_debug_offsets.interpreter_state.threads_head, sizeof(void*), @@ -1674,8 +1210,8 @@ get_all_awaited_by(PyObject* self, PyObject* args) uintptr_t head_addr; while (thread_state_addr != 0) { - if (0 > read_memory( - pid, + if (0 > _Py_RemoteDebug_ReadRemoteMemory( + handle, thread_state_addr + local_debug_offsets.thread_state.native_thread_id, sizeof(tid), @@ -1687,14 +1223,14 @@ get_all_awaited_by(PyObject* self, PyObject* args) head_addr = thread_state_addr + local_async_debug.asyncio_thread_state.asyncio_tasks_head; - if (append_awaited_by(pid, tid, head_addr, &local_debug_offsets, + if (append_awaited_by(handle, tid, head_addr, &local_debug_offsets, &local_async_debug, result)) { goto result_err; } - if (0 > read_memory( - pid, + if (0 > _Py_RemoteDebug_ReadRemoteMemory( + handle, thread_state_addr + local_debug_offsets.thread_state.next, sizeof(void*), &thread_state_addr)) @@ -1711,65 +1247,76 @@ get_all_awaited_by(PyObject* self, PyObject* args) // any tasks still pending when a thread is destroyed will be moved to the // per-interpreter task list. It's unlikely we'll find anything here, but // interesting for debugging. - if (append_awaited_by(pid, 0, head_addr, &local_debug_offsets, + if (append_awaited_by(handle, 0, head_addr, &local_debug_offsets, &local_async_debug, result)) { goto result_err; } + _Py_RemoteDebug_CleanupProcHandle(handle); return result; result_err: Py_DECREF(result); + _Py_RemoteDebug_CleanupProcHandle(handle); return NULL; } static PyObject* get_stack_trace(PyObject* self, PyObject* args) { -#if (!defined(__linux__) && !defined(__APPLE__)) || \ +#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ (defined(__linux__) && !HAVE_PROCESS_VM_READV) PyErr_SetString( PyExc_RuntimeError, "get_stack_trace is not supported on this platform"); return NULL; #endif - int pid; + int pid; if (!PyArg_ParseTuple(args, "i", &pid)) { return NULL; } - uintptr_t runtime_start_address = get_py_runtime(pid); + proc_handle_t the_handle; + proc_handle_t *handle = &the_handle; + if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { + return 0; + } + + PyObject* result = NULL; + + uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); if (runtime_start_address == 0) { if (!PyErr_Occurred()) { PyErr_SetString( PyExc_RuntimeError, "Failed to get .PyRuntime address"); } - return NULL; + goto result_err; } struct _Py_DebugOffsets local_debug_offsets; - if (read_offsets(pid, &runtime_start_address, &local_debug_offsets)) { - return NULL; + if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read debug offsets"); + goto result_err; } uintptr_t address_of_current_frame; if (find_running_frame( - pid, runtime_start_address, &local_debug_offsets, + handle, runtime_start_address, &local_debug_offsets, &address_of_current_frame) ) { - return NULL; + goto result_err; } - PyObject* result = PyList_New(0); + result = PyList_New(0); if (result == NULL) { - return NULL; + goto result_err; } while ((void*)address_of_current_frame != NULL) { if (parse_frame_object( - pid, + handle, result, &local_debug_offsets, address_of_current_frame, @@ -1777,17 +1324,19 @@ get_stack_trace(PyObject* self, PyObject* args) < 0) { Py_DECREF(result); - return NULL; + goto result_err; } } +result_err: + _Py_RemoteDebug_CleanupProcHandle(handle); return result; } static PyObject* get_async_stack_trace(PyObject* self, PyObject* args) { -#if (!defined(__linux__) && !defined(__APPLE__)) || \ +#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ (defined(__linux__) && !HAVE_PROCESS_VM_READV) PyErr_SetString( PyExc_RuntimeError, @@ -1800,7 +1349,13 @@ get_async_stack_trace(PyObject* self, PyObject* args) return NULL; } - uintptr_t runtime_start_address = get_py_runtime(pid); + proc_handle_t the_handle; + proc_handle_t *handle = &the_handle; + if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { + return 0; + } + + uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); if (runtime_start_address == 0) { if (!PyErr_Occurred()) { PyErr_SetString( @@ -1810,12 +1365,14 @@ get_async_stack_trace(PyObject* self, PyObject* args) } struct _Py_DebugOffsets local_debug_offsets; - if (read_offsets(pid, &runtime_start_address, &local_debug_offsets)) { + if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read debug offsets"); return NULL; } struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(pid, &local_async_debug)) { + if (read_async_debug(handle, &local_async_debug)) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); return NULL; } @@ -1836,9 +1393,10 @@ get_async_stack_trace(PyObject* self, PyObject* args) uintptr_t running_task_addr = (uintptr_t)NULL; if (find_running_task( - pid, runtime_start_address, &local_debug_offsets, &local_async_debug, + handle, runtime_start_address, &local_debug_offsets, &local_async_debug, &running_task_addr) ) { + PyErr_SetString(PyExc_RuntimeError, "Failed to find running task"); goto result_err; } @@ -1849,10 +1407,11 @@ get_async_stack_trace(PyObject* self, PyObject* args) uintptr_t running_coro_addr; if (read_py_ptr( - pid, + handle, running_task_addr + local_async_debug.asyncio_task_object.task_coro, &running_coro_addr )) { + PyErr_SetString(PyExc_RuntimeError, "Failed to read running task coro"); goto result_err; } @@ -1865,7 +1424,7 @@ get_async_stack_trace(PyObject* self, PyObject* args) // the offset leads directly to its first field: f_executable uintptr_t address_of_running_task_code_obj; if (read_py_ptr( - pid, + handle, running_coro_addr + local_debug_offsets.gen_object.gi_iframe, &address_of_running_task_code_obj )) { @@ -1879,16 +1438,17 @@ get_async_stack_trace(PyObject* self, PyObject* args) uintptr_t address_of_current_frame; if (find_running_frame( - pid, runtime_start_address, &local_debug_offsets, + handle, runtime_start_address, &local_debug_offsets, &address_of_current_frame) ) { + PyErr_SetString(PyExc_RuntimeError, "Failed to find running frame"); goto result_err; } uintptr_t address_of_code_object; while ((void*)address_of_current_frame != NULL) { int res = parse_async_frame_object( - pid, + handle, calls, &local_debug_offsets, address_of_current_frame, @@ -1897,6 +1457,7 @@ get_async_stack_trace(PyObject* self, PyObject* args) ); if (res < 0) { + PyErr_SetString(PyExc_RuntimeError, "Failed to parse async frame object"); goto result_err; } @@ -1906,7 +1467,7 @@ get_async_stack_trace(PyObject* self, PyObject* args) } PyObject *tn = parse_task_name( - pid, &local_debug_offsets, &local_async_debug, running_task_addr); + handle, &local_debug_offsets, &local_async_debug, running_task_addr); if (tn == NULL) { goto result_err; } @@ -1927,15 +1488,17 @@ get_async_stack_trace(PyObject* self, PyObject* args) Py_DECREF(awaited_by); if (parse_task_awaited_by( - pid, &local_debug_offsets, &local_async_debug, + handle, &local_debug_offsets, &local_async_debug, running_task_addr, awaited_by) ) { goto result_err; } + _Py_RemoteDebug_CleanupProcHandle(handle); return result; result_err: + _Py_RemoteDebug_CleanupProcHandle(handle); Py_DECREF(result); return NULL; } @@ -1943,11 +1506,11 @@ get_async_stack_trace(PyObject* self, PyObject* args) static PyMethodDef methods[] = { {"get_stack_trace", get_stack_trace, METH_VARARGS, - "Get the Python stack from a given PID"}, + "Get the Python stack from a given pod"}, {"get_async_stack_trace", get_async_stack_trace, METH_VARARGS, - "Get the asyncio stack from a given PID"}, + "Get the asyncio stack from a given pid"}, {"get_all_awaited_by", get_all_awaited_by, METH_VARARGS, - "Get all tasks and their awaited_by from a given PID"}, + "Get all tasks and their awaited_by from a given pid"}, {NULL, NULL, 0, NULL}, }; diff --git a/PCbuild/_testexternalinspection.vcxproj b/PCbuild/_testexternalinspection.vcxproj new file mode 100644 index 00000000000000..d5f347ecfec2c7 --- /dev/null +++ b/PCbuild/_testexternalinspection.vcxproj @@ -0,0 +1,114 @@ + + + + + Debug + ARM + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + PGInstrument + ARM + + + PGInstrument + ARM64 + + + PGInstrument + Win32 + + + PGInstrument + x64 + + + PGUpdate + ARM + + + PGUpdate + ARM64 + + + PGUpdate + Win32 + + + PGUpdate + x64 + + + Release + ARM + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + {4D7C112F-3083-4D9E-9754-9341C14D9B39} + _testexternalinspection + Win32Proj + false + + + + + DynamicLibrary + NotSet + + + + $(PyStdlibPydExt) + + + + + + + + + + <_ProjectFileVersion>10.0.30319.1 + + + + + + + + + + {cf7ac3d1-e2df-41d2-bea6-1e2556cdea26} + false + + + {885d4898-d08d-4091-9c40-c700cfe3fc5a} + false + + + + + + diff --git a/PCbuild/_testexternalinspection.vcxproj.filters b/PCbuild/_testexternalinspection.vcxproj.filters new file mode 100644 index 00000000000000..feb4343e5c2b8c --- /dev/null +++ b/PCbuild/_testexternalinspection.vcxproj.filters @@ -0,0 +1,20 @@ + + + + + {6d101329-41df-49a0-8639-f35408ad7c6d} + + + {711941d1-269c-49cb-a733-759b2b91fc61} + + + + + + + + Resource Files + + + + diff --git a/PCbuild/pcbuild.proj b/PCbuild/pcbuild.proj index 0ae87a0f21f039..1bf430e03debc8 100644 --- a/PCbuild/pcbuild.proj +++ b/PCbuild/pcbuild.proj @@ -79,7 +79,7 @@ - + diff --git a/PCbuild/pcbuild.sln b/PCbuild/pcbuild.sln index f3bab33cf60ebe..803bb149c905cb 100644 --- a/PCbuild/pcbuild.sln +++ b/PCbuild/pcbuild.sln @@ -81,6 +81,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_testclinic", "_testclinic. EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_testinternalcapi", "_testinternalcapi.vcxproj", "{900342D7-516A-4469-B1AD-59A66E49A25F}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_testexternalinspection", "_testexternalinspection.vcxproj", "{4D7C112F-3083-4D9E-9754-9341C14D9B39}" +EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_testimportmultiple", "_testimportmultiple.vcxproj", "{36D0C52C-DF4E-45D0-8BC7-E294C3ABC781}" EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "_tkinter", "_tkinter.vcxproj", "{4946ECAC-2E69-4BF8-A90A-F5136F5094DF}" @@ -1716,6 +1718,38 @@ Global {FB91C8B2-6FBC-3A01-B644-1637111F902D}.Release|Win32.Build.0 = Release|Win32 {FB91C8B2-6FBC-3A01-B644-1637111F902D}.Release|x64.ActiveCfg = Release|x64 {FB91C8B2-6FBC-3A01-B644-1637111F902D}.Release|x64.Build.0 = Release|x64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Debug|ARM.ActiveCfg = Debug|ARM + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Debug|ARM.Build.0 = Debug|ARM + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Debug|ARM64.Build.0 = Debug|ARM64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Debug|Win32.ActiveCfg = Debug|Win32 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Debug|Win32.Build.0 = Debug|Win32 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Debug|x64.ActiveCfg = Debug|x64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Debug|x64.Build.0 = Debug|x64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGInstrument|ARM.ActiveCfg = PGInstrument|ARM + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGInstrument|ARM.Build.0 = PGInstrument|ARM + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGInstrument|ARM64.ActiveCfg = PGInstrument|ARM64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGInstrument|ARM64.Build.0 = PGInstrument|ARM64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGInstrument|Win32.ActiveCfg = PGInstrument|Win32 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGInstrument|Win32.Build.0 = PGInstrument|Win32 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGInstrument|x64.ActiveCfg = PGInstrument|x64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGInstrument|x64.Build.0 = PGInstrument|x64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGUpdate|ARM.ActiveCfg = PGUpdate|ARM + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGUpdate|ARM.Build.0 = PGUpdate|ARM + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGUpdate|ARM64.ActiveCfg = PGUpdate|ARM64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGUpdate|ARM64.Build.0 = PGUpdate|ARM64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGUpdate|Win32.ActiveCfg = PGUpdate|Win32 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGUpdate|Win32.Build.0 = PGUpdate|Win32 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGUpdate|x64.ActiveCfg = PGUpdate|x64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.PGUpdate|x64.Build.0 = PGUpdate|x64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Release|ARM.ActiveCfg = Release|ARM + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Release|ARM.Build.0 = Release|ARM + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Release|ARM64.ActiveCfg = Release|ARM64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Release|ARM64.Build.0 = Release|ARM64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Release|Win32.ActiveCfg = Release|Win32 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Release|Win32.Build.0 = Release|Win32 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Release|x64.ActiveCfg = Release|x64 + {4D7C112F-3083-4D9E-9754-9341C14D9B39}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/Python/remote_debug.h b/Python/remote_debug.h new file mode 100644 index 00000000000000..ba29ec5cec7329 --- /dev/null +++ b/Python/remote_debug.h @@ -0,0 +1,793 @@ +/* +IMPORTANT: This header file is full of static functions that are not exported. + +The reason is that we don't want to export these functions to the Python API +and they can be used both for the interpreter and some shared libraries. The +reason we don't want to export them is to avoid having them participating in +return-oriented programming attacks. + +If you need to add a new function ensure that is declared 'static'. +*/ + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# error "this header requires Py_BUILD_CORE or Py_BUILD_CORE_MODULE define" +#endif + +#include "pyconfig.h" +#include "internal/pycore_ceval.h" + +#ifdef __linux__ +# include +# include +# if INTPTR_MAX == INT64_MAX +# define Elf_Ehdr Elf64_Ehdr +# define Elf_Shdr Elf64_Shdr +# define Elf_Phdr Elf64_Phdr +# else +# define Elf_Ehdr Elf32_Ehdr +# define Elf_Shdr Elf32_Shdr +# define Elf_Phdr Elf32_Phdr +# endif +# include +#endif + +#if defined(__APPLE__) && TARGET_OS_OSX +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +#endif + +#ifdef MS_WINDOWS + // Windows includes and definitions +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#ifndef MS_WINDOWS +#include +#include +#include +#include +#endif + +#ifndef HAVE_PROCESS_VM_READV +# define HAVE_PROCESS_VM_READV 0 +#endif + +// Define a platform-independent process handle structure +typedef struct { + pid_t pid; +#ifdef MS_WINDOWS + HANDLE hProcess; +#endif +} proc_handle_t; + +// Initialize the process handle +static int +_Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { + handle->pid = pid; +#ifdef MS_WINDOWS + handle->hProcess = OpenProcess( + PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION, + FALSE, pid); + if (handle->hProcess == NULL) { + PyErr_SetFromWindowsErr(0); + return -1; + } +#endif + return 0; +} + +// Clean up the process handle +static void +_Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) { +#ifdef MS_WINDOWS + if (handle->hProcess != NULL) { + CloseHandle(handle->hProcess); + handle->hProcess = NULL; + } +#endif + handle->pid = 0; +} + +#if defined(__APPLE__) && TARGET_OS_OSX + +static uintptr_t +return_section_address64( + const char* section, + mach_port_t proc_ref, + uintptr_t base, + void* map +) { + struct mach_header_64* hdr = (struct mach_header_64*)map; + int ncmds = hdr->ncmds; + + int cmd_cnt = 0; + struct segment_command_64* cmd = map + sizeof(struct mach_header_64); + + mach_vm_size_t size = 0; + mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); + mach_vm_address_t address = (mach_vm_address_t)base; + vm_region_basic_info_data_64_t r_info; + mach_port_t object_name; + uintptr_t vmaddr = 0; + + for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) { + if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__TEXT") == 0) { + vmaddr = cmd->vmaddr; + } + if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__DATA") == 0) { + while (cmd->filesize != size) { + address += size; + kern_return_t ret = mach_vm_region( + proc_ref, + &address, + &size, + VM_REGION_BASIC_INFO_64, + (vm_region_info_t)&r_info, // cppcheck-suppress [uninitvar] + &count, + &object_name + ); + if (ret != KERN_SUCCESS) { + PyErr_SetString( + PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + return 0; + } + } + + int nsects = cmd->nsects; + struct section_64* sec = (struct section_64*)( + (void*)cmd + sizeof(struct segment_command_64) + ); + for (int j = 0; j < nsects; j++) { + if (strcmp(sec[j].sectname, section) == 0) { + return base + sec[j].addr - vmaddr; + } + } + cmd_cnt++; + } + + cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize); + } + + // We should not be here, but if we are there, we should say about this + PyErr_SetString( + PyExc_RuntimeError, "Cannot find section address.\n"); + return 0; +} + +static uintptr_t +return_section_address32( + const char* section, + mach_port_t proc_ref, + uintptr_t base, + void* map +) { + struct mach_header* hdr = (struct mach_header*)map; + int ncmds = hdr->ncmds; + + int cmd_cnt = 0; + struct segment_command* cmd = map + sizeof(struct mach_header); + + mach_vm_size_t size = 0; + mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_t); + mach_vm_address_t address = (mach_vm_address_t)base; + vm_region_basic_info_data_t r_info; + mach_port_t object_name; + uintptr_t vmaddr = 0; + + for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) { + if (cmd->cmd == LC_SEGMENT && strcmp(cmd->segname, "__TEXT") == 0) { + vmaddr = cmd->vmaddr; + } + if (cmd->cmd == LC_SEGMENT && strcmp(cmd->segname, "__DATA") == 0) { + while (cmd->filesize != size) { + address += size; + kern_return_t ret = mach_vm_region( + proc_ref, + &address, + &size, + VM_REGION_BASIC_INFO, + (vm_region_info_t)&r_info, // cppcheck-suppress [uninitvar] + &count, + &object_name + ); + if (ret != KERN_SUCCESS) { + PyErr_SetString( + PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + return 0; + } + } + + int nsects = cmd->nsects; + struct section* sec = (struct section*)( + (void*)cmd + sizeof(struct segment_command) + ); + for (int j = 0; j < nsects; j++) { + if (strcmp(sec[j].sectname, section) == 0) { + return base + sec[j].addr - vmaddr; + } + } + cmd_cnt++; + } + + cmd = (struct segment_command*)((void*)cmd + cmd->cmdsize); + } + + // We should not be here, but if we are there, we should say about this + PyErr_SetString( + PyExc_RuntimeError, "Cannot find section address.\n"); + return 0; +} + +static uintptr_t +return_section_address_fat( + const char* section, + mach_port_t proc_ref, + uintptr_t base, + void* map +) { + struct fat_header* fat_hdr = (struct fat_header*)map; + + // Determine host CPU type for architecture selection + cpu_type_t cpu; + int is_abi64; + size_t cpu_size = sizeof(cpu), abi64_size = sizeof(is_abi64); + + sysctlbyname("hw.cputype", &cpu, &cpu_size, NULL, 0); + sysctlbyname("hw.cpu64bit_capable", &is_abi64, &abi64_size, NULL, 0); + + cpu |= is_abi64 * CPU_ARCH_ABI64; + + // Check endianness + int swap = fat_hdr->magic == FAT_CIGAM; + struct fat_arch* arch = (struct fat_arch*)(map + sizeof(struct fat_header)); + + // Get number of architectures in fat binary + uint32_t nfat_arch = swap ? __builtin_bswap32(fat_hdr->nfat_arch) : fat_hdr->nfat_arch; + + // Search for matching architecture + for (uint32_t i = 0; i < nfat_arch; i++) { + cpu_type_t arch_cpu = swap ? __builtin_bswap32(arch[i].cputype) : arch[i].cputype; + + if (arch_cpu == cpu) { + // Found matching architecture, now process it + uint32_t offset = swap ? __builtin_bswap32(arch[i].offset) : arch[i].offset; + struct mach_header_64* hdr = (struct mach_header_64*)(map + offset); + + // Determine which type of Mach-O it is and process accordingly + switch (hdr->magic) { + case MH_MAGIC: + case MH_CIGAM: + return return_section_address32(section, proc_ref, base, (void*)hdr); + + case MH_MAGIC_64: + case MH_CIGAM_64: + return return_section_address64(section, proc_ref, base, (void*)hdr); + + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic in fat binary.\n"); + return 0; + } + } + } + + PyErr_SetString(PyExc_RuntimeError, "No matching architecture found in fat binary.\n"); + return 0; +} + +static uintptr_t +search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_size_t size, mach_port_t proc_ref) +{ + int fd = open(path, O_RDONLY); + if (fd == -1) { + PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path); + return 0; + } + + struct stat fs; + if (fstat(fd, &fs) == -1) { + PyErr_Format(PyExc_RuntimeError, "Cannot get size of binary %s\n", path); + close(fd); + return 0; + } + + void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path); + close(fd); + return 0; + } + + uintptr_t result = 0; + uint32_t magic = *(uint32_t*)map; + + switch (magic) { + case MH_MAGIC: + case MH_CIGAM: + result = return_section_address32(secname, proc_ref, base, map); + break; + case MH_MAGIC_64: + case MH_CIGAM_64: + result = return_section_address64(secname, proc_ref, base, map); + break; + case FAT_MAGIC: + case FAT_CIGAM: + result = return_section_address_fat(secname, proc_ref, base, map); + break; + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); + break; + } + + munmap(map, fs.st_size); + if (close(fd) != 0) { + PyErr_SetFromErrno(PyExc_OSError); + } + return result; +} + + +static mach_port_t +pid_to_task(pid_t pid) +{ + mach_port_t task; + kern_return_t result; + + result = task_for_pid(mach_task_self(), pid, &task); + if (result != KERN_SUCCESS) { + PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid); + return 0; + } + return task; +} + +static uintptr_t +search_map_for_section(proc_handle_t *handle, const char* secname, const char* substr) { + mach_vm_address_t address = 0; + mach_vm_size_t size = 0; + mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); + vm_region_basic_info_data_64_t region_info; + mach_port_t object_name; + + mach_port_t proc_ref = pid_to_task(handle->pid); + if (proc_ref == 0) { + PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID"); + return 0; + } + + int match_found = 0; + char map_filename[MAXPATHLEN + 1]; + while (mach_vm_region( + proc_ref, + &address, + &size, + VM_REGION_BASIC_INFO_64, + (vm_region_info_t)®ion_info, + &count, + &object_name) == KERN_SUCCESS) + { + if ((region_info.protection & VM_PROT_READ) == 0 + || (region_info.protection & VM_PROT_EXECUTE) == 0) { + address += size; + continue; + } + + int path_len = proc_regionfilename( + handle->pid, address, map_filename, MAXPATHLEN); + if (path_len == 0) { + address += size; + continue; + } + + char* filename = strrchr(map_filename, '/'); + if (filename != NULL) { + filename++; // Move past the '/' + } else { + filename = map_filename; // No path, use the whole string + } + + if (!match_found && strncmp(filename, substr, strlen(substr)) == 0) { + match_found = 1; + return search_section_in_file( + secname, map_filename, address, size, proc_ref); + } + + address += size; + } + + PyErr_SetString(PyExc_RuntimeError, + "mach_vm_region failed to find the section"); + return 0; +} + +#endif // (__APPLE__ && TARGET_OS_OSX) + +#if defined(__linux__) && HAVE_PROCESS_VM_READV +static uintptr_t +search_elf_file_for_section( + proc_handle_t *handle, + const char* secname, + uintptr_t start_address, + const char *elf_file) +{ + if (start_address == 0) { + return 0; + } + + uintptr_t result = 0; + void* file_memory = NULL; + + int fd = open(elf_file, O_RDONLY); + if (fd < 0) { + PyErr_SetFromErrno(PyExc_OSError); + goto exit; + } + + struct stat file_stats; + if (fstat(fd, &file_stats) != 0) { + PyErr_SetFromErrno(PyExc_OSError); + goto exit; + } + + file_memory = mmap(NULL, file_stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (file_memory == MAP_FAILED) { + PyErr_SetFromErrno(PyExc_OSError); + goto exit; + } + + Elf_Ehdr* elf_header = (Elf_Ehdr*)file_memory; + + Elf_Shdr* section_header_table = (Elf_Shdr*)(file_memory + elf_header->e_shoff); + + Elf_Shdr* shstrtab_section = §ion_header_table[elf_header->e_shstrndx]; + char* shstrtab = (char*)(file_memory + shstrtab_section->sh_offset); + + Elf_Shdr* section = NULL; + for (int i = 0; i < elf_header->e_shnum; i++) { + char* this_sec_name = shstrtab + section_header_table[i].sh_name; + // Move 1 character to account for the leading "." + this_sec_name += 1; + if (strcmp(secname, this_sec_name) == 0) { + section = §ion_header_table[i]; + break; + } + } + + Elf_Phdr* program_header_table = (Elf_Phdr*)(file_memory + elf_header->e_phoff); + // Find the first PT_LOAD segment + Elf_Phdr* first_load_segment = NULL; + for (int i = 0; i < elf_header->e_phnum; i++) { + if (program_header_table[i].p_type == PT_LOAD) { + first_load_segment = &program_header_table[i]; + break; + } + } + + if (section != NULL && first_load_segment != NULL) { + uintptr_t elf_load_addr = first_load_segment->p_vaddr + - (first_load_segment->p_vaddr % first_load_segment->p_align); + result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; + } + +exit: + if (file_memory != NULL) { + munmap(file_memory, file_stats.st_size); + } + if (fd >= 0 && close(fd) != 0) { + PyErr_SetFromErrno(PyExc_OSError); + } + return result; +} + +static uintptr_t +search_linux_map_for_section(proc_handle_t *handle, const char* secname, const char* substr) +{ + char maps_file_path[64]; + sprintf(maps_file_path, "/proc/%d/maps", handle->pid); + + FILE* maps_file = fopen(maps_file_path, "r"); + if (maps_file == NULL) { + PyErr_SetFromErrno(PyExc_OSError); + return 0; + } + + size_t linelen = 0; + size_t linesz = PATH_MAX; + char *line = PyMem_Malloc(linesz); + if (!line) { + fclose(maps_file); + PyErr_NoMemory(); + return 0; + } + + uintptr_t retval = 0; + while (fgets(line + linelen, linesz - linelen, maps_file) != NULL) { + linelen = strlen(line); + if (line[linelen - 1] != '\n') { + // Read a partial line: realloc and keep reading where we left off. + // Note that even the last line will be terminated by a newline. + linesz *= 2; + char *biggerline = PyMem_Realloc(line, linesz); + if (!biggerline) { + PyMem_Free(line); + fclose(maps_file); + PyErr_NoMemory(); + return 0; + } + line = biggerline; + continue; + } + + // Read a full line: strip the newline + line[linelen - 1] = '\0'; + // and prepare to read the next line into the start of the buffer. + linelen = 0; + + unsigned long start = 0; + unsigned long path_pos = 0; + sscanf(line, "%lx-%*x %*s %*s %*s %*s %ln", &start, &path_pos); + + if (!path_pos) { + // Line didn't match our format string. This shouldn't be + // possible, but let's be defensive and skip the line. + continue; + } + + const char *path = line + path_pos; + const char *filename = strrchr(path, '/'); + if (filename) { + filename++; // Move past the '/' + } else { + filename = path; // No directories, or an empty string + } + + if (strstr(filename, substr)) { + retval = search_elf_file_for_section(handle, secname, start, path); + if (retval) { + break; + } + } + } + + PyMem_Free(line); + fclose(maps_file); + + return retval; +} + + +#endif // __linux__ + +#ifdef MS_WINDOWS + +static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* secname) { + HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + PyErr_SetFromWindowsErr(0); + return NULL; + } + HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, 0); + if (!hMap) { + PyErr_SetFromWindowsErr(0); + CloseHandle(hFile); + return NULL; + } + + BYTE* mapView = (BYTE*)MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0); + if (!mapView) { + PyErr_SetFromWindowsErr(0); + CloseHandle(hMap); + CloseHandle(hFile); + return NULL; + } + + IMAGE_DOS_HEADER* pDOSHeader = (IMAGE_DOS_HEADER*)mapView; + if (pDOSHeader->e_magic != IMAGE_DOS_SIGNATURE) { + PyErr_SetString(PyExc_RuntimeError, "Invalid DOS signature."); + UnmapViewOfFile(mapView); + CloseHandle(hMap); + CloseHandle(hFile); + return NULL; + } + + IMAGE_NT_HEADERS* pNTHeaders = (IMAGE_NT_HEADERS*)(mapView + pDOSHeader->e_lfanew); + if (pNTHeaders->Signature != IMAGE_NT_SIGNATURE) { + PyErr_SetString(PyExc_RuntimeError, "Invalid NT signature."); + UnmapViewOfFile(mapView); + CloseHandle(hMap); + CloseHandle(hFile); + return NULL; + } + + IMAGE_SECTION_HEADER* pSection_header = (IMAGE_SECTION_HEADER*)(mapView + pDOSHeader->e_lfanew + sizeof(IMAGE_NT_HEADERS)); + void* runtime_addr = NULL; + + for (int i = 0; i < pNTHeaders->FileHeader.NumberOfSections; i++) { + const char* name = (const char*)pSection_header[i].Name; + if (strncmp(name, secname, IMAGE_SIZEOF_SHORT_NAME) == 0) { + runtime_addr = remote_base + pSection_header[i].VirtualAddress; + break; + } + } + + UnmapViewOfFile(mapView); + CloseHandle(hMap); + CloseHandle(hFile); + + return runtime_addr; +} + + +static uintptr_t +search_windows_map_for_section(proc_handle_t* handle, const char* secname, const wchar_t* substr) { + HANDLE hProcSnap; + do { + hProcSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, handle->pid); + } while (hProcSnap == INVALID_HANDLE_VALUE && GetLastError() == ERROR_BAD_LENGTH); + + if (hProcSnap == INVALID_HANDLE_VALUE) { + PyErr_SetString(PyExc_PermissionError, "Unable to create module snapshot. Check permissions or PID."); + return 0; + } + + MODULEENTRY32W moduleEntry; + moduleEntry.dwSize = sizeof(moduleEntry); + void* runtime_addr = NULL; + + for (BOOL hasModule = Module32FirstW(hProcSnap, &moduleEntry); hasModule; hasModule = Module32NextW(hProcSnap, &moduleEntry)) { + // Look for either python executable or DLL + if (wcsstr(moduleEntry.szModule, substr)) { + runtime_addr = analyze_pe(moduleEntry.szExePath, moduleEntry.modBaseAddr, secname); + if (runtime_addr != NULL) { + break; + } + } + } + + CloseHandle(hProcSnap); + return (uintptr_t)runtime_addr; +} + +#endif // MS_WINDOWS + +// Get the PyRuntime section address for any platform +static uintptr_t +_Py_RemoteDebug_GetPyRuntimeAddress(proc_handle_t* handle) +{ + uintptr_t address = 0; + +#ifdef MS_WINDOWS + // On Windows, search for 'python' in executable or DLL + address = search_windows_map_for_section(handle, "PyRuntime", L"python"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + } +#elif defined(__linux__) + // On Linux, search for 'python' in executable or DLL + address = search_linux_map_for_section(handle, "PyRuntime", "python"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + } +#else + // On macOS, try libpython first, then fall back to python + address = search_map_for_section(handle, "PyRuntime", "libpython"); + if (address == 0) { + // TODO: Differentiate between not found and error + PyErr_Clear(); + address = search_map_for_section(handle, "PyRuntime", "python"); + } +#endif + + return address; +} + +// Platform-independent memory read function +static int +_Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address, size_t len, void* dst) +{ +#ifdef MS_WINDOWS + SIZE_T read_bytes = 0; + SIZE_T result = 0; + do { + if (!ReadProcessMemory(handle->hProcess, (LPCVOID)(remote_address + result), (char*)dst + result, len - result, &read_bytes)) { + PyErr_SetFromWindowsErr(0); + return -1; + } + result += read_bytes; + } while (result < len); + return 0; +#elif defined(__linux__) && HAVE_PROCESS_VM_READV + struct iovec local[1]; + struct iovec remote[1]; + Py_ssize_t result = 0; + Py_ssize_t read_bytes = 0; + + do { + local[0].iov_base = (char*)dst + result; + local[0].iov_len = len - result; + remote[0].iov_base = (void*)(remote_address + result); + remote[0].iov_len = len - result; + + read_bytes = process_vm_readv(handle->pid, local, 1, remote, 1, 0); + if (read_bytes < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + + result += read_bytes; + } while ((size_t)read_bytes != local[0].iov_len); + return 0; +#elif defined(__APPLE__) && TARGET_OS_OSX + Py_ssize_t result = -1; + kern_return_t kr = mach_vm_read_overwrite( + pid_to_task(handle->pid), + (mach_vm_address_t)remote_address, + len, + (mach_vm_address_t)dst, + (mach_vm_size_t*)&result); + + if (kr != KERN_SUCCESS) { + switch (kr) { + case KERN_PROTECTION_FAILURE: + PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory"); + break; + case KERN_INVALID_ARGUMENT: + PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite"); + break; + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory"); + } + return -1; + } + return 0; +#else + Py_UNREACHABLE(); +#endif +} + +static int +_Py_RemoteDebug_ReadDebugOffsets( + proc_handle_t *handle, + uintptr_t *runtime_start_address, + _Py_DebugOffsets* debug_offsets +) { + *runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); + if (!*runtime_start_address) { + if (!PyErr_Occurred()) { + PyErr_SetString( + PyExc_RuntimeError, "Failed to get PyRuntime address"); + } + return -1; + } + size_t size = sizeof(struct _Py_DebugOffsets); + if (0 != _Py_RemoteDebug_ReadRemoteMemory(handle, *runtime_start_address, size, debug_offsets)) { + return -1; + } + return 0; +} + +#ifdef __cplusplus +} +#endif \ No newline at end of file diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 9656297cb83188..dd55b7812d4dee 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -5,754 +5,25 @@ #include "internal/pycore_runtime.h" #include "internal/pycore_ceval.h" -#ifdef __linux__ -# include -# include -# if INTPTR_MAX == INT64_MAX -# define Elf_Ehdr Elf64_Ehdr -# define Elf_Shdr Elf64_Shdr -# define Elf_Phdr Elf64_Phdr -# else -# define Elf_Ehdr Elf32_Ehdr -# define Elf_Shdr Elf32_Shdr -# define Elf_Phdr Elf32_Phdr -# endif -# include -#endif - -#if defined(__APPLE__) && TARGET_OS_OSX -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -#endif - -#ifdef MS_WINDOWS - // Windows includes and definitions -#include -#include -#include -#endif - -#include -#include -#include -#include -#include -#include -#include -#ifndef MS_WINDOWS -#include -#include -#include -#include -#endif - -#ifndef HAVE_PROCESS_VM_READV -# define HAVE_PROCESS_VM_READV 0 -#endif - #if defined(Py_REMOTE_DEBUG) && defined(Py_SUPPORTS_REMOTE_DEBUG) +#include "remote_debug.h" -// Define a platform-independent process handle structure -typedef struct { - pid_t pid; -#ifdef MS_WINDOWS - HANDLE hProcess; -#endif -} proc_handle_t; - -// Initialize the process handle static int init_proc_handle(proc_handle_t *handle, pid_t pid) { - handle->pid = pid; -#ifdef MS_WINDOWS - handle->hProcess = OpenProcess( - PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION, - FALSE, pid); - if (handle->hProcess == NULL) { - PyErr_SetFromWindowsErr(0); - return -1; - } -#endif - return 0; + return _Py_RemoteDebug_InitProcHandle(handle, pid); } -// Clean up the process handle static void cleanup_proc_handle(proc_handle_t *handle) { -#ifdef MS_WINDOWS - if (handle->hProcess != NULL) { - CloseHandle(handle->hProcess); - handle->hProcess = NULL; - } -#endif - handle->pid = 0; -} - -#if defined(__APPLE__) && TARGET_OS_OSX -static uintptr_t -return_section_address64( - const char* section, - mach_port_t proc_ref, - uintptr_t base, - void* map -) { - struct mach_header_64* hdr = (struct mach_header_64*)map; - int ncmds = hdr->ncmds; - - int cmd_cnt = 0; - struct segment_command_64* cmd = map + sizeof(struct mach_header_64); - - mach_vm_size_t size = 0; - mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); - mach_vm_address_t address = (mach_vm_address_t)base; - vm_region_basic_info_data_64_t r_info; - mach_port_t object_name; - uintptr_t vmaddr = 0; - - for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) { - if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__TEXT") == 0) { - vmaddr = cmd->vmaddr; - } - if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__DATA") == 0) { - while (cmd->filesize != size) { - address += size; - kern_return_t ret = mach_vm_region( - proc_ref, - &address, - &size, - VM_REGION_BASIC_INFO_64, - (vm_region_info_t)&r_info, // cppcheck-suppress [uninitvar] - &count, - &object_name - ); - if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); - return 0; - } - } - - int nsects = cmd->nsects; - struct section_64* sec = (struct section_64*)( - (void*)cmd + sizeof(struct segment_command_64) - ); - for (int j = 0; j < nsects; j++) { - if (strcmp(sec[j].sectname, section) == 0) { - return base + sec[j].addr - vmaddr; - } - } - cmd_cnt++; - } - - cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize); - } - - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); - return 0; -} - -static uintptr_t -return_section_address32( - const char* section, - mach_port_t proc_ref, - uintptr_t base, - void* map -) { - struct mach_header* hdr = (struct mach_header*)map; - int ncmds = hdr->ncmds; - - int cmd_cnt = 0; - struct segment_command* cmd = map + sizeof(struct mach_header); - - mach_vm_size_t size = 0; - mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_t); - mach_vm_address_t address = (mach_vm_address_t)base; - vm_region_basic_info_data_t r_info; - mach_port_t object_name; - uintptr_t vmaddr = 0; - - for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) { - if (cmd->cmd == LC_SEGMENT && strcmp(cmd->segname, "__TEXT") == 0) { - vmaddr = cmd->vmaddr; - } - if (cmd->cmd == LC_SEGMENT && strcmp(cmd->segname, "__DATA") == 0) { - while (cmd->filesize != size) { - address += size; - kern_return_t ret = mach_vm_region( - proc_ref, - &address, - &size, - VM_REGION_BASIC_INFO, - (vm_region_info_t)&r_info, // cppcheck-suppress [uninitvar] - &count, - &object_name - ); - if (ret != KERN_SUCCESS) { - PyErr_SetString( - PyExc_RuntimeError, "Cannot get any more VM maps.\n"); - return 0; - } - } - - int nsects = cmd->nsects; - struct section* sec = (struct section*)( - (void*)cmd + sizeof(struct segment_command) - ); - for (int j = 0; j < nsects; j++) { - if (strcmp(sec[j].sectname, section) == 0) { - return base + sec[j].addr - vmaddr; - } - } - cmd_cnt++; - } - - cmd = (struct segment_command*)((void*)cmd + cmd->cmdsize); - } - - // We should not be here, but if we are there, we should say about this - PyErr_SetString( - PyExc_RuntimeError, "Cannot find section address.\n"); - return 0; -} - -static uintptr_t -return_section_address_fat( - const char* section, - mach_port_t proc_ref, - uintptr_t base, - void* map -) { - struct fat_header* fat_hdr = (struct fat_header*)map; - - // Determine host CPU type for architecture selection - cpu_type_t cpu; - int is_abi64; - size_t cpu_size = sizeof(cpu), abi64_size = sizeof(is_abi64); - - sysctlbyname("hw.cputype", &cpu, &cpu_size, NULL, 0); - sysctlbyname("hw.cpu64bit_capable", &is_abi64, &abi64_size, NULL, 0); - - cpu |= is_abi64 * CPU_ARCH_ABI64; - - // Check endianness - int swap = fat_hdr->magic == FAT_CIGAM; - struct fat_arch* arch = (struct fat_arch*)(map + sizeof(struct fat_header)); - - // Get number of architectures in fat binary - uint32_t nfat_arch = swap ? __builtin_bswap32(fat_hdr->nfat_arch) : fat_hdr->nfat_arch; - - // Search for matching architecture - for (uint32_t i = 0; i < nfat_arch; i++) { - cpu_type_t arch_cpu = swap ? __builtin_bswap32(arch[i].cputype) : arch[i].cputype; - - if (arch_cpu == cpu) { - // Found matching architecture, now process it - uint32_t offset = swap ? __builtin_bswap32(arch[i].offset) : arch[i].offset; - struct mach_header_64* hdr = (struct mach_header_64*)(map + offset); - - // Determine which type of Mach-O it is and process accordingly - switch (hdr->magic) { - case MH_MAGIC: - case MH_CIGAM: - return return_section_address32(section, proc_ref, base, (void*)hdr); - - case MH_MAGIC_64: - case MH_CIGAM_64: - return return_section_address64(section, proc_ref, base, (void*)hdr); - - default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic in fat binary.\n"); - return 0; - } - } - } - - PyErr_SetString(PyExc_RuntimeError, "No matching architecture found in fat binary.\n"); - return 0; -} - -static uintptr_t -search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_size_t size, mach_port_t proc_ref) -{ - int fd = open(path, O_RDONLY); - if (fd == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path); - return 0; - } - - struct stat fs; - if (fstat(fd, &fs) == -1) { - PyErr_Format(PyExc_RuntimeError, "Cannot get size of binary %s\n", path); - close(fd); - return 0; - } - - void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0); - if (map == MAP_FAILED) { - PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path); - close(fd); - return 0; - } - - uintptr_t result = 0; - uint32_t magic = *(uint32_t*)map; - - switch (magic) { - case MH_MAGIC: - case MH_CIGAM: - result = return_section_address32(secname, proc_ref, base, map); - break; - case MH_MAGIC_64: - case MH_CIGAM_64: - result = return_section_address64(secname, proc_ref, base, map); - break; - case FAT_MAGIC: - case FAT_CIGAM: - result = return_section_address_fat(secname, proc_ref, base, map); - break; - default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); - break; - } - - munmap(map, fs.st_size); - if (close(fd) != 0) { - PyErr_SetFromErrno(PyExc_OSError); - } - return result; -} - -static mach_port_t -pid_to_task(pid_t pid) -{ - mach_port_t task; - kern_return_t result; - - result = task_for_pid(mach_task_self(), pid, &task); - if (result != KERN_SUCCESS) { - PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid); - return 0; - } - return task; -} - -static uintptr_t -search_map_for_section(proc_handle_t *handle, const char* secname, const char* substr) { - mach_vm_address_t address = 0; - mach_vm_size_t size = 0; - mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); - vm_region_basic_info_data_64_t region_info; - mach_port_t object_name; - - mach_port_t proc_ref = pid_to_task(handle->pid); - if (proc_ref == 0) { - PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID"); - return 0; - } - - int match_found = 0; - char map_filename[MAXPATHLEN + 1]; - while (mach_vm_region( - proc_ref, - &address, - &size, - VM_REGION_BASIC_INFO_64, - (vm_region_info_t)®ion_info, - &count, - &object_name) == KERN_SUCCESS) - { - if ((region_info.protection & VM_PROT_READ) == 0 - || (region_info.protection & VM_PROT_EXECUTE) == 0) { - address += size; - continue; - } - - int path_len = proc_regionfilename( - handle->pid, address, map_filename, MAXPATHLEN); - if (path_len == 0) { - address += size; - continue; - } - - char* filename = strrchr(map_filename, '/'); - if (filename != NULL) { - filename++; // Move past the '/' - } else { - filename = map_filename; // No path, use the whole string - } - - if (!match_found && strncmp(filename, substr, strlen(substr)) == 0) { - match_found = 1; - return search_section_in_file( - secname, map_filename, address, size, proc_ref); - } - - address += size; - } - - PyErr_SetString(PyExc_RuntimeError, - "mach_vm_region failed to find the section"); - return 0; + _Py_RemoteDebug_CleanupProcHandle(handle); } -#endif // (__APPLE__ && TARGET_OS_OSX) - -#if defined(__linux__) && HAVE_PROCESS_VM_READV -static uintptr_t -search_elf_file_for_section( - proc_handle_t *handle, - const char* secname, - uintptr_t start_address, - const char *elf_file) -{ - if (start_address == 0) { - return 0; - } - - uintptr_t result = 0; - void* file_memory = NULL; - - int fd = open(elf_file, O_RDONLY); - if (fd < 0) { - PyErr_SetFromErrno(PyExc_OSError); - goto exit; - } - - struct stat file_stats; - if (fstat(fd, &file_stats) != 0) { - PyErr_SetFromErrno(PyExc_OSError); - goto exit; - } - - file_memory = mmap(NULL, file_stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0); - if (file_memory == MAP_FAILED) { - PyErr_SetFromErrno(PyExc_OSError); - goto exit; - } - - Elf_Ehdr* elf_header = (Elf_Ehdr*)file_memory; - - Elf_Shdr* section_header_table = (Elf_Shdr*)(file_memory + elf_header->e_shoff); - - Elf_Shdr* shstrtab_section = §ion_header_table[elf_header->e_shstrndx]; - char* shstrtab = (char*)(file_memory + shstrtab_section->sh_offset); - - Elf_Shdr* section = NULL; - for (int i = 0; i < elf_header->e_shnum; i++) { - char* this_sec_name = shstrtab + section_header_table[i].sh_name; - // Move 1 character to account for the leading "." - this_sec_name += 1; - if (strcmp(secname, this_sec_name) == 0) { - section = §ion_header_table[i]; - break; - } - } - - Elf_Phdr* program_header_table = (Elf_Phdr*)(file_memory + elf_header->e_phoff); - // Find the first PT_LOAD segment - Elf_Phdr* first_load_segment = NULL; - for (int i = 0; i < elf_header->e_phnum; i++) { - if (program_header_table[i].p_type == PT_LOAD) { - first_load_segment = &program_header_table[i]; - break; - } - } - - if (section != NULL && first_load_segment != NULL) { - uintptr_t elf_load_addr = first_load_segment->p_vaddr - - (first_load_segment->p_vaddr % first_load_segment->p_align); - result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; - } - -exit: - if (file_memory != NULL) { - munmap(file_memory, file_stats.st_size); - } - if (fd >= 0 && close(fd) != 0) { - PyErr_SetFromErrno(PyExc_OSError); - } - return result; -} - -static uintptr_t -search_linux_map_for_section(proc_handle_t *handle, const char* secname, const char* substr) -{ - char maps_file_path[64]; - sprintf(maps_file_path, "/proc/%d/maps", handle->pid); - - FILE* maps_file = fopen(maps_file_path, "r"); - if (maps_file == NULL) { - PyErr_SetFromErrno(PyExc_OSError); - return 0; - } - - size_t linelen = 0; - size_t linesz = PATH_MAX; - char *line = PyMem_Malloc(linesz); - if (!line) { - fclose(maps_file); - PyErr_NoMemory(); - return 0; - } - - uintptr_t retval = 0; - while (fgets(line + linelen, linesz - linelen, maps_file) != NULL) { - linelen = strlen(line); - if (line[linelen - 1] != '\n') { - // Read a partial line: realloc and keep reading where we left off. - // Note that even the last line will be terminated by a newline. - linesz *= 2; - char *biggerline = PyMem_Realloc(line, linesz); - if (!biggerline) { - PyMem_Free(line); - fclose(maps_file); - PyErr_NoMemory(); - return 0; - } - line = biggerline; - continue; - } - - // Read a full line: strip the newline - line[linelen - 1] = '\0'; - // and prepare to read the next line into the start of the buffer. - linelen = 0; - - unsigned long start = 0; - unsigned long path_pos = 0; - sscanf(line, "%lx-%*x %*s %*s %*s %*s %ln", &start, &path_pos); - - if (!path_pos) { - // Line didn't match our format string. This shouldn't be - // possible, but let's be defensive and skip the line. - continue; - } - - const char *path = line + path_pos; - const char *filename = strrchr(path, '/'); - if (filename) { - filename++; // Move past the '/' - } else { - filename = path; // No directories, or an empty string - } - - if (strstr(filename, substr)) { - retval = search_elf_file_for_section(handle, secname, start, path); - if (retval) { - break; - } - } - } - - PyMem_Free(line); - fclose(maps_file); - - return retval; -} - - -#endif // __linux__ - -#ifdef MS_WINDOWS - -static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* secname) { - HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - if (hFile == INVALID_HANDLE_VALUE) { - PyErr_SetFromWindowsErr(0); - return NULL; - } - HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, 0); - if (!hMap) { - PyErr_SetFromWindowsErr(0); - CloseHandle(hFile); - return NULL; - } - - BYTE* mapView = (BYTE*)MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0); - if (!mapView) { - PyErr_SetFromWindowsErr(0); - CloseHandle(hMap); - CloseHandle(hFile); - return NULL; - } - - IMAGE_DOS_HEADER* pDOSHeader = (IMAGE_DOS_HEADER*)mapView; - if (pDOSHeader->e_magic != IMAGE_DOS_SIGNATURE) { - PyErr_SetString(PyExc_RuntimeError, "Invalid DOS signature."); - UnmapViewOfFile(mapView); - CloseHandle(hMap); - CloseHandle(hFile); - return NULL; - } - - IMAGE_NT_HEADERS* pNTHeaders = (IMAGE_NT_HEADERS*)(mapView + pDOSHeader->e_lfanew); - if (pNTHeaders->Signature != IMAGE_NT_SIGNATURE) { - PyErr_SetString(PyExc_RuntimeError, "Invalid NT signature."); - UnmapViewOfFile(mapView); - CloseHandle(hMap); - CloseHandle(hFile); - return NULL; - } - - IMAGE_SECTION_HEADER* pSection_header = (IMAGE_SECTION_HEADER*)(mapView + pDOSHeader->e_lfanew + sizeof(IMAGE_NT_HEADERS)); - void* runtime_addr = NULL; - - for (int i = 0; i < pNTHeaders->FileHeader.NumberOfSections; i++) { - const char* name = (const char*)pSection_header[i].Name; - if (strncmp(name, secname, IMAGE_SIZEOF_SHORT_NAME) == 0) { - runtime_addr = remote_base + pSection_header[i].VirtualAddress; - break; - } - } - - UnmapViewOfFile(mapView); - CloseHandle(hMap); - CloseHandle(hFile); - - return runtime_addr; -} - - -static uintptr_t -search_windows_map_for_section(proc_handle_t* handle, const char* secname, const wchar_t* substr) { - HANDLE hProcSnap; - do { - hProcSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, handle->pid); - } while (hProcSnap == INVALID_HANDLE_VALUE && GetLastError() == ERROR_BAD_LENGTH); - - if (hProcSnap == INVALID_HANDLE_VALUE) { - PyErr_SetString(PyExc_PermissionError, "Unable to create module snapshot. Check permissions or PID."); - return 0; - } - - MODULEENTRY32W moduleEntry; - moduleEntry.dwSize = sizeof(moduleEntry); - void* runtime_addr = NULL; - - for (BOOL hasModule = Module32FirstW(hProcSnap, &moduleEntry); hasModule; hasModule = Module32NextW(hProcSnap, &moduleEntry)) { - // Look for either python executable or DLL - if (wcsstr(moduleEntry.szModule, substr)) { - runtime_addr = analyze_pe(moduleEntry.szExePath, moduleEntry.modBaseAddr, secname); - if (runtime_addr != NULL) { - break; - } - } - } - - CloseHandle(hProcSnap); - return (uintptr_t)runtime_addr; -} - -#endif // MS_WINDOWS - -// Get the PyRuntime section address for any platform -static uintptr_t -get_py_runtime(proc_handle_t* handle) -{ - uintptr_t address = 0; - -#ifdef MS_WINDOWS - // On Windows, search for 'python' in executable or DLL - address = search_windows_map_for_section(handle, "PyRuntime", L"python"); - if (address == 0) { - // Error out: 'python' substring covers both executable and DLL - PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); - } -#elif defined(__linux__) - // On Linux, search for 'python' in executable or DLL - address = search_linux_map_for_section(handle, "PyRuntime", "python"); - if (address == 0) { - // Error out: 'python' substring covers both executable and DLL - PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); - } -#else - // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "PyRuntime", "libpython"); - if (address == 0) { - // TODO: Differentiate between not found and error - PyErr_Clear(); - address = search_map_for_section(handle, "PyRuntime", "python"); - } -#endif - - return address; -} - -// Platform-independent memory read function static int read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* dst) { -#ifdef MS_WINDOWS - SIZE_T read_bytes = 0; - SIZE_T result = 0; - do { - if (!ReadProcessMemory(handle->hProcess, (LPCVOID)(remote_address + result), (char*)dst + result, len - result, &read_bytes)) { - PyErr_SetFromWindowsErr(0); - return -1; - } - result += read_bytes; - } while (result < len); - return 0; -#elif defined(__linux__) && HAVE_PROCESS_VM_READV - struct iovec local[1]; - struct iovec remote[1]; - Py_ssize_t result = 0; - Py_ssize_t read_bytes = 0; - - do { - local[0].iov_base = (char*)dst + result; - local[0].iov_len = len - result; - remote[0].iov_base = (void*)(remote_address + result); - remote[0].iov_len = len - result; - - read_bytes = process_vm_readv(handle->pid, local, 1, remote, 1, 0); - if (read_bytes < 0) { - PyErr_SetFromErrno(PyExc_OSError); - return -1; - } - - result += read_bytes; - } while ((size_t)read_bytes != local[0].iov_len); - return 0; -#elif defined(__APPLE__) && TARGET_OS_OSX - Py_ssize_t result = -1; - kern_return_t kr = mach_vm_read_overwrite( - pid_to_task(handle->pid), - (mach_vm_address_t)remote_address, - len, - (mach_vm_address_t)dst, - (mach_vm_size_t*)&result); - - if (kr != KERN_SUCCESS) { - switch (kr) { - case KERN_PROTECTION_FAILURE: - PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory"); - break; - case KERN_INVALID_ARGUMENT: - PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite"); - break; - default: - PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory"); - } - return -1; - } - return 0; -#else - Py_UNREACHABLE(); -#endif + return _Py_RemoteDebug_ReadRemoteMemory(handle, remote_address, len, dst); } -// Platform-independent memory write function static int write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) { @@ -886,16 +157,7 @@ read_offsets( uintptr_t *runtime_start_address, _Py_DebugOffsets* debug_offsets ) { - *runtime_start_address = get_py_runtime(handle); - if (!*runtime_start_address) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get PyRuntime address"); - } - return -1; - } - size_t size = sizeof(struct _Py_DebugOffsets); - if (0 != read_memory(handle, *runtime_start_address, size, debug_offsets)) { + if (_Py_RemoteDebug_ReadDebugOffsets(handle, runtime_start_address, debug_offsets)) { return -1; } if (ensure_debug_offset_compatibility(debug_offsets)) { @@ -1097,3 +359,4 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) return rc; #endif } +