fix - Remove unmatched tool calls for Claude (All-Hands-AI#7597)

raymyers · web-flow · commit 7f35055d02ac · 2025-04-01T10:45:04.000-05:00
diff --git a/openhands/memory/conversation_memory.py b/openhands/memory/conversation_memory.py
@@ -1,3 +1,5 @@
+from typing import Generator
+
 from litellm import ModelResponse
 
 from openhands.core.config.agent_config import AgentConfig
@@ -125,7 +127,7 @@ def process_events(
                 pending_tool_call_action_messages.pop(response_id)
 
             messages += messages_to_add
-
+        messages = list(ConversationMemory._filter_unmatched_tool_calls(messages))
         return messages
 
     def process_initial_messages(self, with_caching: bool = False) -> list[Message]:
@@ -592,3 +594,58 @@ def _has_agent_in_earlier_events(
                 ):
                     return True
         return False
+
+    @staticmethod
+    def _filter_unmatched_tool_calls(
+        messages: list[Message],
+    ) -> Generator[Message, None, None]:
+        """Filter out tool calls that don't have matching tool responses and vice versa.
+
+        This ensures that every tool_call_id in a tool message has a corresponding tool_calls[].id
+        in an assistant message, and vice versa. The original list is unmodified, when tool_calls is
+        updated the message is copied.
+
+        This does not remove items with id set to None.
+        """
+        tool_call_ids = {
+            tool_call.id
+            for message in messages
+            if message.tool_calls
+            for tool_call in message.tool_calls
+            if message.role == 'assistant' and tool_call.id
+        }
+        tool_response_ids = {
+            message.tool_call_id
+            for message in messages
+            if message.role == 'tool' and message.tool_call_id
+        }
+
+        for message in messages:
+            # Remove tool messages with no matching assistant tool call
+            if message.role == 'tool' and message.tool_call_id:
+                if message.tool_call_id in tool_call_ids:
+                    yield message
+
+            # Remove assistant tool calls with no matching tool response
+            elif message.role == 'assistant' and message.tool_calls:
+                all_tool_calls_match = all(
+                    tool_call.id in tool_response_ids
+                    for tool_call in message.tool_calls
+                )
+                if all_tool_calls_match:
+                    yield message
+                else:
+                    matched_tool_calls = [
+                        tool_call
+                        for tool_call in message.tool_calls
+                        if tool_call.id in tool_response_ids
+                    ]
+
+                    if matched_tool_calls:
+                        # Keep an updated message if there are tools calls left
+                        yield message.model_copy(
+                            update={'tool_calls': matched_tool_calls}
+                        )
+            else:
+                # Any other case is kept
+                yield message
diff --git a/tests/unit/test_conversation_memory.py b/tests/unit/test_conversation_memory.py
@@ -3,6 +3,7 @@
 from unittest.mock import MagicMock, Mock
 
 import pytest
+from litellm import ChatCompletionMessageToolCall
 
 from openhands.controller.state.state import State
 from openhands.core.config.agent_config import AgentConfig
@@ -1050,3 +1051,150 @@ def test_has_agent_in_earlier_events(conversation_memory):
         conversation_memory._has_agent_in_earlier_events('non_existent', 3, events)
         is False
     )
+
+
+class TestFilterUnmatchedToolCalls:
+    @pytest.fixture
+    def processor(self):
+        return ConversationMemory()
+
+    def test_empty_is_unchanged(self):
+        assert list(ConversationMemory._filter_unmatched_tool_calls([])) == []
+
+    def test_no_tool_calls_is_unchanged(self):
+        messages = [
+            Message(role='user', content=[TextContent(text='Hello')]),
+            Message(role='assistant', content=[TextContent(text='Hi there')]),
+            Message(role='user', content=[TextContent(text='How are you?')]),
+        ]
+        assert (
+            list(ConversationMemory._filter_unmatched_tool_calls(messages)) == messages
+        )
+
+    def test_matched_tool_calls_are_unchanged(self):
+        messages = [
+            Message(role='user', content=[TextContent(text="What's the weather?")]),
+            Message(
+                role='assistant',
+                content=[],
+                tool_calls=[
+                    ChatCompletionMessageToolCall(
+                        id='call_1',
+                        type='function',
+                        function={'name': 'get_weather', 'arguments': ''},
+                    )
+                ],
+            ),
+            Message(
+                role='tool',
+                tool_call_id='call_1',
+                content=[TextContent(text='Sunny, 75°F')],
+            ),
+            Message(role='assistant', content=[TextContent(text="It's sunny today.")]),
+        ]
+
+        # All tool calls have matching responses, should remain unchanged
+        assert (
+            list(ConversationMemory._filter_unmatched_tool_calls(messages)) == messages
+        )
+
+    def test_tool_call_without_response_is_removed(self):
+        messages = [
+            Message(role='user', content=[TextContent(text='Query')]),
+            Message(
+                role='tool',
+                tool_call_id='missing_call',
+                content=[TextContent(text='Response')],
+            ),
+            Message(role='assistant', content=[TextContent(text='Answer')]),
+        ]
+
+        expected_after_filter = [
+            Message(role='user', content=[TextContent(text='Query')]),
+            Message(role='assistant', content=[TextContent(text='Answer')]),
+        ]
+
+        result = list(ConversationMemory._filter_unmatched_tool_calls(messages))
+        assert result == expected_after_filter
+
+    def test_tool_response_without_call_is_removed(self):
+        messages = [
+            Message(role='user', content=[TextContent(text='Query')]),
+            Message(
+                role='assistant',
+                content=[],
+                tool_calls=[
+                    ChatCompletionMessageToolCall(
+                        id='unmatched_call',
+                        type='function',
+                        function={'name': 'some_function', 'arguments': ''},
+                    )
+                ],
+            ),
+            Message(role='assistant', content=[TextContent(text='Answer')]),
+        ]
+
+        expected_after_filter = [
+            Message(role='user', content=[TextContent(text='Query')]),
+            Message(role='assistant', content=[TextContent(text='Answer')]),
+        ]
+
+        result = list(ConversationMemory._filter_unmatched_tool_calls(messages))
+        assert result == expected_after_filter
+
+    def test_partial_matched_tool_calls_retains_matched(self):
+        """When there are both matched and unmatched tools calls in a message, retain the message and only matched calls"""
+        messages = [
+            Message(role='user', content=[TextContent(text='Get data')]),
+            Message(
+                role='assistant',
+                content=[],
+                tool_calls=[
+                    ChatCompletionMessageToolCall(
+                        id='matched_call',
+                        type='function',
+                        function={'name': 'function1', 'arguments': ''},
+                    ),
+                    ChatCompletionMessageToolCall(
+                        id='unmatched_call',
+                        type='function',
+                        function={'name': 'function2', 'arguments': ''},
+                    ),
+                ],
+            ),
+            Message(
+                role='tool',
+                tool_call_id='matched_call',
+                content=[TextContent(text='Data')],
+            ),
+            Message(role='assistant', content=[TextContent(text='Result')]),
+        ]
+
+        expected = [
+            Message(role='user', content=[TextContent(text='Get data')]),
+            # This message should be modified to only include the matched tool call
+            Message(
+                role='assistant',
+                content=[],
+                tool_calls=[
+                    ChatCompletionMessageToolCall(
+                        id='matched_call',
+                        type='function',
+                        function={'name': 'function1', 'arguments': ''},
+                    )
+                ],
+            ),
+            Message(
+                role='tool',
+                tool_call_id='matched_call',
+                content=[TextContent(text='Data')],
+            ),
+            Message(role='assistant', content=[TextContent(text='Result')]),
+        ]
+
+        result = list(ConversationMemory._filter_unmatched_tool_calls(messages))
+
+        # Verify result structure
+        assert len(result) == len(expected)
+        for i, msg in enumerate(result):
+            assert msg == expected[i]