Skip to content

Commit f742435

Browse files
csmith49Calvin Smith
and
Calvin Smith
authored
feat: Structured summary generation for history condensation (#7696)
Co-authored-by: Calvin Smith <[email protected]>
1 parent a4ebb5b commit f742435

File tree

5 files changed

+466
-5
lines changed

5 files changed

+466
-5
lines changed

openhands/core/config/condenser_config.py

+29
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,33 @@ class LLMAttentionCondenserConfig(BaseModel):
126126
model_config = {'extra': 'forbid'}
127127

128128

129+
class StructuredSummaryCondenserConfig(BaseModel):
130+
"""Configuration for StructuredSummaryCondenser instances."""
131+
132+
type: Literal['structured'] = Field('structured')
133+
llm_config: LLMConfig = Field(
134+
..., description='Configuration for the LLM to use for condensing.'
135+
)
136+
137+
# at least one event by default, because the best guess is that it's the user task
138+
keep_first: int = Field(
139+
default=1,
140+
description='Number of initial events to always keep in history.',
141+
ge=0,
142+
)
143+
max_size: int = Field(
144+
default=100,
145+
description='Maximum size of the condensed history before triggering forgetting.',
146+
ge=2,
147+
)
148+
max_event_length: int = Field(
149+
default=10_000,
150+
description='Maximum length of the event representations to be passed to the LLM.',
151+
)
152+
153+
model_config = {'extra': 'forbid'}
154+
155+
129156
# Type alias for convenience
130157
CondenserConfig = (
131158
NoOpCondenserConfig
@@ -135,6 +162,7 @@ class LLMAttentionCondenserConfig(BaseModel):
135162
| LLMSummarizingCondenserConfig
136163
| AmortizedForgettingCondenserConfig
137164
| LLMAttentionCondenserConfig
165+
| StructuredSummaryCondenserConfig
138166
)
139167

140168

@@ -237,6 +265,7 @@ def create_condenser_config(condenser_type: str, data: dict) -> CondenserConfig:
237265
'llm': LLMSummarizingCondenserConfig,
238266
'amortized': AmortizedForgettingCondenserConfig,
239267
'llm_attention': LLMAttentionCondenserConfig,
268+
'structured': StructuredSummaryCondenserConfig,
240269
}
241270

242271
if condenser_type not in condenser_classes:

openhands/memory/condenser/impl/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
from openhands.memory.condenser.impl.recent_events_condenser import (
1919
RecentEventsCondenser,
2020
)
21+
from openhands.memory.condenser.impl.structured_summary_condenser import (
22+
StructuredSummaryCondenser,
23+
)
2124

2225
__all__ = [
2326
'AmortizedForgettingCondenser',
@@ -28,4 +31,5 @@
2831
'ObservationMaskingCondenser',
2932
'BrowserOutputCondenser',
3033
'RecentEventsCondenser',
34+
'StructuredSummaryCondenser',
3135
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
from __future__ import annotations
2+
3+
import json
4+
from typing import Any
5+
6+
from pydantic import BaseModel, Field
7+
8+
from openhands.core.config.condenser_config import (
9+
StructuredSummaryCondenserConfig,
10+
)
11+
from openhands.core.logger import openhands_logger as logger
12+
from openhands.core.message import Message, TextContent
13+
from openhands.events.action.agent import CondensationAction
14+
from openhands.events.observation.agent import AgentCondensationObservation
15+
from openhands.events.serialization.event import truncate_content
16+
from openhands.llm import LLM
17+
from openhands.memory.condenser.condenser import (
18+
Condensation,
19+
RollingCondenser,
20+
View,
21+
)
22+
23+
24+
class StateSummary(BaseModel):
25+
"""A structured representation summarizing the state of the agent and the task."""
26+
27+
# Required core fields
28+
user_context: str = Field(
29+
default='',
30+
description='Essential user requirements, goals, and clarifications in concise form.',
31+
)
32+
completed_tasks: str = Field(
33+
default='', description='List of tasks completed so far with brief results.'
34+
)
35+
pending_tasks: str = Field(
36+
default='', description='List of tasks that still need to be done.'
37+
)
38+
current_state: str = Field(
39+
default='',
40+
description='Current variables, data structures, or other relevant state information.',
41+
)
42+
43+
# Code state fields
44+
files_modified: str = Field(
45+
default='', description='List of files that have been created or modified.'
46+
)
47+
function_changes: str = Field(
48+
default='', description='List of functions that have been created or modified.'
49+
)
50+
data_structures: str = Field(
51+
default='', description='List of key data structures in use or modified.'
52+
)
53+
54+
# Test status fields
55+
tests_written: str = Field(
56+
default='',
57+
description='Whether tests have been written for the changes. True, false, or unknown.',
58+
)
59+
tests_passing: str = Field(
60+
default='',
61+
description='Whether all tests are currently passing. True, false, or unknown.',
62+
)
63+
failing_tests: str = Field(
64+
default='', description='List of names or descriptions of any failing tests.'
65+
)
66+
error_messages: str = Field(
67+
default='', description='List of key error messages encountered.'
68+
)
69+
70+
# Version control fields
71+
branch_created: str = Field(
72+
default='',
73+
description='Whether a branch has been created for this work. True, false, or unknown.',
74+
)
75+
branch_name: str = Field(
76+
default='', description='Name of the current working branch if known.'
77+
)
78+
commits_made: str = Field(
79+
default='',
80+
description='Whether any commits have been made. True, false, or unknown.',
81+
)
82+
pr_created: str = Field(
83+
default='',
84+
description='Whether a pull request has been created. True, false, or unknown.',
85+
)
86+
pr_status: str = Field(
87+
default='',
88+
description="Status of any pull request: 'draft', 'open', 'merged', 'closed', or 'unknown'.",
89+
)
90+
91+
# Other fields
92+
dependencies: str = Field(
93+
default='',
94+
description='List of dependencies or imports that have been added or modified.',
95+
)
96+
other_relevant_context: str = Field(
97+
default='',
98+
description="Any other important information that doesn't fit into the categories above.",
99+
)
100+
101+
@classmethod
102+
def tool_description(cls) -> dict[str, Any]:
103+
"""Description of a tool whose arguments are the fields of this class.
104+
105+
Can be given to an LLM to force structured generation.
106+
"""
107+
properties = {}
108+
109+
# Build properties dictionary from field information
110+
for field_name, field in cls.model_fields.items():
111+
description = field.description or ''
112+
113+
properties[field_name] = {'type': 'string', 'description': description}
114+
115+
return {
116+
'type': 'function',
117+
'function': {
118+
'name': 'create_state_summary',
119+
'description': 'Creates a comprehensive summary of the current state of the interaction to preserve context when history grows too large. You must include non-empty values for user_context, completed_tasks, and pending_tasks.',
120+
'parameters': {
121+
'type': 'object',
122+
'properties': properties,
123+
'required': ['user_context', 'completed_tasks', 'pending_tasks'],
124+
},
125+
},
126+
}
127+
128+
def __str__(self) -> str:
129+
"""Format the state summary in a clear way for Claude 3.7 Sonnet."""
130+
sections = [
131+
'# State Summary',
132+
'## Core Information',
133+
f'**User Context**: {self.user_context}',
134+
f'**Completed Tasks**: {self.completed_tasks}',
135+
f'**Pending Tasks**: {self.pending_tasks}',
136+
f'**Current State**: {self.current_state}',
137+
'## Code Changes',
138+
f'**Files Modified**: {self.files_modified}',
139+
f'**Function Changes**: {self.function_changes}',
140+
f'**Data Structures**: {self.data_structures}',
141+
f'**Dependencies**: {self.dependencies}',
142+
'## Testing Status',
143+
f'**Tests Written**: {self.tests_written}',
144+
f'**Tests Passing**: {self.tests_passing}',
145+
f'**Failing Tests**: {self.failing_tests}',
146+
f'**Error Messages**: {self.error_messages}',
147+
'## Version Control',
148+
f'**Branch Created**: {self.branch_created}',
149+
f'**Branch Name**: {self.branch_name}',
150+
f'**Commits Made**: {self.commits_made}',
151+
f'**PR Created**: {self.pr_created}',
152+
f'**PR Status**: {self.pr_status}',
153+
'## Additional Context',
154+
f'**Other Relevant Context**: {self.other_relevant_context}',
155+
]
156+
157+
# Join all sections with double newlines
158+
return '\n\n'.join(sections)
159+
160+
161+
class StructuredSummaryCondenser(RollingCondenser):
162+
"""A condenser that summarizes forgotten events.
163+
164+
Maintains a condensed history and forgets old events when it grows too large. Uses structured generation via function-calling to produce summaries that replace forgotten events.
165+
"""
166+
167+
def __init__(
168+
self,
169+
llm: LLM,
170+
max_size: int = 100,
171+
keep_first: int = 1,
172+
max_event_length: int = 10_000,
173+
):
174+
if keep_first >= max_size // 2:
175+
raise ValueError(
176+
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
177+
)
178+
if keep_first < 0:
179+
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
180+
if max_size < 1:
181+
raise ValueError(f'max_size ({max_size}) cannot be non-positive')
182+
183+
if not llm.is_function_calling_active():
184+
raise ValueError(
185+
'LLM must support function calling to use StructuredSummaryCondenser'
186+
)
187+
188+
self.max_size = max_size
189+
self.keep_first = keep_first
190+
self.max_event_length = max_event_length
191+
self.llm = llm
192+
193+
super().__init__()
194+
195+
def _truncate(self, content: str) -> str:
196+
"""Truncate the content to fit within the specified maximum event length."""
197+
return truncate_content(content, max_chars=self.max_event_length)
198+
199+
def get_condensation(self, view: View) -> Condensation:
200+
head = view[: self.keep_first]
201+
target_size = self.max_size // 2
202+
# Number of events to keep from the tail -- target size, minus however many
203+
# prefix events from the head, minus one for the summarization event
204+
events_from_tail = target_size - len(head) - 1
205+
206+
summary_event = (
207+
view[self.keep_first]
208+
if isinstance(view[self.keep_first], AgentCondensationObservation)
209+
else AgentCondensationObservation('No events summarized')
210+
)
211+
212+
# Identify events to be forgotten (those not in head or tail)
213+
forgotten_events = []
214+
for event in view[self.keep_first : -events_from_tail]:
215+
if not isinstance(event, AgentCondensationObservation):
216+
forgotten_events.append(event)
217+
218+
# Construct prompt for summarization
219+
prompt = """You are maintaining a context-aware state summary for an interactive software agent. This summary is critical because it:
220+
1. Preserves essential context when conversation history grows too large
221+
2. Prevents lost work when the session length exceeds token limits
222+
3. Helps maintain continuity across multiple interactions
223+
224+
You will be given:
225+
- A list of events (actions taken by the agent)
226+
- The most recent previous summary (if one exists)
227+
228+
Capture all relevant information, especially:
229+
- User requirements that were explicitly stated
230+
- Work that has been completed
231+
- Tasks that remain pending
232+
- Current state of code, variables, and data structures
233+
- The status of any version control operations"""
234+
235+
prompt += '\n\n'
236+
237+
# Add the previous summary if it exists. We'll always have a summary
238+
# event, but the types aren't precise enought to guarantee that it has a
239+
# message attribute.
240+
summary_event_content = self._truncate(
241+
summary_event.message if summary_event.message else ''
242+
)
243+
prompt += f'<PREVIOUS SUMMARY>\n{summary_event_content}\n</PREVIOUS SUMMARY>\n'
244+
245+
prompt += '\n\n'
246+
247+
# Add all events that are being forgotten. We use the string
248+
# representation defined by the event, and truncate it if necessary.
249+
for forgotten_event in forgotten_events:
250+
event_content = self._truncate(str(forgotten_event))
251+
prompt += f'<EVENT id={forgotten_event.id}>\n{event_content}\n</EVENT>\n'
252+
253+
messages = [Message(role='user', content=[TextContent(text=prompt)])]
254+
255+
response = self.llm.completion(
256+
messages=self.llm.format_messages_for_llm(messages),
257+
tools=[StateSummary.tool_description()],
258+
tool_choice={
259+
'type': 'function',
260+
'function': {'name': 'create_state_summary'},
261+
},
262+
)
263+
264+
try:
265+
# Extract the message containing tool calls
266+
message = response.choices[0].message
267+
268+
# Check if there are tool calls
269+
if not hasattr(message, 'tool_calls') or not message.tool_calls:
270+
raise ValueError('No tool calls found in response')
271+
272+
# Find the create_state_summary tool call
273+
summary_tool_call = None
274+
for tool_call in message.tool_calls:
275+
if tool_call.function.name == 'create_state_summary':
276+
summary_tool_call = tool_call
277+
break
278+
279+
if not summary_tool_call:
280+
raise ValueError('create_state_summary tool call not found')
281+
282+
# Parse the arguments
283+
args_json = summary_tool_call.function.arguments
284+
args_dict = json.loads(args_json)
285+
286+
# Create a StateSummary object
287+
summary = StateSummary.model_validate(args_dict)
288+
289+
except (ValueError, AttributeError, KeyError, json.JSONDecodeError) as e:
290+
logger.warning(
291+
f'Failed to parse summary tool call: {e}. Using empty summary.'
292+
)
293+
summary = StateSummary()
294+
295+
self.add_metadata('response', response.model_dump())
296+
self.add_metadata('metrics', self.llm.metrics.get())
297+
298+
return Condensation(
299+
action=CondensationAction(
300+
forgotten_events_start_id=min(event.id for event in forgotten_events),
301+
forgotten_events_end_id=max(event.id for event in forgotten_events),
302+
summary=str(summary),
303+
summary_offset=self.keep_first,
304+
)
305+
)
306+
307+
def should_condense(self, view: View) -> bool:
308+
return len(view) > self.max_size
309+
310+
@classmethod
311+
def from_config(
312+
cls, config: StructuredSummaryCondenserConfig
313+
) -> StructuredSummaryCondenser:
314+
return StructuredSummaryCondenser(
315+
llm=LLM(config=config.llm_config),
316+
max_size=config.max_size,
317+
keep_first=config.keep_first,
318+
max_event_length=config.max_event_length,
319+
)
320+
321+
322+
StructuredSummaryCondenser.register_config(StructuredSummaryCondenserConfig)

0 commit comments

Comments
 (0)