Skip to content

Commit 88b2f36

Browse files
csmith49Calvin Smithenyst
authored andcommitted
Improve performance of LLM summarizing condenser (All-Hands-AI#6597)
Co-authored-by: Calvin Smith <[email protected]> Co-authored-by: Engel Nyst <[email protected]>
1 parent 5c530ce commit 88b2f36

File tree

4 files changed

+190
-71
lines changed

4 files changed

+190
-71
lines changed

openhands/core/config/condenser_config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@ class LLMSummarizingCondenserConfig(BaseModel):
5454
llm_config: LLMConfig = Field(
5555
..., description='Configuration for the LLM to use for condensing.'
5656
)
57+
keep_first: int = Field(
58+
default=1,
59+
description='The number of initial events to condense.',
60+
ge=0,
61+
)
62+
max_size: int = Field(
63+
default=10, description='Maximum number of events to keep.', ge=1
64+
)
5765

5866

5967
class AmortizedForgettingCondenserConfig(BaseModel):
Lines changed: 86 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,112 @@
11
from __future__ import annotations
22

33
from openhands.core.config.condenser_config import LLMSummarizingCondenserConfig
4-
from openhands.core.logger import openhands_logger as logger
54
from openhands.events.event import Event
65
from openhands.events.observation.agent import AgentCondensationObservation
76
from openhands.llm import LLM
8-
from openhands.memory.condenser.condenser import Condenser
7+
from openhands.memory.condenser.condenser import RollingCondenser
98

109

11-
class LLMSummarizingCondenser(Condenser):
12-
"""A condenser that relies on a language model to summarize the event sequence as a single event."""
10+
class LLMSummarizingCondenser(RollingCondenser):
11+
"""A condenser that summarizes forgotten events.
1312
14-
def __init__(self, llm: LLM):
13+
Maintains a condensed history and forgets old events when it grows too large,
14+
keeping a special summarization event after the prefix that summarizes all previous summarizations
15+
and newly forgotten events.
16+
"""
17+
18+
def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1):
19+
if keep_first >= max_size // 2:
20+
raise ValueError(
21+
f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
22+
)
23+
if keep_first < 0:
24+
raise ValueError(f'keep_first ({keep_first}) cannot be negative')
25+
if max_size < 1:
26+
raise ValueError(f'max_size ({max_size}) cannot be non-positive')
27+
28+
self.max_size = max_size
29+
self.keep_first = keep_first
1530
self.llm = llm
1631

1732
super().__init__()
1833

1934
def condense(self, events: list[Event]) -> list[Event]:
20-
"""Applies an LLM to summarize the list of events.
21-
22-
Raises:
23-
Exception: If the LLM is unable to summarize the event sequence.
24-
"""
25-
try:
26-
# Convert events to a format suitable for summarization
27-
events_text = '\n'.join(f'{e.timestamp}: {e.message}' for e in events)
28-
summarize_prompt = f'Please summarize these events:\n{events_text}'
29-
30-
resp = self.llm.completion(
31-
messages=[{'content': summarize_prompt, 'role': 'user'}]
32-
)
33-
summary_response = resp.choices[0].message.content
35+
"""Apply the amortized forgetting strategy with LLM summarization to the given list of events."""
36+
if len(events) <= self.max_size:
37+
return events
38+
39+
head = events[: self.keep_first]
40+
41+
target_size = self.max_size // 2
42+
events_from_tail = target_size - len(head)
43+
tail = events[-events_from_tail:]
44+
45+
summary_event = (
46+
events[self.keep_first]
47+
if isinstance(events[self.keep_first], AgentCondensationObservation)
48+
else AgentCondensationObservation('No events summarized')
49+
)
50+
51+
# Identify events to be forgotten (those not in head or tail)
52+
forgotten_events = []
53+
for event in events[self.keep_first : -events_from_tail]:
54+
if not isinstance(event, AgentCondensationObservation):
55+
forgotten_events.append(event)
56+
57+
# Construct prompt for summarization
58+
prompt = """You are maintaining state history for an LLM-based code agent. Track:
59+
60+
STATE: {File paths, function signatures, data structures}
61+
TESTS: {Failing cases, error messages, outputs}
62+
CHANGES: {Code edits, variable updates}
63+
DEPS: {Dependencies, imports, external calls}
64+
INTENT: {Why changes were made, acceptance criteria}
65+
66+
SKIP: {Git clones, build logs}
67+
SUMMARIZE: {File listings}
68+
MAX_LENGTH: Keep summaries under 1000 words
69+
70+
Example history format:
71+
STATE: mod_float() in card.py updated
72+
TESTS: test_format() passed
73+
CHANGES: str(val) replaces f"{val:.16G}"
74+
DEPS: None modified
75+
INTENT: Fix float precision overflow"""
76+
77+
prompt + '\n\n'
78+
79+
prompt += ('\n' + summary_event.message + '\n') if summary_event.message else ''
80+
81+
prompt + '\n\n'
3482

35-
# Create a new summary event with the condensed content
36-
summary_event = AgentCondensationObservation(summary_response)
83+
for forgotten_event in forgotten_events:
84+
prompt += str(forgotten_event) + '\n\n'
3785

38-
# Add metrics to state
39-
self.add_metadata('response', resp.model_dump())
40-
self.add_metadata('metrics', self.llm.metrics.get())
86+
response = self.llm.completion(
87+
messages=[
88+
{
89+
'content': prompt,
90+
'role': 'user',
91+
},
92+
],
93+
)
94+
summary = response.choices[0].message.content
4195

42-
return [summary_event]
96+
self.add_metadata('response', response.model_dump())
97+
self.add_metadata('metrics', self.llm.metrics.get())
4398

44-
except Exception as e:
45-
logger.error(f'Error condensing events: {str(e)}')
46-
raise e
99+
return head + [AgentCondensationObservation(summary)] + tail
47100

48101
@classmethod
49102
def from_config(
50103
cls, config: LLMSummarizingCondenserConfig
51104
) -> LLMSummarizingCondenser:
52-
return LLMSummarizingCondenser(llm=LLM(config=config.llm_config))
105+
return LLMSummarizingCondenser(
106+
llm=LLM(config=config.llm_config),
107+
max_size=config.max_size,
108+
keep_first=config.keep_first,
109+
)
53110

54111

55112
LLMSummarizingCondenser.register_config(LLMSummarizingCondenserConfig)

openhands/server/session/session.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
from openhands.controller.agent import Agent
88
from openhands.core.config import AppConfig
9-
from openhands.core.config.condenser_config import AmortizedForgettingCondenserConfig
9+
from openhands.core.config.condenser_config import (
10+
LLMSummarizingCondenserConfig,
11+
)
1012
from openhands.core.const.guide_url import TROUBLESHOOTING_URL
1113
from openhands.core.logger import openhands_logger as logger
1214
from openhands.core.schema import AgentState
@@ -108,8 +110,8 @@ async def initialize_agent(
108110
agent_config = self.config.get_agent_config(agent_cls)
109111

110112
if settings.enable_default_condenser:
111-
default_condenser_config = AmortizedForgettingCondenserConfig(
112-
keep_first=3, max_size=20
113+
default_condenser_config = LLMSummarizingCondenserConfig(
114+
llm_config=llm.config, keep_first=3, max_size=40
113115
)
114116
logger.info(f'Enabling default condenser: {default_condenser_config}')
115117
agent_config.condenser = default_condenser_config

tests/unit/test_condenser.py

Lines changed: 91 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
)
1616
from openhands.core.config.llm_config import LLMConfig
1717
from openhands.events.event import Event, EventSource
18+
from openhands.events.observation.agent import AgentCondensationObservation
1819
from openhands.events.observation.observation import Observation
1920
from openhands.llm import LLM
2021
from openhands.memory.condenser import Condenser
@@ -214,73 +215,124 @@ def test_recent_events_condenser():
214215
assert result[2]._message == 'Event 5'
215216

216217

217-
def test_llm_condenser_from_config():
218-
"""Test that LLMCondensers can be made from config."""
218+
def test_llm_summarization_condenser_from_config():
219+
"""Test that LLMSummarizingCondenser objects can be made from config."""
219220
config = LLMSummarizingCondenserConfig(
221+
max_size=50,
222+
keep_first=10,
220223
llm_config=LLMConfig(
221224
model='gpt-4o',
222225
api_key='test_key',
223-
)
226+
),
224227
)
225228
condenser = Condenser.from_config(config)
226229

227230
assert isinstance(condenser, LLMSummarizingCondenser)
228231
assert condenser.llm.config.model == 'gpt-4o'
229232
assert condenser.llm.config.api_key.get_secret_value() == 'test_key'
233+
assert condenser.max_size == 50
234+
assert condenser.keep_first == 10
230235

231236

232-
def test_llm_condenser(mock_llm, mock_state):
233-
"""Test that LLMCondensers use the LLM to generate a summary event."""
234-
events = [
235-
create_test_event('Event 1'),
236-
create_test_event('Event 2'),
237-
]
238-
mock_state.history = events
237+
def test_llm_amortized_summarization_condenser_invalid_config():
238+
"""Test that LLMSummarizingCondenser raises error when keep_first > max_size."""
239+
pytest.raises(
240+
ValueError,
241+
LLMSummarizingCondenser,
242+
llm=MagicMock(),
243+
max_size=4,
244+
keep_first=2,
245+
)
246+
pytest.raises(ValueError, LLMSummarizingCondenser, llm=MagicMock(), max_size=0)
247+
pytest.raises(ValueError, LLMSummarizingCondenser, llm=MagicMock(), keep_first=-1)
239248

240-
mock_llm.metrics = MagicMock()
241-
mock_llm.metrics.get.return_value = {'test_metric': 1.0}
242249

243-
mock_llm.set_mock_response_content('Summary of events')
250+
def test_llm_summarizing_condenser_grows_to_max_size(mock_llm, mock_state):
251+
"""Test that LLMSummarizingCondenser correctly maintains an event context up to max size."""
252+
max_size = 15
253+
condenser = LLMSummarizingCondenser(max_size=max_size, llm=mock_llm)
244254

245-
condenser = LLMSummarizingCondenser(llm=mock_llm)
246-
result = condenser.condensed_history(mock_state)
255+
for i in range(max_size):
256+
event = create_test_event(f'Event {i}')
257+
mock_state.history.append(event)
258+
results = condenser.condensed_history(mock_state)
259+
assert len(results) == i + 1
260+
261+
262+
def test_llm_summarizing_condenser_forgets_and_summarizes(mock_llm, mock_state):
263+
"""Test that the LLMSummarizingCondenser forgets events and maintains a summary."""
264+
max_size = 4
265+
keep_first = 1
266+
condenser = LLMSummarizingCondenser(
267+
max_size=max_size, keep_first=keep_first, llm=mock_llm
268+
)
269+
270+
# Add initial event
271+
first_event = create_test_event('Event 0')
272+
mock_state.history.append(first_event)
273+
274+
# Set up mock LLM response
275+
mock_llm.set_mock_response_content('Summary of forgotten events')
276+
277+
# Add enough events to trigger forgetting
278+
for i in range(max_size + 3): # +3 to ensure we're well past max_size
279+
event = create_test_event(f'Event {i+1}')
280+
mock_state.history.append(event)
281+
282+
# Get the condensed history
283+
results = condenser.condensed_history(mock_state)
284+
285+
# We should have exactly 3 events:
286+
# 1. First event (keep_first = 1)
287+
# 2. Summary event
288+
# 3. Most recent event
289+
assert len(results) == 3, f'Expected 3 events, got {len(results)}: {results}'
290+
assert (
291+
results[0] == first_event
292+
), f'First event should be {first_event}, got {results[0]}'
293+
assert isinstance(
294+
results[1], AgentCondensationObservation
295+
), f'Second event should be a summary, got {results[1]}'
296+
assert (
297+
results[1].content == 'Summary of forgotten events'
298+
), f"Summary content should be 'Summary of forgotten events', got {results[1].content}"
299+
assert results[2] == event, f'Last event should be {event}, got {results[2]}'
300+
301+
302+
def test_llm_summarizing_condenser_llm_call(mock_llm, mock_state):
303+
"""Test that the LLM is called correctly when forgetting events."""
304+
max_size = 4
305+
keep_first = 1
306+
condenser = LLMSummarizingCondenser(
307+
max_size=max_size, keep_first=keep_first, llm=mock_llm
308+
)
309+
310+
# Add initial event
311+
first_event = create_test_event('Event 0')
312+
mock_state.history.append(first_event)
313+
314+
# Set up mock LLM response
315+
mock_llm.set_mock_response_content('Summary of forgotten events')
316+
mock_llm.metrics.get.return_value = {'test_metric': 1.0}
247317

248-
assert len(result) == 1
249-
assert result[0].content == 'Summary of events'
318+
# Add enough events to trigger forgetting
319+
for i in range(max_size):
320+
event = create_test_event(f'Event {i+1}')
321+
mock_state.history.append(event)
322+
condenser.condensed_history(mock_state)
250323

251-
# Verify LLM was called with correct prompt.
324+
# Verify LLM was called with correct prompt
252325
mock_llm.completion.assert_called_once()
253326
call_args = mock_llm.completion.call_args[1]
254327
assert 'messages' in call_args
255328
assert len(call_args['messages']) == 1
256-
assert 'Event 1' in call_args['messages'][0]['content']
257-
assert 'Event 2' in call_args['messages'][0]['content']
258329

259330
# Verify metrics were added to state
260331
assert 'condenser_meta' in mock_state.extra_data
261332
assert len(mock_state.extra_data['condenser_meta']) == 1
262333
assert mock_state.extra_data['condenser_meta'][0]['metrics'] == {'test_metric': 1.0}
263334

264335

265-
def test_llm_condenser_error():
266-
"""Test that LLM errors are propagated during condensation."""
267-
events = [create_test_event('Event 1', datetime(2024, 1, 1, 10, 0))]
268-
269-
mock_state = MagicMock()
270-
mock_state.history = events
271-
272-
mock_llm = MagicMock()
273-
mock_llm.completion.side_effect = Exception('LLM error')
274-
275-
condenser = LLMSummarizingCondenser(llm=mock_llm)
276-
277-
try:
278-
condenser.condensed_history(mock_state)
279-
raise AssertionError('Expected exception was not raised.')
280-
except Exception as e:
281-
assert str(e) == 'LLM error'
282-
283-
284336
def test_amortized_forgetting_condenser_from_config():
285337
"""Test that AmortizedForgettingCondenser objects can be made from config."""
286338
max_size = 50

0 commit comments

Comments
 (0)