|
15 | 15 | )
|
16 | 16 | from openhands.core.config.llm_config import LLMConfig
|
17 | 17 | from openhands.events.event import Event, EventSource
|
| 18 | +from openhands.events.observation.agent import AgentCondensationObservation |
18 | 19 | from openhands.events.observation.observation import Observation
|
19 | 20 | from openhands.llm import LLM
|
20 | 21 | from openhands.memory.condenser import Condenser
|
@@ -214,73 +215,124 @@ def test_recent_events_condenser():
|
214 | 215 | assert result[2]._message == 'Event 5'
|
215 | 216 |
|
216 | 217 |
|
217 |
| -def test_llm_condenser_from_config(): |
218 |
| - """Test that LLMCondensers can be made from config.""" |
| 218 | +def test_llm_summarization_condenser_from_config(): |
| 219 | + """Test that LLMSummarizingCondenser objects can be made from config.""" |
219 | 220 | config = LLMSummarizingCondenserConfig(
|
| 221 | + max_size=50, |
| 222 | + keep_first=10, |
220 | 223 | llm_config=LLMConfig(
|
221 | 224 | model='gpt-4o',
|
222 | 225 | api_key='test_key',
|
223 |
| - ) |
| 226 | + ), |
224 | 227 | )
|
225 | 228 | condenser = Condenser.from_config(config)
|
226 | 229 |
|
227 | 230 | assert isinstance(condenser, LLMSummarizingCondenser)
|
228 | 231 | assert condenser.llm.config.model == 'gpt-4o'
|
229 | 232 | assert condenser.llm.config.api_key.get_secret_value() == 'test_key'
|
| 233 | + assert condenser.max_size == 50 |
| 234 | + assert condenser.keep_first == 10 |
230 | 235 |
|
231 | 236 |
|
232 |
| -def test_llm_condenser(mock_llm, mock_state): |
233 |
| - """Test that LLMCondensers use the LLM to generate a summary event.""" |
234 |
| - events = [ |
235 |
| - create_test_event('Event 1'), |
236 |
| - create_test_event('Event 2'), |
237 |
| - ] |
238 |
| - mock_state.history = events |
| 237 | +def test_llm_amortized_summarization_condenser_invalid_config(): |
| 238 | + """Test that LLMSummarizingCondenser raises error when keep_first > max_size.""" |
| 239 | + pytest.raises( |
| 240 | + ValueError, |
| 241 | + LLMSummarizingCondenser, |
| 242 | + llm=MagicMock(), |
| 243 | + max_size=4, |
| 244 | + keep_first=2, |
| 245 | + ) |
| 246 | + pytest.raises(ValueError, LLMSummarizingCondenser, llm=MagicMock(), max_size=0) |
| 247 | + pytest.raises(ValueError, LLMSummarizingCondenser, llm=MagicMock(), keep_first=-1) |
239 | 248 |
|
240 |
| - mock_llm.metrics = MagicMock() |
241 |
| - mock_llm.metrics.get.return_value = {'test_metric': 1.0} |
242 | 249 |
|
243 |
| - mock_llm.set_mock_response_content('Summary of events') |
| 250 | +def test_llm_summarizing_condenser_grows_to_max_size(mock_llm, mock_state): |
| 251 | + """Test that LLMSummarizingCondenser correctly maintains an event context up to max size.""" |
| 252 | + max_size = 15 |
| 253 | + condenser = LLMSummarizingCondenser(max_size=max_size, llm=mock_llm) |
244 | 254 |
|
245 |
| - condenser = LLMSummarizingCondenser(llm=mock_llm) |
246 |
| - result = condenser.condensed_history(mock_state) |
| 255 | + for i in range(max_size): |
| 256 | + event = create_test_event(f'Event {i}') |
| 257 | + mock_state.history.append(event) |
| 258 | + results = condenser.condensed_history(mock_state) |
| 259 | + assert len(results) == i + 1 |
| 260 | + |
| 261 | + |
| 262 | +def test_llm_summarizing_condenser_forgets_and_summarizes(mock_llm, mock_state): |
| 263 | + """Test that the LLMSummarizingCondenser forgets events and maintains a summary.""" |
| 264 | + max_size = 4 |
| 265 | + keep_first = 1 |
| 266 | + condenser = LLMSummarizingCondenser( |
| 267 | + max_size=max_size, keep_first=keep_first, llm=mock_llm |
| 268 | + ) |
| 269 | + |
| 270 | + # Add initial event |
| 271 | + first_event = create_test_event('Event 0') |
| 272 | + mock_state.history.append(first_event) |
| 273 | + |
| 274 | + # Set up mock LLM response |
| 275 | + mock_llm.set_mock_response_content('Summary of forgotten events') |
| 276 | + |
| 277 | + # Add enough events to trigger forgetting |
| 278 | + for i in range(max_size + 3): # +3 to ensure we're well past max_size |
| 279 | + event = create_test_event(f'Event {i+1}') |
| 280 | + mock_state.history.append(event) |
| 281 | + |
| 282 | + # Get the condensed history |
| 283 | + results = condenser.condensed_history(mock_state) |
| 284 | + |
| 285 | + # We should have exactly 3 events: |
| 286 | + # 1. First event (keep_first = 1) |
| 287 | + # 2. Summary event |
| 288 | + # 3. Most recent event |
| 289 | + assert len(results) == 3, f'Expected 3 events, got {len(results)}: {results}' |
| 290 | + assert ( |
| 291 | + results[0] == first_event |
| 292 | + ), f'First event should be {first_event}, got {results[0]}' |
| 293 | + assert isinstance( |
| 294 | + results[1], AgentCondensationObservation |
| 295 | + ), f'Second event should be a summary, got {results[1]}' |
| 296 | + assert ( |
| 297 | + results[1].content == 'Summary of forgotten events' |
| 298 | + ), f"Summary content should be 'Summary of forgotten events', got {results[1].content}" |
| 299 | + assert results[2] == event, f'Last event should be {event}, got {results[2]}' |
| 300 | + |
| 301 | + |
| 302 | +def test_llm_summarizing_condenser_llm_call(mock_llm, mock_state): |
| 303 | + """Test that the LLM is called correctly when forgetting events.""" |
| 304 | + max_size = 4 |
| 305 | + keep_first = 1 |
| 306 | + condenser = LLMSummarizingCondenser( |
| 307 | + max_size=max_size, keep_first=keep_first, llm=mock_llm |
| 308 | + ) |
| 309 | + |
| 310 | + # Add initial event |
| 311 | + first_event = create_test_event('Event 0') |
| 312 | + mock_state.history.append(first_event) |
| 313 | + |
| 314 | + # Set up mock LLM response |
| 315 | + mock_llm.set_mock_response_content('Summary of forgotten events') |
| 316 | + mock_llm.metrics.get.return_value = {'test_metric': 1.0} |
247 | 317 |
|
248 |
| - assert len(result) == 1 |
249 |
| - assert result[0].content == 'Summary of events' |
| 318 | + # Add enough events to trigger forgetting |
| 319 | + for i in range(max_size): |
| 320 | + event = create_test_event(f'Event {i+1}') |
| 321 | + mock_state.history.append(event) |
| 322 | + condenser.condensed_history(mock_state) |
250 | 323 |
|
251 |
| - # Verify LLM was called with correct prompt. |
| 324 | + # Verify LLM was called with correct prompt |
252 | 325 | mock_llm.completion.assert_called_once()
|
253 | 326 | call_args = mock_llm.completion.call_args[1]
|
254 | 327 | assert 'messages' in call_args
|
255 | 328 | assert len(call_args['messages']) == 1
|
256 |
| - assert 'Event 1' in call_args['messages'][0]['content'] |
257 |
| - assert 'Event 2' in call_args['messages'][0]['content'] |
258 | 329 |
|
259 | 330 | # Verify metrics were added to state
|
260 | 331 | assert 'condenser_meta' in mock_state.extra_data
|
261 | 332 | assert len(mock_state.extra_data['condenser_meta']) == 1
|
262 | 333 | assert mock_state.extra_data['condenser_meta'][0]['metrics'] == {'test_metric': 1.0}
|
263 | 334 |
|
264 | 335 |
|
265 |
| -def test_llm_condenser_error(): |
266 |
| - """Test that LLM errors are propagated during condensation.""" |
267 |
| - events = [create_test_event('Event 1', datetime(2024, 1, 1, 10, 0))] |
268 |
| - |
269 |
| - mock_state = MagicMock() |
270 |
| - mock_state.history = events |
271 |
| - |
272 |
| - mock_llm = MagicMock() |
273 |
| - mock_llm.completion.side_effect = Exception('LLM error') |
274 |
| - |
275 |
| - condenser = LLMSummarizingCondenser(llm=mock_llm) |
276 |
| - |
277 |
| - try: |
278 |
| - condenser.condensed_history(mock_state) |
279 |
| - raise AssertionError('Expected exception was not raised.') |
280 |
| - except Exception as e: |
281 |
| - assert str(e) == 'LLM error' |
282 |
| - |
283 |
| - |
284 | 336 | def test_amortized_forgetting_condenser_from_config():
|
285 | 337 | """Test that AmortizedForgettingCondenser objects can be made from config."""
|
286 | 338 | max_size = 50
|
|
0 commit comments