fix: Tweak prompting behavior of LLMSummarizingCondenser (#7695)

csmith49 · Calvin Smith · web-flow · commit cc1aadaba587 · 2025-04-03T16:06:10.000-06:00
Co-authored-by: Calvin Smith &lt;calvin@all-hands.dev&gt;
diff --git a/openhands/core/config/condenser_config.py b/openhands/core/config/condenser_config.py
@@ -75,6 +75,10 @@ class LLMSummarizingCondenserConfig(BaseModel):
         description='Maximum size of the condensed history before triggering forgetting.',
         ge=2,
     )
+    max_event_length: int = Field(
+        default=10_000,
+        description='Maximum length of the event representations to be passed to the LLM.',
+    )
 
     model_config = {'extra': 'forbid'}
 
diff --git a/openhands/memory/condenser/impl/llm_summarizing_condenser.py b/openhands/memory/condenser/impl/llm_summarizing_condenser.py
@@ -4,6 +4,7 @@
 from openhands.core.message import Message, TextContent
 from openhands.events.action.agent import CondensationAction
 from openhands.events.observation.agent import AgentCondensationObservation
+from openhands.events.serialization.event import truncate_content
 from openhands.llm import LLM
 from openhands.memory.condenser.condenser import (
     Condensation,
@@ -20,7 +21,13 @@ class LLMSummarizingCondenser(RollingCondenser):
     and newly forgotten events.
     """
 
-    def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1):
+    def __init__(
+        self,
+        llm: LLM,
+        max_size: int = 100,
+        keep_first: int = 1,
+        max_event_length: int = 10_000,
+    ):
         if keep_first >= max_size // 2:
             raise ValueError(
                 f'keep_first ({keep_first}) must be less than half of max_size ({max_size})'
@@ -32,10 +39,15 @@ def __init__(self, llm: LLM, max_size: int = 100, keep_first: int = 1):
 
         self.max_size = max_size
         self.keep_first = keep_first
+        self.max_event_length = max_event_length
         self.llm = llm
 
         super().__init__()
 
+    def _truncate(self, content: str) -> str:
+        """Truncate the content to fit within the specified maximum event length."""
+        return truncate_content(content, max_chars=self.max_event_length)
+
     def get_condensation(self, view: View) -> Condensation:
         head = view[: self.keep_first]
         target_size = self.max_size // 2
@@ -56,40 +68,66 @@ def get_condensation(self, view: View) -> Condensation:
                 forgotten_events.append(event)
 
         # Construct prompt for summarization
-        prompt = """You are maintaining state history for an LLM-based code agent. Track:
+        prompt = """You are maintaining a context-aware state summary for an interactive agent. You will be given a list of events corresponding to actions taken by the agent, and the most recent previous summary if one exists. Track:
+
+USER_CONTEXT: (Preserve essential user requirements, goals, and clarifications in concise form)
 
-USER_CONTEXT: (Preserve essential user requirements, problem descriptions, and clarifications in concise form)
+COMPLETED: (Tasks completed so far, with brief results)
+PENDING: (Tasks that still need to be done)
+CURRENT_STATE: (Current variables, data structures, or relevant state)
 
-STATE: {File paths, function signatures, data structures}
+For code-specific tasks, also include:
+CODE_STATE: {File paths, function signatures, data structures}
 TESTS: {Failing cases, error messages, outputs}
 CHANGES: {Code edits, variable updates}
 DEPS: {Dependencies, imports, external calls}
-INTENT: {Why changes were made, acceptance criteria}
+VERSION_CONTROL_STATUS: {Repository state, current branch, PR status, commit history}
 
 PRIORITIZE:
-1. Capture key user requirements and constraints
-2. Maintain critical problem context
-3. Keep all sections concise
+1. Adapt tracking format to match the actual task type
+2. Capture key user requirements and goals
+3. Distinguish between completed and pending tasks
+4. Keep all sections concise and relevant
 
-SKIP: {Git clones, build logs, file listings}
+SKIP: Tracking irrelevant details for the current task type
 
-Example history format:
-USER_CONTEXT: Fix FITS card float representation - "0.009125" becomes "0.009124999999999999" causing comment truncation. Use Python's str() when possible while maintaining FITS compliance.
+Example formats:
 
-STATE: mod_float() in card.py updated
+For code tasks:
+USER_CONTEXT: Fix FITS card float representation issue
+COMPLETED: Modified mod_float() in card.py, all tests passing
+PENDING: Create PR, update documentation
+CODE_STATE: mod_float() in card.py updated
 TESTS: test_format() passed
 CHANGES: str(val) replaces f"{val:.16G}"
 DEPS: None modified
-INTENT: Fix precision while maintaining FITS compliance"""
+VERSION_CONTROL_STATUS: Branch: fix-float-precision, Latest commit: a1b2c3d
+
+For other tasks:
+USER_CONTEXT: Write 20 haikus based on coin flip results
+COMPLETED: 15 haikus written for results [T,H,T,H,T,H,T,T,H,T,H,T,H,T,H]
+PENDING: 5 more haikus needed
+CURRENT_STATE: Last flip: Heads, Haiku count: 15/20"""
 
         prompt += '\n\n'
 
-        prompt += ('\n' + summary_event.message + '\n') if summary_event.message else ''
+        # Add the previous summary if it exists. We'll always have a summary
+        # event, but the types aren't precise enought to guarantee that it has a
+        # message attribute.
+        summary_event_content = self._truncate(
+            summary_event.message if summary_event.message else ''
+        )
+        prompt += f'<PREVIOUS SUMMARY>\n{summary_event_content}\n</PREVIOUS SUMMARY>\n'
 
         prompt += '\n\n'
 
+        # Add all events that are being forgotten. We use the string
+        # representation defined by the event, and truncate it if necessary.
         for forgotten_event in forgotten_events:
-            prompt += str(forgotten_event) + '\n\n'
+            event_content = self._truncate(str(forgotten_event))
+            prompt += f'<EVENT id={forgotten_event.id}>\n{event_content}\n</EVENT>\n'
+
+        prompt += 'Now summarize the events using the rules above.'
 
         messages = [Message(role='user', content=[TextContent(text=prompt)])]
 
@@ -121,6 +159,7 @@ def from_config(
             llm=LLM(config=config.llm_config),
             max_size=config.max_size,
             keep_first=config.keep_first,
+            max_event_length=config.max_event_length,
         )
 
 

Original file line number	Diff line number	Diff line change
`@@ -75,6 +75,10 @@ class LLMSummarizingCondenserConfig(BaseModel):`
`75`	`75`	`description='Maximum size of the condensed history before triggering forgetting.',`
`76`	`76`	`ge=2,`
`77`	`77`	`)`
	`78`	`+ max_event_length: int = Field(`
	`79`	`+ default=10_000,`
	`80`	`+ description='Maximum length of the event representations to be passed to the LLM.',`
	`81`	`+ )`
`78`	`82`
`79`	`83`	`model_config = {'extra': 'forbid'}`
`80`	`84`