All-Hands-AI · neubig · Aug 27, 2024 · Aug 16, 2024 · Aug 16, 2024 · Aug 16, 2024
diff --git a/agenthub/codeact_agent/codeact_agent.py b/agenthub/codeact_agent/codeact_agent.py
@@ -205,21 +205,46 @@ def step(self, state: State) -> Action:
         # prepare what we want to send to the LLM
         messages = self._get_messages(state)
 
-        response = self.llm.completion(
-            messages=[message.model_dump() for message in messages],
-            stop=[
+        params = {
+            'messages': [message.model_dump() for message in messages],
+            'stop': [
                 '</execute_ipython>',
                 '</execute_bash>',
                 '</execute_browse>',
             ],
-            temperature=0.0,
-        )
+            'temperature': 0.0,
+        }
+
+        if self.llm.supports__prompt_caching():
+            params['extra_headers'] = {
+                'anthropic-version': '2023-06-01',
+                'anthropic-beta': 'prompt-caching-2024-07-31',
+            }
+
+        response = self.llm.completion(**params)
+
         return self.action_parser.parse(response)
 
     def _get_messages(self, state: State) -> list[Message]:
         messages: list[Message] = [
-            Message(role='system', content=[TextContent(text=self.system_message)]),
-            Message(role='user', content=[TextContent(text=self.in_context_example)]),
+            Message(
+                role='system',
+                content=[
+                    TextContent(
+                        text=self.system_message,
+                        cache_prompt=self.llm.supports__prompt_caching(),
+                    )
+                ],
+            ),
+            Message(
+                role='user',
+                content=[
+                    TextContent(
+                        text=self.in_context_example,
+                        cache_prompt=self.llm.supports__prompt_caching(),
+                    )
+                ],
+            ),
         ]
 
         for event in state.history.get_events():

diff --git a/opendevin/core/message.py b/opendevin/core/message.py
@@ -20,10 +20,17 @@ def serialize_model(self):
 class TextContent(Content):
     type: ContentType = ContentType.TEXT
     text: str
+    cache_prompt: bool = False
 
     @model_serializer
     def serialize_model(self):
-        return {'type': self.type.value, 'text': self.text}
+        data: dict[str, str | dict[str, str]] = {
+            'type': self.type.value,
+            'text': self.text,
+        }
+        if self.cache_prompt:
+            data['cache_control'] = {'type': 'ephemeral'}
+        return data
 
 
 class ImageContent(Content):

diff --git a/opendevin/llm/llm.py b/opendevin/llm/llm.py
@@ -421,6 +421,16 @@ def async_streaming_completion(self):
     def supports_vision(self):
         return litellm.supports_vision(self.config.model)
 
+    def supports__prompt_caching(self):
+        cache_prompting_supported_models = [
+            'claude-3-5-sonnet-20240620',
+            'claude-3-haiku-20240307',
+        ]
+        if self.config.model in cache_prompting_supported_models:
+            return True
+        else:
+            return False
+
     def _post_completion(self, response: str) -> None:
         """Post-process the completion response."""
         try: