continuedev · sestinj · Mar 16, 2025 · Mar 1, 2025 · Mar 3, 2025 · Mar 3, 2025
@@ -43,13 +43,13 @@ export interface IndexingProgressUpdate {
   desc: string;
   shouldClearIndexes?: boolean;
   status:
-    | "loading"
-    | "indexing"
-    | "done"
-    | "failed"
-    | "paused"
-    | "disabled"
-    | "cancelled";
+  | "loading"
+  | "indexing"
+  | "done"
+  | "failed"
+  | "paused"
+  | "disabled"
+  | "cancelled";
   debugInfo?: string;
 }
 
@@ -312,7 +312,7 @@ export interface CompletionOptions extends BaseCompletionOptions {
   model: string;
 }
 
-export type ChatMessageRole = "user" | "assistant" | "system" | "tool";
+export type ChatMessageRole = "user" | "assistant" | "thinking" | "system" | "tool";
 
 export type TextMessagePart = {
   type: "text";
@@ -357,6 +357,14 @@ export interface UserChatMessage {
   content: MessageContent;
 }
 
+export interface ThinkingChatMessage {
+  role: "thinking";
+  content: MessageContent;
+  signature?: string;
+  redactedThinking?: string;
+  toolCalls?: ToolCallDelta[];
+}
+
 export interface AssistantChatMessage {
   role: "assistant";
   content: MessageContent;
@@ -371,6 +379,7 @@ export interface SystemChatMessage {
 export type ChatMessage =
   | UserChatMessage
   | AssistantChatMessage
+  | ThinkingChatMessage
   | SystemChatMessage
   | ToolResultChatMessage;
 
@@ -679,10 +688,10 @@ export interface IDE {
   getCurrentFile(): Promise<
     | undefined
     | {
-        isUntitled: boolean;
-        path: string;
-        contents: string;
-      }
+      isUntitled: boolean;
+      path: string;
+      contents: string;
+    }
   >;
 
   getLastFileSaveTimestamp?(): number;
@@ -866,11 +875,11 @@ export interface CustomCommand {
 export interface Prediction {
   type: "content";
   content:
-    | string
-    | {
-        type: "text";
-        text: string;
-      }[];
+  | string
+  | {
+    type: "text";
+    text: string;
+  }[];
 }
 
 export interface ToolExtras {
@@ -921,6 +930,8 @@ export interface BaseCompletionOptions {
   prediction?: Prediction;
   tools?: Tool[];
   toolChoice?: ToolChoice;
+  reasoning?: boolean;
+  reasoningBudgetTokens?: number;
 }
 
 export interface ModelCapability {
@@ -1208,9 +1219,9 @@ export interface Config {
   embeddingsProvider?: EmbeddingsProviderDescription | ILLM;
   /** The model that Continue will use for tab autocompletions. */
   tabAutocompleteModel?:
-    | CustomLLM
-    | ModelDescription
-    | (CustomLLM | ModelDescription)[];
+  | CustomLLM
+  | ModelDescription
+  | (CustomLLM | ModelDescription)[];
   /** Options for tab autocomplete */
   tabAutocompleteOptions?: Partial<TabAutocompleteOptions>;
   /** UI styles customization */
@@ -1302,9 +1313,9 @@ export type PackageDetailsSuccess = PackageDetails & {
 export type PackageDocsResult = {
   packageInfo: ParsedPackageInfo;
 } & (
-  | { error: string; details?: never }
-  | { details: PackageDetailsSuccess; error?: never }
-);
+    | { error: string; details?: never }
+    | { details: PackageDetailsSuccess; error?: never }
+  );
 
 export interface TerminalOptions {
   reuseTerminal?: boolean;

@@ -27,9 +27,9 @@ class LlamaEncoding implements Encoding {
 }
 
 class NonWorkerAsyncEncoder implements AsyncEncoder {
-  constructor(private readonly encoding: Encoding) {}
+  constructor(private readonly encoding: Encoding) { }
 
-  async close(): Promise<void> {}
+  async close(): Promise<void> { }
 
   async encode(text: string): Promise<number[]> {
     return this.encoding.encode(text);
@@ -366,6 +366,7 @@ function chatMessageIsEmpty(message: ChatMessage): boolean {
         message.content.trim() === "" &&
         !message.toolCalls
       );
+    case "thinking":
     case "tool":
       return false;
   }
@@ -383,8 +384,8 @@ function compileChatMessages(
 ): ChatMessage[] {
   let msgsCopy = msgs
     ? msgs
-        .map((msg) => ({ ...msg }))
-        .filter((msg) => !chatMessageIsEmpty(msg) && msg.role !== "system")
+      .map((msg) => ({ ...msg }))
+      .filter((msg) => !chatMessageIsEmpty(msg) && msg.role !== "system")
     : [];
 
   msgsCopy = addSpaceToAnyEmptyMessages(msgsCopy);
@@ -469,5 +470,6 @@ export {
   pruneLinesFromTop,
   pruneRawPromptFromTop,
   pruneStringFromBottom,
-  pruneStringFromTop,
+  pruneStringFromTop
 };
+
@@ -193,11 +193,11 @@ export abstract class BaseLLM implements ILLM {
         options.completionOptions?.maxTokens ??
         (llmInfo?.maxCompletionTokens
           ? Math.min(
-              llmInfo.maxCompletionTokens,
-              // Even if the model has a large maxTokens, we don't want to use that every time,
-              // because it takes away from the context length
-              this.contextLength / 4,
-            )
+            llmInfo.maxCompletionTokens,
+            // Even if the model has a large maxTokens, we don't want to use that every time,
+            // because it takes away from the context length
+            this.contextLength / 4,
+          )
           : DEFAULT_MAX_TOKENS),
     };
     this.requestOptions = options.requestOptions;
@@ -780,6 +780,7 @@ export abstract class BaseLLM implements ILLM {
       }
     }
 
+    let thinking = "";
     let completion = "";
     let citations: null | string[] = null
 
@@ -834,8 +835,16 @@ export abstract class BaseLLM implements ILLM {
             signal,
             completionOptions,
           )) {
-            completion += chunk.content;
-            yield chunk;
+
+            if (chunk.role === "assistant") {
+              completion += chunk.content;
+              yield chunk;
+            }
+
+            if (chunk.role === "thinking") {
+              thinking += chunk.content;
+              yield chunk;
+            }
           }
         }
       }
@@ -847,6 +856,18 @@ export abstract class BaseLLM implements ILLM {
     this._logTokensGenerated(completionOptions.model, prompt, completion);
 
     if (logEnabled && this.writeLog) {
+      if (thinking) {
+        await this.writeLog(`Thinking:\n${thinking}\n\n`);
+      }
+      /*
+      TODO: According to: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
+      During tool use, you must pass thinking and redacted_thinking blocks back to the API,
+      and you must include the complete unmodified block back to the API. This is critical
+      for maintaining the model's reasoning flow and conversation integrity.
+
+      On the other hand, adding thinking and redacted_thinking blocks are ignored on subsequent
+      requests when not using tools, so it's the simplest option to always add to history.
+      */
       await this.writeLog(`Completion:\n${completion}\n\n`);
 
       if (citations) {
@@ -920,15 +941,15 @@ export abstract class BaseLLM implements ILLM {
     );
   }
 
-  protected async *_streamComplete(
+  protected async * _streamComplete(
     prompt: string,
     signal: AbortSignal,
     options: CompletionOptions,
   ): AsyncGenerator<string> {
     throw new Error("Not implemented");
   }
 
-  protected async *_streamChat(
+  protected async * _streamChat(
     messages: ChatMessage[],
     signal: AbortSignal,
     options: CompletionOptions,

@@ -30,11 +30,15 @@ class Anthropic extends BaseLLM {
         description: tool.function.description,
         input_schema: tool.function.parameters,
       })),
+      thinking: options.reasoning ? {
+        type: "enabled",
+        budget_tokens: options.reasoningBudgetTokens,
+      } : undefined,
       tool_choice: options.toolChoice
         ? {
-            type: "tool",
-            name: options.toolChoice.function.name,
-          }
+          type: "tool",
+          name: options.toolChoice.function.name,
+        }
         : undefined,
     };
 
@@ -63,6 +67,23 @@ class Anthropic extends BaseLLM {
           input: JSON.parse(toolCall.function?.arguments || "{}"),
         })),
       };
+    } else if (message.role === "thinking" && !message.redactedThinking) {
+      return {
+        role: "assistant",
+        content: [{
+          type: "thinking",
+          thinking: message.content,
+          signature: message.signature
+        }]
+      };
+    } else if (message.role === "thinking" && message.redactedThinking) {
+      return {
+        role: "assistant",
+        content: [{
+          type: "redacted_thinking",
+          data: message.redactedThinking
+        }]
+      };
     }
 
     if (typeof message.content === "string") {
@@ -174,12 +195,12 @@ class Anthropic extends BaseLLM {
         messages: msgs,
         system: shouldCacheSystemMessage
           ? [
-              {
-                type: "text",
-                text: this.systemMessage,
-                cache_control: { type: "ephemeral" },
-              },
-            ]
+            {
+              type: "text",
+              text: this.systemMessage,
+              cache_control: { type: "ephemeral" },
+            },
+          ]
           : systemMessage,
       }),
       signal,
@@ -216,13 +237,24 @@ class Anthropic extends BaseLLM {
             lastToolUseId = value.content_block.id;
             lastToolUseName = value.content_block.name;
           }
+          // handle redacted thinking
+          if (value.content_block.type === "redacted_thinking") {
+            console.log("redacted thinking", value.content_block.data);
+            yield { role: "thinking", content: "", redactedThinking: value.content_block.data };
+          }
           break;
         case "content_block_delta":
           // https://docs.anthropic.com/en/api/messages-streaming#delta-types
           switch (value.delta.type) {
             case "text_delta":
               yield { role: "assistant", content: value.delta.text };
               break;
+            case "thinking_delta":
+              yield { role: "thinking", content: value.delta.thinking };
+              break;
+            case "signature_delta":
+              yield { role: "thinking", content: "", signature: value.delta.signature };
+              break;
             case "input_json_delta":
               if (!lastToolUseId || !lastToolUseName) {
                 throw new Error("No tool use found");