Skip to content

Commit 46ffc40

Browse files
authored
Merge pull request #4453 from ferenci84/bedrock_reasoning_2
Process thinking blocks for Anthropic with Bedrock provider
2 parents 338ba83 + 7949f9a commit 46ffc40

File tree

16 files changed

+1207
-900
lines changed

16 files changed

+1207
-900
lines changed

binary/package-lock.json

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/index.d.ts

Lines changed: 34 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@ export interface IndexingProgressUpdate {
4343
desc: string;
4444
shouldClearIndexes?: boolean;
4545
status:
46-
| "loading"
47-
| "indexing"
48-
| "done"
49-
| "failed"
50-
| "paused"
51-
| "disabled"
52-
| "cancelled";
46+
| "loading"
47+
| "indexing"
48+
| "done"
49+
| "failed"
50+
| "paused"
51+
| "disabled"
52+
| "cancelled";
5353
debugInfo?: string;
5454
}
5555

@@ -312,7 +312,7 @@ export interface CompletionOptions extends BaseCompletionOptions {
312312
model: string;
313313
}
314314

315-
export type ChatMessageRole = "user" | "assistant" | "system" | "tool";
315+
export type ChatMessageRole = "user" | "assistant" | "thinking" | "system" | "tool";
316316

317317
export type TextMessagePart = {
318318
type: "text";
@@ -357,6 +357,14 @@ export interface UserChatMessage {
357357
content: MessageContent;
358358
}
359359

360+
export interface ThinkingChatMessage {
361+
role: "thinking";
362+
content: MessageContent;
363+
signature?: string;
364+
redactedThinking?: string;
365+
toolCalls?: ToolCallDelta[];
366+
}
367+
360368
export interface AssistantChatMessage {
361369
role: "assistant";
362370
content: MessageContent;
@@ -371,6 +379,7 @@ export interface SystemChatMessage {
371379
export type ChatMessage =
372380
| UserChatMessage
373381
| AssistantChatMessage
382+
| ThinkingChatMessage
374383
| SystemChatMessage
375384
| ToolResultChatMessage;
376385

@@ -679,10 +688,10 @@ export interface IDE {
679688
getCurrentFile(): Promise<
680689
| undefined
681690
| {
682-
isUntitled: boolean;
683-
path: string;
684-
contents: string;
685-
}
691+
isUntitled: boolean;
692+
path: string;
693+
contents: string;
694+
}
686695
>;
687696

688697
getLastFileSaveTimestamp?(): number;
@@ -866,11 +875,11 @@ export interface CustomCommand {
866875
export interface Prediction {
867876
type: "content";
868877
content:
869-
| string
870-
| {
871-
type: "text";
872-
text: string;
873-
}[];
878+
| string
879+
| {
880+
type: "text";
881+
text: string;
882+
}[];
874883
}
875884

876885
export interface ToolExtras {
@@ -921,6 +930,8 @@ export interface BaseCompletionOptions {
921930
prediction?: Prediction;
922931
tools?: Tool[];
923932
toolChoice?: ToolChoice;
933+
reasoning?: boolean;
934+
reasoningBudgetTokens?: number;
924935
}
925936

926937
export interface ModelCapability {
@@ -1208,9 +1219,9 @@ export interface Config {
12081219
embeddingsProvider?: EmbeddingsProviderDescription | ILLM;
12091220
/** The model that Continue will use for tab autocompletions. */
12101221
tabAutocompleteModel?:
1211-
| CustomLLM
1212-
| ModelDescription
1213-
| (CustomLLM | ModelDescription)[];
1222+
| CustomLLM
1223+
| ModelDescription
1224+
| (CustomLLM | ModelDescription)[];
12141225
/** Options for tab autocomplete */
12151226
tabAutocompleteOptions?: Partial<TabAutocompleteOptions>;
12161227
/** UI styles customization */
@@ -1302,9 +1313,9 @@ export type PackageDetailsSuccess = PackageDetails & {
13021313
export type PackageDocsResult = {
13031314
packageInfo: ParsedPackageInfo;
13041315
} & (
1305-
| { error: string; details?: never }
1306-
| { details: PackageDetailsSuccess; error?: never }
1307-
);
1316+
| { error: string; details?: never }
1317+
| { details: PackageDetailsSuccess; error?: never }
1318+
);
13081319

13091320
export interface TerminalOptions {
13101321
reuseTerminal?: boolean;

core/llm/countTokens.ts

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ class LlamaEncoding implements Encoding {
2727
}
2828

2929
class NonWorkerAsyncEncoder implements AsyncEncoder {
30-
constructor(private readonly encoding: Encoding) {}
30+
constructor(private readonly encoding: Encoding) { }
3131

32-
async close(): Promise<void> {}
32+
async close(): Promise<void> { }
3333

3434
async encode(text: string): Promise<number[]> {
3535
return this.encoding.encode(text);
@@ -366,6 +366,7 @@ function chatMessageIsEmpty(message: ChatMessage): boolean {
366366
message.content.trim() === "" &&
367367
!message.toolCalls
368368
);
369+
case "thinking":
369370
case "tool":
370371
return false;
371372
}
@@ -383,8 +384,8 @@ function compileChatMessages(
383384
): ChatMessage[] {
384385
let msgsCopy = msgs
385386
? msgs
386-
.map((msg) => ({ ...msg }))
387-
.filter((msg) => !chatMessageIsEmpty(msg) && msg.role !== "system")
387+
.map((msg) => ({ ...msg }))
388+
.filter((msg) => !chatMessageIsEmpty(msg) && msg.role !== "system")
388389
: [];
389390

390391
msgsCopy = addSpaceToAnyEmptyMessages(msgsCopy);
@@ -469,5 +470,6 @@ export {
469470
pruneLinesFromTop,
470471
pruneRawPromptFromTop,
471472
pruneStringFromBottom,
472-
pruneStringFromTop,
473+
pruneStringFromTop
473474
};
475+

core/llm/index.ts

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -195,11 +195,11 @@ export abstract class BaseLLM implements ILLM {
195195
options.completionOptions?.maxTokens ??
196196
(llmInfo?.maxCompletionTokens
197197
? Math.min(
198-
llmInfo.maxCompletionTokens,
199-
// Even if the model has a large maxTokens, we don't want to use that every time,
200-
// because it takes away from the context length
201-
this.contextLength / 4,
202-
)
198+
llmInfo.maxCompletionTokens,
199+
// Even if the model has a large maxTokens, we don't want to use that every time,
200+
// because it takes away from the context length
201+
this.contextLength / 4,
202+
)
203203
: DEFAULT_MAX_TOKENS),
204204
};
205205
this.requestOptions = options.requestOptions;
@@ -782,6 +782,7 @@ export abstract class BaseLLM implements ILLM {
782782
}
783783
}
784784

785+
let thinking = "";
785786
let completion = "";
786787
let citations: null | string[] = null;
787788

@@ -839,8 +840,16 @@ export abstract class BaseLLM implements ILLM {
839840
signal,
840841
completionOptions,
841842
)) {
842-
completion += chunk.content;
843-
yield chunk;
843+
844+
if (chunk.role === "assistant") {
845+
completion += chunk.content;
846+
yield chunk;
847+
}
848+
849+
if (chunk.role === "thinking") {
850+
thinking += chunk.content;
851+
yield chunk;
852+
}
844853
}
845854
}
846855
}
@@ -852,6 +861,18 @@ export abstract class BaseLLM implements ILLM {
852861
this._logTokensGenerated(completionOptions.model, prompt, completion);
853862

854863
if (logEnabled && this.writeLog) {
864+
if (thinking) {
865+
await this.writeLog(`Thinking:\n${thinking}\n\n`);
866+
}
867+
/*
868+
TODO: According to: https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
869+
During tool use, you must pass thinking and redacted_thinking blocks back to the API,
870+
and you must include the complete unmodified block back to the API. This is critical
871+
for maintaining the model's reasoning flow and conversation integrity.
872+
873+
On the other hand, adding thinking and redacted_thinking blocks are ignored on subsequent
874+
requests when not using tools, so it's the simplest option to always add to history.
875+
*/
855876
await this.writeLog(`Completion:\n${completion}\n\n`);
856877

857878
if (citations) {
@@ -927,15 +948,15 @@ export abstract class BaseLLM implements ILLM {
927948
);
928949
}
929950

930-
protected async *_streamComplete(
951+
protected async * _streamComplete(
931952
prompt: string,
932953
signal: AbortSignal,
933954
options: CompletionOptions,
934955
): AsyncGenerator<string> {
935956
throw new Error("Not implemented");
936957
}
937958

938-
protected async *_streamChat(
959+
protected async * _streamChat(
939960
messages: ChatMessage[],
940961
signal: AbortSignal,
941962
options: CompletionOptions,

core/llm/llms/Anthropic.ts

Lines changed: 41 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,15 @@ class Anthropic extends BaseLLM {
3030
description: tool.function.description,
3131
input_schema: tool.function.parameters,
3232
})),
33+
thinking: options.reasoning ? {
34+
type: "enabled",
35+
budget_tokens: options.reasoningBudgetTokens,
36+
} : undefined,
3337
tool_choice: options.toolChoice
3438
? {
35-
type: "tool",
36-
name: options.toolChoice.function.name,
37-
}
39+
type: "tool",
40+
name: options.toolChoice.function.name,
41+
}
3842
: undefined,
3943
};
4044

@@ -63,6 +67,23 @@ class Anthropic extends BaseLLM {
6367
input: JSON.parse(toolCall.function?.arguments || "{}"),
6468
})),
6569
};
70+
} else if (message.role === "thinking" && !message.redactedThinking) {
71+
return {
72+
role: "assistant",
73+
content: [{
74+
type: "thinking",
75+
thinking: message.content,
76+
signature: message.signature
77+
}]
78+
};
79+
} else if (message.role === "thinking" && message.redactedThinking) {
80+
return {
81+
role: "assistant",
82+
content: [{
83+
type: "redacted_thinking",
84+
data: message.redactedThinking
85+
}]
86+
};
6687
}
6788

6889
if (typeof message.content === "string") {
@@ -174,12 +195,12 @@ class Anthropic extends BaseLLM {
174195
messages: msgs,
175196
system: shouldCacheSystemMessage
176197
? [
177-
{
178-
type: "text",
179-
text: this.systemMessage,
180-
cache_control: { type: "ephemeral" },
181-
},
182-
]
198+
{
199+
type: "text",
200+
text: this.systemMessage,
201+
cache_control: { type: "ephemeral" },
202+
},
203+
]
183204
: systemMessage,
184205
}),
185206
signal,
@@ -216,13 +237,24 @@ class Anthropic extends BaseLLM {
216237
lastToolUseId = value.content_block.id;
217238
lastToolUseName = value.content_block.name;
218239
}
240+
// handle redacted thinking
241+
if (value.content_block.type === "redacted_thinking") {
242+
console.log("redacted thinking", value.content_block.data);
243+
yield { role: "thinking", content: "", redactedThinking: value.content_block.data };
244+
}
219245
break;
220246
case "content_block_delta":
221247
// https://docs.anthropic.com/en/api/messages-streaming#delta-types
222248
switch (value.delta.type) {
223249
case "text_delta":
224250
yield { role: "assistant", content: value.delta.text };
225251
break;
252+
case "thinking_delta":
253+
yield { role: "thinking", content: value.delta.thinking };
254+
break;
255+
case "signature_delta":
256+
yield { role: "thinking", content: "", signature: value.delta.signature };
257+
break;
226258
case "input_json_delta":
227259
if (!lastToolUseId || !lastToolUseName) {
228260
throw new Error("No tool use found");

0 commit comments

Comments
 (0)