Merge pull request continuedev#4016 from Lash-L/vertexai_fixes

sestinj · web-flow · commit c1699c975fc2 · 2025-03-16T10:35:09.000-07:00
Fix VertexAI bugs and add function calling
diff --git a/core/llm/llms/Gemini.ts b/core/llm/llms/Gemini.ts
@@ -134,21 +134,11 @@ class Gemini extends BaseLLM {
         };
   }
 
-  private async *streamChatGemini(
+  public prepareBody(
     messages: ChatMessage[],
-    signal: AbortSignal,
     options: CompletionOptions,
-  ): AsyncGenerator<ChatMessage> {
-    const apiURL = new URL(
-      `models/${options.model}:streamGenerateContent?key=${this.apiKey}`,
-      this.apiBase,
-    );
-    // This feels hacky to repeat code from above function but was the quickest
-    // way to ensure system message re-formatting isn't done if user has specified v1
-    const apiBase = this.apiBase || Gemini.defaultOptions.apiBase!; // Determine if it's a v1 API call based on apiBase
-    const isV1API = apiBase.includes("/v1/");
-
-    // Convert chat messages to contents
+    isV1API: boolean,
+  ): GeminiChatRequestBody {
     const body: GeminiChatRequestBody = {
       contents: messages
         .filter((msg) => !(msg.role === "system" && isV1API))
@@ -303,15 +293,14 @@ class Gemini extends BaseLLM {
         }
       }
     }
+    return body;
+  }
 
-    const response = await this.fetch(apiURL, {
-      method: "POST",
-      body: JSON.stringify(body),
-      signal,
-    });
-
+  public async *processGeminiResponse(
+    stream: AsyncIterable<string>,
+  ): AsyncGenerator<ChatMessage> {
     let buffer = "";
-    for await (const chunk of streamResponse(response)) {
+    for await (const chunk of stream) {
       buffer += chunk;
       if (buffer.startsWith("[")) {
         buffer = buffer.slice(1);
@@ -425,6 +414,35 @@ class Gemini extends BaseLLM {
       }
     }
   }
+
+  private async *streamChatGemini(
+    messages: ChatMessage[],
+    signal: AbortSignal,
+    options: CompletionOptions,
+  ): AsyncGenerator<ChatMessage> {
+    const apiURL = new URL(
+      `models/${options.model}:streamGenerateContent?key=${this.apiKey}`,
+      this.apiBase,
+    );
+    // This feels hacky to repeat code from above function but was the quickest
+    // way to ensure system message re-formatting isn't done if user has specified v1
+    const apiBase = this.apiBase || Gemini.defaultOptions.apiBase!; // Determine if it's a v1 API call based on apiBase
+    const isV1API = apiBase.includes("/v1/");
+
+    // Convert chat messages to contents
+    const body = this.prepareBody(messages, options, isV1API);
+
+    const response = await this.fetch(apiURL, {
+      method: "POST",
+      body: JSON.stringify(body),
+      signal,
+    });
+    for await (const message of this.processGeminiResponse(
+      streamResponse(response),
+    )) {
+      yield message;
+    }
+  }
   private async *streamChatBison(
     messages: ChatMessage[],
     signal: AbortSignal,
diff --git a/core/llm/llms/VertexAI.ts b/core/llm/llms/VertexAI.ts
@@ -16,7 +16,7 @@ class VertexAI extends BaseLLM {
   declare geminiInstance: Gemini;
 
   static defaultOptions: Partial<LLMOptions> | undefined = {
-    maxEmbeddingBatchSize: 5,
+    maxEmbeddingBatchSize: 250,
     region: "us-central1",
   };
 
@@ -35,6 +35,13 @@ class VertexAI extends BaseLLM {
   }
 
   constructor(_options: LLMOptions) {
+    if (_options.region !== "us-central1") {
+      // Any region outside of us-central1 has a max batch size of 5.
+      _options.maxEmbeddingBatchSize = Math.min(
+        _options.maxEmbeddingBatchSize ?? 5,
+        5,
+      );
+    }
     super(_options);
     this.apiBase ??= VertexAI.getDefaultApiBaseFrom(_options);
     this.vertexProvider =
@@ -143,97 +150,16 @@ class VertexAI extends BaseLLM {
       `publishers/google/models/${options.model}:streamGenerateContent`,
       this.apiBase,
     );
-    // This feels hacky to repeat code from above function but was the quickest
-    // way to ensure system message re-formatting isn't done if user has specified v1
-    const isV1API = this.apiBase.includes("/v1/");
 
-    const contents = messages
-      .map((msg) => {
-        if (msg.role === "system" && !isV1API) {
-          return null; // Don't include system message in contents
-        }
-        if (msg.role === "tool") {
-          return null;
-        }
-
-        return {
-          role: msg.role === "assistant" ? "model" : "user",
-          parts:
-            typeof msg.content === "string"
-              ? [{ text: msg.content }]
-              : msg.content.map(this.geminiInstance.continuePartToGeminiPart),
-        };
-      })
-      .filter((c) => c !== null);
-
-    const body = {
-      ...this.geminiInstance.convertArgs(options),
-      contents,
-      // if this.systemMessage is defined, reformat it for Gemini API
-      ...(this.systemMessage &&
-        !isV1API && {
-          systemInstruction: { parts: [{ text: this.systemMessage }] },
-        }),
-    };
+    const body = this.geminiInstance.prepareBody(messages, options, false);
     const response = await this.fetch(apiURL, {
       method: "POST",
       body: JSON.stringify(body),
     });
-
-    let buffer = "";
-    for await (const chunk of streamResponse(response)) {
-      buffer += chunk;
-      if (buffer.startsWith("[")) {
-        buffer = buffer.slice(1);
-      }
-      if (buffer.endsWith("]")) {
-        buffer = buffer.slice(0, -1);
-      }
-      if (buffer.startsWith(",")) {
-        buffer = buffer.slice(1);
-      }
-
-      const parts = buffer.split("\n,");
-
-      let foundIncomplete = false;
-      for (let i = 0; i < parts.length; i++) {
-        const part = parts[i];
-        let data;
-        try {
-          data = JSON.parse(part);
-        } catch (e) {
-          foundIncomplete = true;
-          continue; // yo!
-        }
-        if (data.error) {
-          throw new Error(data.error.message);
-        }
-        // Check for existence of each level before accessing the final 'text' property
-        if (data?.candidates?.[0]?.content?.parts?.[0]?.text) {
-          // Incrementally stream the content to make it smoother
-          const content = data.candidates[0].content.parts[0].text;
-          const words = content.split(/(\s+)/);
-          const delaySeconds = Math.min(4.0 / (words.length + 1), 0.1);
-          while (words.length > 0) {
-            const wordsToYield = Math.min(3, words.length);
-            yield {
-              role: "assistant",
-              content: words.splice(0, wordsToYield).join(""),
-            };
-            await delay(delaySeconds);
-          }
-        } else {
-          // Handle the case where the expected data structure is not found
-          if (data?.candidates?.[0]?.finishReason !== "STOP") {
-            console.warn("Unexpected response format:", data);
-          }
-        }
-      }
-      if (foundIncomplete) {
-        buffer = parts[parts.length - 1];
-      } else {
-        buffer = "";
-      }
+    for await (const message of this.geminiInstance.processGeminiResponse(
+      streamResponse(response),
+    )) {
+      yield message;
     }
   }
 
@@ -337,7 +263,9 @@ class VertexAI extends BaseLLM {
     });
 
     for await (const chunk of streamSse(response)) {
-      yield chunk.choices[0].delta.content;
+      if (chunk.choices?.[0].delta) {
+        yield chunk.choices[0].delta.content;
+      }
     }
   }
 
@@ -432,7 +360,9 @@ class VertexAI extends BaseLLM {
   }
 
   supportsFim(): boolean {
-    return ["code-gecko", "codestral-latest"].includes(this.model);
+    return (
+      this.model.includes("code-gecko") || this.model.includes("codestral")
+    );
   }
 
   protected async _embed(chunks: string[]): Promise<number[][]> {
diff --git a/core/llm/toolSupport.ts b/core/llm/toolSupport.ts
@@ -57,6 +57,10 @@ export const PROVIDER_TOOL_SUPPORT: Record<
     // All gemini models support function calling
     return model.toLowerCase().includes("gemini");
   },
+  vertexai: (model) => {
+    // All gemini models except flash 2.0 lite support function calling
+    return model.toLowerCase().includes("gemini") && !model.toLowerCase().includes("lite");;
+  },
   bedrock: (model) => {
     // For Bedrock, only support Claude Sonnet models with versions 3.5/3-5 and 3.7/3-7
     if (
diff --git a/docs/i18n/zh-CN/docusaurus-plugin-content-docs/current/customize/model-providers/top-level/vertexai.md b/docs/i18n/zh-CN/docusaurus-plugin-content-docs/current/customize/model-providers/top-level/vertexai.md
@@ -45,13 +45,13 @@ We recommend configuring **Codestral** or **code-gecko** as your autocomplete mo
 
 ## Embeddings model
 
-We recommend configuring **text-embedding-004** as your embeddings model.
+We recommend configuring **text-embedding-005** as your embeddings model.
 
 ```json title="config.json"
 {
   "embeddingsProvider": {
     "provider": "vertexai",
-    "model": "text-embedding-004",
+    "model": "text-embedding-005",
     "projectId": "[PROJECT_ID]",
     "region": "us-central1"
   }
diff --git a/extensions/vscode/config_schema.json b/extensions/vscode/config_schema.json
@@ -2855,11 +2855,11 @@
                     "description": "The name of your VertexAI project"
                   },
                   "region": {
-                    "description": "The region your VertexAI model is hosted in - typically central1",
-                    "default": "central1"
+                    "description": "The region your VertexAI model is hosted in - typically us-central1",
+                    "default": "us-central1"
                   },
                   "model": {
-                    "default": "text-embedding-004"
+                    "default": "text-embedding-005"
                   }
                 },
                 "required": ["projectId", "model", "region"]
diff --git a/packages/llm-info/src/providers/vertexai.ts b/packages/llm-info/src/providers/vertexai.ts
@@ -30,17 +30,17 @@ export const Gemini: ModelProvider = {
     },
     // embed
     {
-      model: "text-embedding-004",
+      model: "text-embedding-005",
       displayName: "Vertex Text Embedding",
       recommendedFor: ["embed"],
     },
     //autocomplete
     {
-        model: "code-gecko",
-        displayName: "VertexAI Code Gecko",
-        recommendedFor: ["autocomplete"],
-        maxCompletionTokens: 64,
-    }
+      model: "code-gecko",
+      displayName: "VertexAI Code Gecko",
+      recommendedFor: ["autocomplete"],
+      maxCompletionTokens: 64,
+    },
   ],
   id: "gemini",
   displayName: "Gemini",

Original file line number	Diff line number	Diff line change
`@@ -45,13 +45,13 @@ We recommend configuring Codestral or code-gecko as your autocomplete mo`
`45`	`45`
`46`	`46`	`## Embeddings model`
`47`	`47`
`48`		`-We recommend configuring text-embedding-004 as your embeddings model.`
	`48`	`+We recommend configuring text-embedding-005 as your embeddings model.`
`49`	`49`
`50`	`50`	```json title="config.json"
`51`	`51`	`{`
`52`	`52`	`"embeddingsProvider": {`
`53`	`53`	`"provider": "vertexai",`
`54`		`- "model": "text-embedding-004",`
	`54`	`+ "model": "text-embedding-005",`
`55`	`55`	`"projectId": "[PROJECT_ID]",`
`56`	`56`	`"region": "us-central1"`
`57`	`57`	`}`