maxStopWords option

sestinj · sestinj · commit ac68becf2acf · 2024-08-16T10:12:39.000-07:00
diff --git a/core/index.d.ts b/core/index.d.ts
@@ -58,6 +58,7 @@ export interface ILLM extends LLMOptions {
   title?: string;
   systemMessage?: string;
   contextLength: number;
+  maxStopWords?: number;
   completionOptions: CompletionOptions;
   requestOptions?: RequestOptions;
   promptTemplates?: Record<string, PromptTemplate>;
@@ -321,6 +322,7 @@ export interface LLMOptions {
   uniqueId?: string;
   systemMessage?: string;
   contextLength?: number;
+  maxStopWords?: number;
   completionOptions?: CompletionOptions;
   requestOptions?: RequestOptions;
   template?: TemplateType;
@@ -747,6 +749,7 @@ export interface ModelDescription {
   apiKey?: string;
   apiBase?: string;
   contextLength?: number;
+  maxStopWords?: number;
   template?: TemplateType;
   completionOptions?: BaseCompletionOptions;
   systemMessage?: string;
diff --git a/core/llm/index.ts b/core/llm/index.ts
@@ -91,6 +91,7 @@ export abstract class BaseLLM implements ILLM {
   title?: string;
   systemMessage?: string;
   contextLength: number;
+  maxStopWords?: number | undefined;
   completionOptions: CompletionOptions;
   requestOptions?: RequestOptions;
   template?: TemplateType;
@@ -143,6 +144,7 @@ export abstract class BaseLLM implements ILLM {
     this.systemMessage = options.systemMessage;
     this.contextLength =
       options.contextLength ?? llmInfo?.contextLength ?? DEFAULT_CONTEXT_LENGTH;
+    this.maxStopWords = options.maxStopWords ?? this.maxStopWords;
     this.completionOptions = {
       ...options.completionOptions,
       model: options.model || "gpt-4",
diff --git a/core/llm/llms/Deepseek.ts b/core/llm/llms/Deepseek.ts
@@ -13,7 +13,7 @@ class Deepseek extends OpenAI {
     },
     useLegacyCompletionsEndpoint: false,
   };
-  protected maxStopWords: number | undefined = 16;
+  maxStopWords: number | undefined = 16;
 
   supportsFim(): boolean {
     return true;
diff --git a/core/llm/llms/Groq.ts b/core/llm/llms/Groq.ts
@@ -6,7 +6,7 @@ class Groq extends OpenAI {
   static defaultOptions: Partial<LLMOptions> = {
     apiBase: "https://api.groq.com/openai/v1/",
   };
-  protected maxStopWords: number | undefined = 4;
+  maxStopWords: number | undefined = 4;
 
   private static modelConversion: { [key: string]: string } = {
     "llama2-70b": "llama2-70b-4096",
diff --git a/core/llm/llms/OpenAI.ts b/core/llm/llms/OpenAI.ts
@@ -42,7 +42,7 @@ const CHAT_ONLY_MODELS = [
 class OpenAI extends BaseLLM {
   public useLegacyCompletionsEndpoint: boolean | undefined = undefined;
 
-  protected maxStopWords: number | undefined = undefined;
+  maxStopWords: number | undefined = undefined;
 
   constructor(options: LLMOptions) {
     super(options);
@@ -104,13 +104,13 @@ class OpenAI extends BaseLLM {
         this.maxStopWords !== undefined
           ? options.stop?.slice(0, this.maxStopWords)
           : url.host === "api.deepseek.com"
-          ? options.stop?.slice(0, 16)
-          : url.port === "1337" ||
-            url.host === "api.openai.com" ||
-            url.host === "api.groq.com" ||
-            this.apiType === "azure"
-          ? options.stop?.slice(0, 4)
-          : options.stop,
+            ? options.stop?.slice(0, 16)
+            : url.port === "1337" ||
+                url.host === "api.openai.com" ||
+                url.host === "api.groq.com" ||
+                this.apiType === "azure"
+              ? options.stop?.slice(0, 4)
+              : options.stop,
     };
 
     return finalOptions;
diff --git a/core/llm/llms/WatsonX.ts b/core/llm/llms/WatsonX.ts
@@ -13,7 +13,7 @@ const watsonxConfig = {
   },
 };
 class WatsonX extends BaseLLM {
-  protected maxStopWords: number | undefined = undefined;
+  maxStopWords: number | undefined = undefined;
 
   constructor(options: LLMOptions) {
     super(options);
@@ -176,7 +176,9 @@ class WatsonX extends BaseLLM {
       throw new Error(`Something went wrong. Check your credentials, please.`);
     }
 
-    const stopToken = this.watsonxStopToken ?? (options.model.includes("granite") ? "<|im_end|>" : undefined);
+    const stopToken =
+      this.watsonxStopToken ??
+      (options.model.includes("granite") ? "<|im_end|>" : undefined);
     var streamResponse = await fetch(
       `${this.watsonxUrl}/ml/v1/text/generation_stream?version=2023-05-29`,
       {
diff --git a/docs/static/schemas/config.json b/docs/static/schemas/config.json
@@ -239,6 +239,11 @@
           "default": 2048,
           "type": "integer"
         },
+        "maxStopWords": {
+          "title": "Max Stop Words",
+          "description": "The maximum number of stop words that the API will accept. You can set this if you are receiving an error about the number of stop words, but otherwise should leave it undefined.",
+          "type": "integer"
+        },
         "template": {
           "title": "Template",
           "description": "The chat template used to format messages. This is auto-detected for most models, but can be overridden here. Choose none if you are using vLLM or another server that automatically handles prompting.",
diff --git a/extensions/intellij/src/main/resources/config_schema.json b/extensions/intellij/src/main/resources/config_schema.json
@@ -239,6 +239,11 @@
           "default": 2048,
           "type": "integer"
         },
+        "maxStopWords": {
+          "title": "Max Stop Words",
+          "description": "The maximum number of stop words that the API will accept. You can set this if you are receiving an error about the number of stop words, but otherwise should leave it undefined.",
+          "type": "integer"
+        },
         "template": {
           "title": "Template",
           "description": "The chat template used to format messages. This is auto-detected for most models, but can be overridden here. Choose none if you are using vLLM or another server that automatically handles prompting.",
diff --git a/extensions/vscode/config_schema.json b/extensions/vscode/config_schema.json
@@ -239,6 +239,11 @@
           "default": 2048,
           "type": "integer"
         },
+        "maxStopWords": {
+          "title": "Max Stop Words",
+          "description": "The maximum number of stop words that the API will accept. You can set this if you are receiving an error about the number of stop words, but otherwise should leave it undefined.",
+          "type": "integer"
+        },
         "template": {
           "title": "Template",
           "description": "The chat template used to format messages. This is auto-detected for most models, but can be overridden here. Choose none if you are using vLLM or another server that automatically handles prompting.",
diff --git a/extensions/vscode/continue_rc_schema.json b/extensions/vscode/continue_rc_schema.json
@@ -242,6 +242,11 @@
           "default": 2048,
           "type": "integer"
         },
+        "maxStopWords": {
+          "title": "Max Stop Words",
+          "description": "The maximum number of stop words that the API will accept. You can set this if you are receiving an error about the number of stop words, but otherwise should leave it undefined.",
+          "type": "integer"
+        },
         "template": {
           "title": "Template",
           "description": "The chat template used to format messages. This is auto-detected for most models, but can be overridden here. Choose none if you are using vLLM or another server that automatically handles prompting.",