langchain-community[patch]: #3369 Streaming support for Replicate models

jeasonnow · jeasonnow · commit fe1013c801db · 2024-05-22T17:56:02.000+08:00
diff --git a/libs/langchain-community/package.json b/libs/langchain-community/package.json
@@ -195,7 +195,7 @@
     "puppeteer": "^19.7.2",
     "redis": "^4.6.6",
     "release-it": "^15.10.1",
-    "replicate": "^0.18.0",
+    "replicate": "^0.29.4",
     "rollup": "^3.19.1",
     "sonix-speech-recognition": "^2.1.1",
     "srt-parser-2": "^1.2.3",
@@ -316,7 +316,7 @@
     "portkey-ai": "^0.1.11",
     "puppeteer": "^19.7.2",
     "redis": "*",
-    "replicate": "^0.18.0",
+    "replicate": "^0.29.4",
     "sonix-speech-recognition": "^2.1.1",
     "srt-parser-2": "^1.2.3",
     "typeorm": "^0.3.12",
diff --git a/libs/langchain-community/src/llms/replicate.ts b/libs/langchain-community/src/llms/replicate.ts
@@ -1,5 +1,9 @@
 import { LLM, type BaseLLMParams } from "@langchain/core/language_models/llms";
 import { getEnvironmentVariable } from "@langchain/core/utils/env";
+import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager";
+import { GenerationChunk } from "@langchain/core/outputs";
+
+import type ReplicateInstance from "replicate";
 
 /**
  * Interface defining the structure of the input data for the Replicate
@@ -88,13 +92,85 @@ export class Replicate extends LLM implements ReplicateInput {
     prompt: string,
     options: this["ParsedCallOptions"]
   ): Promise<string> {
+    const replicate = await this._prepareReplicate();
+    const input = await this._getReplicateInput(replicate, prompt);
+
+    const output = await this.caller.callWithOptions(
+      { signal: options.signal },
+      () =>
+        replicate.run(this.model, {
+          input,
+        })
+    );
+
+    if (typeof output === "string") {
+      return output;
+    } else if (Array.isArray(output)) {
+      return output.join("");
+    } else {
+      // Note this is a little odd, but the output format is not consistent
+      // across models, so it makes some amount of sense.
+      return String(output);
+    }
+  }
+
+  async *_streamResponseChunks(
+    prompt: string,
+    options: this["ParsedCallOptions"],
+    runManager?: CallbackManagerForLLMRun
+  ): AsyncGenerator<GenerationChunk> {
+    const replicate = await this._prepareReplicate();
+    const input = await this._getReplicateInput(replicate, prompt);
+
+    const stream = await this.caller.callWithOptions(
+      { signal: options?.signal },
+      async () =>
+        replicate.stream(this.model, {
+          input,
+        })
+    );
+    for await (const chunk of stream) {
+      if (chunk.event === "output") {
+        yield new GenerationChunk({ text: chunk.data, generationInfo: chunk });
+        await runManager?.handleLLMNewToken(chunk.data ?? "");
+      }
+
+      // stream is done
+      if (chunk.event === "done")
+        yield new GenerationChunk({
+          text: "",
+          generationInfo: { finished: true },
+        });
+    }
+  }
+
+  /** @ignore */
+  static async imports(): Promise<{
+    Replicate: typeof ReplicateInstance;
+  }> {
+    try {
+      const { default: Replicate } = await import("replicate");
+      return { Replicate };
+    } catch (e) {
+      throw new Error(
+        "Please install replicate as a dependency with, e.g. `yarn add replicate`"
+      );
+    }
+  }
+
+  private async _prepareReplicate(): Promise<ReplicateInstance> {
     const imports = await Replicate.imports();
 
-    const replicate = new imports.Replicate({
+    return new imports.Replicate({
       userAgent: "langchain",
       auth: this.apiKey,
     });
+  }
 
+  private async _getReplicateInput(
+    replicate: ReplicateInstance,
+    prompt: string
+  ) {
     if (this.promptKey === undefined) {
       const [modelString, versionString] = this.model.split(":");
       const version = await replicate.models.versions.get(
@@ -119,40 +195,11 @@ export class Replicate extends LLM implements ReplicateInput {
         this.promptKey = sortedInputProperties[0][0] ?? "prompt";
       }
     }
-    const output = await this.caller.callWithOptions(
-      { signal: options.signal },
-      () =>
-        replicate.run(this.model, {
-          input: {
-            // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
-            [this.promptKey!]: prompt,
-            ...this.input,
-          },
-        })
-    );
-
-    if (typeof output === "string") {
-      return output;
-    } else if (Array.isArray(output)) {
-      return output.join("");
-    } else {
-      // Note this is a little odd, but the output format is not consistent
-      // across models, so it makes some amount of sense.
-      return String(output);
-    }
-  }
 
-  /** @ignore */
-  static async imports(): Promise<{
-    Replicate: typeof import("replicate").default;
-  }> {
-    try {
-      const { default: Replicate } = await import("replicate");
-      return { Replicate };
-    } catch (e) {
-      throw new Error(
-        "Please install replicate as a dependency with, e.g. `yarn add replicate`"
-      );
-    }
+    return {
+      // eslint-disable-next-line @typescript-eslint/no-non-null-assertion
+      [this.promptKey!]: prompt,
+      ...this.input,
+    };
   }
 }
diff --git a/yarn.lock b/yarn.lock
@@ -9154,7 +9154,7 @@ __metadata:
     puppeteer: ^19.7.2
     redis: ^4.6.6
     release-it: ^15.10.1
-    replicate: ^0.18.0
+    replicate: ^0.29.4
     rollup: ^3.19.1
     sonix-speech-recognition: ^2.1.1
     srt-parser-2: ^1.2.3
@@ -9277,7 +9277,7 @@ __metadata:
     portkey-ai: ^0.1.11
     puppeteer: ^19.7.2
     redis: "*"
-    replicate: ^0.18.0
+    replicate: ^0.29.4 
     sonix-speech-recognition: ^2.1.1
     srt-parser-2: ^1.2.3
     typeorm: ^0.3.12
@@ -32771,6 +32771,19 @@ __metadata:
   languageName: node
   linkType: hard
 
+"readable-stream@npm:>=4.0.0":
+  version: 4.5.2
+  resolution: "readable-stream@npm:4.5.2"
+  dependencies:
+    abort-controller: ^3.0.0
+    buffer: ^6.0.3
+    events: ^3.3.0
+    process: ^0.11.10
+    string_decoder: ^1.3.0
+  checksum: c4030ccff010b83e4f33289c535f7830190773e274b3fcb6e2541475070bdfd69c98001c3b0cb78763fc00c8b62f514d96c2b10a8bd35d5ce45203a25fa1d33a
+  languageName: node
+  linkType: hard
+
 "readable-stream@npm:^2.0.0, readable-stream@npm:^2.0.1, readable-stream@npm:^2.3.0, readable-stream@npm:^2.3.5, readable-stream@npm:~2.3.6":
   version: 2.3.8
   resolution: "readable-stream@npm:2.3.8"
@@ -33191,10 +33204,15 @@ __metadata:
   languageName: node
   linkType: hard
 
-"replicate@npm:^0.18.0":
-  version: 0.18.0
-  resolution: "replicate@npm:0.18.0"
-  checksum: 547a8b386418aedf6e5be2086a63090e5a5f6cda36202a0122c4036a2af8a80efea420393e5efa4810c9cff0616a7df5adbd40fd4a0560f4aa1b4eda60a34794
+"replicate@npm:^0.29.4":
+  version: 0.29.4
+  resolution: "replicate@npm:0.29.4"
+  dependencies:
+    readable-stream: ">=4.0.0"
+  dependenciesMeta:
+    readable-stream:
+      optional: true
+  checksum: 9405e19f619134a312aa77b3c04156549e4c8ba5e0711a494b99358abd0378646c22cd9bf07e6f9c8ab4a2f80b69ba22ed0a5b8ec0610684e9fa5d413e3b5729
   languageName: node
   linkType: hard