google-common[minor]: Add stream_usage (#5763)

bracesproul · web-flow · commit 6ade28950874 · 2024-06-14T10:51:56.000-07:00
* google-vertexai[minor]: Add stream usage

* fix tests

* bump min core version
diff --git a/libs/langchain-google-common/package.json b/libs/langchain-google-common/package.json
@@ -40,7 +40,7 @@
   "author": "LangChain",
   "license": "MIT",
   "dependencies": {
-    "@langchain/core": ">0.1.56 <0.3.0",
+    "@langchain/core": ">=0.2.5 <0.3.0",
     "uuid": "^9.0.0",
     "zod-to-json-schema": "^3.22.4"
   },
diff --git a/libs/langchain-google-common/src/chat_models.ts b/libs/langchain-google-common/src/chat_models.ts
@@ -1,5 +1,5 @@
 import { getEnvironmentVariable } from "@langchain/core/utils/env";
-import { type BaseMessage } from "@langchain/core/messages";
+import { UsageMetadata, type BaseMessage } from "@langchain/core/messages";
 import { CallbackManagerForLLMRun } from "@langchain/core/callbacks/manager";
 
 import {
@@ -150,7 +150,8 @@ export interface ChatGoogleBaseInput<AuthOptions>
   extends BaseChatModelParams,
     GoogleConnectionParams<AuthOptions>,
     GoogleAIModelParams,
-    GoogleAISafetyParams {}
+    GoogleAISafetyParams,
+    Pick<GoogleAIBaseLanguageModelCallOptions, "streamUsage"> {}
 
 function convertToGeminiTools(
   structuredTools: (StructuredToolInterface | Record<string, unknown>)[]
@@ -216,6 +217,8 @@ export abstract class ChatGoogleBase<AuthOptions>
 
   safetyHandler: GoogleAISafetyHandler;
 
+  streamUsage = true;
+
   protected connection: ChatConnection<AuthOptions>;
 
   protected streamedConnection: ChatConnection<AuthOptions>;
@@ -226,7 +229,7 @@ export abstract class ChatGoogleBase<AuthOptions>
     copyAndValidateModelParamsInto(fields, this);
     this.safetyHandler =
       fields?.safetyHandler ?? new DefaultGeminiSafetyHandler();
-
+    this.streamUsage = fields?.streamUsage ?? this.streamUsage;
     const client = this.buildClient(fields);
     this.buildConnection(fields ?? {}, client);
   }
@@ -342,12 +345,24 @@ export abstract class ChatGoogleBase<AuthOptions>
 
     // Get the streaming parser of the response
     const stream = response.data as JsonStream;
-
+    let usageMetadata: UsageMetadata | undefined;
     // Loop until the end of the stream
     // During the loop, yield each time we get a chunk from the streaming parser
     // that is either available or added to the queue
     while (!stream.streamDone) {
       const output = await stream.nextChunk();
+      if (
+        output &&
+        output.usageMetadata &&
+        this.streamUsage !== false &&
+        options.streamUsage !== false
+      ) {
+        usageMetadata = {
+          input_tokens: output.usageMetadata.promptTokenCount,
+          output_tokens: output.usageMetadata.candidatesTokenCount,
+          total_tokens: output.usageMetadata.totalTokenCount,
+        };
+      }
       const chunk =
         output !== null
           ? safeResponseToChatGeneration({ data: output }, this.safetyHandler)
@@ -356,6 +371,7 @@ export abstract class ChatGoogleBase<AuthOptions>
               generationInfo: { finishReason: "stop" },
               message: new AIMessageChunk({
                 content: "",
+                usage_metadata: usageMetadata,
               }),
             });
       yield chunk;
diff --git a/libs/langchain-google-common/src/types.ts b/libs/langchain-google-common/src/types.ts
@@ -122,7 +122,14 @@ export interface GoogleAIBaseLLMInput<AuthOptions>
 export interface GoogleAIBaseLanguageModelCallOptions
   extends BaseLanguageModelCallOptions,
     GoogleAIModelRequestParams,
-    GoogleAISafetyParams {}
+    GoogleAISafetyParams {
+  /**
+   * Whether or not to include usage data, like token counts
+   * in the streamed response chunks.
+   * @default true
+   */
+  streamUsage?: boolean;
+}
 
 /**
  * Input to LLM class.
diff --git a/libs/langchain-google-common/src/utils/gemini.ts b/libs/langchain-google-common/src/utils/gemini.ts
@@ -12,6 +12,7 @@ import {
   MessageContentText,
   SystemMessage,
   ToolMessage,
+  UsageMetadata,
   isAIMessage,
 } from "@langchain/core/messages";
 import {
@@ -604,12 +605,22 @@ export function responseToChatGenerations(
       id: toolCall.id,
       index: i,
     }));
+    let usageMetadata: UsageMetadata | undefined;
+    if ("usageMetadata" in response.data) {
+      usageMetadata = {
+        input_tokens: response.data.usageMetadata.promptTokenCount as number,
+        output_tokens: response.data.usageMetadata
+          .candidatesTokenCount as number,
+        total_tokens: response.data.usageMetadata.totalTokenCount as number,
+      };
+    }
     ret = [
       new ChatGenerationChunk({
         message: new AIMessageChunk({
           content: combinedContent,
           additional_kwargs: ret[ret.length - 1]?.message.additional_kwargs,
           tool_call_chunks: toolCallChunks,
+          usage_metadata: usageMetadata,
         }),
         text: combinedText,
         generationInfo: ret[ret.length - 1].generationInfo,
diff --git a/libs/langchain-google-vertexai/src/tests/chat_models.int.test.ts b/libs/langchain-google-vertexai/src/tests/chat_models.int.test.ts
@@ -233,3 +233,65 @@ describe("GAuth Chat", () => {
     expect(result).toHaveProperty("location");
   });
 });
+
+test("Stream token count usage_metadata", async () => {
+  const model = new ChatVertexAI({
+    temperature: 0,
+  });
+  let res: AIMessageChunk | null = null;
+  for await (const chunk of await model.stream(
+    "Why is the sky blue? Be concise."
+  )) {
+    if (!res) {
+      res = chunk;
+    } else {
+      res = res.concat(chunk);
+    }
+  }
+  console.log(res);
+  expect(res?.usage_metadata).toBeDefined();
+  if (!res?.usage_metadata) {
+    return;
+  }
+  expect(res.usage_metadata.input_tokens).toBe(9);
+  expect(res.usage_metadata.output_tokens).toBeGreaterThan(10);
+  expect(res.usage_metadata.total_tokens).toBe(
+    res.usage_metadata.input_tokens + res.usage_metadata.output_tokens
+  );
+});
+
+test("streamUsage excludes token usage", async () => {
+  const model = new ChatVertexAI({
+    temperature: 0,
+    streamUsage: false,
+  });
+  let res: AIMessageChunk | null = null;
+  for await (const chunk of await model.stream(
+    "Why is the sky blue? Be concise."
+  )) {
+    if (!res) {
+      res = chunk;
+    } else {
+      res = res.concat(chunk);
+    }
+  }
+  console.log(res);
+  expect(res?.usage_metadata).not.toBeDefined();
+});
+
+test("Invoke token count usage_metadata", async () => {
+  const model = new ChatVertexAI({
+    temperature: 0,
+  });
+  const res = await model.invoke("Why is the sky blue? Be concise.");
+  console.log(res);
+  expect(res?.usage_metadata).toBeDefined();
+  if (!res?.usage_metadata) {
+    return;
+  }
+  expect(res.usage_metadata.input_tokens).toBe(9);
+  expect(res.usage_metadata.output_tokens).toBeGreaterThan(10);
+  expect(res.usage_metadata.total_tokens).toBe(
+    res.usage_metadata.input_tokens + res.usage_metadata.output_tokens
+  );
+});
diff --git a/libs/langchain-google-vertexai/src/tests/chat_models.standard.int.test.ts b/libs/langchain-google-vertexai/src/tests/chat_models.standard.int.test.ts
@@ -25,22 +25,6 @@ class ChatVertexAIStandardIntegrationTests extends ChatModelIntegrationTests<
     });
   }
 
-  async testUsageMetadataStreaming() {
-    this.skipTestMessage(
-      "testUsageMetadataStreaming",
-      "ChatVertexAI",
-      "Streaming tokens is not currently supported."
-    );
-  }
-
-  async testUsageMetadata() {
-    this.skipTestMessage(
-      "testUsageMetadata",
-      "ChatVertexAI",
-      "Usage metadata tokens is not currently supported."
-    );
-  }
-
   async testToolMessageHistoriesListContent() {
     this.skipTestMessage(
       "testToolMessageHistoriesListContent",
diff --git a/yarn.lock b/yarn.lock
@@ -10194,7 +10194,7 @@ __metadata:
   resolution: "@langchain/google-common@workspace:libs/langchain-google-common"
   dependencies:
     "@jest/globals": ^29.5.0
-    "@langchain/core": ">0.1.56 <0.3.0"
+    "@langchain/core": ">=0.2.5 <0.3.0"
     "@langchain/scripts": ~0.0.14
     "@swc/core": ^1.3.90
     "@swc/jest": ^0.2.29