Skip to content

core[minor],google-common[minor]: Add support for generic objects in prompts, gemini audio/video docs #5043

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Apr 11, 2024
10 changes: 8 additions & 2 deletions langchain-core/src/prompts/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,13 @@ function _coerceMessagePromptTemplateLike(
const message = coerceMessageLikeToMessage(messagePromptTemplateLike);
let templateData:
| string
| (string | _TextTemplateParam | _ImageTemplateParam)[];
| (
| string
| _TextTemplateParam
| _ImageTemplateParam
// eslint-disable-next-line @typescript-eslint/no-explicit-any
| Record<string, any>
)[];

if (typeof message.content === "string") {
templateData = message.content;
Expand All @@ -718,7 +724,7 @@ function _coerceMessagePromptTemplateLike(
} else if ("image_url" in item) {
return { image_url: item.image_url };
} else {
throw new Error("Invalid message content");
return item;
}
});
}
Expand Down
26 changes: 26 additions & 0 deletions langchain-core/src/prompts/tests/chat.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -568,3 +568,29 @@ test("Multi-modal, multi part chat prompt works with instances of BaseMessage",
});
expect(messages).toMatchSnapshot();
});

test.only("Gemini can understand audio", async () => {
const audioBase64 = `fs.readFileSync(audioPath, "base64");`;

const prompt = ChatPromptTemplate.fromMessages([
new MessagesPlaceholder("audio"),
]);

const pInvoke = await prompt.invoke({
audio: new HumanMessage({
content: [
{
type: "audio",
data: {
url: `data:audio/mp3;base64,${audioBase64}`,
},
},
{
type: "text",
text: "Summarize this audio. Be very concise.",
},
],
}),
});
console.log(JSON.stringify(pInvoke, null, 2));
});
1 change: 1 addition & 0 deletions libs/langchain-google-common/src/utils/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export function copyAIModelParamsInto(
const model = options?.model ?? params?.model ?? target.model;
ret.modelName =
model ?? options?.modelName ?? params?.modelName ?? target.modelName;
ret.model = model;
ret.temperature =
options?.temperature ?? params?.temperature ?? target.temperature;
ret.maxOutputTokens =
Expand Down
52 changes: 46 additions & 6 deletions libs/langchain-google-common/src/utils/gemini.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@ import type {
} from "../types.js";
import { GoogleAISafetyError } from "./safety.js";

const extractMimeType = (
str: string
): { mimeType: string; data: string } | null => {
if (str.startsWith("data:")) {
return {
mimeType: str.split(":")[1].split(";")[0],
data: str.split(",")[1],
};
}
return null;
};

function messageContentText(
content: MessageContentText
): GeminiPartText | null {
Expand All @@ -51,17 +63,14 @@ function messageContentImageUrl(
typeof content.image_url === "string"
? content.image_url
: content.image_url.url;

if (!url) {
throw new Error("Missing Image URL");
}

if (url.startsWith("data:")) {
const mineTypeAndData = extractMimeType(url);
if (mineTypeAndData) {
return {
inlineData: {
mimeType: url.split(":")[1].split(";")[0],
data: url.split(",")[1],
},
inlineData: mineTypeAndData,
};
} else {
// FIXME - need some way to get mime type
Expand All @@ -74,6 +83,32 @@ function messageContentImageUrl(
}
}

function messageContentToAudio(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
content: Record<string, any>
): GeminiPartInlineData | GeminiPartFileData {
const { url } = content.data;

if (!url) {
throw new Error("Missing Audio URL");
}

const mineTypeAndData = extractMimeType(url);
if (mineTypeAndData) {
return {
inlineData: mineTypeAndData,
};
} else {
// FIXME - need some way to get mime type
return {
fileData: {
mimeType: "audio/mpeg",
fileUri: url,
},
};
}
}

export function messageContentToParts(content: MessageContent): GeminiPart[] {
// Convert a string to a text type MessageContent if needed
const messageContent: MessageContent =
Expand Down Expand Up @@ -101,6 +136,11 @@ export function messageContentToParts(content: MessageContent): GeminiPart[] {
return messageContentImageUrl(content as MessageContentImageUrl);
}
break;
case "audio":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the plan to change this to a generic "media" or "object" or "blob"? (And have the other methods named similarly)
This way we can support audio, video, and images at once.

if ("data" in content) {
return messageContentToAudio(content);
}
break;
default:
throw new Error(
`Unsupported type received while converting message to message parts`
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import fs from "fs";
import {
ChatPromptTemplate,
MessagesPlaceholder,
} from "@langchain/core/prompts";
import { HumanMessage } from "@langchain/core/messages";
import { ChatGoogle } from "../chat_models.js";

function convertMp3ToBase64(filePath: string): string {
const fileData = fs.readFileSync(filePath);
const base64String = Buffer.from(fileData).toString("base64");
return base64String;
}

test("Gemini can understand audio", async () => {
const model = new ChatGoogle({
model: "gemini-1.5-pro-preview-0409",
temperature: 0,
});

const audioPath = "./src/tests/data/audio.mp3";
const audioBase64 = convertMp3ToBase64(audioPath);

const prompt = ChatPromptTemplate.fromMessages([
new MessagesPlaceholder("audio"),
]);

const chain = prompt.pipe(model);
const response = await chain.invoke({
audio: new HumanMessage({
content: [
{
type: "audio",
data: {
url: `data:audio/mp3;base64,${audioBase64}`,
},
},
{
type: "text",
text: "Summarize this audio. Be very concise.",
},
],
}),
});
console.log("response", response);
});
Binary file not shown.
Binary file not shown.
Loading