diff --git a/docs/core_docs/.gitignore b/docs/core_docs/.gitignore
index 924a6920834f..11ed6749ec06 100644
--- a/docs/core_docs/.gitignore
+++ b/docs/core_docs/.gitignore
@@ -53,10 +53,6 @@ docs/use_cases/question_answering/citations.md
 docs/use_cases/question_answering/citations.mdx
 docs/use_cases/question_answering/chat_history.md
 docs/use_cases/question_answering/chat_history.mdx
-docs/use_cases/query_analysis/quickstart.md
-docs/use_cases/query_analysis/quickstart.mdx
-docs/use_cases/query_analysis/index.md
-docs/use_cases/query_analysis/index.mdx
 docs/use_cases/graph/semantic.md
 docs/use_cases/graph/semantic.mdx
 docs/use_cases/graph/quickstart.md
@@ -75,6 +71,18 @@ docs/use_cases/extraction/index.md
 docs/use_cases/extraction/index.mdx
 docs/use_cases/extraction/guidelines.md
 docs/use_cases/extraction/guidelines.mdx
+docs/use_cases/query_analysis/quickstart.md
+docs/use_cases/query_analysis/quickstart.mdx
+docs/use_cases/query_analysis/index.md
+docs/use_cases/query_analysis/index.mdx
+docs/use_cases/extraction/how_to/parse.md
+docs/use_cases/extraction/how_to/parse.mdx
+docs/use_cases/extraction/how_to/handle_long_text.md
+docs/use_cases/extraction/how_to/handle_long_text.mdx
+docs/use_cases/extraction/how_to/handle_files.md
+docs/use_cases/extraction/how_to/handle_files.mdx
+docs/use_cases/extraction/how_to/examples.md
+docs/use_cases/extraction/how_to/examples.mdx
 docs/use_cases/query_analysis/how_to/no_queries.md
 docs/use_cases/query_analysis/how_to/no_queries.mdx
 docs/use_cases/query_analysis/how_to/multiple_retrievers.md
@@ -99,14 +107,6 @@ docs/use_cases/query_analysis/techniques/expansion.md
 docs/use_cases/query_analysis/techniques/expansion.mdx
 docs/use_cases/query_analysis/techniques/decomposition.md
 docs/use_cases/query_analysis/techniques/decomposition.mdx
-docs/use_cases/extraction/how_to/parse.md
-docs/use_cases/extraction/how_to/parse.mdx
-docs/use_cases/extraction/how_to/handle_long_text.md
-docs/use_cases/extraction/how_to/handle_long_text.mdx
-docs/use_cases/extraction/how_to/handle_files.md
-docs/use_cases/extraction/how_to/handle_files.mdx
-docs/use_cases/extraction/how_to/examples.md
-docs/use_cases/extraction/how_to/examples.mdx
 docs/modules/model_io/output_parsers/custom.md
 docs/modules/model_io/output_parsers/custom.mdx
 docs/modules/model_io/chat/function_calling.md
diff --git a/docs/core_docs/docs/use_cases/media.mdx b/docs/core_docs/docs/use_cases/media.mdx
new file mode 100644
index 000000000000..261c23d3cbf8
--- /dev/null
+++ b/docs/core_docs/docs/use_cases/media.mdx
@@ -0,0 +1,56 @@
+# Audio/Video Structured Extraction
+
+Google's Gemini API offers support for audio and video input, along with function calling.
+Together, we can pair these API features to extract structured data given audio or video input.
+
+In the following examples, we'll demonstrate how to read and send MP3 and MP4 files to the Gemini API, and receive structured output as a response.
+
+## Setup
+
+These examples use the Gemini API, so you'll need a Google VertexAI credentials file (or stringified credentials file if using a web environment):
+
+```bash
+GOOGLE_APPLICATION_CREDENTIALS="credentials.json"
+```
+
+Next, install the `@langchain/google-vertexai` and `@langchain/community` packages:
+
+import IntegrationInstallTooltip from "@mdx_components/integration_install_tooltip.mdx";
+
+<IntegrationInstallTooltip></IntegrationInstallTooltip>
+
+```bash npm2yarn
+npm install @langchain/google-vertexai @langchain/core
+```
+
+## Video
+
+This example uses a [LangChain YouTube video on datasets and testing in LangSmith](https://www.youtube.com/watch?v=N9hjO-Uy1Vo) sped up to 1.5x speed.
+It's then converted to `base64`, and sent to Gemini with a prompt asking for structured output of tasks I can do to improve my knowledge of datasets and testing in LangSmith.
+
+We create a new tool for this using Zod, and pass it to the model via the `withStructuredOutput` method.
+
+import CodeBlock from "@theme/CodeBlock";
+
+import VideoExample from "@examples/use_cases/media/video.ts";
+
+<CodeBlock language="typescript">{VideoExample}</CodeBlock>
+
+## Audio
+
+The next example loads an audio (MP3) file containing Mozart's Requiem in D Minor and prompts Gemini to return a single array of strings, with each string being an instrument from the song.
+
+Here, we'll also use the `withStructuredOutput` method to get structured output from the model.
+
+import AudioExample from "@examples/use_cases/media/audio.ts";
+
+<CodeBlock language="typescript">{AudioExample}</CodeBlock>
+
+From a quick Google search, we see the song was composed using the following instruments:
+
+```txt
+The Requiem is scored for 2 basset horns in F, 2 bassoons, 2 trumpets in D, 3 trombones (alto, tenor, and bass),
+timpani (2 drums), violins, viola, and basso continuo (cello, double bass, and organ).
+```
+
+Gemini did pretty well here! For music not being its primary focus, it was able to identify a few of the instruments used in the song, and didn't hallucinate any!
diff --git a/examples/src/use_cases/media/audio.ts b/examples/src/use_cases/media/audio.ts
new file mode 100644
index 000000000000..cca82b428e29
--- /dev/null
+++ b/examples/src/use_cases/media/audio.ts
@@ -0,0 +1,67 @@
+import {
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from "@langchain/core/prompts";
+import { ChatVertexAI } from "@langchain/google-vertexai";
+import { HumanMessage } from "@langchain/core/messages";
+import fs from "fs";
+import { z } from "zod";
+
+function fileToBase64(filePath: string): string {
+  return fs.readFileSync(filePath, "base64");
+}
+
+const mozartMp3File = "Mozart_Requiem_D_minor.mp3";
+const mozartInBase64 = fileToBase64(mozartMp3File);
+
+const tool = z.object({
+  instruments: z
+    .array(z.string())
+    .describe("A list of instruments found in the audio."),
+});
+
+const model = new ChatVertexAI({
+  model: "gemini-1.5-pro-preview-0409",
+  temperature: 0,
+}).withStructuredOutput(tool, {
+  name: "instruments_list_tool",
+});
+
+const prompt = ChatPromptTemplate.fromMessages([
+  new MessagesPlaceholder("audio"),
+]);
+
+const chain = prompt.pipe(model);
+const response = await chain.invoke({
+  audio: new HumanMessage({
+    content: [
+      {
+        type: "media",
+        mimeType: "audio/mp3",
+        data: mozartInBase64,
+      },
+
+      {
+        type: "text",
+        text: `The following audio is a song by Mozart. Respond with a list of instruments you hear in the song.
+
+Rules:
+Use the "instruments_list_tool" to return a list of tasks.`,
+      },
+    ],
+  }),
+});
+
+console.log("response", response);
+/*
+response {
+  instruments: [
+    'violin',   'viola',
+    'cello',    'double bass',
+    'flute',    'oboe',
+    'clarinet', 'bassoon',
+    'horn',     'trumpet',
+    'timpani'
+  ]
+}
+*/
diff --git a/examples/src/use_cases/media/video.ts b/examples/src/use_cases/media/video.ts
new file mode 100644
index 000000000000..275cb5e8a2d1
--- /dev/null
+++ b/examples/src/use_cases/media/video.ts
@@ -0,0 +1,64 @@
+import {
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from "@langchain/core/prompts";
+import { ChatVertexAI } from "@langchain/google-vertexai";
+import { HumanMessage } from "@langchain/core/messages";
+import fs from "fs";
+import { z } from "zod";
+
+function fileToBase64(filePath: string): string {
+  return fs.readFileSync(filePath, "base64");
+}
+
+const lanceLsEvalsVideo = "lance_ls_eval_video.mp4";
+const lanceInBase64 = fileToBase64(lanceLsEvalsVideo);
+
+const tool = z.object({
+  tasks: z.array(z.string()).describe("A list of tasks."),
+});
+
+const model = new ChatVertexAI({
+  model: "gemini-1.5-pro-preview-0409",
+  temperature: 0,
+}).withStructuredOutput(tool, {
+  name: "tasks_list_tool",
+});
+
+const prompt = ChatPromptTemplate.fromMessages([
+  new MessagesPlaceholder("video"),
+]);
+
+const chain = prompt.pipe(model);
+const response = await chain.invoke({
+  video: new HumanMessage({
+    content: [
+      {
+        type: "media",
+        mimeType: "video/mp4",
+        data: lanceInBase64,
+      },
+      {
+        type: "text",
+        text: `The following video is an overview of how to build datasets in LangSmith.
+Given the following video, come up with three tasks I should do to further improve my knowledge around using datasets in LangSmith.
+Only reference features that were outlined or described in the video.
+
+Rules:
+Use the "tasks_list_tool" to return a list of tasks.
+Your tasks should be tailored for an engineer who is looking to improve their knowledge around using datasets and evaluations, specifically with LangSmith.`,
+      },
+    ],
+  }),
+});
+
+console.log("response", response);
+/*
+response {
+  tasks: [
+    'Explore the LangSmith SDK documentation for in-depth understanding of dataset creation, manipulation, and versioning functionalities.',
+    'Experiment with different dataset types like Key-Value, Chat, and LLM to understand their structures and use cases.',
+    'Try uploading a CSV file containing question-answer pairs to LangSmith and create a new dataset from it.'
+  ]
+}
+*/
diff --git a/langchain-core/src/prompts/chat.ts b/langchain-core/src/prompts/chat.ts
index 45defd2ce07d..948d88eb12ed 100644
--- a/langchain-core/src/prompts/chat.ts
+++ b/langchain-core/src/prompts/chat.ts
@@ -706,7 +706,13 @@ function _coerceMessagePromptTemplateLike(
   const message = coerceMessageLikeToMessage(messagePromptTemplateLike);
   let templateData:
     | string
-    | (string | _TextTemplateParam | _ImageTemplateParam)[];
+    | (
+        | string
+        | _TextTemplateParam
+        | _ImageTemplateParam
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        | Record<string, any>
+      )[];
 
   if (typeof message.content === "string") {
     templateData = message.content;
@@ -718,7 +724,7 @@ function _coerceMessagePromptTemplateLike(
       } else if ("image_url" in item) {
         return { image_url: item.image_url };
       } else {
-        throw new Error("Invalid message content");
+        return item;
       }
     });
   }
diff --git a/libs/langchain-google-common/src/utils/common.ts b/libs/langchain-google-common/src/utils/common.ts
index 512535987234..f242de83215c 100644
--- a/libs/langchain-google-common/src/utils/common.ts
+++ b/libs/langchain-google-common/src/utils/common.ts
@@ -24,6 +24,7 @@ export function copyAIModelParamsInto(
   const model = options?.model ?? params?.model ?? target.model;
   ret.modelName =
     model ?? options?.modelName ?? params?.modelName ?? target.modelName;
+  ret.model = model;
   ret.temperature =
     options?.temperature ?? params?.temperature ?? target.temperature;
   ret.maxOutputTokens =
diff --git a/libs/langchain-google-common/src/utils/gemini.ts b/libs/langchain-google-common/src/utils/gemini.ts
index e15bdde098d1..8e504408a832 100644
--- a/libs/langchain-google-common/src/utils/gemini.ts
+++ b/libs/langchain-google-common/src/utils/gemini.ts
@@ -35,6 +35,18 @@ import type {
 } from "../types.js";
 import { GoogleAISafetyError } from "./safety.js";
 
+const extractMimeType = (
+  str: string
+): { mimeType: string; data: string } | null => {
+  if (str.startsWith("data:")) {
+    return {
+      mimeType: str.split(":")[1].split(";")[0],
+      data: str.split(",")[1],
+    };
+  }
+  return null;
+};
+
 function messageContentText(
   content: MessageContentText
 ): GeminiPartText | null {
@@ -54,17 +66,14 @@ function messageContentImageUrl(
     typeof content.image_url === "string"
       ? content.image_url
       : content.image_url.url;
-
   if (!url) {
     throw new Error("Missing Image URL");
   }
 
-  if (url.startsWith("data:")) {
+  const mineTypeAndData = extractMimeType(url);
+  if (mineTypeAndData) {
     return {
-      inlineData: {
-        mimeType: url.split(":")[1].split(";")[0],
-        data: url.split(",")[1],
-      },
+      inlineData: mineTypeAndData,
     };
   } else {
     // FIXME - need some way to get mime type
@@ -77,6 +86,29 @@ function messageContentImageUrl(
   }
 }
 
+function messageContentMedia(
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  content: Record<string, any>
+): GeminiPartInlineData | GeminiPartFileData {
+  if ("mimeType" in content && "data" in content) {
+    return {
+      inlineData: {
+        mimeType: content.mimeType,
+        data: content.data,
+      },
+    };
+  } else if ("mimeType" in content && "fileUri" in content) {
+    return {
+      fileData: {
+        mimeType: content.mimeType,
+        fileUri: content.fileUri,
+      },
+    };
+  }
+
+  throw new Error("Invalid media content");
+}
+
 export function messageContentToParts(content: MessageContent): GeminiPart[] {
   // Convert a string to a text type MessageContent if needed
   const messageContent: MessageContent =
@@ -104,6 +136,8 @@ export function messageContentToParts(content: MessageContent): GeminiPart[] {
             return messageContentImageUrl(content as MessageContentImageUrl);
           }
           break;
+        case "media":
+          return messageContentMedia(content);
         default:
           throw new Error(
             `Unsupported type received while converting message to message parts`
diff --git a/libs/langchain-google-gauth/package.json b/libs/langchain-google-gauth/package.json
index cef43ab3f6a9..4fa126edb474 100644
--- a/libs/langchain-google-gauth/package.json
+++ b/libs/langchain-google-gauth/package.json
@@ -65,7 +65,8 @@
     "release-it": "^15.10.1",
     "rollup": "^4.5.2",
     "ts-jest": "^29.1.0",
-    "typescript": "<5.2.0"
+    "typescript": "<5.2.0",
+    "zod": "^3.22.4"
   },
   "publishConfig": {
     "access": "public"
diff --git a/libs/langchain-google-gauth/src/tests/chat_models.audio.int.test.ts b/libs/langchain-google-gauth/src/tests/chat_models.audio.int.test.ts
new file mode 100644
index 000000000000..8cc04b4c61c2
--- /dev/null
+++ b/libs/langchain-google-gauth/src/tests/chat_models.audio.int.test.ts
@@ -0,0 +1,178 @@
+import fs from "fs";
+import {
+  ChatPromptTemplate,
+  MessagesPlaceholder,
+} from "@langchain/core/prompts";
+import { HumanMessage } from "@langchain/core/messages";
+import { z } from "zod";
+import { ChatGoogle } from "../chat_models.js";
+
+function fileToBase64(filePath: string): string {
+  const fileData = fs.readFileSync(filePath);
+  const base64String = Buffer.from(fileData).toString("base64");
+  return base64String;
+}
+
+test.skip("Gemini can understand audio", async () => {
+  const model = new ChatGoogle({
+    model: "gemini-1.5-pro-preview-0409",
+    temperature: 0,
+  });
+
+  const audioPath = "../../examples/Mozart_Requiem_D_minor.mp3";
+  const audioMimeType = "audio/mp3";
+  const audioBase64 = fileToBase64(audioPath);
+
+  const prompt = ChatPromptTemplate.fromMessages([
+    new MessagesPlaceholder("audio"),
+  ]);
+
+  const chain = prompt.pipe(model);
+  const response = await chain.invoke({
+    audio: new HumanMessage({
+      content: [
+        {
+          type: "media",
+          mimeType: audioMimeType,
+          data: audioBase64,
+        },
+        {
+          type: "text",
+          text: "Do you know this song? If so, who is the composer and can you give me a brief overview of the tone/tempo?",
+        },
+      ],
+    }),
+  });
+
+  expect(typeof response.content).toBe("string");
+  expect((response.content as string).length).toBeGreaterThan(15);
+});
+
+test.skip("Gemini can understand video", async () => {
+  const model = new ChatGoogle({
+    model: "gemini-1.5-pro-preview-0409",
+    temperature: 0,
+  });
+
+  const videoPath = "../../examples/lance_ls_eval_video.mp4";
+  const videoMimeType = "video/mp4";
+  const videoBase64 = fileToBase64(videoPath);
+
+  const prompt = ChatPromptTemplate.fromMessages([
+    new MessagesPlaceholder("video"),
+  ]);
+
+  const chain = prompt.pipe(model);
+  const response = await chain.invoke({
+    video: new HumanMessage({
+      content: [
+        {
+          type: "media",
+          mimeType: videoMimeType,
+          data: videoBase64,
+        },
+        {
+          type: "text",
+          text: "Summarize the video in a few sentences.",
+        },
+      ],
+    }),
+  });
+
+  expect(typeof response.content).toBe("string");
+  expect((response.content as string).length).toBeGreaterThan(15);
+});
+
+test.skip("Gemini can use tools with audio", async () => {
+  const audioPath = "../../examples/Mozart_Requiem_D_minor.mp3";
+  const audioMimeType = "audio/mp3";
+  const audioBase64 = fileToBase64(audioPath);
+
+  const tool = z.object({
+    instruments: z
+      .array(z.string())
+      .describe("A list of instruments found in the audio."),
+  });
+
+  const model = new ChatGoogle({
+    model: "gemini-1.5-pro-preview-0409",
+    temperature: 0,
+  }).withStructuredOutput(tool, {
+    name: "instruments_list_tool",
+  });
+
+  const prompt = ChatPromptTemplate.fromMessages([
+    new MessagesPlaceholder("audio"),
+  ]);
+
+  const chain = prompt.pipe(model);
+  const response = await chain.invoke({
+    audio: new HumanMessage({
+      content: [
+        {
+          type: "media",
+          mimeType: audioMimeType,
+          data: audioBase64,
+        },
+
+        {
+          type: "text",
+          text: `The following audio is a song by Mozart. Respond with a list of instruments you hear in the song.
+  
+  Rules:
+  Use the "instruments_list_tool" to return a list of tasks.`,
+        },
+      ],
+    }),
+  });
+
+  expect(response.instruments).toBeTruthy();
+  expect(response.instruments.length).toBeGreaterThan(0);
+});
+
+test.skip("Gemini can use tools with video", async () => {
+  const videoPath = "../../examples/lance_ls_eval_video.mp4";
+  const videoMimeType = "video/mp4";
+  const videoBase64 = fileToBase64(videoPath);
+
+  const tool = z.object({
+    tasks: z.array(z.string()).describe("A list of tasks."),
+  });
+
+  const model = new ChatGoogle({
+    model: "gemini-1.5-pro-preview-0409",
+    temperature: 0,
+  }).withStructuredOutput(tool, {
+    name: "tasks_list_tool",
+  });
+
+  const prompt = ChatPromptTemplate.fromMessages([
+    new MessagesPlaceholder("video"),
+  ]);
+
+  const chain = prompt.pipe(model);
+  const response = await chain.invoke({
+    video: new HumanMessage({
+      content: [
+        {
+          type: "media",
+          mimeType: videoMimeType,
+          data: videoBase64,
+        },
+        {
+          type: "text",
+          text: `The following video is an overview of how to build datasets in LangSmith.
+  Given the following video, come up with three tasks I should do to further improve my knowledge around using datasets in LangSmith.
+  Only reference features that were outlined or described in the video.
+  
+  Rules:
+  Use the "tasks_list_tool" to return a list of tasks.
+  Your tasks should be tailored for an engineer who is looking to improve their knowledge around using datasets and evaluations, specifically with LangSmith.`,
+        },
+      ],
+    }),
+  });
+
+  expect(response.tasks).toBeTruthy();
+  expect(response.tasks.length).toBeGreaterThanOrEqual(3);
+});
diff --git a/yarn.lock b/yarn.lock
index 819d35b18d47..ca2c1b40c297 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -9486,6 +9486,7 @@ __metadata:
     rollup: ^4.5.2
     ts-jest: ^29.1.0
     typescript: <5.2.0
+    zod: ^3.22.4
   languageName: unknown
   linkType: soft