Skip to content

Added support for google specific arguments for video analysis #2110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions pydantic_ai_slim/pydantic_ai/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,13 @@ class FileUrl(ABC):
* If False, the URL is sent directly to the model and no download is performed.
"""

vendor_metadata: dict[str, Any] | None = None
"""Vendor-specific metadata for the file.

Supported by:
- `GoogleModel`: `VideoUrl.vendor_metadata` is used as `video_metadata`: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
"""

@property
@abstractmethod
def media_type(self) -> str:
Expand Down Expand Up @@ -263,6 +270,13 @@ class BinaryContent:
media_type: AudioMediaType | ImageMediaType | DocumentMediaType | str
"""The media type of the binary data."""

vendor_metadata: dict[str, Any] | None = None
"""Vendor-specific metadata for the file.

Supported by:
- `GoogleModel`: `BinaryContent.vendor_metadata` is used as `video_metadata`: https://ai.google.dev/gemini-api/docs/video-understanding#customize-video-processing
"""

kind: Literal['binary'] = 'binary'
"""Type identifier, this is available on all parts as a discriminator."""

Expand Down
18 changes: 16 additions & 2 deletions pydantic_ai_slim/pydantic_ai/models/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
GenerateContentConfigDict,
GenerateContentResponse,
HttpOptionsDict,
MediaResolution,
Part,
PartDict,
SafetySettingDict,
Expand Down Expand Up @@ -121,6 +122,12 @@ class GoogleModelSettings(ModelSettings, total=False):
See the [Gemini API docs](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/add-labels-to-api-calls) for use cases and limitations.
"""

google_video_resolution: MediaResolution
"""The video resolution to use for the model.

See <https://ai.google.dev/api/generate-content#MediaResolution> for more information.
"""


@dataclass(init=False)
class GoogleModel(Model):
Expand Down Expand Up @@ -292,6 +299,7 @@ async def _generate_content(
safety_settings=model_settings.get('google_safety_settings'),
thinking_config=model_settings.get('google_thinking_config'),
labels=model_settings.get('google_labels'),
media_resolution=model_settings.get('google_video_resolution'),
tools=cast(ToolListUnionDict, tools),
tool_config=tool_config,
response_mime_type=response_mime_type,
Expand Down Expand Up @@ -399,9 +407,15 @@ async def _map_user_prompt(self, part: UserPromptPart) -> list[PartDict]:
elif isinstance(item, BinaryContent):
# NOTE: The type from Google GenAI is incorrect, it should be `str`, not `bytes`.
base64_encoded = base64.b64encode(item.data).decode('utf-8')
content.append({'inline_data': {'data': base64_encoded, 'mime_type': item.media_type}}) # type: ignore
inline_data_dict = {'inline_data': {'data': base64_encoded, 'mime_type': item.media_type}}
if item.vendor_metadata:
inline_data_dict['video_metadata'] = item.vendor_metadata
content.append(inline_data_dict) # type: ignore
elif isinstance(item, VideoUrl) and item.is_youtube:
content.append({'file_data': {'file_uri': item.url, 'mime_type': item.media_type}})
file_data_dict = {'file_data': {'file_uri': item.url, 'mime_type': item.media_type}}
if item.vendor_metadata:
file_data_dict['video_metadata'] = item.vendor_metadata
content.append(file_data_dict) # type: ignore
elif isinstance(item, FileUrl):
if self.system == 'google-gla' or item.force_download:
downloaded_item = await download_item(item, data_format='base64')
Expand Down
2 changes: 1 addition & 1 deletion tests/models/cassettes/test_google/test_google_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down Expand Up @@ -86,7 +86,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down Expand Up @@ -173,7 +173,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down Expand Up @@ -92,7 +92,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down Expand Up @@ -124,7 +124,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ interactions:
- '268'
content-type:
- application/x-www-form-urlencoded
method: POST
method: post
uri: https://oauth2.googleapis.com/token
response:
headers:
Expand Down Expand Up @@ -57,7 +57,7 @@ interactions:
- application/json
host:
- us-central1-aiplatform.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ interactions:
- '268'
content-type:
- application/x-www-form-urlencoded
method: POST
method: post
uri: https://oauth2.googleapis.com/token
response:
headers:
Expand Down Expand Up @@ -57,7 +57,7 @@ interactions:
- application/json
host:
- us-central1-aiplatform.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11115,7 +11115,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down Expand Up @@ -90,7 +90,7 @@ interactions:
- application/json
host:
- generativelanguage.googleapis.com
method: POST
method: post
parsed_body:
contents:
- parts:
Expand Down
Loading
Loading