|
16 | 16 | import hashlib
|
17 | 17 | import io
|
18 | 18 | import os
|
19 |
| -import tempfile |
20 | 19 | import shutil
|
| 20 | +import tempfile |
21 | 21 | from typing import List
|
22 | 22 | from unittest import mock
|
23 | 23 | from vertexai.generative_models import Content, Image, Part
|
|
27 | 27 | get_tokenizer_for_model,
|
28 | 28 | )
|
29 | 29 | import pytest
|
30 |
| -from sentencepiece import sentencepiece_model_pb2 |
31 | 30 | import sentencepiece as spm
|
| 31 | +from sentencepiece import sentencepiece_model_pb2 |
| 32 | +from google.cloud.aiplatform_v1beta1.types import ( |
| 33 | + content as gapic_content_types, |
| 34 | +) |
32 | 35 |
|
33 | 36 | _TOKENIZER_NAME = "google/gemma"
|
34 | 37 | _MODEL_NAME = "gemini-1.5-pro"
|
|
63 | 66 | [
|
64 | 67 | Part.from_text(_SENTENCE_1),
|
65 | 68 | Part.from_text(_SENTENCE_2),
|
| 69 | + Part.from_text(_EMPTY_SENTENCE), |
| 70 | + ], |
| 71 | + [_SENTENCE_1, _SENTENCE_2, _EMPTY_SENTENCE], |
| 72 | + [ |
| 73 | + _TOKENS_MAP[_SENTENCE_1]["ids"], |
| 74 | + _TOKENS_MAP[_SENTENCE_2]["ids"], |
| 75 | + _TOKENS_MAP[_EMPTY_SENTENCE]["ids"], |
66 | 76 | ],
|
67 |
| - [_SENTENCE_1, _SENTENCE_2], |
68 |
| - [_TOKENS_MAP[_SENTENCE_1]["ids"], _TOKENS_MAP[_SENTENCE_2]["ids"]], |
69 | 77 | ),
|
70 | 78 | (
|
71 | 79 | Content(role="user", parts=[Part.from_text(_SENTENCE_1)]),
|
|
78 | 86 | parts=[
|
79 | 87 | Part.from_text(_SENTENCE_1),
|
80 | 88 | Part.from_text(_SENTENCE_2),
|
| 89 | + Part.from_text(_EMPTY_SENTENCE), |
81 | 90 | ],
|
82 | 91 | ),
|
83 |
| - [_SENTENCE_1, _SENTENCE_2], |
84 |
| - [_TOKENS_MAP[_SENTENCE_1]["ids"], _TOKENS_MAP[_SENTENCE_2]["ids"]], |
| 92 | + [_SENTENCE_1, _SENTENCE_2, _EMPTY_SENTENCE], |
| 93 | + [ |
| 94 | + _TOKENS_MAP[_SENTENCE_1]["ids"], |
| 95 | + _TOKENS_MAP[_SENTENCE_2]["ids"], |
| 96 | + _TOKENS_MAP[_EMPTY_SENTENCE]["ids"], |
| 97 | + ], |
85 | 98 | ),
|
86 | 99 | (
|
87 | 100 | [
|
|
128 | 141 |
|
129 | 142 |
|
130 | 143 | _LIST_OF_UNSUPPORTED_CONTENTS = [
|
| 144 | + gapic_content_types.Part( |
| 145 | + video_metadata=gapic_content_types.VideoMetadata(start_offset="10s") |
| 146 | + ), |
131 | 147 | Part.from_uri("gs://bucket/object", mime_type="mime_type"),
|
132 | 148 | Part.from_data(b"inline_data_bytes", mime_type="mime_type"),
|
133 | 149 | Part.from_dict({"function_call": {"name": "test_function_call"}}),
|
|
0 commit comments