@@ -53,17 +53,31 @@ class TokensInfo:
53
53
role : str = None
54
54
55
55
56
+ @dataclasses .dataclass (frozen = True )
57
+ class ComputeTokensResult :
58
+ tokens_info : Sequence [TokensInfo ]
59
+
60
+
61
+ class PreviewComputeTokensResult (ComputeTokensResult ):
62
+ def token_info_list (self ) -> Sequence [TokensInfo ]:
63
+ import warnings
64
+
65
+ message = "PreviewComputeTokensResult.token_info_list is deprecated. Use ComputeTokensResult.tokens_info instead."
66
+ warnings .warn (message , DeprecationWarning , stacklevel = 2 )
67
+ return self .tokens_info
68
+
69
+
56
70
@dataclasses .dataclass (frozen = True )
57
71
class ComputeTokensResult :
58
72
"""Represents token string pieces and ids output in compute_tokens function.
59
73
60
74
Attributes:
61
75
tokens_info: Lists of tokens_info from the input.
62
- The input `contents: ContentsType` could have
63
- multiple string instances and each tokens_info
64
- item represents each string instance. Each token
65
- info consists tokens list, token_ids list and
66
- a role.
76
+ The input `contents: ContentsType` could have
77
+ multiple string instances and each tokens_info
78
+ item represents each string instance. Each token
79
+ info consists tokens list, token_ids list and
80
+ a role.
67
81
token_info_list: the value in this field equal to tokens_info.
68
82
"""
69
83
@@ -523,6 +537,32 @@ def compute_tokens(self, contents: ContentsType) -> ComputeTokensResult:
523
537
)
524
538
525
539
540
+ class PreviewTokenizer (Tokenizer ):
541
+ def compute_tokens (self , contents : ContentsType ) -> PreviewComputeTokensResult :
542
+ return PreviewComputeTokensResult (tokens_info = super ().compute_tokens (contents ))
543
+
544
+
545
+ def _get_tokenizer_for_model_preview (model_name : str ) -> PreviewTokenizer :
546
+ """Returns a tokenizer for the given tokenizer name.
547
+
548
+ Usage:
549
+ ```
550
+ tokenizer = get_tokenizer_for_model("gemini-1.5-pro-001")
551
+ print(tokenizer.count_tokens("Hello world!"))
552
+ ```
553
+
554
+ Supported models can be found at
555
+ https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models.
556
+
557
+ Args:
558
+ model_name: Specify the tokenizer is from which model.
559
+ """
560
+ if not model_name :
561
+ raise ValueError ("model_name must not be empty." )
562
+
563
+ return PreviewTokenizer (get_tokenizer_name (model_name ))
564
+
565
+
526
566
def get_tokenizer_for_model (model_name : str ) -> Tokenizer :
527
567
"""Returns a tokenizer for the given tokenizer name.
528
568
0 commit comments