@@ -636,6 +636,7 @@ def predict(
636
636
temperature : Optional [float ] = None ,
637
637
top_k : Optional [int ] = None ,
638
638
top_p : Optional [float ] = None ,
639
+ stop_sequences : Optional [List [str ]] = None ,
639
640
) -> "TextGenerationResponse" :
640
641
"""Gets model response for a single prompt.
641
642
@@ -645,6 +646,7 @@ def predict(
645
646
temperature: Controls the randomness of predictions. Range: [0, 1]. Default: 0.
646
647
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
647
648
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
649
+ stop_sequences: Customized stop sequences to stop the decoding process.
648
650
649
651
Returns:
650
652
A `TextGenerationResponse` object that contains the text produced by the model.
@@ -656,6 +658,7 @@ def predict(
656
658
temperature = temperature ,
657
659
top_k = top_k ,
658
660
top_p = top_p ,
661
+ stop_sequences = stop_sequences ,
659
662
)[0 ]
660
663
661
664
def _batch_predict (
@@ -665,6 +668,7 @@ def _batch_predict(
665
668
temperature : Optional [float ] = None ,
666
669
top_k : Optional [int ] = None ,
667
670
top_p : Optional [float ] = None ,
671
+ stop_sequences : Optional [List [str ]] = None ,
668
672
) -> List ["TextGenerationResponse" ]:
669
673
"""Gets model response for a single prompt.
670
674
@@ -674,6 +678,7 @@ def _batch_predict(
674
678
temperature: Controls the randomness of predictions. Range: [0, 1]. Default: 0.
675
679
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
676
680
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
681
+ stop_sequences: Customized stop sequences to stop the decoding process.
677
682
678
683
Returns:
679
684
A list of `TextGenerationResponse` objects that contain the texts produced by the model.
@@ -693,6 +698,9 @@ def _batch_predict(
693
698
if top_k :
694
699
prediction_parameters ["topK" ] = top_k
695
700
701
+ if stop_sequences :
702
+ prediction_parameters ["stopSequences" ] = stop_sequences
703
+
696
704
prediction_response = self ._endpoint .predict (
697
705
instances = instances ,
698
706
parameters = prediction_parameters ,
@@ -1165,6 +1173,7 @@ def start_chat(
1165
1173
top_k : Optional [int ] = None ,
1166
1174
top_p : Optional [float ] = None ,
1167
1175
message_history : Optional [List [ChatMessage ]] = None ,
1176
+ stop_sequences : Optional [List [str ]] = None ,
1168
1177
) -> "ChatSession" :
1169
1178
"""Starts a chat session with the model.
1170
1179
@@ -1178,6 +1187,7 @@ def start_chat(
1178
1187
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
1179
1188
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
1180
1189
message_history: A list of previously sent and received messages.
1190
+ stop_sequences: Customized stop sequences to stop the decoding process.
1181
1191
1182
1192
Returns:
1183
1193
A `ChatSession` object.
@@ -1191,6 +1201,7 @@ def start_chat(
1191
1201
top_k = top_k ,
1192
1202
top_p = top_p ,
1193
1203
message_history = message_history ,
1204
+ stop_sequences = stop_sequences ,
1194
1205
)
1195
1206
1196
1207
@@ -1291,6 +1302,7 @@ def __init__(
1291
1302
top_k : Optional [int ] = None ,
1292
1303
top_p : Optional [float ] = None ,
1293
1304
message_history : Optional [List [ChatMessage ]] = None ,
1305
+ stop_sequences : Optional [List [str ]] = None ,
1294
1306
):
1295
1307
self ._model = model
1296
1308
self ._context = context
@@ -1300,6 +1312,7 @@ def __init__(
1300
1312
self ._top_k = top_k
1301
1313
self ._top_p = top_p
1302
1314
self ._message_history : List [ChatMessage ] = message_history or []
1315
+ self ._stop_sequences = stop_sequences
1303
1316
1304
1317
@property
1305
1318
def message_history (self ) -> List [ChatMessage ]:
@@ -1314,6 +1327,7 @@ def _prepare_request(
1314
1327
temperature : Optional [float ] = None ,
1315
1328
top_k : Optional [int ] = None ,
1316
1329
top_p : Optional [float ] = None ,
1330
+ stop_sequences : Optional [List [str ]] = None ,
1317
1331
) -> _PredictionRequest :
1318
1332
"""Prepares a request for the language model.
1319
1333
@@ -1327,6 +1341,7 @@ def _prepare_request(
1327
1341
Uses the value specified when calling `ChatModel.start_chat` by default.
1328
1342
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
1329
1343
Uses the value specified when calling `ChatModel.start_chat` by default.
1344
+ stop_sequences: Customized stop sequences to stop the decoding process.
1330
1345
1331
1346
Returns:
1332
1347
A `_PredictionRequest` object.
@@ -1350,6 +1365,10 @@ def _prepare_request(
1350
1365
if top_k :
1351
1366
prediction_parameters ["topK" ] = top_k
1352
1367
1368
+ stop_sequences = stop_sequences or self ._stop_sequences
1369
+ if stop_sequences :
1370
+ prediction_parameters ["stopSequences" ] = stop_sequences
1371
+
1353
1372
message_structs = []
1354
1373
for past_message in self ._message_history :
1355
1374
message_structs .append (
@@ -1426,6 +1445,7 @@ def send_message(
1426
1445
temperature : Optional [float ] = None ,
1427
1446
top_k : Optional [int ] = None ,
1428
1447
top_p : Optional [float ] = None ,
1448
+ stop_sequences : Optional [List [str ]] = None ,
1429
1449
) -> "TextGenerationResponse" :
1430
1450
"""Sends message to the language model and gets a response.
1431
1451
@@ -1439,6 +1459,7 @@ def send_message(
1439
1459
Uses the value specified when calling `ChatModel.start_chat` by default.
1440
1460
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
1441
1461
Uses the value specified when calling `ChatModel.start_chat` by default.
1462
+ stop_sequences: Customized stop sequences to stop the decoding process.
1442
1463
1443
1464
Returns:
1444
1465
A `TextGenerationResponse` object that contains the text produced by the model.
@@ -1449,6 +1470,7 @@ def send_message(
1449
1470
temperature = temperature ,
1450
1471
top_k = top_k ,
1451
1472
top_p = top_p ,
1473
+ stop_sequences = stop_sequences ,
1452
1474
)
1453
1475
1454
1476
prediction_response = self ._model ._endpoint .predict (
@@ -1553,6 +1575,7 @@ def __init__(
1553
1575
top_k : Optional [int ] = None ,
1554
1576
top_p : Optional [float ] = None ,
1555
1577
message_history : Optional [List [ChatMessage ]] = None ,
1578
+ stop_sequences : Optional [List [str ]] = None ,
1556
1579
):
1557
1580
super ().__init__ (
1558
1581
model = model ,
@@ -1563,6 +1586,7 @@ def __init__(
1563
1586
top_k = top_k ,
1564
1587
top_p = top_p ,
1565
1588
message_history = message_history ,
1589
+ stop_sequences = stop_sequences ,
1566
1590
)
1567
1591
1568
1592
@@ -1669,6 +1693,7 @@ def _create_prediction_request(
1669
1693
* ,
1670
1694
max_output_tokens : Optional [int ] = _DEFAULT_MAX_OUTPUT_TOKENS ,
1671
1695
temperature : Optional [float ] = None ,
1696
+ stop_sequences : Optional [List [str ]] = None ,
1672
1697
) -> _PredictionRequest :
1673
1698
"""Creates a code generation prediction request.
1674
1699
@@ -1677,6 +1702,8 @@ def _create_prediction_request(
1677
1702
suffix: Code after the current point.
1678
1703
max_output_tokens: Max length of the output text in tokens. Range: [1, 1000].
1679
1704
temperature: Controls the randomness of predictions. Range: [0, 1].
1705
+ stop_sequences: Customized stop sequences to stop the decoding process.
1706
+
1680
1707
1681
1708
Returns:
1682
1709
A `TextGenerationResponse` object that contains the text produced by the model.
@@ -1693,6 +1720,9 @@ def _create_prediction_request(
1693
1720
if max_output_tokens :
1694
1721
prediction_parameters ["maxOutputTokens" ] = max_output_tokens
1695
1722
1723
+ if stop_sequences :
1724
+ prediction_parameters ["stopSequences" ] = stop_sequences
1725
+
1696
1726
return _PredictionRequest (instance = instance , parameters = prediction_parameters )
1697
1727
1698
1728
def predict (
@@ -1702,6 +1732,7 @@ def predict(
1702
1732
* ,
1703
1733
max_output_tokens : Optional [int ] = _DEFAULT_MAX_OUTPUT_TOKENS ,
1704
1734
temperature : Optional [float ] = None ,
1735
+ stop_sequences : Optional [List [str ]] = None ,
1705
1736
) -> "TextGenerationResponse" :
1706
1737
"""Gets model response for a single prompt.
1707
1738
@@ -1710,6 +1741,7 @@ def predict(
1710
1741
suffix: Code after the current point.
1711
1742
max_output_tokens: Max length of the output text in tokens. Range: [1, 1000].
1712
1743
temperature: Controls the randomness of predictions. Range: [0, 1].
1744
+ stop_sequences: Customized stop sequences to stop the decoding process.
1713
1745
1714
1746
Returns:
1715
1747
A `TextGenerationResponse` object that contains the text produced by the model.
@@ -1719,6 +1751,7 @@ def predict(
1719
1751
suffix = suffix ,
1720
1752
max_output_tokens = max_output_tokens ,
1721
1753
temperature = temperature ,
1754
+ stop_sequences = stop_sequences ,
1722
1755
)
1723
1756
1724
1757
prediction_response = self ._endpoint .predict (
0 commit comments