@@ -734,6 +734,7 @@ def predict_streaming(
734
734
temperature : Optional [float ] = None ,
735
735
top_k : Optional [int ] = None ,
736
736
top_p : Optional [float ] = None ,
737
+ stop_sequences : Optional [List [str ]] = None ,
737
738
) -> Iterator [TextGenerationResponse ]:
738
739
"""Gets a streaming model response for a single prompt.
739
740
@@ -745,6 +746,7 @@ def predict_streaming(
745
746
temperature: Controls the randomness of predictions. Range: [0, 1]. Default: 0.
746
747
top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40.
747
748
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
749
+ stop_sequences: Customized stop sequences to stop the decoding process.
748
750
749
751
Yields:
750
752
A stream of `TextGenerationResponse` objects that contain partial
@@ -771,6 +773,9 @@ def predict_streaming(
771
773
if top_k :
772
774
prediction_parameters ["topK" ] = top_k
773
775
776
+ if stop_sequences :
777
+ prediction_parameters ["stopSequences" ] = stop_sequences
778
+
774
779
for prediction_dict in _streaming_prediction .predict_stream_of_dicts_from_single_dict (
775
780
prediction_service_client = prediction_service_client ,
776
781
endpoint_name = self ._endpoint_name ,
@@ -1299,12 +1304,14 @@ def start_chat(
1299
1304
max_output_tokens : Optional [int ] = None ,
1300
1305
temperature : Optional [float ] = None ,
1301
1306
message_history : Optional [List [ChatMessage ]] = None ,
1307
+ stop_sequences : Optional [List [str ]] = None ,
1302
1308
) -> "CodeChatSession" :
1303
1309
"""Starts a chat session with the code chat model.
1304
1310
1305
1311
Args:
1306
1312
max_output_tokens: Max length of the output text in tokens. Range: [1, 1000].
1307
1313
temperature: Controls the randomness of predictions. Range: [0, 1].
1314
+ stop_sequences: Customized stop sequences to stop the decoding process.
1308
1315
1309
1316
Returns:
1310
1317
A `ChatSession` object.
@@ -1314,6 +1321,7 @@ def start_chat(
1314
1321
max_output_tokens = max_output_tokens ,
1315
1322
temperature = temperature ,
1316
1323
message_history = message_history ,
1324
+ stop_sequences = stop_sequences ,
1317
1325
)
1318
1326
1319
1327
@@ -1541,6 +1549,7 @@ def send_message_streaming(
1541
1549
temperature : Optional [float ] = None ,
1542
1550
top_k : Optional [int ] = None ,
1543
1551
top_p : Optional [float ] = None ,
1552
+ stop_sequences : Optional [List [str ]] = None ,
1544
1553
) -> Iterator [TextGenerationResponse ]:
1545
1554
"""Sends message to the language model and gets a streamed response.
1546
1555
@@ -1556,6 +1565,8 @@ def send_message_streaming(
1556
1565
Uses the value specified when calling `ChatModel.start_chat` by default.
1557
1566
top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95.
1558
1567
Uses the value specified when calling `ChatModel.start_chat` by default.
1568
+ stop_sequences: Customized stop sequences to stop the decoding process.
1569
+ Uses the value specified when calling `ChatModel.start_chat` by default.
1559
1570
1560
1571
Yields:
1561
1572
A stream of `TextGenerationResponse` objects that contain partial
@@ -1567,6 +1578,7 @@ def send_message_streaming(
1567
1578
temperature = temperature ,
1568
1579
top_k = top_k ,
1569
1580
top_p = top_p ,
1581
+ stop_sequences = stop_sequences ,
1570
1582
)
1571
1583
1572
1584
prediction_service_client = self ._model ._endpoint ._prediction_client
@@ -1644,12 +1656,14 @@ def __init__(
1644
1656
max_output_tokens : Optional [int ] = None ,
1645
1657
temperature : Optional [float ] = None ,
1646
1658
message_history : Optional [List [ChatMessage ]] = None ,
1659
+ stop_sequences : Optional [List [str ]] = None ,
1647
1660
):
1648
1661
super ().__init__ (
1649
1662
model = model ,
1650
1663
max_output_tokens = max_output_tokens ,
1651
1664
temperature = temperature ,
1652
1665
message_history = message_history ,
1666
+ stop_sequences = stop_sequences ,
1653
1667
)
1654
1668
1655
1669
def send_message (
@@ -1658,6 +1672,7 @@ def send_message(
1658
1672
* ,
1659
1673
max_output_tokens : Optional [int ] = None ,
1660
1674
temperature : Optional [float ] = None ,
1675
+ stop_sequences : Optional [List [str ]] = None ,
1661
1676
) -> "TextGenerationResponse" :
1662
1677
"""Sends message to the code chat model and gets a response.
1663
1678
@@ -1667,6 +1682,7 @@ def send_message(
1667
1682
Uses the value specified when calling `CodeChatModel.start_chat` by default.
1668
1683
temperature: Controls the randomness of predictions. Range: [0, 1].
1669
1684
Uses the value specified when calling `CodeChatModel.start_chat` by default.
1685
+ stop_sequences: Customized stop sequences to stop the decoding process.
1670
1686
1671
1687
Returns:
1672
1688
A `TextGenerationResponse` object that contains the text produced by the model.
@@ -1675,6 +1691,7 @@ def send_message(
1675
1691
message = message ,
1676
1692
max_output_tokens = max_output_tokens ,
1677
1693
temperature = temperature ,
1694
+ stop_sequences = stop_sequences ,
1678
1695
)
1679
1696
1680
1697
def send_message_streaming (
@@ -1683,6 +1700,7 @@ def send_message_streaming(
1683
1700
* ,
1684
1701
max_output_tokens : Optional [int ] = None ,
1685
1702
temperature : Optional [float ] = None ,
1703
+ stop_sequences : Optional [List [str ]] = None ,
1686
1704
) -> Iterator [TextGenerationResponse ]:
1687
1705
"""Sends message to the language model and gets a streamed response.
1688
1706
@@ -1694,6 +1712,8 @@ def send_message_streaming(
1694
1712
Uses the value specified when calling `ChatModel.start_chat` by default.
1695
1713
temperature: Controls the randomness of predictions. Range: [0, 1]. Default: 0.
1696
1714
Uses the value specified when calling `ChatModel.start_chat` by default.
1715
+ stop_sequences: Customized stop sequences to stop the decoding process.
1716
+ Uses the value specified when calling `ChatModel.start_chat` by default.
1697
1717
1698
1718
Returns:
1699
1719
A stream of `TextGenerationResponse` objects that contain partial
@@ -1703,6 +1723,7 @@ def send_message_streaming(
1703
1723
message = message ,
1704
1724
max_output_tokens = max_output_tokens ,
1705
1725
temperature = temperature ,
1726
+ stop_sequences = stop_sequences ,
1706
1727
)
1707
1728
1708
1729
@@ -1811,6 +1832,7 @@ def predict_streaming(
1811
1832
* ,
1812
1833
max_output_tokens : Optional [int ] = None ,
1813
1834
temperature : Optional [float ] = None ,
1835
+ stop_sequences : Optional [List [str ]] = None ,
1814
1836
) -> Iterator [TextGenerationResponse ]:
1815
1837
"""Predicts the code based on previous code.
1816
1838
@@ -1821,6 +1843,7 @@ def predict_streaming(
1821
1843
suffix: Code after the current point.
1822
1844
max_output_tokens: Max length of the output text in tokens. Range: [1, 1000].
1823
1845
temperature: Controls the randomness of predictions. Range: [0, 1].
1846
+ stop_sequences: Customized stop sequences to stop the decoding process.
1824
1847
1825
1848
Yields:
1826
1849
A stream of `TextGenerationResponse` objects that contain partial
@@ -1831,6 +1854,7 @@ def predict_streaming(
1831
1854
suffix = suffix ,
1832
1855
max_output_tokens = max_output_tokens ,
1833
1856
temperature = temperature ,
1857
+ stop_sequences = stop_sequences ,
1834
1858
)
1835
1859
1836
1860
prediction_service_client = self ._endpoint ._prediction_client
0 commit comments