@@ -1290,6 +1290,7 @@ def deploy(
1290
1290
reservation_affinity_key : Optional [str ] = None ,
1291
1291
reservation_affinity_values : Optional [List [str ]] = None ,
1292
1292
spot : bool = False ,
1293
+ fast_tryout_enabled : bool = False ,
1293
1294
) -> None :
1294
1295
"""Deploys a Model to the Endpoint.
1295
1296
@@ -1397,6 +1398,11 @@ def deploy(
1397
1398
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
1398
1399
spot (bool):
1399
1400
Optional. Whether to schedule the deployment workload on spot VMs.
1401
+ fast_tryout_enabled (bool):
1402
+ Optional. Defaults to False.
1403
+ If True, model will be deployed using faster deployment path.
1404
+ Useful for quick experiments. Not for production workloads. Only
1405
+ available for most popular models with certain machine types.
1400
1406
"""
1401
1407
self ._sync_gca_resource_if_skipped ()
1402
1408
@@ -1440,6 +1446,7 @@ def deploy(
1440
1446
enable_access_logging = enable_access_logging ,
1441
1447
disable_container_logging = disable_container_logging ,
1442
1448
deployment_resource_pool = deployment_resource_pool ,
1449
+ fast_tryout_enabled = fast_tryout_enabled ,
1443
1450
)
1444
1451
1445
1452
@base .optional_sync ()
@@ -1469,6 +1476,7 @@ def _deploy(
1469
1476
enable_access_logging = False ,
1470
1477
disable_container_logging : bool = False ,
1471
1478
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1479
+ fast_tryout_enabled : bool = False ,
1472
1480
) -> None :
1473
1481
"""Deploys a Model to the Endpoint.
1474
1482
@@ -1570,6 +1578,11 @@ def _deploy(
1570
1578
are deployed to the same DeploymentResourcePool will be hosted in
1571
1579
a shared model server. If provided, will override replica count
1572
1580
arguments.
1581
+ fast_tryout_enabled (bool):
1582
+ Optional. Defaults to False.
1583
+ If True, model will be deployed using faster deployment path.
1584
+ Useful for quick experiments. Not for production workloads. Only
1585
+ available for most popular models with certain machine types.
1573
1586
"""
1574
1587
_LOGGER .log_action_start_against_resource (
1575
1588
f"Deploying Model { model .resource_name } to" , "" , self
@@ -1603,6 +1616,7 @@ def _deploy(
1603
1616
enable_access_logging = enable_access_logging ,
1604
1617
disable_container_logging = disable_container_logging ,
1605
1618
deployment_resource_pool = deployment_resource_pool ,
1619
+ fast_tryout_enabled = fast_tryout_enabled ,
1606
1620
)
1607
1621
1608
1622
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , self )
@@ -1639,6 +1653,7 @@ def _deploy_call(
1639
1653
enable_access_logging = False ,
1640
1654
disable_container_logging : bool = False ,
1641
1655
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1656
+ fast_tryout_enabled : bool = False ,
1642
1657
) -> None :
1643
1658
"""Helper method to deploy model to endpoint.
1644
1659
@@ -1747,6 +1762,11 @@ def _deploy_call(
1747
1762
are deployed to the same DeploymentResourcePool will be hosted in
1748
1763
a shared model server. If provided, will override replica count
1749
1764
arguments.
1765
+ fast_tryout_enabled (bool):
1766
+ Optional. Defaults to False.
1767
+ If True, model will be deployed using faster deployment path.
1768
+ Useful for quick experiments. Not for production workloads. Only
1769
+ available for most popular models with certain machine types.
1750
1770
1751
1771
Raises:
1752
1772
ValueError: If only `accelerator_type` or `accelerator_count` is specified.
@@ -1907,6 +1927,12 @@ def _deploy_call(
1907
1927
1908
1928
dedicated_resources .machine_spec = machine_spec
1909
1929
deployed_model .dedicated_resources = dedicated_resources
1930
+ if fast_tryout_enabled :
1931
+ deployed_model .faster_deployment_config = (
1932
+ gca_endpoint_compat .FasterDeploymentConfig (
1933
+ fast_tryout_enabled = fast_tryout_enabled
1934
+ )
1935
+ )
1910
1936
1911
1937
elif supports_automatic_resources :
1912
1938
deployed_model .automatic_resources = (
@@ -5090,6 +5116,7 @@ def deploy(
5090
5116
reservation_affinity_key : Optional [str ] = None ,
5091
5117
reservation_affinity_values : Optional [List [str ]] = None ,
5092
5118
spot : bool = False ,
5119
+ fast_tryout_enabled : bool = False ,
5093
5120
) -> Union [Endpoint , PrivateEndpoint ]:
5094
5121
"""Deploys model to endpoint. Endpoint will be created if unspecified.
5095
5122
@@ -5219,6 +5246,11 @@ def deploy(
5219
5246
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
5220
5247
spot (bool):
5221
5248
Optional. Whether to schedule the deployment workload on spot VMs.
5249
+ fast_tryout_enabled (bool):
5250
+ Optional. Defaults to False.
5251
+ If True, model will be deployed using faster deployment path.
5252
+ Useful for quick experiments. Not for production workloads. Only
5253
+ available for most popular models with certain machine types.
5222
5254
5223
5255
Returns:
5224
5256
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5287,6 +5319,7 @@ def deploy(
5287
5319
disable_container_logging = disable_container_logging ,
5288
5320
private_service_connect_config = private_service_connect_config ,
5289
5321
deployment_resource_pool = deployment_resource_pool ,
5322
+ fast_tryout_enabled = fast_tryout_enabled ,
5290
5323
)
5291
5324
5292
5325
@base .optional_sync (return_input_arg = "endpoint" , bind_future_to_self = False )
@@ -5321,6 +5354,7 @@ def _deploy(
5321
5354
PrivateEndpoint .PrivateServiceConnectConfig
5322
5355
] = None ,
5323
5356
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
5357
+ fast_tryout_enabled : bool = False ,
5324
5358
) -> Union [Endpoint , PrivateEndpoint ]:
5325
5359
"""Deploys model to endpoint. Endpoint will be created if unspecified.
5326
5360
@@ -5443,6 +5477,11 @@ def _deploy(
5443
5477
are deployed to the same DeploymentResourcePool will be hosted in
5444
5478
a shared model server. If provided, will override replica count
5445
5479
arguments.
5480
+ fast_tryout_enabled (bool):
5481
+ Optional. Defaults to False.
5482
+ If True, model will be deployed using faster deployment path.
5483
+ Useful for quick experiments. Not for production workloads. Only
5484
+ available for most popular models with certain machine types.
5446
5485
5447
5486
Returns:
5448
5487
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5501,6 +5540,7 @@ def _deploy(
5501
5540
enable_access_logging = enable_access_logging ,
5502
5541
disable_container_logging = disable_container_logging ,
5503
5542
deployment_resource_pool = deployment_resource_pool ,
5543
+ fast_tryout_enabled = fast_tryout_enabled ,
5504
5544
)
5505
5545
5506
5546
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , endpoint )
0 commit comments