Skip to content

Commit fde1b96

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Adding fast_tryout_enabled option to Vertex SDK
PiperOrigin-RevId: 696388561
1 parent 9d00424 commit fde1b96

File tree

3 files changed

+141
-0
lines changed

3 files changed

+141
-0
lines changed

google/cloud/aiplatform/models.py

+40
Original file line numberDiff line numberDiff line change
@@ -1290,6 +1290,7 @@ def deploy(
12901290
reservation_affinity_key: Optional[str] = None,
12911291
reservation_affinity_values: Optional[List[str]] = None,
12921292
spot: bool = False,
1293+
fast_tryout_enabled: bool = False,
12931294
) -> None:
12941295
"""Deploys a Model to the Endpoint.
12951296
@@ -1397,6 +1398,11 @@ def deploy(
13971398
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
13981399
spot (bool):
13991400
Optional. Whether to schedule the deployment workload on spot VMs.
1401+
fast_tryout_enabled (bool):
1402+
Optional. Defaults to False.
1403+
If True, model will be deployed using faster deployment path.
1404+
Useful for quick experiments. Not for production workloads. Only
1405+
available for most popular models with certain machine types.
14001406
"""
14011407
self._sync_gca_resource_if_skipped()
14021408

@@ -1440,6 +1446,7 @@ def deploy(
14401446
enable_access_logging=enable_access_logging,
14411447
disable_container_logging=disable_container_logging,
14421448
deployment_resource_pool=deployment_resource_pool,
1449+
fast_tryout_enabled=fast_tryout_enabled,
14431450
)
14441451

14451452
@base.optional_sync()
@@ -1469,6 +1476,7 @@ def _deploy(
14691476
enable_access_logging=False,
14701477
disable_container_logging: bool = False,
14711478
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
1479+
fast_tryout_enabled: bool = False,
14721480
) -> None:
14731481
"""Deploys a Model to the Endpoint.
14741482
@@ -1570,6 +1578,11 @@ def _deploy(
15701578
are deployed to the same DeploymentResourcePool will be hosted in
15711579
a shared model server. If provided, will override replica count
15721580
arguments.
1581+
fast_tryout_enabled (bool):
1582+
Optional. Defaults to False.
1583+
If True, model will be deployed using faster deployment path.
1584+
Useful for quick experiments. Not for production workloads. Only
1585+
available for most popular models with certain machine types.
15731586
"""
15741587
_LOGGER.log_action_start_against_resource(
15751588
f"Deploying Model {model.resource_name} to", "", self
@@ -1603,6 +1616,7 @@ def _deploy(
16031616
enable_access_logging=enable_access_logging,
16041617
disable_container_logging=disable_container_logging,
16051618
deployment_resource_pool=deployment_resource_pool,
1619+
fast_tryout_enabled=fast_tryout_enabled,
16061620
)
16071621

16081622
_LOGGER.log_action_completed_against_resource("model", "deployed", self)
@@ -1639,6 +1653,7 @@ def _deploy_call(
16391653
enable_access_logging=False,
16401654
disable_container_logging: bool = False,
16411655
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
1656+
fast_tryout_enabled: bool = False,
16421657
) -> None:
16431658
"""Helper method to deploy model to endpoint.
16441659
@@ -1747,6 +1762,11 @@ def _deploy_call(
17471762
are deployed to the same DeploymentResourcePool will be hosted in
17481763
a shared model server. If provided, will override replica count
17491764
arguments.
1765+
fast_tryout_enabled (bool):
1766+
Optional. Defaults to False.
1767+
If True, model will be deployed using faster deployment path.
1768+
Useful for quick experiments. Not for production workloads. Only
1769+
available for most popular models with certain machine types.
17501770
17511771
Raises:
17521772
ValueError: If only `accelerator_type` or `accelerator_count` is specified.
@@ -1907,6 +1927,12 @@ def _deploy_call(
19071927

19081928
dedicated_resources.machine_spec = machine_spec
19091929
deployed_model.dedicated_resources = dedicated_resources
1930+
if fast_tryout_enabled:
1931+
deployed_model.faster_deployment_config = (
1932+
gca_endpoint_compat.FasterDeploymentConfig(
1933+
fast_tryout_enabled=fast_tryout_enabled
1934+
)
1935+
)
19101936

19111937
elif supports_automatic_resources:
19121938
deployed_model.automatic_resources = (
@@ -5090,6 +5116,7 @@ def deploy(
50905116
reservation_affinity_key: Optional[str] = None,
50915117
reservation_affinity_values: Optional[List[str]] = None,
50925118
spot: bool = False,
5119+
fast_tryout_enabled: bool = False,
50935120
) -> Union[Endpoint, PrivateEndpoint]:
50945121
"""Deploys model to endpoint. Endpoint will be created if unspecified.
50955122
@@ -5219,6 +5246,11 @@ def deploy(
52195246
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
52205247
spot (bool):
52215248
Optional. Whether to schedule the deployment workload on spot VMs.
5249+
fast_tryout_enabled (bool):
5250+
Optional. Defaults to False.
5251+
If True, model will be deployed using faster deployment path.
5252+
Useful for quick experiments. Not for production workloads. Only
5253+
available for most popular models with certain machine types.
52225254
52235255
Returns:
52245256
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5287,6 +5319,7 @@ def deploy(
52875319
disable_container_logging=disable_container_logging,
52885320
private_service_connect_config=private_service_connect_config,
52895321
deployment_resource_pool=deployment_resource_pool,
5322+
fast_tryout_enabled=fast_tryout_enabled,
52905323
)
52915324

52925325
@base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False)
@@ -5321,6 +5354,7 @@ def _deploy(
53215354
PrivateEndpoint.PrivateServiceConnectConfig
53225355
] = None,
53235356
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
5357+
fast_tryout_enabled: bool = False,
53245358
) -> Union[Endpoint, PrivateEndpoint]:
53255359
"""Deploys model to endpoint. Endpoint will be created if unspecified.
53265360
@@ -5443,6 +5477,11 @@ def _deploy(
54435477
are deployed to the same DeploymentResourcePool will be hosted in
54445478
a shared model server. If provided, will override replica count
54455479
arguments.
5480+
fast_tryout_enabled (bool):
5481+
Optional. Defaults to False.
5482+
If True, model will be deployed using faster deployment path.
5483+
Useful for quick experiments. Not for production workloads. Only
5484+
available for most popular models with certain machine types.
54465485
54475486
Returns:
54485487
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5501,6 +5540,7 @@ def _deploy(
55015540
enable_access_logging=enable_access_logging,
55025541
disable_container_logging=disable_container_logging,
55035542
deployment_resource_pool=deployment_resource_pool,
5543+
fast_tryout_enabled=fast_tryout_enabled,
55045544
)
55055545

55065546
_LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)

tests/unit/aiplatform/test_endpoints.py

+49
Original file line numberDiff line numberDiff line change
@@ -2112,6 +2112,55 @@ def test_preview_deploy_with_fast_tryout_enabled(
21122112
timeout=None,
21132113
)
21142114

2115+
@pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock")
2116+
@pytest.mark.parametrize("sync", [True, False])
2117+
def test_deploy_with_fast_tryout_enabled(self, deploy_model_mock, sync):
2118+
test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
2119+
test_model = models.Model(_TEST_ID)
2120+
test_model._gca_resource.supported_deployment_resources_types.append(
2121+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES,
2122+
)
2123+
2124+
test_endpoint.deploy(
2125+
model=test_model,
2126+
sync=sync,
2127+
deploy_request_timeout=None,
2128+
machine_type=_TEST_MACHINE_TYPE,
2129+
accelerator_type=_TEST_ACCELERATOR_TYPE,
2130+
accelerator_count=_TEST_ACCELERATOR_COUNT,
2131+
fast_tryout_enabled=True,
2132+
disable_container_logging=True,
2133+
)
2134+
if not sync:
2135+
test_endpoint.wait()
2136+
2137+
expected_machine_spec = gca_machine_resources.MachineSpec(
2138+
machine_type=_TEST_MACHINE_TYPE,
2139+
accelerator_type=_TEST_ACCELERATOR_TYPE,
2140+
accelerator_count=_TEST_ACCELERATOR_COUNT,
2141+
)
2142+
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
2143+
machine_spec=expected_machine_spec,
2144+
min_replica_count=1,
2145+
max_replica_count=1,
2146+
)
2147+
expected_deployed_model = gca_endpoint.DeployedModel(
2148+
dedicated_resources=expected_dedicated_resources,
2149+
model=test_model.resource_name,
2150+
display_name=None,
2151+
disable_container_logging=True,
2152+
faster_deployment_config=gca_endpoint.FasterDeploymentConfig(
2153+
fast_tryout_enabled=True
2154+
),
2155+
)
2156+
deploy_model_mock.assert_called_once_with(
2157+
endpoint=test_endpoint.resource_name,
2158+
deployed_model=expected_deployed_model,
2159+
traffic_split={"0": 100},
2160+
metadata=(),
2161+
timeout=None,
2162+
)
2163+
21152164
@pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock", "get_drp_mock")
21162165
@pytest.mark.parametrize("sync", [True, False])
21172166
def test_deploy_with_deployment_resource_pool(self, deploy_model_mock, sync):

tests/unit/aiplatform/test_models.py

+52
Original file line numberDiff line numberDiff line change
@@ -2527,6 +2527,58 @@ def test_preview_deploy_with_fast_tryout_enabled(
25272527
timeout=None,
25282528
)
25292529

2530+
@pytest.mark.usefixtures(
2531+
"get_model_mock",
2532+
"create_endpoint_mock",
2533+
"get_endpoint_mock",
2534+
)
2535+
@pytest.mark.parametrize("sync", [True, False])
2536+
def test_deploy_with_fast_tryout_enabled(self, deploy_model_mock, sync):
2537+
test_model = models.Model(_TEST_ID)
2538+
test_model._gca_resource.supported_deployment_resources_types.append(
2539+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
2540+
)
2541+
2542+
test_endpoint = test_model.deploy(
2543+
machine_type=_TEST_MACHINE_TYPE,
2544+
accelerator_type=_TEST_ACCELERATOR_TYPE,
2545+
accelerator_count=_TEST_ACCELERATOR_COUNT,
2546+
disable_container_logging=True,
2547+
sync=sync,
2548+
deploy_request_timeout=None,
2549+
fast_tryout_enabled=True,
2550+
)
2551+
2552+
if not sync:
2553+
test_endpoint.wait()
2554+
2555+
expected_machine_spec = gca_machine_resources.MachineSpec(
2556+
machine_type=_TEST_MACHINE_TYPE,
2557+
accelerator_type=_TEST_ACCELERATOR_TYPE,
2558+
accelerator_count=_TEST_ACCELERATOR_COUNT,
2559+
)
2560+
expected_dedicated_resources = gca_machine_resources.DedicatedResources(
2561+
machine_spec=expected_machine_spec,
2562+
min_replica_count=1,
2563+
max_replica_count=1,
2564+
)
2565+
expected_deployed_model = gca_endpoint.DeployedModel(
2566+
dedicated_resources=expected_dedicated_resources,
2567+
model=test_model.resource_name,
2568+
display_name=None,
2569+
disable_container_logging=True,
2570+
faster_deployment_config=gca_endpoint.FasterDeploymentConfig(
2571+
fast_tryout_enabled=True
2572+
),
2573+
)
2574+
deploy_model_mock.assert_called_once_with(
2575+
endpoint=test_endpoint.resource_name,
2576+
deployed_model=expected_deployed_model,
2577+
traffic_split={"0": 100},
2578+
metadata=(),
2579+
timeout=None,
2580+
)
2581+
25302582
@pytest.mark.usefixtures(
25312583
"get_model_mock",
25322584
"preview_get_drp_mock",

0 commit comments

Comments
 (0)