feat: Adding fast_tryout_enabled option to Vertex SDK

vertex-sdk-bot · copybara-github · commit fde1b96db006 · 2024-11-13T22:11:43.000-08:00
PiperOrigin-RevId: 696388561
diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py
@@ -1290,6 +1290,7 @@ def deploy(
         reservation_affinity_key: Optional[str] = None,
         reservation_affinity_values: Optional[List[str]] = None,
         spot: bool = False,
+        fast_tryout_enabled: bool = False,
     ) -> None:
         """Deploys a Model to the Endpoint.
 
@@ -1397,6 +1398,11 @@ def deploy(
                 Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
             spot (bool):
                 Optional. Whether to schedule the deployment workload on spot VMs.
+            fast_tryout_enabled (bool):
+              Optional. Defaults to False.
+              If True, model will be deployed using faster deployment path.
+              Useful for quick experiments. Not for production workloads. Only
+              available for most popular models with certain machine types.
         """
         self._sync_gca_resource_if_skipped()
 
@@ -1440,6 +1446,7 @@ def deploy(
             enable_access_logging=enable_access_logging,
             disable_container_logging=disable_container_logging,
             deployment_resource_pool=deployment_resource_pool,
+            fast_tryout_enabled=fast_tryout_enabled,
         )
 
     @base.optional_sync()
@@ -1469,6 +1476,7 @@ def _deploy(
         enable_access_logging=False,
         disable_container_logging: bool = False,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
+        fast_tryout_enabled: bool = False,
     ) -> None:
         """Deploys a Model to the Endpoint.
 
@@ -1570,6 +1578,11 @@ def _deploy(
                 are deployed to the same DeploymentResourcePool will be hosted in
                 a shared model server. If provided, will override replica count
                 arguments.
+            fast_tryout_enabled (bool):
+              Optional. Defaults to False.
+              If True, model will be deployed using faster deployment path.
+              Useful for quick experiments. Not for production workloads. Only
+              available for most popular models with certain machine types.
         """
         _LOGGER.log_action_start_against_resource(
             f"Deploying Model {model.resource_name} to", "", self
@@ -1603,6 +1616,7 @@ def _deploy(
             enable_access_logging=enable_access_logging,
             disable_container_logging=disable_container_logging,
             deployment_resource_pool=deployment_resource_pool,
+            fast_tryout_enabled=fast_tryout_enabled,
         )
 
         _LOGGER.log_action_completed_against_resource("model", "deployed", self)
@@ -1639,6 +1653,7 @@ def _deploy_call(
         enable_access_logging=False,
         disable_container_logging: bool = False,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
+        fast_tryout_enabled: bool = False,
     ) -> None:
         """Helper method to deploy model to endpoint.
 
@@ -1747,6 +1762,11 @@ def _deploy_call(
                 are deployed to the same DeploymentResourcePool will be hosted in
                 a shared model server. If provided, will override replica count
                 arguments.
+            fast_tryout_enabled (bool):
+                Optional. Defaults to False.
+                If True, model will be deployed using faster deployment path.
+                Useful for quick experiments. Not for production workloads. Only
+                available for most popular models with certain machine types.
 
         Raises:
             ValueError: If only `accelerator_type` or `accelerator_count` is specified.
@@ -1907,6 +1927,12 @@ def _deploy_call(
 
                 dedicated_resources.machine_spec = machine_spec
                 deployed_model.dedicated_resources = dedicated_resources
+                if fast_tryout_enabled:
+                    deployed_model.faster_deployment_config = (
+                        gca_endpoint_compat.FasterDeploymentConfig(
+                            fast_tryout_enabled=fast_tryout_enabled
+                        )
+                    )
 
             elif supports_automatic_resources:
                 deployed_model.automatic_resources = (
@@ -5090,6 +5116,7 @@ def deploy(
         reservation_affinity_key: Optional[str] = None,
         reservation_affinity_values: Optional[List[str]] = None,
         spot: bool = False,
+        fast_tryout_enabled: bool = False,
     ) -> Union[Endpoint, PrivateEndpoint]:
         """Deploys model to endpoint. Endpoint will be created if unspecified.
 
@@ -5219,6 +5246,11 @@ def deploy(
                 Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
             spot (bool):
                 Optional. Whether to schedule the deployment workload on spot VMs.
+            fast_tryout_enabled (bool):
+              Optional. Defaults to False.
+              If True, model will be deployed using faster deployment path.
+              Useful for quick experiments. Not for production workloads. Only
+              available for most popular models with certain machine types.
 
         Returns:
             endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5287,6 +5319,7 @@ def deploy(
             disable_container_logging=disable_container_logging,
             private_service_connect_config=private_service_connect_config,
             deployment_resource_pool=deployment_resource_pool,
+            fast_tryout_enabled=fast_tryout_enabled,
         )
 
     @base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False)
@@ -5321,6 +5354,7 @@ def _deploy(
             PrivateEndpoint.PrivateServiceConnectConfig
         ] = None,
         deployment_resource_pool: Optional[DeploymentResourcePool] = None,
+        fast_tryout_enabled: bool = False,
     ) -> Union[Endpoint, PrivateEndpoint]:
         """Deploys model to endpoint. Endpoint will be created if unspecified.
 
@@ -5443,6 +5477,11 @@ def _deploy(
                 are deployed to the same DeploymentResourcePool will be hosted in
                 a shared model server. If provided, will override replica count
                 arguments.
+            fast_tryout_enabled (bool):
+                Optional. Defaults to False.
+                If True, model will be deployed using faster deployment path.
+                Useful for quick experiments. Not for production workloads. Only
+                available for most popular models with certain machine types.
 
         Returns:
             endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5501,6 +5540,7 @@ def _deploy(
             enable_access_logging=enable_access_logging,
             disable_container_logging=disable_container_logging,
             deployment_resource_pool=deployment_resource_pool,
+            fast_tryout_enabled=fast_tryout_enabled,
         )
 
         _LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)
diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py
@@ -2112,6 +2112,55 @@ def test_preview_deploy_with_fast_tryout_enabled(
             timeout=None,
         )
 
+    @pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock")
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_deploy_with_fast_tryout_enabled(self, deploy_model_mock, sync):
+        test_endpoint = models.Endpoint(_TEST_ENDPOINT_NAME)
+        test_model = models.Model(_TEST_ID)
+        test_model._gca_resource.supported_deployment_resources_types.append(
+            aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES,
+        )
+
+        test_endpoint.deploy(
+            model=test_model,
+            sync=sync,
+            deploy_request_timeout=None,
+            machine_type=_TEST_MACHINE_TYPE,
+            accelerator_type=_TEST_ACCELERATOR_TYPE,
+            accelerator_count=_TEST_ACCELERATOR_COUNT,
+            fast_tryout_enabled=True,
+            disable_container_logging=True,
+        )
+        if not sync:
+            test_endpoint.wait()
+
+        expected_machine_spec = gca_machine_resources.MachineSpec(
+            machine_type=_TEST_MACHINE_TYPE,
+            accelerator_type=_TEST_ACCELERATOR_TYPE,
+            accelerator_count=_TEST_ACCELERATOR_COUNT,
+        )
+        expected_dedicated_resources = gca_machine_resources.DedicatedResources(
+            machine_spec=expected_machine_spec,
+            min_replica_count=1,
+            max_replica_count=1,
+        )
+        expected_deployed_model = gca_endpoint.DeployedModel(
+            dedicated_resources=expected_dedicated_resources,
+            model=test_model.resource_name,
+            display_name=None,
+            disable_container_logging=True,
+            faster_deployment_config=gca_endpoint.FasterDeploymentConfig(
+                fast_tryout_enabled=True
+            ),
+        )
+        deploy_model_mock.assert_called_once_with(
+            endpoint=test_endpoint.resource_name,
+            deployed_model=expected_deployed_model,
+            traffic_split={"0": 100},
+            metadata=(),
+            timeout=None,
+        )
+
     @pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock", "get_drp_mock")
     @pytest.mark.parametrize("sync", [True, False])
     def test_deploy_with_deployment_resource_pool(self, deploy_model_mock, sync):
diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py
@@ -2527,6 +2527,58 @@ def test_preview_deploy_with_fast_tryout_enabled(
             timeout=None,
         )
 
+    @pytest.mark.usefixtures(
+        "get_model_mock",
+        "create_endpoint_mock",
+        "get_endpoint_mock",
+    )
+    @pytest.mark.parametrize("sync", [True, False])
+    def test_deploy_with_fast_tryout_enabled(self, deploy_model_mock, sync):
+        test_model = models.Model(_TEST_ID)
+        test_model._gca_resource.supported_deployment_resources_types.append(
+            aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
+        )
+
+        test_endpoint = test_model.deploy(
+            machine_type=_TEST_MACHINE_TYPE,
+            accelerator_type=_TEST_ACCELERATOR_TYPE,
+            accelerator_count=_TEST_ACCELERATOR_COUNT,
+            disable_container_logging=True,
+            sync=sync,
+            deploy_request_timeout=None,
+            fast_tryout_enabled=True,
+        )
+
+        if not sync:
+            test_endpoint.wait()
+
+        expected_machine_spec = gca_machine_resources.MachineSpec(
+            machine_type=_TEST_MACHINE_TYPE,
+            accelerator_type=_TEST_ACCELERATOR_TYPE,
+            accelerator_count=_TEST_ACCELERATOR_COUNT,
+        )
+        expected_dedicated_resources = gca_machine_resources.DedicatedResources(
+            machine_spec=expected_machine_spec,
+            min_replica_count=1,
+            max_replica_count=1,
+        )
+        expected_deployed_model = gca_endpoint.DeployedModel(
+            dedicated_resources=expected_dedicated_resources,
+            model=test_model.resource_name,
+            display_name=None,
+            disable_container_logging=True,
+            faster_deployment_config=gca_endpoint.FasterDeploymentConfig(
+                fast_tryout_enabled=True
+            ),
+        )
+        deploy_model_mock.assert_called_once_with(
+            endpoint=test_endpoint.resource_name,
+            deployed_model=expected_deployed_model,
+            traffic_split={"0": 100},
+            metadata=(),
+            timeout=None,
+        )
+
     @pytest.mark.usefixtures(
         "get_model_mock",
         "preview_get_drp_mock",