feat: Vertex AI Model Garden deploy SDK Support for container specifications and equivalent Model Garden models for Hugging Face

vertex-sdk-bot · copybara-github · commit e425ded74110 · 2025-03-10T18:39:18.000-07:00
PiperOrigin-RevId: 735581803
diff --git a/tests/unit/vertexai/model_garden/test_model_garden.py b/tests/unit/vertexai/model_garden/test_model_garden.py
@@ -45,6 +45,28 @@
 
 _TEST_ENDPOINT_NAME = "projects/test-project/locations/us-central1/endpoints/1234567890"
 _TEST_MODEL_NAME = "projects/test-project/locations/us-central1/models/9876543210"
+_TEST_MODEL_CONTAINER_SPEC = types.ModelContainerSpec(
+    image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
+    command=["python", "main.py"],
+    args=["--model-id=gemma-2b"],
+    env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
+    ports=[types.Port(container_port=7080)],
+    grpc_ports=[types.Port(container_port=7081)],
+    predict_route="/predictions/v1/predict",
+    health_route="/ping",
+    deployment_timeout=duration_pb2.Duration(seconds=1800),
+    shared_memory_size_mb=256,
+    startup_probe=types.Probe(
+        exec_=types.Probe.ExecAction(command=["python", "main.py"]),
+        period_seconds=10,
+        timeout_seconds=10,
+    ),
+    health_probe=types.Probe(
+        exec_=types.Probe.ExecAction(command=["python", "health_check.py"]),
+        period_seconds=10,
+        timeout_seconds=10,
+    ),
+)
 
 
 @pytest.fixture(scope="module")
@@ -65,7 +87,7 @@ def deploy_mock():
         "deploy",
     ) as deploy:
         mock_lro = mock.Mock(ga_operation.Operation)
-        mock_lro.result.return_value = types.DeployPublisherModelResponse(
+        mock_lro.result.return_value = types.DeployResponse(
             endpoint=_TEST_ENDPOINT_NAME,
             model=_TEST_MODEL_FULL_RESOURCE_NAME,
         )
@@ -588,6 +610,71 @@ def test_deploy_with_serving_container_image_success(self, deploy_mock):
         )
 
     def test_deploy_with_serving_container_spec_success(self, deploy_mock):
+        """Tests deploying a model with serving container spec."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+        model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+        model.deploy(serving_container_spec=_TEST_MODEL_CONTAINER_SPEC)
+        deploy_mock.assert_called_once_with(
+            types.DeployRequest(
+                publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
+                model_config=types.DeployRequest.ModelConfig(
+                    container_spec=_TEST_MODEL_CONTAINER_SPEC
+                ),
+            )
+        )
+
+    def test_deploy_with_serving_container_spec_no_image_uri_raises_error(self):
+        """Tests getting the supported deploy options for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+
+        expected_message = (
+            "Serving container image uri is required for the serving container" " spec."
+        )
+        with pytest.raises(ValueError) as exception:
+            model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+            model.deploy(
+                serving_container_spec=types.ModelContainerSpec(
+                    predict_route="/predictions/v1/predict",
+                    health_route="/ping",
+                )
+            )
+        assert str(exception.value) == expected_message
+
+    def test_deploy_with_serving_container_spec_with_both_image_uri_raises_error(
+        self,
+    ):
+        """Tests getting the supported deploy options for a model."""
+        aiplatform.init(
+            project=_TEST_PROJECT,
+            location=_TEST_LOCATION,
+        )
+
+        expected_message = (
+            "Serving container image uri is already set in the serving container"
+            " spec."
+        )
+        with pytest.raises(ValueError) as exception:
+            model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
+            model.deploy(
+                serving_container_spec=types.ModelContainerSpec(
+                    image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
+                    predict_route="/predictions/v1/predict",
+                    health_route="/ping",
+                ),
+                serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
+            )
+        assert str(exception.value) == expected_message
+
+    def test_deploy_with_serving_container_spec_individual_fields_success(
+        self, deploy_mock
+    ):
         """Tests deploying a model with serving container spec."""
         aiplatform.init(
             project=_TEST_PROJECT,
@@ -665,7 +752,9 @@ def test_list_deploy_options(self, get_publisher_model_mock):
         model.list_deploy_options()
         get_publisher_model_mock.assert_called_with(
             types.GetPublisherModelRequest(
-                name=_TEST_MODEL_FULL_RESOURCE_NAME, is_hugging_face_model=False
+                name=_TEST_MODEL_FULL_RESOURCE_NAME,
+                is_hugging_face_model=False,
+                include_equivalent_model_garden_model_deployment_configs=True,
             )
         )
 
@@ -697,8 +786,10 @@ def test_list_deployable_models(self, list_publisher_models_mock):
             types.ListPublisherModelsRequest(
                 parent="publishers/*",
                 list_all_versions=True,
-                filter="is_hf_wildcard(true) AND "
-                "labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED",
+                filter=(
+                    "is_hf_wildcard(true) AND "
+                    "labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED"
+                ),
             )
         )
         assert hf_models == [
diff --git a/vertexai/model_garden/_model_garden.py b/vertexai/model_garden/_model_garden.py
@@ -326,6 +326,7 @@ def deploy(
         endpoint_display_name: Optional[str] = None,
         model_display_name: Optional[str] = None,
         deploy_request_timeout: Optional[float] = None,
+        serving_container_spec: Optional[types.ModelContainerSpec] = None,
         serving_container_image_uri: Optional[str] = None,
         serving_container_predict_route: Optional[str] = None,
         serving_container_health_route: Optional[str] = None,
@@ -400,6 +401,10 @@ def deploy(
             model_display_name: The display name of the uploaded model.
             deploy_request_timeout: The timeout for the deploy request. Default
                 is 2 hours.
+            serving_container_spec (types.ModelContainerSpec):
+                Optional. The container specification for the model instance.
+                This specification overrides the default container specification
+                and other serving container parameters.
             serving_container_image_uri (str):
                 Optional. The URI of the Model serving container. This parameter is required
                 if the parameter `local_model` is not specified.
@@ -474,6 +479,11 @@ def deploy(
         Returns:
             endpoint (aiplatform.Endpoint):
                 Created endpoint.
+
+        Raises:
+            ValueError: If ``serving_container_spec`` is specified but ``serving_container_spec.image_uri``
+                is ``None``, or if ``serving_container_spec`` is specified but other
+                serving container parameters are specified.
         """
         request = types.DeployRequest(
             destination=f"projects/{self._project}/locations/{self._location}",
@@ -529,6 +539,17 @@ def deploy(
         if fast_tryout_enabled:
             request.deploy_config.fast_tryout_enabled = fast_tryout_enabled
 
+        if serving_container_spec:
+            if not serving_container_spec.image_uri:
+                raise ValueError(
+                    "Serving container image uri is required for the serving container spec."
+                )
+            if serving_container_image_uri:
+                raise ValueError(
+                    "Serving container image uri is already set in the serving container spec."
+                )
+            request.model_config.container_spec = serving_container_spec
+
         if serving_container_image_uri:
             request.model_config.container_spec = _construct_serving_container_spec(
                 serving_container_image_uri,
@@ -574,6 +595,7 @@ def list_deploy_options(
         request = types.GetPublisherModelRequest(
             name=self._publisher_model_name,
             is_hugging_face_model="@" not in self._publisher_model_name,
+            include_equivalent_model_garden_model_deployment_configs=True,
         )
         response = self._us_central1_model_garden_client.get_publisher_model(request)
         multi_deploy = (