Skip to content

Commit e425ded

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Vertex AI Model Garden deploy SDK Support for container specifications and equivalent Model Garden models for Hugging Face
PiperOrigin-RevId: 735581803
1 parent b36a43a commit e425ded

File tree

2 files changed

+117
-4
lines changed

2 files changed

+117
-4
lines changed

tests/unit/vertexai/model_garden/test_model_garden.py

+95-4
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,28 @@
4545

4646
_TEST_ENDPOINT_NAME = "projects/test-project/locations/us-central1/endpoints/1234567890"
4747
_TEST_MODEL_NAME = "projects/test-project/locations/us-central1/models/9876543210"
48+
_TEST_MODEL_CONTAINER_SPEC = types.ModelContainerSpec(
49+
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
50+
command=["python", "main.py"],
51+
args=["--model-id=gemma-2b"],
52+
env=[types.EnvVar(name="MODEL_ID", value="gemma-2b")],
53+
ports=[types.Port(container_port=7080)],
54+
grpc_ports=[types.Port(container_port=7081)],
55+
predict_route="/predictions/v1/predict",
56+
health_route="/ping",
57+
deployment_timeout=duration_pb2.Duration(seconds=1800),
58+
shared_memory_size_mb=256,
59+
startup_probe=types.Probe(
60+
exec_=types.Probe.ExecAction(command=["python", "main.py"]),
61+
period_seconds=10,
62+
timeout_seconds=10,
63+
),
64+
health_probe=types.Probe(
65+
exec_=types.Probe.ExecAction(command=["python", "health_check.py"]),
66+
period_seconds=10,
67+
timeout_seconds=10,
68+
),
69+
)
4870

4971

5072
@pytest.fixture(scope="module")
@@ -65,7 +87,7 @@ def deploy_mock():
6587
"deploy",
6688
) as deploy:
6789
mock_lro = mock.Mock(ga_operation.Operation)
68-
mock_lro.result.return_value = types.DeployPublisherModelResponse(
90+
mock_lro.result.return_value = types.DeployResponse(
6991
endpoint=_TEST_ENDPOINT_NAME,
7092
model=_TEST_MODEL_FULL_RESOURCE_NAME,
7193
)
@@ -588,6 +610,71 @@ def test_deploy_with_serving_container_image_success(self, deploy_mock):
588610
)
589611

590612
def test_deploy_with_serving_container_spec_success(self, deploy_mock):
613+
"""Tests deploying a model with serving container spec."""
614+
aiplatform.init(
615+
project=_TEST_PROJECT,
616+
location=_TEST_LOCATION,
617+
)
618+
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
619+
model.deploy(serving_container_spec=_TEST_MODEL_CONTAINER_SPEC)
620+
deploy_mock.assert_called_once_with(
621+
types.DeployRequest(
622+
publisher_model_name=_TEST_MODEL_FULL_RESOURCE_NAME,
623+
destination=f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}",
624+
model_config=types.DeployRequest.ModelConfig(
625+
container_spec=_TEST_MODEL_CONTAINER_SPEC
626+
),
627+
)
628+
)
629+
630+
def test_deploy_with_serving_container_spec_no_image_uri_raises_error(self):
631+
"""Tests getting the supported deploy options for a model."""
632+
aiplatform.init(
633+
project=_TEST_PROJECT,
634+
location=_TEST_LOCATION,
635+
)
636+
637+
expected_message = (
638+
"Serving container image uri is required for the serving container" " spec."
639+
)
640+
with pytest.raises(ValueError) as exception:
641+
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
642+
model.deploy(
643+
serving_container_spec=types.ModelContainerSpec(
644+
predict_route="/predictions/v1/predict",
645+
health_route="/ping",
646+
)
647+
)
648+
assert str(exception.value) == expected_message
649+
650+
def test_deploy_with_serving_container_spec_with_both_image_uri_raises_error(
651+
self,
652+
):
653+
"""Tests getting the supported deploy options for a model."""
654+
aiplatform.init(
655+
project=_TEST_PROJECT,
656+
location=_TEST_LOCATION,
657+
)
658+
659+
expected_message = (
660+
"Serving container image uri is already set in the serving container"
661+
" spec."
662+
)
663+
with pytest.raises(ValueError) as exception:
664+
model = model_garden.OpenModel(model_name=_TEST_MODEL_FULL_RESOURCE_NAME)
665+
model.deploy(
666+
serving_container_spec=types.ModelContainerSpec(
667+
image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
668+
predict_route="/predictions/v1/predict",
669+
health_route="/ping",
670+
),
671+
serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
672+
)
673+
assert str(exception.value) == expected_message
674+
675+
def test_deploy_with_serving_container_spec_individual_fields_success(
676+
self, deploy_mock
677+
):
591678
"""Tests deploying a model with serving container spec."""
592679
aiplatform.init(
593680
project=_TEST_PROJECT,
@@ -665,7 +752,9 @@ def test_list_deploy_options(self, get_publisher_model_mock):
665752
model.list_deploy_options()
666753
get_publisher_model_mock.assert_called_with(
667754
types.GetPublisherModelRequest(
668-
name=_TEST_MODEL_FULL_RESOURCE_NAME, is_hugging_face_model=False
755+
name=_TEST_MODEL_FULL_RESOURCE_NAME,
756+
is_hugging_face_model=False,
757+
include_equivalent_model_garden_model_deployment_configs=True,
669758
)
670759
)
671760

@@ -697,8 +786,10 @@ def test_list_deployable_models(self, list_publisher_models_mock):
697786
types.ListPublisherModelsRequest(
698787
parent="publishers/*",
699788
list_all_versions=True,
700-
filter="is_hf_wildcard(true) AND "
701-
"labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED",
789+
filter=(
790+
"is_hf_wildcard(true) AND "
791+
"labels.VERIFIED_DEPLOYMENT_CONFIG=VERIFIED_DEPLOYMENT_SUCCEED"
792+
),
702793
)
703794
)
704795
assert hf_models == [

vertexai/model_garden/_model_garden.py

+22
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ def deploy(
326326
endpoint_display_name: Optional[str] = None,
327327
model_display_name: Optional[str] = None,
328328
deploy_request_timeout: Optional[float] = None,
329+
serving_container_spec: Optional[types.ModelContainerSpec] = None,
329330
serving_container_image_uri: Optional[str] = None,
330331
serving_container_predict_route: Optional[str] = None,
331332
serving_container_health_route: Optional[str] = None,
@@ -400,6 +401,10 @@ def deploy(
400401
model_display_name: The display name of the uploaded model.
401402
deploy_request_timeout: The timeout for the deploy request. Default
402403
is 2 hours.
404+
serving_container_spec (types.ModelContainerSpec):
405+
Optional. The container specification for the model instance.
406+
This specification overrides the default container specification
407+
and other serving container parameters.
403408
serving_container_image_uri (str):
404409
Optional. The URI of the Model serving container. This parameter is required
405410
if the parameter `local_model` is not specified.
@@ -474,6 +479,11 @@ def deploy(
474479
Returns:
475480
endpoint (aiplatform.Endpoint):
476481
Created endpoint.
482+
483+
Raises:
484+
ValueError: If ``serving_container_spec`` is specified but ``serving_container_spec.image_uri``
485+
is ``None``, or if ``serving_container_spec`` is specified but other
486+
serving container parameters are specified.
477487
"""
478488
request = types.DeployRequest(
479489
destination=f"projects/{self._project}/locations/{self._location}",
@@ -529,6 +539,17 @@ def deploy(
529539
if fast_tryout_enabled:
530540
request.deploy_config.fast_tryout_enabled = fast_tryout_enabled
531541

542+
if serving_container_spec:
543+
if not serving_container_spec.image_uri:
544+
raise ValueError(
545+
"Serving container image uri is required for the serving container spec."
546+
)
547+
if serving_container_image_uri:
548+
raise ValueError(
549+
"Serving container image uri is already set in the serving container spec."
550+
)
551+
request.model_config.container_spec = serving_container_spec
552+
532553
if serving_container_image_uri:
533554
request.model_config.container_spec = _construct_serving_container_spec(
534555
serving_container_image_uri,
@@ -574,6 +595,7 @@ def list_deploy_options(
574595
request = types.GetPublisherModelRequest(
575596
name=self._publisher_model_name,
576597
is_hugging_face_model="@" not in self._publisher_model_name,
598+
include_equivalent_model_garden_model_deployment_configs=True,
577599
)
578600
response = self._us_central1_model_garden_client.get_publisher_model(request)
579601
multi_deploy = (

0 commit comments

Comments
 (0)