@@ -181,6 +181,7 @@ def get_publisher_model_mock():
181
181
multi_deploy_vertex = types .PublisherModel .CallToAction .DeployVertex (
182
182
multi_deploy_vertex = [
183
183
types .PublisherModel .CallToAction .Deploy (
184
+ deploy_task_name = "vLLM 32K context" ,
184
185
container_spec = types .ModelContainerSpec (
185
186
image_uri = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00" ,
186
187
command = ["python" , "main.py" ],
@@ -198,6 +199,7 @@ def get_publisher_model_mock():
198
199
),
199
200
),
200
201
types .PublisherModel .CallToAction .Deploy (
202
+ deploy_task_name = "vLLM 128K context" ,
201
203
container_spec = types .ModelContainerSpec (
202
204
image_uri = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest" ,
203
205
command = ["python" , "main.py" ],
@@ -1032,17 +1034,17 @@ def test_list_deploy_options_concise(self, get_publisher_model_mock):
1032
1034
result = model .list_deploy_options (concise = True )
1033
1035
expected_result = textwrap .dedent (
1034
1036
"""\
1035
- [Option 1]
1036
- serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
1037
- machine_type="g2-standard-16",
1038
- accelerator_type="NVIDIA_L4",
1039
- accelerator_count=1,
1037
+ [Option 1: vLLM 32K context ]
1038
+ serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
1039
+ machine_type="g2-standard-16",
1040
+ accelerator_type="NVIDIA_L4",
1041
+ accelerator_count=1,
1040
1042
1041
- [Option 2]
1042
- serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest",
1043
- machine_type="g2-standard-32",
1044
- accelerator_type="NVIDIA_L4",
1045
- accelerator_count=4,"""
1043
+ [Option 2: vLLM 128K context ]
1044
+ serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest",
1045
+ machine_type="g2-standard-32",
1046
+ accelerator_type="NVIDIA_L4",
1047
+ accelerator_count=4,"""
1046
1048
)
1047
1049
assert result == expected_result
1048
1050
get_publisher_model_mock .assert_called_with (
@@ -1058,16 +1060,16 @@ def test_list_deploy_options_concise(self, get_publisher_model_mock):
1058
1060
expected_hf_result = textwrap .dedent (
1059
1061
"""\
1060
1062
[Option 1]
1061
- serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
1062
- machine_type="g2-standard-16",
1063
- accelerator_type="NVIDIA_L4",
1064
- accelerator_count=1,
1063
+ serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-vllm-serve:20241202_0916_RC00",
1064
+ machine_type="g2-standard-16",
1065
+ accelerator_type="NVIDIA_L4",
1066
+ accelerator_count=1,
1065
1067
1066
1068
[Option 2]
1067
- serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest",
1068
- machine_type="g2-standard-32",
1069
- accelerator_type="NVIDIA_L4",
1070
- accelerator_count=4,"""
1069
+ serving_container_image_uri="us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/text-generation-inference-cu121.2-1.py310:latest",
1070
+ machine_type="g2-standard-32",
1071
+ accelerator_type="NVIDIA_L4",
1072
+ accelerator_count=4,"""
1071
1073
)
1072
1074
assert hf_result == expected_hf_result
1073
1075
get_publisher_model_mock .assert_called_with (
0 commit comments