Skip to content

Commit 03d9bf7

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add multihost_gpu_node_count to Vertex SDK.
PiperOrigin-RevId: 739019987
1 parent 827b484 commit 03d9bf7

File tree

2 files changed

+101
-2
lines changed

2 files changed

+101
-2
lines changed

google/cloud/aiplatform/preview/models.py

+41-2
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ def create(
139139
sync=True,
140140
create_request_timeout: Optional[float] = None,
141141
required_replica_count: Optional[int] = 0,
142+
multihost_gpu_node_count: Optional[int] = None,
142143
) -> "DeploymentResourcePool":
143144
"""Creates a new DeploymentResourcePool.
144145
@@ -205,6 +206,9 @@ def create(
205206
set, the model deploy/mutate operation will succeed once
206207
available_replica_count reaches required_replica_count, and the
207208
rest of the replicas will be retried.
209+
multihost_gpu_node_count (int):
210+
Optional. The number of nodes per replica for multihost GPU
211+
deployments. Required for multihost GPU deployments.
208212
209213
Returns:
210214
DeploymentResourcePool
@@ -232,6 +236,7 @@ def create(
232236
sync=sync,
233237
create_request_timeout=create_request_timeout,
234238
required_replica_count=required_replica_count,
239+
multihost_gpu_node_count=multihost_gpu_node_count,
235240
)
236241

237242
@classmethod
@@ -254,6 +259,7 @@ def _create(
254259
sync=True,
255260
create_request_timeout: Optional[float] = None,
256261
required_replica_count: Optional[int] = 0,
262+
multihost_gpu_node_count: Optional[int] = None,
257263
) -> "DeploymentResourcePool":
258264
"""Creates a new DeploymentResourcePool.
259265
@@ -323,6 +329,9 @@ def _create(
323329
set, the model deploy/mutate operation will succeed once
324330
available_replica_count reaches required_replica_count, and the
325331
rest of the replicas will be retried.
332+
multihost_gpu_node_count (int):
333+
Optional. The number of nodes per replica for multihost GPU
334+
deployments. Required for multihost GPU deployments.
326335
327336
Returns:
328337
DeploymentResourcePool
@@ -339,7 +348,8 @@ def _create(
339348
)
340349

341350
machine_spec = gca_machine_resources_compat.MachineSpec(
342-
machine_type=machine_type
351+
machine_type=machine_type,
352+
multihost_gpu_node_count=multihost_gpu_node_count,
343353
)
344354

345355
if autoscaling_target_cpu_utilization:
@@ -369,6 +379,9 @@ def _create(
369379
[autoscaling_metric_spec]
370380
)
371381

382+
if multihost_gpu_node_count:
383+
machine_spec.multihost_gpu_node_count = multihost_gpu_node_count
384+
372385
dedicated_resources.machine_spec = machine_spec
373386

374387
gapic_drp = gca_deployment_resource_pool_compat.DeploymentResourcePool(
@@ -691,6 +704,7 @@ def deploy(
691704
system_labels: Optional[Dict[str, str]] = None,
692705
required_replica_count: Optional[int] = 0,
693706
rollout_options: Optional[RolloutOptions] = None,
707+
multihost_gpu_node_count: Optional[int] = None,
694708
) -> None:
695709
"""Deploys a Model to the Endpoint.
696710
@@ -789,6 +803,9 @@ def deploy(
789803
rest of the replicas will be retried.
790804
rollout_options (RolloutOptions):
791805
Optional. Options to configure a rolling deployment.
806+
multihost_gpu_node_count (int): Optional. The number of nodes per
807+
replica for multihost GPU deployments. Required for multihost GPU
808+
deployments.
792809
793810
"""
794811
self._sync_gca_resource_if_skipped()
@@ -832,6 +849,7 @@ def deploy(
832849
system_labels=system_labels,
833850
required_replica_count=required_replica_count,
834851
rollout_options=rollout_options,
852+
multihost_gpu_node_count=multihost_gpu_node_count,
835853
)
836854

837855
@base.optional_sync()
@@ -859,6 +877,7 @@ def _deploy(
859877
system_labels: Optional[Dict[str, str]] = None,
860878
required_replica_count: Optional[int] = 0,
861879
rollout_options: Optional[RolloutOptions] = None,
880+
multihost_gpu_node_count: Optional[int] = None,
862881
) -> None:
863882
"""Deploys a Model to the Endpoint.
864883
@@ -951,6 +970,10 @@ def _deploy(
951970
rest of the replicas will be retried.
952971
rollout_options (RolloutOptions): Optional.
953972
Options to configure a rolling deployment.
973+
multihost_gpu_node_count (int): Optional. The number of nodes per
974+
replica for multihost GPU deployments. Required for multihost
975+
GPU deployments.
976+
954977
"""
955978
_LOGGER.log_action_start_against_resource(
956979
f"Deploying Model {model.resource_name} to", "", self
@@ -982,6 +1005,7 @@ def _deploy(
9821005
system_labels=system_labels,
9831006
required_replica_count=required_replica_count,
9841007
rollout_options=rollout_options,
1008+
multihost_gpu_node_count=multihost_gpu_node_count,
9851009
)
9861010

9871011
_LOGGER.log_action_completed_against_resource("model", "deployed", self)
@@ -1016,6 +1040,7 @@ def _deploy_call(
10161040
system_labels: Optional[Dict[str, str]] = None,
10171041
required_replica_count: Optional[int] = 0,
10181042
rollout_options: Optional[RolloutOptions] = None,
1043+
multihost_gpu_node_count: Optional[int] = None,
10191044
) -> None:
10201045
"""Helper method to deploy model to endpoint.
10211046
@@ -1115,6 +1140,9 @@ def _deploy_call(
11151140
rest of the replicas will be retried.
11161141
rollout_options (RolloutOptions): Optional. Options to configure a
11171142
rolling deployment.
1143+
multihost_gpu_node_count (int):
1144+
Optional. The number of nodes per replica for multihost GPU
1145+
deployments. Required for multihost GPU deployments.
11181146
11191147
Raises:
11201148
ValueError: If only `accelerator_type` or `accelerator_count` is
@@ -1195,7 +1223,8 @@ def _deploy_call(
11951223
)
11961224

11971225
machine_spec = gca_machine_resources_compat.MachineSpec(
1198-
machine_type=machine_type
1226+
machine_type=machine_type,
1227+
multihost_gpu_node_count=multihost_gpu_node_count,
11991228
)
12001229

12011230
if autoscaling_target_cpu_utilization:
@@ -1538,6 +1567,7 @@ def deploy(
15381567
system_labels: Optional[Dict[str, str]] = None,
15391568
required_replica_count: Optional[int] = 0,
15401569
rollout_options: Optional[RolloutOptions] = None,
1570+
multihost_gpu_node_count: Optional[int] = None,
15411571
) -> Union[Endpoint, models.PrivateEndpoint]:
15421572
"""Deploys model to endpoint.
15431573
@@ -1657,6 +1687,9 @@ def deploy(
16571687
rest of the replicas will be retried.
16581688
rollout_options (RolloutOptions):
16591689
Optional. Options to configure a rolling deployment.
1690+
multihost_gpu_node_count (int):
1691+
Optional. The number of nodes per replica for multihost GPU
1692+
deployments. Required for multihost GPU deployments.
16601693
16611694
Returns:
16621695
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1717,6 +1750,7 @@ def deploy(
17171750
system_labels=system_labels,
17181751
required_replica_count=required_replica_count,
17191752
rollout_options=rollout_options,
1753+
multihost_gpu_node_count=multihost_gpu_node_count,
17201754
)
17211755

17221756
def _should_enable_dedicated_endpoint(self, fast_tryout_enabled: bool) -> bool:
@@ -1753,6 +1787,7 @@ def _deploy(
17531787
system_labels: Optional[Dict[str, str]] = None,
17541788
required_replica_count: Optional[int] = 0,
17551789
rollout_options: Optional[RolloutOptions] = None,
1790+
multihost_gpu_node_count: Optional[int] = None,
17561791
) -> Union[Endpoint, models.PrivateEndpoint]:
17571792
"""Deploys model to endpoint.
17581793
@@ -1863,6 +1898,9 @@ def _deploy(
18631898
rest of the replicas will be retried.
18641899
rollout_options (RolloutOptions):
18651900
Optional. Options to configure a rolling deployment.
1901+
multihost_gpu_node_count (int):
1902+
Optional. The number of nodes per replica for multihost GPU
1903+
deployments. Required for multihost GPU deployments.
18661904
18671905
Returns:
18681906
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1928,6 +1966,7 @@ def _deploy(
19281966
fast_tryout_enabled=fast_tryout_enabled,
19291967
system_labels=system_labels,
19301968
required_replica_count=required_replica_count,
1969+
multihost_gpu_node_count=multihost_gpu_node_count,
19311970
**preview_kwargs,
19321971
)
19331972

tests/unit/aiplatform/test_models.py

+60
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@
146146
_TEST_TPU_MACHINE_TYPE = "ct5lp-hightpu-4t"
147147
_TEST_TPU_TOPOLOGY = "2x2"
148148

149+
_TEST_GPU_MACHINE_TYPE = "a3-highgpu-8g"
150+
_TEST_GPU_ACCELERATOR_TYPE = "NVIDIA_TESLA_A100"
151+
_TEST_GPU_ACCELERATOR_COUNT = 8
152+
_TEST_MULTIHOST_GPU_NODE_COUNT = 2
153+
149154
_TEST_BATCH_SIZE = 16
150155

151156
_TEST_PIPELINE_RESOURCE_NAME = (
@@ -2239,6 +2244,61 @@ def test_deploy_no_endpoint_with_tpu_topology(self, deploy_model_mock, sync):
22392244
timeout=None,
22402245
)
22412246

2247+
@pytest.mark.usefixtures(
2248+
"get_endpoint_mock",
2249+
"get_model_mock",
2250+
"create_endpoint_mock",
2251+
"preview_deploy_model_mock",
2252+
)
2253+
@pytest.mark.parametrize("sync", [True, False])
2254+
def test_deploy_no_endpoint_with_multihost_gpu_node_count(
2255+
self, preview_deploy_model_mock, sync
2256+
):
2257+
test_model = preview_models.Model(_TEST_ID)
2258+
test_model._gca_resource.supported_deployment_resources_types.append(
2259+
aiplatform.gapic.Model.DeploymentResourcesType.DEDICATED_RESOURCES
2260+
)
2261+
2262+
test_endpoint = test_model.deploy(
2263+
machine_type=_TEST_GPU_MACHINE_TYPE,
2264+
accelerator_type=_TEST_GPU_ACCELERATOR_TYPE,
2265+
accelerator_count=_TEST_GPU_ACCELERATOR_COUNT,
2266+
multihost_gpu_node_count=_TEST_MULTIHOST_GPU_NODE_COUNT,
2267+
sync=sync,
2268+
deploy_request_timeout=None,
2269+
)
2270+
2271+
if not sync:
2272+
test_endpoint.wait()
2273+
2274+
expected_machine_spec = gca_machine_resources_v1beta1.MachineSpec(
2275+
machine_type=_TEST_GPU_MACHINE_TYPE,
2276+
accelerator_type=_TEST_GPU_ACCELERATOR_TYPE,
2277+
accelerator_count=_TEST_GPU_ACCELERATOR_COUNT,
2278+
multihost_gpu_node_count=_TEST_MULTIHOST_GPU_NODE_COUNT,
2279+
)
2280+
expected_dedicated_resources = gca_machine_resources_v1beta1.DedicatedResources(
2281+
machine_spec=expected_machine_spec,
2282+
min_replica_count=1,
2283+
max_replica_count=1,
2284+
spot=False,
2285+
)
2286+
expected_deployed_model = gca_endpoint_v1beta1.DeployedModel(
2287+
dedicated_resources=expected_dedicated_resources,
2288+
model=test_model.resource_name,
2289+
display_name=None,
2290+
enable_container_logging=True,
2291+
faster_deployment_config=gca_endpoint_v1beta1.FasterDeploymentConfig(),
2292+
)
2293+
2294+
preview_deploy_model_mock.assert_called_once_with(
2295+
endpoint=test_endpoint.resource_name,
2296+
deployed_model=expected_deployed_model,
2297+
traffic_split={"0": 100},
2298+
metadata=(),
2299+
timeout=None,
2300+
)
2301+
22422302
@pytest.mark.usefixtures(
22432303
"get_endpoint_mock", "get_model_mock", "create_endpoint_mock"
22442304
)

0 commit comments

Comments
 (0)