@@ -139,6 +139,7 @@ def create(
139
139
sync = True ,
140
140
create_request_timeout : Optional [float ] = None ,
141
141
required_replica_count : Optional [int ] = 0 ,
142
+ multihost_gpu_node_count : Optional [int ] = None ,
142
143
) -> "DeploymentResourcePool" :
143
144
"""Creates a new DeploymentResourcePool.
144
145
@@ -205,6 +206,9 @@ def create(
205
206
set, the model deploy/mutate operation will succeed once
206
207
available_replica_count reaches required_replica_count, and the
207
208
rest of the replicas will be retried.
209
+ multihost_gpu_node_count (int):
210
+ Optional. The number of nodes per replica for multihost GPU
211
+ deployments. Required for multihost GPU deployments.
208
212
209
213
Returns:
210
214
DeploymentResourcePool
@@ -232,6 +236,7 @@ def create(
232
236
sync = sync ,
233
237
create_request_timeout = create_request_timeout ,
234
238
required_replica_count = required_replica_count ,
239
+ multihost_gpu_node_count = multihost_gpu_node_count ,
235
240
)
236
241
237
242
@classmethod
@@ -254,6 +259,7 @@ def _create(
254
259
sync = True ,
255
260
create_request_timeout : Optional [float ] = None ,
256
261
required_replica_count : Optional [int ] = 0 ,
262
+ multihost_gpu_node_count : Optional [int ] = None ,
257
263
) -> "DeploymentResourcePool" :
258
264
"""Creates a new DeploymentResourcePool.
259
265
@@ -323,6 +329,9 @@ def _create(
323
329
set, the model deploy/mutate operation will succeed once
324
330
available_replica_count reaches required_replica_count, and the
325
331
rest of the replicas will be retried.
332
+ multihost_gpu_node_count (int):
333
+ Optional. The number of nodes per replica for multihost GPU
334
+ deployments. Required for multihost GPU deployments.
326
335
327
336
Returns:
328
337
DeploymentResourcePool
@@ -339,7 +348,8 @@ def _create(
339
348
)
340
349
341
350
machine_spec = gca_machine_resources_compat .MachineSpec (
342
- machine_type = machine_type
351
+ machine_type = machine_type ,
352
+ multihost_gpu_node_count = multihost_gpu_node_count ,
343
353
)
344
354
345
355
if autoscaling_target_cpu_utilization :
@@ -369,6 +379,9 @@ def _create(
369
379
[autoscaling_metric_spec ]
370
380
)
371
381
382
+ if multihost_gpu_node_count :
383
+ machine_spec .multihost_gpu_node_count = multihost_gpu_node_count
384
+
372
385
dedicated_resources .machine_spec = machine_spec
373
386
374
387
gapic_drp = gca_deployment_resource_pool_compat .DeploymentResourcePool (
@@ -691,6 +704,7 @@ def deploy(
691
704
system_labels : Optional [Dict [str , str ]] = None ,
692
705
required_replica_count : Optional [int ] = 0 ,
693
706
rollout_options : Optional [RolloutOptions ] = None ,
707
+ multihost_gpu_node_count : Optional [int ] = None ,
694
708
) -> None :
695
709
"""Deploys a Model to the Endpoint.
696
710
@@ -789,6 +803,9 @@ def deploy(
789
803
rest of the replicas will be retried.
790
804
rollout_options (RolloutOptions):
791
805
Optional. Options to configure a rolling deployment.
806
+ multihost_gpu_node_count (int): Optional. The number of nodes per
807
+ replica for multihost GPU deployments. Required for multihost GPU
808
+ deployments.
792
809
793
810
"""
794
811
self ._sync_gca_resource_if_skipped ()
@@ -832,6 +849,7 @@ def deploy(
832
849
system_labels = system_labels ,
833
850
required_replica_count = required_replica_count ,
834
851
rollout_options = rollout_options ,
852
+ multihost_gpu_node_count = multihost_gpu_node_count ,
835
853
)
836
854
837
855
@base .optional_sync ()
@@ -859,6 +877,7 @@ def _deploy(
859
877
system_labels : Optional [Dict [str , str ]] = None ,
860
878
required_replica_count : Optional [int ] = 0 ,
861
879
rollout_options : Optional [RolloutOptions ] = None ,
880
+ multihost_gpu_node_count : Optional [int ] = None ,
862
881
) -> None :
863
882
"""Deploys a Model to the Endpoint.
864
883
@@ -951,6 +970,10 @@ def _deploy(
951
970
rest of the replicas will be retried.
952
971
rollout_options (RolloutOptions): Optional.
953
972
Options to configure a rolling deployment.
973
+ multihost_gpu_node_count (int): Optional. The number of nodes per
974
+ replica for multihost GPU deployments. Required for multihost
975
+ GPU deployments.
976
+
954
977
"""
955
978
_LOGGER .log_action_start_against_resource (
956
979
f"Deploying Model { model .resource_name } to" , "" , self
@@ -982,6 +1005,7 @@ def _deploy(
982
1005
system_labels = system_labels ,
983
1006
required_replica_count = required_replica_count ,
984
1007
rollout_options = rollout_options ,
1008
+ multihost_gpu_node_count = multihost_gpu_node_count ,
985
1009
)
986
1010
987
1011
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , self )
@@ -1016,6 +1040,7 @@ def _deploy_call(
1016
1040
system_labels : Optional [Dict [str , str ]] = None ,
1017
1041
required_replica_count : Optional [int ] = 0 ,
1018
1042
rollout_options : Optional [RolloutOptions ] = None ,
1043
+ multihost_gpu_node_count : Optional [int ] = None ,
1019
1044
) -> None :
1020
1045
"""Helper method to deploy model to endpoint.
1021
1046
@@ -1115,6 +1140,9 @@ def _deploy_call(
1115
1140
rest of the replicas will be retried.
1116
1141
rollout_options (RolloutOptions): Optional. Options to configure a
1117
1142
rolling deployment.
1143
+ multihost_gpu_node_count (int):
1144
+ Optional. The number of nodes per replica for multihost GPU
1145
+ deployments. Required for multihost GPU deployments.
1118
1146
1119
1147
Raises:
1120
1148
ValueError: If only `accelerator_type` or `accelerator_count` is
@@ -1195,7 +1223,8 @@ def _deploy_call(
1195
1223
)
1196
1224
1197
1225
machine_spec = gca_machine_resources_compat .MachineSpec (
1198
- machine_type = machine_type
1226
+ machine_type = machine_type ,
1227
+ multihost_gpu_node_count = multihost_gpu_node_count ,
1199
1228
)
1200
1229
1201
1230
if autoscaling_target_cpu_utilization :
@@ -1538,6 +1567,7 @@ def deploy(
1538
1567
system_labels : Optional [Dict [str , str ]] = None ,
1539
1568
required_replica_count : Optional [int ] = 0 ,
1540
1569
rollout_options : Optional [RolloutOptions ] = None ,
1570
+ multihost_gpu_node_count : Optional [int ] = None ,
1541
1571
) -> Union [Endpoint , models .PrivateEndpoint ]:
1542
1572
"""Deploys model to endpoint.
1543
1573
@@ -1657,6 +1687,9 @@ def deploy(
1657
1687
rest of the replicas will be retried.
1658
1688
rollout_options (RolloutOptions):
1659
1689
Optional. Options to configure a rolling deployment.
1690
+ multihost_gpu_node_count (int):
1691
+ Optional. The number of nodes per replica for multihost GPU
1692
+ deployments. Required for multihost GPU deployments.
1660
1693
1661
1694
Returns:
1662
1695
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1717,6 +1750,7 @@ def deploy(
1717
1750
system_labels = system_labels ,
1718
1751
required_replica_count = required_replica_count ,
1719
1752
rollout_options = rollout_options ,
1753
+ multihost_gpu_node_count = multihost_gpu_node_count ,
1720
1754
)
1721
1755
1722
1756
def _should_enable_dedicated_endpoint (self , fast_tryout_enabled : bool ) -> bool :
@@ -1753,6 +1787,7 @@ def _deploy(
1753
1787
system_labels : Optional [Dict [str , str ]] = None ,
1754
1788
required_replica_count : Optional [int ] = 0 ,
1755
1789
rollout_options : Optional [RolloutOptions ] = None ,
1790
+ multihost_gpu_node_count : Optional [int ] = None ,
1756
1791
) -> Union [Endpoint , models .PrivateEndpoint ]:
1757
1792
"""Deploys model to endpoint.
1758
1793
@@ -1863,6 +1898,9 @@ def _deploy(
1863
1898
rest of the replicas will be retried.
1864
1899
rollout_options (RolloutOptions):
1865
1900
Optional. Options to configure a rolling deployment.
1901
+ multihost_gpu_node_count (int):
1902
+ Optional. The number of nodes per replica for multihost GPU
1903
+ deployments. Required for multihost GPU deployments.
1866
1904
1867
1905
Returns:
1868
1906
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1928,6 +1966,7 @@ def _deploy(
1928
1966
fast_tryout_enabled = fast_tryout_enabled ,
1929
1967
system_labels = system_labels ,
1930
1968
required_replica_count = required_replica_count ,
1969
+ multihost_gpu_node_count = multihost_gpu_node_count ,
1931
1970
** preview_kwargs ,
1932
1971
)
1933
1972
0 commit comments