@@ -251,6 +251,7 @@ def create(
251
251
reservation_affinity_key : Optional [str ] = None ,
252
252
reservation_affinity_values : Optional [List [str ]] = None ,
253
253
spot : bool = False ,
254
+ required_replica_count : Optional [int ] = 0 ,
254
255
) -> "DeploymentResourcePool" :
255
256
"""Creates a new DeploymentResourcePool.
256
257
@@ -323,6 +324,14 @@ def create(
323
324
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
324
325
spot (bool):
325
326
Optional. Whether to schedule the deployment workload on spot VMs.
327
+ required_replica_count (int):
328
+ Optional. Number of required available replicas for the
329
+ deployment to succeed. This field is only needed when partial
330
+ model deployment/mutation is desired, with a value greater than
331
+ or equal to 1 and fewer than or equal to min_replica_count. If
332
+ set, the model deploy/mutate operation will succeed once
333
+ available_replica_count reaches required_replica_count, and the
334
+ rest of the replicas will be retried.
326
335
327
336
Returns:
328
337
DeploymentResourcePool
@@ -353,6 +362,7 @@ def create(
353
362
spot = spot ,
354
363
sync = sync ,
355
364
create_request_timeout = create_request_timeout ,
365
+ required_replica_count = required_replica_count ,
356
366
)
357
367
358
368
@classmethod
@@ -378,6 +388,7 @@ def _create(
378
388
spot : bool = False ,
379
389
sync = True ,
380
390
create_request_timeout : Optional [float ] = None ,
391
+ required_replica_count : Optional [int ] = 0 ,
381
392
) -> "DeploymentResourcePool" :
382
393
"""Creates a new DeploymentResourcePool.
383
394
@@ -453,6 +464,14 @@ def _create(
453
464
when the Future has completed.
454
465
create_request_timeout (float):
455
466
Optional. The create request timeout in seconds.
467
+ required_replica_count (int):
468
+ Optional. Number of required available replicas for the
469
+ deployment to succeed. This field is only needed when partial
470
+ model deployment/mutation is desired, with a value greater than
471
+ or equal to 1 and fewer than or equal to min_replica_count. If
472
+ set, the model deploy/mutate operation will succeed once
473
+ available_replica_count reaches required_replica_count, and the
474
+ rest of the replicas will be retried.
456
475
457
476
Returns:
458
477
DeploymentResourcePool
@@ -466,6 +485,7 @@ def _create(
466
485
min_replica_count = min_replica_count ,
467
486
max_replica_count = max_replica_count ,
468
487
spot = spot ,
488
+ required_replica_count = required_replica_count ,
469
489
)
470
490
471
491
machine_spec = gca_machine_resources_compat .MachineSpec (
@@ -1186,6 +1206,7 @@ def _validate_deploy_args(
1186
1206
traffic_split : Optional [Dict [str , int ]],
1187
1207
traffic_percentage : Optional [int ],
1188
1208
deployment_resource_pool : Optional [DeploymentResourcePool ],
1209
+ required_replica_count : Optional [int ],
1189
1210
):
1190
1211
"""Helper method to validate deploy arguments.
1191
1212
@@ -1233,6 +1254,14 @@ def _validate_deploy_args(
1233
1254
are deployed to the same DeploymentResourcePool will be hosted in
1234
1255
a shared model server. If provided, will override replica count
1235
1256
arguments.
1257
+ required_replica_count (int):
1258
+ Optional. Number of required available replicas for the
1259
+ deployment to succeed. This field is only needed when partial
1260
+ model deployment/mutation is desired, with a value greater than
1261
+ or equal to 1 and fewer than or equal to min_replica_count. If
1262
+ set, the model deploy/mutate operation will succeed once
1263
+ available_replica_count reaches required_replica_count, and the
1264
+ rest of the replicas will be retried.
1236
1265
1237
1266
Raises:
1238
1267
ValueError: if Min or Max replica is negative. Traffic percentage > 100 or
@@ -1246,6 +1275,8 @@ def _validate_deploy_args(
1246
1275
and min_replica_count != 1
1247
1276
or max_replica_count
1248
1277
and max_replica_count != 1
1278
+ or required_replica_count
1279
+ and required_replica_count != 0
1249
1280
):
1250
1281
raise ValueError (
1251
1282
"Ignoring explicitly specified replica counts, "
@@ -1264,6 +1295,8 @@ def _validate_deploy_args(
1264
1295
raise ValueError ("Min replica cannot be negative." )
1265
1296
if max_replica_count < 0 :
1266
1297
raise ValueError ("Max replica cannot be negative." )
1298
+ if required_replica_count and required_replica_count < 0 :
1299
+ raise ValueError ("Required replica cannot be negative." )
1267
1300
if accelerator_type :
1268
1301
utils .validate_accelerator_type (accelerator_type )
1269
1302
@@ -1313,6 +1346,7 @@ def deploy(
1313
1346
spot : bool = False ,
1314
1347
fast_tryout_enabled : bool = False ,
1315
1348
system_labels : Optional [Dict [str , str ]] = None ,
1349
+ required_replica_count : Optional [int ] = 0 ,
1316
1350
) -> None :
1317
1351
"""Deploys a Model to the Endpoint.
1318
1352
@@ -1428,6 +1462,14 @@ def deploy(
1428
1462
system_labels (Dict[str, str]):
1429
1463
Optional. System labels to apply to Model Garden deployments.
1430
1464
System labels are managed by Google for internal use only.
1465
+ required_replica_count (int):
1466
+ Optional. Number of required available replicas for the
1467
+ deployment to succeed. This field is only needed when partial
1468
+ model deployment/mutation is desired, with a value greater than
1469
+ or equal to 1 and fewer than or equal to min_replica_count. If
1470
+ set, the model deploy/mutate operation will succeed once
1471
+ available_replica_count reaches required_replica_count, and the
1472
+ rest of the replicas will be retried.
1431
1473
"""
1432
1474
self ._sync_gca_resource_if_skipped ()
1433
1475
@@ -1439,6 +1481,7 @@ def deploy(
1439
1481
traffic_split = traffic_split ,
1440
1482
traffic_percentage = traffic_percentage ,
1441
1483
deployment_resource_pool = deployment_resource_pool ,
1484
+ required_replica_count = required_replica_count ,
1442
1485
)
1443
1486
1444
1487
explanation_spec = _explanation_utils .create_and_validate_explanation_spec (
@@ -1473,6 +1516,7 @@ def deploy(
1473
1516
deployment_resource_pool = deployment_resource_pool ,
1474
1517
fast_tryout_enabled = fast_tryout_enabled ,
1475
1518
system_labels = system_labels ,
1519
+ required_replica_count = required_replica_count ,
1476
1520
)
1477
1521
1478
1522
@base .optional_sync ()
@@ -1504,6 +1548,7 @@ def _deploy(
1504
1548
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1505
1549
fast_tryout_enabled : bool = False ,
1506
1550
system_labels : Optional [Dict [str , str ]] = None ,
1551
+ required_replica_count : Optional [int ] = 0 ,
1507
1552
) -> None :
1508
1553
"""Deploys a Model to the Endpoint.
1509
1554
@@ -1613,6 +1658,14 @@ def _deploy(
1613
1658
system_labels (Dict[str, str]):
1614
1659
Optional. System labels to apply to Model Garden deployments.
1615
1660
System labels are managed by Google for internal use only.
1661
+ required_replica_count (int):
1662
+ Optional. Number of required available replicas for the
1663
+ deployment to succeed. This field is only needed when partial
1664
+ model deployment/mutation is desired, with a value greater than
1665
+ or equal to 1 and fewer than or equal to min_replica_count. If
1666
+ set, the model deploy/mutate operation will succeed once
1667
+ available_replica_count reaches required_replica_count, and the
1668
+ rest of the replicas will be retried.
1616
1669
"""
1617
1670
_LOGGER .log_action_start_against_resource (
1618
1671
f"Deploying Model { model .resource_name } to" , "" , self
@@ -1648,6 +1701,7 @@ def _deploy(
1648
1701
deployment_resource_pool = deployment_resource_pool ,
1649
1702
fast_tryout_enabled = fast_tryout_enabled ,
1650
1703
system_labels = system_labels ,
1704
+ required_replica_count = required_replica_count ,
1651
1705
)
1652
1706
1653
1707
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , self )
@@ -1686,6 +1740,7 @@ def _deploy_call(
1686
1740
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1687
1741
fast_tryout_enabled : bool = False ,
1688
1742
system_labels : Optional [Dict [str , str ]] = None ,
1743
+ required_replica_count : Optional [int ] = 0 ,
1689
1744
) -> None :
1690
1745
"""Helper method to deploy model to endpoint.
1691
1746
@@ -1802,6 +1857,14 @@ def _deploy_call(
1802
1857
system_labels (Dict[str, str]):
1803
1858
Optional. System labels to apply to Model Garden deployments.
1804
1859
System labels are managed by Google for internal use only.
1860
+ required_replica_count (int):
1861
+ Optional. Number of required available replicas for the
1862
+ deployment to succeed. This field is only needed when partial
1863
+ model deployment/mutation is desired, with a value greater than
1864
+ or equal to 1 and fewer than or equal to min_replica_count. If
1865
+ set, the model deploy/mutate operation will succeed once
1866
+ available_replica_count reaches required_replica_count, and the
1867
+ rest of the replicas will be retried.
1805
1868
1806
1869
Raises:
1807
1870
ValueError: If only `accelerator_type` or `accelerator_count` is specified.
@@ -1927,6 +1990,7 @@ def _deploy_call(
1927
1990
min_replica_count = min_replica_count ,
1928
1991
max_replica_count = max_replica_count ,
1929
1992
spot = spot ,
1993
+ required_replica_count = required_replica_count ,
1930
1994
)
1931
1995
1932
1996
machine_spec = gca_machine_resources_compat .MachineSpec (
@@ -3963,6 +4027,7 @@ def deploy(
3963
4027
reservation_affinity_values : Optional [List [str ]] = None ,
3964
4028
spot : bool = False ,
3965
4029
system_labels : Optional [Dict [str , str ]] = None ,
4030
+ required_replica_count : Optional [int ] = 0 ,
3966
4031
) -> None :
3967
4032
"""Deploys a Model to the PrivateEndpoint.
3968
4033
@@ -4081,6 +4146,14 @@ def deploy(
4081
4146
system_labels (Dict[str, str]):
4082
4147
Optional. System labels to apply to Model Garden deployments.
4083
4148
System labels are managed by Google for internal use only.
4149
+ required_replica_count (int):
4150
+ Optional. Number of required available replicas for the
4151
+ deployment to succeed. This field is only needed when partial
4152
+ model deployment/mutation is desired, with a value greater than
4153
+ or equal to 1 and fewer than or equal to min_replica_count. If
4154
+ set, the model deploy/mutate operation will succeed once
4155
+ available_replica_count reaches required_replica_count, and the
4156
+ rest of the replicas will be retried.
4084
4157
"""
4085
4158
4086
4159
if self .network :
@@ -4098,6 +4171,7 @@ def deploy(
4098
4171
traffic_split = traffic_split ,
4099
4172
traffic_percentage = traffic_percentage ,
4100
4173
deployment_resource_pool = None ,
4174
+ required_replica_count = required_replica_count ,
4101
4175
)
4102
4176
4103
4177
explanation_spec = _explanation_utils .create_and_validate_explanation_spec (
@@ -4126,6 +4200,7 @@ def deploy(
4126
4200
spot = spot ,
4127
4201
disable_container_logging = disable_container_logging ,
4128
4202
system_labels = system_labels ,
4203
+ required_replica_count = required_replica_count ,
4129
4204
)
4130
4205
4131
4206
def update (
@@ -5190,6 +5265,7 @@ def deploy(
5190
5265
spot : bool = False ,
5191
5266
fast_tryout_enabled : bool = False ,
5192
5267
system_labels : Optional [Dict [str , str ]] = None ,
5268
+ required_replica_count : Optional [int ] = 0 ,
5193
5269
) -> Union [Endpoint , PrivateEndpoint ]:
5194
5270
"""Deploys model to endpoint. Endpoint will be created if unspecified.
5195
5271
@@ -5327,6 +5403,14 @@ def deploy(
5327
5403
system_labels (Dict[str, str]):
5328
5404
Optional. System labels to apply to Model Garden deployments.
5329
5405
System labels are managed by Google for internal use only.
5406
+ required_replica_count (int):
5407
+ Optional. Number of required available replicas for the
5408
+ deployment to succeed. This field is only needed when partial
5409
+ model deployment/mutation is desired, with a value greater than
5410
+ or equal to 1 and fewer than or equal to min_replica_count. If
5411
+ set, the model deploy/mutate operation will succeed once
5412
+ available_replica_count reaches required_replica_count, and the
5413
+ rest of the replicas will be retried.
5330
5414
5331
5415
Returns:
5332
5416
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5345,6 +5429,7 @@ def deploy(
5345
5429
traffic_split = traffic_split ,
5346
5430
traffic_percentage = traffic_percentage ,
5347
5431
deployment_resource_pool = deployment_resource_pool ,
5432
+ required_replica_count = required_replica_count ,
5348
5433
)
5349
5434
5350
5435
if isinstance (endpoint , PrivateEndpoint ):
@@ -5397,6 +5482,7 @@ def deploy(
5397
5482
deployment_resource_pool = deployment_resource_pool ,
5398
5483
fast_tryout_enabled = fast_tryout_enabled ,
5399
5484
system_labels = system_labels ,
5485
+ required_replica_count = required_replica_count ,
5400
5486
)
5401
5487
5402
5488
def _should_enable_dedicated_endpoint (self , fast_tryout_enabled : bool ) -> bool :
@@ -5440,6 +5526,7 @@ def _deploy(
5440
5526
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
5441
5527
fast_tryout_enabled : bool = False ,
5442
5528
system_labels : Optional [Dict [str , str ]] = None ,
5529
+ required_replica_count : Optional [int ] = 0 ,
5443
5530
) -> Union [Endpoint , PrivateEndpoint ]:
5444
5531
"""Deploys model to endpoint. Endpoint will be created if unspecified.
5445
5532
@@ -5570,6 +5657,14 @@ def _deploy(
5570
5657
system_labels (Dict[str, str]):
5571
5658
Optional. System labels to apply to Model Garden deployments.
5572
5659
System labels are managed by Google for internal use only.
5660
+ required_replica_count (int):
5661
+ Optional. Number of required available replicas for the
5662
+ deployment to succeed. This field is only needed when partial
5663
+ model deployment/mutation is desired, with a value greater than
5664
+ or equal to 1 and fewer than or equal to min_replica_count. If
5665
+ set, the model deploy/mutate operation will succeed once
5666
+ available_replica_count reaches required_replica_count, and the
5667
+ rest of the replicas will be retried.
5573
5668
5574
5669
Returns:
5575
5670
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5633,6 +5728,7 @@ def _deploy(
5633
5728
deployment_resource_pool = deployment_resource_pool ,
5634
5729
fast_tryout_enabled = fast_tryout_enabled ,
5635
5730
system_labels = system_labels ,
5731
+ required_replica_count = required_replica_count ,
5636
5732
)
5637
5733
5638
5734
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , endpoint )
0 commit comments