Skip to content

Commit f5ddbb8

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add required_replica to Vertex SDK
PiperOrigin-RevId: 711934045
1 parent 88d2656 commit f5ddbb8

File tree

3 files changed

+195
-0
lines changed

3 files changed

+195
-0
lines changed

google/cloud/aiplatform/models.py

+96
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ def create(
251251
reservation_affinity_key: Optional[str] = None,
252252
reservation_affinity_values: Optional[List[str]] = None,
253253
spot: bool = False,
254+
required_replica_count: Optional[int] = 0,
254255
) -> "DeploymentResourcePool":
255256
"""Creates a new DeploymentResourcePool.
256257
@@ -323,6 +324,14 @@ def create(
323324
Format: 'projects/{project_id_or_number}/zones/{zone}/reservations/{reservation_name}'
324325
spot (bool):
325326
Optional. Whether to schedule the deployment workload on spot VMs.
327+
required_replica_count (int):
328+
Optional. Number of required available replicas for the
329+
deployment to succeed. This field is only needed when partial
330+
model deployment/mutation is desired, with a value greater than
331+
or equal to 1 and fewer than or equal to min_replica_count. If
332+
set, the model deploy/mutate operation will succeed once
333+
available_replica_count reaches required_replica_count, and the
334+
rest of the replicas will be retried.
326335
327336
Returns:
328337
DeploymentResourcePool
@@ -353,6 +362,7 @@ def create(
353362
spot=spot,
354363
sync=sync,
355364
create_request_timeout=create_request_timeout,
365+
required_replica_count=required_replica_count,
356366
)
357367

358368
@classmethod
@@ -378,6 +388,7 @@ def _create(
378388
spot: bool = False,
379389
sync=True,
380390
create_request_timeout: Optional[float] = None,
391+
required_replica_count: Optional[int] = 0,
381392
) -> "DeploymentResourcePool":
382393
"""Creates a new DeploymentResourcePool.
383394
@@ -453,6 +464,14 @@ def _create(
453464
when the Future has completed.
454465
create_request_timeout (float):
455466
Optional. The create request timeout in seconds.
467+
required_replica_count (int):
468+
Optional. Number of required available replicas for the
469+
deployment to succeed. This field is only needed when partial
470+
model deployment/mutation is desired, with a value greater than
471+
or equal to 1 and fewer than or equal to min_replica_count. If
472+
set, the model deploy/mutate operation will succeed once
473+
available_replica_count reaches required_replica_count, and the
474+
rest of the replicas will be retried.
456475
457476
Returns:
458477
DeploymentResourcePool
@@ -466,6 +485,7 @@ def _create(
466485
min_replica_count=min_replica_count,
467486
max_replica_count=max_replica_count,
468487
spot=spot,
488+
required_replica_count=required_replica_count,
469489
)
470490

471491
machine_spec = gca_machine_resources_compat.MachineSpec(
@@ -1186,6 +1206,7 @@ def _validate_deploy_args(
11861206
traffic_split: Optional[Dict[str, int]],
11871207
traffic_percentage: Optional[int],
11881208
deployment_resource_pool: Optional[DeploymentResourcePool],
1209+
required_replica_count: Optional[int],
11891210
):
11901211
"""Helper method to validate deploy arguments.
11911212
@@ -1233,6 +1254,14 @@ def _validate_deploy_args(
12331254
are deployed to the same DeploymentResourcePool will be hosted in
12341255
a shared model server. If provided, will override replica count
12351256
arguments.
1257+
required_replica_count (int):
1258+
Optional. Number of required available replicas for the
1259+
deployment to succeed. This field is only needed when partial
1260+
model deployment/mutation is desired, with a value greater than
1261+
or equal to 1 and fewer than or equal to min_replica_count. If
1262+
set, the model deploy/mutate operation will succeed once
1263+
available_replica_count reaches required_replica_count, and the
1264+
rest of the replicas will be retried.
12361265
12371266
Raises:
12381267
ValueError: if Min or Max replica is negative. Traffic percentage > 100 or
@@ -1246,6 +1275,8 @@ def _validate_deploy_args(
12461275
and min_replica_count != 1
12471276
or max_replica_count
12481277
and max_replica_count != 1
1278+
or required_replica_count
1279+
and required_replica_count != 0
12491280
):
12501281
raise ValueError(
12511282
"Ignoring explicitly specified replica counts, "
@@ -1264,6 +1295,8 @@ def _validate_deploy_args(
12641295
raise ValueError("Min replica cannot be negative.")
12651296
if max_replica_count < 0:
12661297
raise ValueError("Max replica cannot be negative.")
1298+
if required_replica_count and required_replica_count < 0:
1299+
raise ValueError("Required replica cannot be negative.")
12671300
if accelerator_type:
12681301
utils.validate_accelerator_type(accelerator_type)
12691302

@@ -1313,6 +1346,7 @@ def deploy(
13131346
spot: bool = False,
13141347
fast_tryout_enabled: bool = False,
13151348
system_labels: Optional[Dict[str, str]] = None,
1349+
required_replica_count: Optional[int] = 0,
13161350
) -> None:
13171351
"""Deploys a Model to the Endpoint.
13181352
@@ -1428,6 +1462,14 @@ def deploy(
14281462
system_labels (Dict[str, str]):
14291463
Optional. System labels to apply to Model Garden deployments.
14301464
System labels are managed by Google for internal use only.
1465+
required_replica_count (int):
1466+
Optional. Number of required available replicas for the
1467+
deployment to succeed. This field is only needed when partial
1468+
model deployment/mutation is desired, with a value greater than
1469+
or equal to 1 and fewer than or equal to min_replica_count. If
1470+
set, the model deploy/mutate operation will succeed once
1471+
available_replica_count reaches required_replica_count, and the
1472+
rest of the replicas will be retried.
14311473
"""
14321474
self._sync_gca_resource_if_skipped()
14331475

@@ -1439,6 +1481,7 @@ def deploy(
14391481
traffic_split=traffic_split,
14401482
traffic_percentage=traffic_percentage,
14411483
deployment_resource_pool=deployment_resource_pool,
1484+
required_replica_count=required_replica_count,
14421485
)
14431486

14441487
explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
@@ -1473,6 +1516,7 @@ def deploy(
14731516
deployment_resource_pool=deployment_resource_pool,
14741517
fast_tryout_enabled=fast_tryout_enabled,
14751518
system_labels=system_labels,
1519+
required_replica_count=required_replica_count,
14761520
)
14771521

14781522
@base.optional_sync()
@@ -1504,6 +1548,7 @@ def _deploy(
15041548
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
15051549
fast_tryout_enabled: bool = False,
15061550
system_labels: Optional[Dict[str, str]] = None,
1551+
required_replica_count: Optional[int] = 0,
15071552
) -> None:
15081553
"""Deploys a Model to the Endpoint.
15091554
@@ -1613,6 +1658,14 @@ def _deploy(
16131658
system_labels (Dict[str, str]):
16141659
Optional. System labels to apply to Model Garden deployments.
16151660
System labels are managed by Google for internal use only.
1661+
required_replica_count (int):
1662+
Optional. Number of required available replicas for the
1663+
deployment to succeed. This field is only needed when partial
1664+
model deployment/mutation is desired, with a value greater than
1665+
or equal to 1 and fewer than or equal to min_replica_count. If
1666+
set, the model deploy/mutate operation will succeed once
1667+
available_replica_count reaches required_replica_count, and the
1668+
rest of the replicas will be retried.
16161669
"""
16171670
_LOGGER.log_action_start_against_resource(
16181671
f"Deploying Model {model.resource_name} to", "", self
@@ -1648,6 +1701,7 @@ def _deploy(
16481701
deployment_resource_pool=deployment_resource_pool,
16491702
fast_tryout_enabled=fast_tryout_enabled,
16501703
system_labels=system_labels,
1704+
required_replica_count=required_replica_count,
16511705
)
16521706

16531707
_LOGGER.log_action_completed_against_resource("model", "deployed", self)
@@ -1686,6 +1740,7 @@ def _deploy_call(
16861740
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
16871741
fast_tryout_enabled: bool = False,
16881742
system_labels: Optional[Dict[str, str]] = None,
1743+
required_replica_count: Optional[int] = 0,
16891744
) -> None:
16901745
"""Helper method to deploy model to endpoint.
16911746
@@ -1802,6 +1857,14 @@ def _deploy_call(
18021857
system_labels (Dict[str, str]):
18031858
Optional. System labels to apply to Model Garden deployments.
18041859
System labels are managed by Google for internal use only.
1860+
required_replica_count (int):
1861+
Optional. Number of required available replicas for the
1862+
deployment to succeed. This field is only needed when partial
1863+
model deployment/mutation is desired, with a value greater than
1864+
or equal to 1 and fewer than or equal to min_replica_count. If
1865+
set, the model deploy/mutate operation will succeed once
1866+
available_replica_count reaches required_replica_count, and the
1867+
rest of the replicas will be retried.
18051868
18061869
Raises:
18071870
ValueError: If only `accelerator_type` or `accelerator_count` is specified.
@@ -1927,6 +1990,7 @@ def _deploy_call(
19271990
min_replica_count=min_replica_count,
19281991
max_replica_count=max_replica_count,
19291992
spot=spot,
1993+
required_replica_count=required_replica_count,
19301994
)
19311995

19321996
machine_spec = gca_machine_resources_compat.MachineSpec(
@@ -3963,6 +4027,7 @@ def deploy(
39634027
reservation_affinity_values: Optional[List[str]] = None,
39644028
spot: bool = False,
39654029
system_labels: Optional[Dict[str, str]] = None,
4030+
required_replica_count: Optional[int] = 0,
39664031
) -> None:
39674032
"""Deploys a Model to the PrivateEndpoint.
39684033
@@ -4081,6 +4146,14 @@ def deploy(
40814146
system_labels (Dict[str, str]):
40824147
Optional. System labels to apply to Model Garden deployments.
40834148
System labels are managed by Google for internal use only.
4149+
required_replica_count (int):
4150+
Optional. Number of required available replicas for the
4151+
deployment to succeed. This field is only needed when partial
4152+
model deployment/mutation is desired, with a value greater than
4153+
or equal to 1 and fewer than or equal to min_replica_count. If
4154+
set, the model deploy/mutate operation will succeed once
4155+
available_replica_count reaches required_replica_count, and the
4156+
rest of the replicas will be retried.
40844157
"""
40854158

40864159
if self.network:
@@ -4098,6 +4171,7 @@ def deploy(
40984171
traffic_split=traffic_split,
40994172
traffic_percentage=traffic_percentage,
41004173
deployment_resource_pool=None,
4174+
required_replica_count=required_replica_count,
41014175
)
41024176

41034177
explanation_spec = _explanation_utils.create_and_validate_explanation_spec(
@@ -4126,6 +4200,7 @@ def deploy(
41264200
spot=spot,
41274201
disable_container_logging=disable_container_logging,
41284202
system_labels=system_labels,
4203+
required_replica_count=required_replica_count,
41294204
)
41304205

41314206
def update(
@@ -5190,6 +5265,7 @@ def deploy(
51905265
spot: bool = False,
51915266
fast_tryout_enabled: bool = False,
51925267
system_labels: Optional[Dict[str, str]] = None,
5268+
required_replica_count: Optional[int] = 0,
51935269
) -> Union[Endpoint, PrivateEndpoint]:
51945270
"""Deploys model to endpoint. Endpoint will be created if unspecified.
51955271
@@ -5327,6 +5403,14 @@ def deploy(
53275403
system_labels (Dict[str, str]):
53285404
Optional. System labels to apply to Model Garden deployments.
53295405
System labels are managed by Google for internal use only.
5406+
required_replica_count (int):
5407+
Optional. Number of required available replicas for the
5408+
deployment to succeed. This field is only needed when partial
5409+
model deployment/mutation is desired, with a value greater than
5410+
or equal to 1 and fewer than or equal to min_replica_count. If
5411+
set, the model deploy/mutate operation will succeed once
5412+
available_replica_count reaches required_replica_count, and the
5413+
rest of the replicas will be retried.
53305414
53315415
Returns:
53325416
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5345,6 +5429,7 @@ def deploy(
53455429
traffic_split=traffic_split,
53465430
traffic_percentage=traffic_percentage,
53475431
deployment_resource_pool=deployment_resource_pool,
5432+
required_replica_count=required_replica_count,
53485433
)
53495434

53505435
if isinstance(endpoint, PrivateEndpoint):
@@ -5397,6 +5482,7 @@ def deploy(
53975482
deployment_resource_pool=deployment_resource_pool,
53985483
fast_tryout_enabled=fast_tryout_enabled,
53995484
system_labels=system_labels,
5485+
required_replica_count=required_replica_count,
54005486
)
54015487

54025488
def _should_enable_dedicated_endpoint(self, fast_tryout_enabled: bool) -> bool:
@@ -5440,6 +5526,7 @@ def _deploy(
54405526
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
54415527
fast_tryout_enabled: bool = False,
54425528
system_labels: Optional[Dict[str, str]] = None,
5529+
required_replica_count: Optional[int] = 0,
54435530
) -> Union[Endpoint, PrivateEndpoint]:
54445531
"""Deploys model to endpoint. Endpoint will be created if unspecified.
54455532
@@ -5570,6 +5657,14 @@ def _deploy(
55705657
system_labels (Dict[str, str]):
55715658
Optional. System labels to apply to Model Garden deployments.
55725659
System labels are managed by Google for internal use only.
5660+
required_replica_count (int):
5661+
Optional. Number of required available replicas for the
5662+
deployment to succeed. This field is only needed when partial
5663+
model deployment/mutation is desired, with a value greater than
5664+
or equal to 1 and fewer than or equal to min_replica_count. If
5665+
set, the model deploy/mutate operation will succeed once
5666+
available_replica_count reaches required_replica_count, and the
5667+
rest of the replicas will be retried.
55735668
55745669
Returns:
55755670
endpoint (Union[Endpoint, PrivateEndpoint]):
@@ -5633,6 +5728,7 @@ def _deploy(
56335728
deployment_resource_pool=deployment_resource_pool,
56345729
fast_tryout_enabled=fast_tryout_enabled,
56355730
system_labels=system_labels,
5731+
required_replica_count=required_replica_count,
56365732
)
56375733

56385734
_LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)

0 commit comments

Comments
 (0)