Skip to content

Commit 0f1f10a

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Add autoscaling_target_request_count_per_minute option in Preview model deployment on Endpoint & Model classes.
PiperOrigin-RevId: 745003388
1 parent 6c1569b commit 0f1f10a

File tree

4 files changed

+349
-46
lines changed

4 files changed

+349
-46
lines changed

google/cloud/aiplatform/models.py

+30-7
Original file line numberDiff line numberDiff line change
@@ -1734,6 +1734,7 @@ def _deploy_call(
17341734
deploy_request_timeout: Optional[float] = None,
17351735
autoscaling_target_cpu_utilization: Optional[int] = None,
17361736
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
1737+
autoscaling_target_request_count_per_minute: Optional[int] = None,
17371738
spot: bool = False,
17381739
enable_access_logging=False,
17391740
disable_container_logging: bool = False,
@@ -1837,6 +1838,8 @@ def _deploy_call(
18371838
Optional. Target Accelerator Duty Cycle.
18381839
Must also set accelerator_type and accelerator_count if specified.
18391840
A default value of 60 will be used if not specified.
1841+
autoscaling_target_request_count_per_minute (int):
1842+
Optional. Target request count per minute per instance.
18401843
spot (bool):
18411844
Optional. Whether to schedule the deployment workload on spot VMs.
18421845
enable_access_logging (bool):
@@ -1906,15 +1909,18 @@ def _deploy_call(
19061909
or accelerator_count
19071910
or autoscaling_target_accelerator_duty_cycle
19081911
or autoscaling_target_cpu_utilization
1912+
or autoscaling_target_request_count_per_minute
19091913
)
19101914

19111915
if provided_custom_machine_spec:
19121916
raise ValueError(
19131917
"Conflicting parameters in deployment request. "
1914-
"The machine_type, accelerator_type and accelerator_count,"
1915-
"autoscaling_target_accelerator_duty_cycle,"
1916-
"autoscaling_target_cpu_utilization parameters may not be set "
1917-
"when `deployment_resource_pool` is specified."
1918+
"The machine_type, accelerator_type and accelerator_count, "
1919+
"autoscaling_target_accelerator_duty_cycle, "
1920+
"autoscaling_target_cpu_utilization, "
1921+
"autoscaling_target_request_count_per_minute parameters "
1922+
"may not be set when `deployment_resource_pool` is "
1923+
"specified."
19181924
)
19191925

19201926
deployed_model.shared_resources = deployment_resource_pool.resource_name
@@ -1965,6 +1971,7 @@ def _deploy_call(
19651971
or accelerator_count
19661972
or autoscaling_target_accelerator_duty_cycle
19671973
or autoscaling_target_cpu_utilization
1974+
or autoscaling_target_request_count_per_minute
19681975
)
19691976

19701977
# If the model supports both automatic and dedicated deployment resources,
@@ -1976,9 +1983,11 @@ def _deploy_call(
19761983
if provided_custom_machine_spec and not use_dedicated_resources:
19771984
_LOGGER.info(
19781985
"Model does not support dedicated deployment resources. "
1979-
"The machine_type, accelerator_type and accelerator_count,"
1980-
"autoscaling_target_accelerator_duty_cycle,"
1981-
"autoscaling_target_cpu_utilization parameters are ignored."
1986+
"The machine_type, accelerator_type and accelerator_count, "
1987+
"autoscaling_target_accelerator_duty_cycle, "
1988+
"autoscaling_target_cpu_utilization, "
1989+
"autoscaling_target_request_count_per_minute parameters "
1990+
"are ignored."
19821991
)
19831992

19841993
if use_dedicated_resources and not machine_type:
@@ -2020,6 +2029,20 @@ def _deploy_call(
20202029
[autoscaling_metric_spec]
20212030
)
20222031

2032+
if autoscaling_target_request_count_per_minute:
2033+
autoscaling_metric_spec = (
2034+
gca_machine_resources_compat.AutoscalingMetricSpec(
2035+
metric_name=(
2036+
"aiplatform.googleapis.com/prediction/online/"
2037+
"request_count"
2038+
),
2039+
target=autoscaling_target_request_count_per_minute,
2040+
)
2041+
)
2042+
dedicated_resources.autoscaling_metric_specs.extend(
2043+
[autoscaling_metric_spec]
2044+
)
2045+
20232046
if reservation_affinity_type:
20242047
machine_spec.reservation_affinity = utils.get_reservation_affinity(
20252048
reservation_affinity_type,

google/cloud/aiplatform/preview/models.py

+48-11
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,7 @@ def deploy(
698698
deploy_request_timeout: Optional[float] = None,
699699
autoscaling_target_cpu_utilization: Optional[int] = None,
700700
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
701+
autoscaling_target_request_count_per_minute: Optional[int] = None,
701702
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
702703
disable_container_logging: bool = False,
703704
fast_tryout_enabled: bool = False,
@@ -778,6 +779,8 @@ def deploy(
778779
autoscaling_target_accelerator_duty_cycle (int): Target Accelerator Duty
779780
Cycle. Must also set accelerator_type and accelerator_count if
780781
specified. A default value of 60 will be used if not specified.
782+
autoscaling_target_request_count_per_minute (int): Target request
783+
count per minute per instance.
781784
deployment_resource_pool (DeploymentResourcePool): Optional.
782785
Resource pool where the model will be deployed. All models that
783786
are deployed to the same DeploymentResourcePool will be hosted in
@@ -806,7 +809,6 @@ def deploy(
806809
multihost_gpu_node_count (int): Optional. The number of nodes per
807810
replica for multihost GPU deployments. Required for multihost GPU
808811
deployments.
809-
810812
"""
811813
self._sync_gca_resource_if_skipped()
812814

@@ -843,6 +845,7 @@ def deploy(
843845
deploy_request_timeout=deploy_request_timeout,
844846
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
845847
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
848+
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
846849
deployment_resource_pool=deployment_resource_pool,
847850
disable_container_logging=disable_container_logging,
848851
fast_tryout_enabled=fast_tryout_enabled,
@@ -871,6 +874,7 @@ def _deploy(
871874
deploy_request_timeout: Optional[float] = None,
872875
autoscaling_target_cpu_utilization: Optional[int] = None,
873876
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
877+
autoscaling_target_request_count_per_minute: Optional[int] = None,
874878
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
875879
disable_container_logging: bool = False,
876880
fast_tryout_enabled: bool = False,
@@ -945,6 +949,8 @@ def _deploy(
945949
autoscaling_target_accelerator_duty_cycle (int): Target Accelerator Duty
946950
Cycle. Must also set accelerator_type and accelerator_count if
947951
specified. A default value of 60 will be used if not specified.
952+
autoscaling_target_request_count_per_minute (int): Target request
953+
count per minute per instance.
948954
deployment_resource_pool (DeploymentResourcePool): Optional.
949955
Resource pool where the model will be deployed. All models that
950956
are deployed to the same DeploymentResourcePool will be hosted in
@@ -999,6 +1005,7 @@ def _deploy(
9991005
deploy_request_timeout=deploy_request_timeout,
10001006
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
10011007
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
1008+
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
10021009
deployment_resource_pool=deployment_resource_pool,
10031010
disable_container_logging=disable_container_logging,
10041011
fast_tryout_enabled=fast_tryout_enabled,
@@ -1034,6 +1041,7 @@ def _deploy_call(
10341041
deploy_request_timeout: Optional[float] = None,
10351042
autoscaling_target_cpu_utilization: Optional[int] = None,
10361043
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
1044+
autoscaling_target_request_count_per_minute: Optional[int] = None,
10371045
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
10381046
disable_container_logging: bool = False,
10391047
fast_tryout_enabled: bool = False,
@@ -1115,6 +1123,8 @@ def _deploy_call(
11151123
Accelerator Duty Cycle. Must also set accelerator_type and
11161124
accelerator_count if specified. A default value of 60 will be used if
11171125
not specified.
1126+
autoscaling_target_request_count_per_minute (int): Optional. Target
1127+
request count per minute per instance.
11181128
deployment_resource_pool (DeploymentResourcePool): Optional.
11191129
Resource pool where the model will be deployed. All models that
11201130
are deployed to the same DeploymentResourcePool will be hosted in
@@ -1194,6 +1204,7 @@ def _deploy_call(
11941204
or accelerator_type
11951205
or accelerator_count
11961206
or autoscaling_target_accelerator_duty_cycle
1207+
or autoscaling_target_request_count_per_minute
11971208
or autoscaling_target_cpu_utilization
11981209
)
11991210

@@ -1206,9 +1217,11 @@ def _deploy_call(
12061217
if provided_custom_machine_spec and not use_dedicated_resources:
12071218
_LOGGER.info(
12081219
"Model does not support dedicated deployment resources. "
1209-
"The machine_type, accelerator_type and accelerator_count,"
1210-
"autoscaling_target_accelerator_duty_cycle,"
1211-
"autoscaling_target_cpu_utilization parameters are ignored."
1220+
"The machine_type, accelerator_type and accelerator_count, "
1221+
"autoscaling_target_accelerator_duty_cycle, "
1222+
"autoscaling_target_cpu_utilization, "
1223+
"autoscaling_target_request_count_per_minute parameters "
1224+
"are ignored."
12121225
)
12131226

12141227
if use_dedicated_resources and not machine_type:
@@ -1250,6 +1263,20 @@ def _deploy_call(
12501263
[autoscaling_metric_spec]
12511264
)
12521265

1266+
if autoscaling_target_request_count_per_minute:
1267+
autoscaling_metric_spec = (
1268+
gca_machine_resources_compat.AutoscalingMetricSpec(
1269+
metric_name=(
1270+
"aiplatform.googleapis.com/prediction/online/"
1271+
"request_count"
1272+
),
1273+
target=autoscaling_target_request_count_per_minute,
1274+
)
1275+
)
1276+
dedicated_resources.autoscaling_metric_specs.extend(
1277+
[autoscaling_metric_spec]
1278+
)
1279+
12531280
dedicated_resources.machine_spec = machine_spec
12541281

12551282
# Checking if flag fast_tryout_enabled is set, only in v1beta1
@@ -1296,15 +1323,18 @@ def _deploy_call(
12961323
or accelerator_count
12971324
or autoscaling_target_accelerator_duty_cycle
12981325
or autoscaling_target_cpu_utilization
1326+
or autoscaling_target_request_count_per_minute
12991327
)
13001328

13011329
if provided_custom_machine_spec:
13021330
raise ValueError(
13031331
"Conflicting parameters in deployment request. "
1304-
"The machine_type, accelerator_type and accelerator_count,"
1305-
"autoscaling_target_accelerator_duty_cycle,"
1306-
"autoscaling_target_cpu_utilization parameters may not be set "
1307-
"when `deployment_resource_pool` is specified."
1332+
"The machine_type, accelerator_type and accelerator_count, "
1333+
"autoscaling_target_accelerator_duty_cycle, "
1334+
"autoscaling_target_cpu_utilization, "
1335+
"autoscaling_target_request_count_per_minute parameters "
1336+
"may not be set when `deployment_resource_pool` is "
1337+
"specified."
13081338
)
13091339

13101340
deployed_model.shared_resources = deployment_resource_pool.resource_name
@@ -1561,6 +1591,7 @@ def deploy(
15611591
deploy_request_timeout: Optional[float] = None,
15621592
autoscaling_target_cpu_utilization: Optional[int] = None,
15631593
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
1594+
autoscaling_target_request_count_per_minute: Optional[int] = None,
15641595
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
15651596
disable_container_logging: bool = False,
15661597
fast_tryout_enabled: bool = False,
@@ -1662,6 +1693,8 @@ def deploy(
16621693
Accelerator Duty Cycle. Must also set accelerator_type and
16631694
accelerator_count if specified. A default value of 60 will be used if
16641695
not specified.
1696+
autoscaling_target_request_count_per_minute (int): Optional. Target
1697+
request count per minute per instance.
16651698
deployment_resource_pool (DeploymentResourcePool): Optional.
16661699
Resource pool where the model will be deployed. All models that
16671700
are deployed to the same DeploymentResourcePool will be hosted in
@@ -1688,8 +1721,8 @@ def deploy(
16881721
rollout_options (RolloutOptions):
16891722
Optional. Options to configure a rolling deployment.
16901723
multihost_gpu_node_count (int):
1691-
Optional. The number of nodes per replica for multihost GPU
1692-
deployments. Required for multihost GPU deployments.
1724+
Optional. The number of nodes per replica for multihost GPU
1725+
deployments. Required for multihost GPU deployments.
16931726
16941727
Returns:
16951728
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1744,6 +1777,7 @@ def deploy(
17441777
deploy_request_timeout=deploy_request_timeout,
17451778
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
17461779
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
1780+
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
17471781
deployment_resource_pool=deployment_resource_pool,
17481782
disable_container_logging=disable_container_logging,
17491783
fast_tryout_enabled=fast_tryout_enabled,
@@ -1781,6 +1815,7 @@ def _deploy(
17811815
deploy_request_timeout: Optional[float] = None,
17821816
autoscaling_target_cpu_utilization: Optional[int] = None,
17831817
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
1818+
autoscaling_target_request_count_per_minute: Optional[int] = None,
17841819
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
17851820
disable_container_logging: bool = False,
17861821
fast_tryout_enabled: bool = False,
@@ -1874,6 +1909,8 @@ def _deploy(
18741909
Accelerator Duty Cycle. Must also set accelerator_type and
18751910
accelerator_count if specified. A default value of 60 will be used if
18761911
not specified.
1912+
autoscaling_target_request_count_per_minute (int): Optional. Target
1913+
request count per minute per instance.
18771914
deployment_resource_pool (DeploymentResourcePool): Optional.
18781915
Resource pool where the model will be deployed. All models that
18791916
are deployed to the same DeploymentResourcePool will be hosted in
@@ -1901,7 +1938,6 @@ def _deploy(
19011938
multihost_gpu_node_count (int):
19021939
Optional. The number of nodes per replica for multihost GPU
19031940
deployments. Required for multihost GPU deployments.
1904-
19051941
Returns:
19061942
endpoint (Union[Endpoint, models.PrivateEndpoint]):
19071943
Endpoint with the deployed model.
@@ -1961,6 +1997,7 @@ def _deploy(
19611997
deploy_request_timeout=deploy_request_timeout,
19621998
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
19631999
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
2000+
autoscaling_target_request_count_per_minute=autoscaling_target_request_count_per_minute,
19642001
deployment_resource_pool=deployment_resource_pool,
19652002
disable_container_logging=disable_container_logging,
19662003
fast_tryout_enabled=fast_tryout_enabled,

0 commit comments

Comments
 (0)