@@ -698,6 +698,7 @@ def deploy(
698
698
deploy_request_timeout : Optional [float ] = None ,
699
699
autoscaling_target_cpu_utilization : Optional [int ] = None ,
700
700
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
701
+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
701
702
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
702
703
disable_container_logging : bool = False ,
703
704
fast_tryout_enabled : bool = False ,
@@ -778,6 +779,8 @@ def deploy(
778
779
autoscaling_target_accelerator_duty_cycle (int): Target Accelerator Duty
779
780
Cycle. Must also set accelerator_type and accelerator_count if
780
781
specified. A default value of 60 will be used if not specified.
782
+ autoscaling_target_request_count_per_minute (int): Target request
783
+ count per minute per instance.
781
784
deployment_resource_pool (DeploymentResourcePool): Optional.
782
785
Resource pool where the model will be deployed. All models that
783
786
are deployed to the same DeploymentResourcePool will be hosted in
@@ -806,7 +809,6 @@ def deploy(
806
809
multihost_gpu_node_count (int): Optional. The number of nodes per
807
810
replica for multihost GPU deployments. Required for multihost GPU
808
811
deployments.
809
-
810
812
"""
811
813
self ._sync_gca_resource_if_skipped ()
812
814
@@ -843,6 +845,7 @@ def deploy(
843
845
deploy_request_timeout = deploy_request_timeout ,
844
846
autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
845
847
autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
848
+ autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
846
849
deployment_resource_pool = deployment_resource_pool ,
847
850
disable_container_logging = disable_container_logging ,
848
851
fast_tryout_enabled = fast_tryout_enabled ,
@@ -871,6 +874,7 @@ def _deploy(
871
874
deploy_request_timeout : Optional [float ] = None ,
872
875
autoscaling_target_cpu_utilization : Optional [int ] = None ,
873
876
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
877
+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
874
878
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
875
879
disable_container_logging : bool = False ,
876
880
fast_tryout_enabled : bool = False ,
@@ -945,6 +949,8 @@ def _deploy(
945
949
autoscaling_target_accelerator_duty_cycle (int): Target Accelerator Duty
946
950
Cycle. Must also set accelerator_type and accelerator_count if
947
951
specified. A default value of 60 will be used if not specified.
952
+ autoscaling_target_request_count_per_minute (int): Target request
953
+ count per minute per instance.
948
954
deployment_resource_pool (DeploymentResourcePool): Optional.
949
955
Resource pool where the model will be deployed. All models that
950
956
are deployed to the same DeploymentResourcePool will be hosted in
@@ -999,6 +1005,7 @@ def _deploy(
999
1005
deploy_request_timeout = deploy_request_timeout ,
1000
1006
autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
1001
1007
autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
1008
+ autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
1002
1009
deployment_resource_pool = deployment_resource_pool ,
1003
1010
disable_container_logging = disable_container_logging ,
1004
1011
fast_tryout_enabled = fast_tryout_enabled ,
@@ -1034,6 +1041,7 @@ def _deploy_call(
1034
1041
deploy_request_timeout : Optional [float ] = None ,
1035
1042
autoscaling_target_cpu_utilization : Optional [int ] = None ,
1036
1043
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1044
+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
1037
1045
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1038
1046
disable_container_logging : bool = False ,
1039
1047
fast_tryout_enabled : bool = False ,
@@ -1115,6 +1123,8 @@ def _deploy_call(
1115
1123
Accelerator Duty Cycle. Must also set accelerator_type and
1116
1124
accelerator_count if specified. A default value of 60 will be used if
1117
1125
not specified.
1126
+ autoscaling_target_request_count_per_minute (int): Optional. Target
1127
+ request count per minute per instance.
1118
1128
deployment_resource_pool (DeploymentResourcePool): Optional.
1119
1129
Resource pool where the model will be deployed. All models that
1120
1130
are deployed to the same DeploymentResourcePool will be hosted in
@@ -1194,6 +1204,7 @@ def _deploy_call(
1194
1204
or accelerator_type
1195
1205
or accelerator_count
1196
1206
or autoscaling_target_accelerator_duty_cycle
1207
+ or autoscaling_target_request_count_per_minute
1197
1208
or autoscaling_target_cpu_utilization
1198
1209
)
1199
1210
@@ -1206,9 +1217,11 @@ def _deploy_call(
1206
1217
if provided_custom_machine_spec and not use_dedicated_resources :
1207
1218
_LOGGER .info (
1208
1219
"Model does not support dedicated deployment resources. "
1209
- "The machine_type, accelerator_type and accelerator_count,"
1210
- "autoscaling_target_accelerator_duty_cycle,"
1211
- "autoscaling_target_cpu_utilization parameters are ignored."
1220
+ "The machine_type, accelerator_type and accelerator_count, "
1221
+ "autoscaling_target_accelerator_duty_cycle, "
1222
+ "autoscaling_target_cpu_utilization, "
1223
+ "autoscaling_target_request_count_per_minute parameters "
1224
+ "are ignored."
1212
1225
)
1213
1226
1214
1227
if use_dedicated_resources and not machine_type :
@@ -1250,6 +1263,20 @@ def _deploy_call(
1250
1263
[autoscaling_metric_spec ]
1251
1264
)
1252
1265
1266
+ if autoscaling_target_request_count_per_minute :
1267
+ autoscaling_metric_spec = (
1268
+ gca_machine_resources_compat .AutoscalingMetricSpec (
1269
+ metric_name = (
1270
+ "aiplatform.googleapis.com/prediction/online/"
1271
+ "request_count"
1272
+ ),
1273
+ target = autoscaling_target_request_count_per_minute ,
1274
+ )
1275
+ )
1276
+ dedicated_resources .autoscaling_metric_specs .extend (
1277
+ [autoscaling_metric_spec ]
1278
+ )
1279
+
1253
1280
dedicated_resources .machine_spec = machine_spec
1254
1281
1255
1282
# Checking if flag fast_tryout_enabled is set, only in v1beta1
@@ -1296,15 +1323,18 @@ def _deploy_call(
1296
1323
or accelerator_count
1297
1324
or autoscaling_target_accelerator_duty_cycle
1298
1325
or autoscaling_target_cpu_utilization
1326
+ or autoscaling_target_request_count_per_minute
1299
1327
)
1300
1328
1301
1329
if provided_custom_machine_spec :
1302
1330
raise ValueError (
1303
1331
"Conflicting parameters in deployment request. "
1304
- "The machine_type, accelerator_type and accelerator_count,"
1305
- "autoscaling_target_accelerator_duty_cycle,"
1306
- "autoscaling_target_cpu_utilization parameters may not be set "
1307
- "when `deployment_resource_pool` is specified."
1332
+ "The machine_type, accelerator_type and accelerator_count, "
1333
+ "autoscaling_target_accelerator_duty_cycle, "
1334
+ "autoscaling_target_cpu_utilization, "
1335
+ "autoscaling_target_request_count_per_minute parameters "
1336
+ "may not be set when `deployment_resource_pool` is "
1337
+ "specified."
1308
1338
)
1309
1339
1310
1340
deployed_model .shared_resources = deployment_resource_pool .resource_name
@@ -1561,6 +1591,7 @@ def deploy(
1561
1591
deploy_request_timeout : Optional [float ] = None ,
1562
1592
autoscaling_target_cpu_utilization : Optional [int ] = None ,
1563
1593
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1594
+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
1564
1595
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1565
1596
disable_container_logging : bool = False ,
1566
1597
fast_tryout_enabled : bool = False ,
@@ -1662,6 +1693,8 @@ def deploy(
1662
1693
Accelerator Duty Cycle. Must also set accelerator_type and
1663
1694
accelerator_count if specified. A default value of 60 will be used if
1664
1695
not specified.
1696
+ autoscaling_target_request_count_per_minute (int): Optional. Target
1697
+ request count per minute per instance.
1665
1698
deployment_resource_pool (DeploymentResourcePool): Optional.
1666
1699
Resource pool where the model will be deployed. All models that
1667
1700
are deployed to the same DeploymentResourcePool will be hosted in
@@ -1688,8 +1721,8 @@ def deploy(
1688
1721
rollout_options (RolloutOptions):
1689
1722
Optional. Options to configure a rolling deployment.
1690
1723
multihost_gpu_node_count (int):
1691
- Optional. The number of nodes per replica for multihost GPU
1692
- deployments. Required for multihost GPU deployments.
1724
+ Optional. The number of nodes per replica for multihost GPU
1725
+ deployments. Required for multihost GPU deployments.
1693
1726
1694
1727
Returns:
1695
1728
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1744,6 +1777,7 @@ def deploy(
1744
1777
deploy_request_timeout = deploy_request_timeout ,
1745
1778
autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
1746
1779
autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
1780
+ autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
1747
1781
deployment_resource_pool = deployment_resource_pool ,
1748
1782
disable_container_logging = disable_container_logging ,
1749
1783
fast_tryout_enabled = fast_tryout_enabled ,
@@ -1781,6 +1815,7 @@ def _deploy(
1781
1815
deploy_request_timeout : Optional [float ] = None ,
1782
1816
autoscaling_target_cpu_utilization : Optional [int ] = None ,
1783
1817
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1818
+ autoscaling_target_request_count_per_minute : Optional [int ] = None ,
1784
1819
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1785
1820
disable_container_logging : bool = False ,
1786
1821
fast_tryout_enabled : bool = False ,
@@ -1874,6 +1909,8 @@ def _deploy(
1874
1909
Accelerator Duty Cycle. Must also set accelerator_type and
1875
1910
accelerator_count if specified. A default value of 60 will be used if
1876
1911
not specified.
1912
+ autoscaling_target_request_count_per_minute (int): Optional. Target
1913
+ request count per minute per instance.
1877
1914
deployment_resource_pool (DeploymentResourcePool): Optional.
1878
1915
Resource pool where the model will be deployed. All models that
1879
1916
are deployed to the same DeploymentResourcePool will be hosted in
@@ -1901,7 +1938,6 @@ def _deploy(
1901
1938
multihost_gpu_node_count (int):
1902
1939
Optional. The number of nodes per replica for multihost GPU
1903
1940
deployments. Required for multihost GPU deployments.
1904
-
1905
1941
Returns:
1906
1942
endpoint (Union[Endpoint, models.PrivateEndpoint]):
1907
1943
Endpoint with the deployed model.
@@ -1961,6 +1997,7 @@ def _deploy(
1961
1997
deploy_request_timeout = deploy_request_timeout ,
1962
1998
autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
1963
1999
autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
2000
+ autoscaling_target_request_count_per_minute = autoscaling_target_request_count_per_minute ,
1964
2001
deployment_resource_pool = deployment_resource_pool ,
1965
2002
disable_container_logging = disable_container_logging ,
1966
2003
fast_tryout_enabled = fast_tryout_enabled ,
0 commit comments