@@ -643,6 +643,8 @@ def deploy(
643
643
metadata : Optional [Sequence [Tuple [str , str ]]] = (),
644
644
sync = True ,
645
645
deploy_request_timeout : Optional [float ] = None ,
646
+ autoscaling_target_cpu_utilization : Optional [int ] = None ,
647
+ autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
646
648
) -> None :
647
649
"""Deploys a Model to the Endpoint.
648
650
@@ -716,6 +718,13 @@ def deploy(
716
718
be immediately returned and synced when the Future has completed.
717
719
deploy_request_timeout (float):
718
720
Optional. The timeout for the deploy request in seconds.
721
+ autoscaling_target_cpu_utilization (int):
722
+ Target CPU Utilization to use for Autoscaling Replicas.
723
+ A default value of 60 will be used if not specified.
724
+ autoscaling_target_accelerator_duty_cycle (int):
725
+ Target Accelerator Duty Cycle.
726
+ Must also set accelerator_type and accelerator_count if specified.
727
+ A default value of 60 will be used if not specified.
719
728
"""
720
729
self ._sync_gca_resource_if_skipped ()
721
730
@@ -746,6 +755,8 @@ def deploy(
746
755
metadata = metadata ,
747
756
sync = sync ,
748
757
deploy_request_timeout = deploy_request_timeout ,
758
+ autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
759
+ autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
749
760
)
750
761
751
762
@base .optional_sync ()
@@ -766,6 +777,8 @@ def _deploy(
766
777
metadata : Optional [Sequence [Tuple [str , str ]]] = (),
767
778
sync = True ,
768
779
deploy_request_timeout : Optional [float ] = None ,
780
+ autoscaling_target_cpu_utilization : Optional [int ] = None ,
781
+ autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
769
782
) -> None :
770
783
"""Deploys a Model to the Endpoint.
771
784
@@ -839,6 +852,13 @@ def _deploy(
839
852
be immediately returned and synced when the Future has completed.
840
853
deploy_request_timeout (float):
841
854
Optional. The timeout for the deploy request in seconds.
855
+ autoscaling_target_cpu_utilization (int):
856
+ Target CPU Utilization to use for Autoscaling Replicas.
857
+ A default value of 60 will be used if not specified.
858
+ autoscaling_target_accelerator_duty_cycle (int):
859
+ Target Accelerator Duty Cycle.
860
+ Must also set accelerator_type and accelerator_count if specified.
861
+ A default value of 60 will be used if not specified.
842
862
Raises:
843
863
ValueError: If there is not current traffic split and traffic percentage
844
864
is not 0 or 100.
@@ -865,6 +885,8 @@ def _deploy(
865
885
explanation_parameters = explanation_parameters ,
866
886
metadata = metadata ,
867
887
deploy_request_timeout = deploy_request_timeout ,
888
+ autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
889
+ autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
868
890
)
869
891
870
892
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , self )
@@ -891,6 +913,8 @@ def _deploy_call(
891
913
explanation_parameters : Optional [explain .ExplanationParameters ] = None ,
892
914
metadata : Optional [Sequence [Tuple [str , str ]]] = (),
893
915
deploy_request_timeout : Optional [float ] = None ,
916
+ autoscaling_target_cpu_utilization : Optional [int ] = None ,
917
+ autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
894
918
):
895
919
"""Helper method to deploy model to endpoint.
896
920
@@ -964,6 +988,13 @@ def _deploy_call(
964
988
be immediately returned and synced when the Future has completed.
965
989
deploy_request_timeout (float):
966
990
Optional. The timeout for the deploy request in seconds.
991
+ autoscaling_target_cpu_utilization (int):
992
+ Optional. Target CPU Utilization to use for Autoscaling Replicas.
993
+ A default value of 60 will be used if not specified.
994
+ autoscaling_target_accelerator_duty_cycle (int):
995
+ Optional. Target Accelerator Duty Cycle.
996
+ Must also set accelerator_type and accelerator_count if specified.
997
+ A default value of 60 will be used if not specified.
967
998
Raises:
968
999
ValueError: If there is not current traffic split and traffic percentage
969
1000
is not 0 or 100.
@@ -979,6 +1010,14 @@ def _deploy_call(
979
1010
"Both `accelerator_type` and `accelerator_count` should be specified or None."
980
1011
)
981
1012
1013
+ if autoscaling_target_accelerator_duty_cycle is not None and (
1014
+ not accelerator_type or not accelerator_count
1015
+ ):
1016
+ raise ValueError (
1017
+ "Both `accelerator_type` and `accelerator_count` should be set "
1018
+ "when specifying autoscaling_target_accelerator_duty_cycle`"
1019
+ )
1020
+
982
1021
deployed_model = gca_endpoint_compat .DeployedModel (
983
1022
model = model .resource_name ,
984
1023
display_name = deployed_model_display_name ,
@@ -994,7 +1033,11 @@ def _deploy_call(
994
1033
in model .supported_deployment_resources_types
995
1034
)
996
1035
provided_custom_machine_spec = (
997
- machine_type or accelerator_type or accelerator_count
1036
+ machine_type
1037
+ or accelerator_type
1038
+ or accelerator_count
1039
+ or autoscaling_target_accelerator_duty_cycle
1040
+ or autoscaling_target_cpu_utilization
998
1041
)
999
1042
1000
1043
# If the model supports both automatic and dedicated deployment resources,
@@ -1006,30 +1049,51 @@ def _deploy_call(
1006
1049
if provided_custom_machine_spec and not use_dedicated_resources :
1007
1050
_LOGGER .info (
1008
1051
"Model does not support dedicated deployment resources. "
1009
- "The machine_type, accelerator_type and accelerator_count parameters are ignored."
1052
+ "The machine_type, accelerator_type and accelerator_count,"
1053
+ "autoscaling_target_accelerator_duty_cycle,"
1054
+ "autoscaling_target_cpu_utilization parameters are ignored."
1010
1055
)
1011
1056
1012
1057
if use_dedicated_resources and not machine_type :
1013
1058
machine_type = _DEFAULT_MACHINE_TYPE
1014
1059
_LOGGER .info (f"Using default machine_type: { machine_type } " )
1015
1060
1016
1061
if use_dedicated_resources :
1062
+
1063
+ dedicated_resources = gca_machine_resources_compat .DedicatedResources (
1064
+ min_replica_count = min_replica_count ,
1065
+ max_replica_count = max_replica_count ,
1066
+ )
1067
+
1017
1068
machine_spec = gca_machine_resources_compat .MachineSpec (
1018
1069
machine_type = machine_type
1019
1070
)
1020
1071
1072
+ if autoscaling_target_cpu_utilization :
1073
+ autoscaling_metric_spec = gca_machine_resources_compat .AutoscalingMetricSpec (
1074
+ metric_name = "aiplatform.googleapis.com/prediction/online/cpu/utilization" ,
1075
+ target = autoscaling_target_cpu_utilization ,
1076
+ )
1077
+ dedicated_resources .autoscaling_metric_specs .extend (
1078
+ [autoscaling_metric_spec ]
1079
+ )
1080
+
1021
1081
if accelerator_type and accelerator_count :
1022
1082
utils .validate_accelerator_type (accelerator_type )
1023
1083
machine_spec .accelerator_type = accelerator_type
1024
1084
machine_spec .accelerator_count = accelerator_count
1025
1085
1026
- deployed_model .dedicated_resources = (
1027
- gca_machine_resources_compat .DedicatedResources (
1028
- machine_spec = machine_spec ,
1029
- min_replica_count = min_replica_count ,
1030
- max_replica_count = max_replica_count ,
1031
- )
1032
- )
1086
+ if autoscaling_target_accelerator_duty_cycle :
1087
+ autoscaling_metric_spec = gca_machine_resources_compat .AutoscalingMetricSpec (
1088
+ metric_name = "aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle" ,
1089
+ target = autoscaling_target_accelerator_duty_cycle ,
1090
+ )
1091
+ dedicated_resources .autoscaling_metric_specs .extend (
1092
+ [autoscaling_metric_spec ]
1093
+ )
1094
+
1095
+ dedicated_resources .machine_spec = machine_spec
1096
+ deployed_model .dedicated_resources = dedicated_resources
1033
1097
1034
1098
elif supports_automatic_resources :
1035
1099
deployed_model .automatic_resources = (
@@ -1994,6 +2058,8 @@ def deploy(
1994
2058
encryption_spec_key_name : Optional [str ] = None ,
1995
2059
sync = True ,
1996
2060
deploy_request_timeout : Optional [float ] = None ,
2061
+ autoscaling_target_cpu_utilization : Optional [int ] = None ,
2062
+ autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1997
2063
) -> Endpoint :
1998
2064
"""Deploys model to endpoint. Endpoint will be created if unspecified.
1999
2065
@@ -2078,6 +2144,13 @@ def deploy(
2078
2144
be immediately returned and synced when the Future has completed.
2079
2145
deploy_request_timeout (float):
2080
2146
Optional. The timeout for the deploy request in seconds.
2147
+ autoscaling_target_cpu_utilization (int):
2148
+ Optional. Target CPU Utilization to use for Autoscaling Replicas.
2149
+ A default value of 60 will be used if not specified.
2150
+ autoscaling_target_accelerator_duty_cycle (int):
2151
+ Optional. Target Accelerator Duty Cycle.
2152
+ Must also set accelerator_type and accelerator_count if specified.
2153
+ A default value of 60 will be used if not specified.
2081
2154
Returns:
2082
2155
endpoint ("Endpoint"):
2083
2156
Endpoint with the deployed model.
@@ -2112,6 +2185,8 @@ def deploy(
2112
2185
or initializer .global_config .encryption_spec_key_name ,
2113
2186
sync = sync ,
2114
2187
deploy_request_timeout = deploy_request_timeout ,
2188
+ autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
2189
+ autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
2115
2190
)
2116
2191
2117
2192
@base .optional_sync (return_input_arg = "endpoint" , bind_future_to_self = False )
@@ -2133,6 +2208,8 @@ def _deploy(
2133
2208
encryption_spec_key_name : Optional [str ] = None ,
2134
2209
sync : bool = True ,
2135
2210
deploy_request_timeout : Optional [float ] = None ,
2211
+ autoscaling_target_cpu_utilization : Optional [int ] = None ,
2212
+ autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
2136
2213
) -> Endpoint :
2137
2214
"""Deploys model to endpoint. Endpoint will be created if unspecified.
2138
2215
@@ -2217,6 +2294,13 @@ def _deploy(
2217
2294
be immediately returned and synced when the Future has completed.
2218
2295
deploy_request_timeout (float):
2219
2296
Optional. The timeout for the deploy request in seconds.
2297
+ autoscaling_target_cpu_utilization (int):
2298
+ Optional. Target CPU Utilization to use for Autoscaling Replicas.
2299
+ A default value of 60 will be used if not specified.
2300
+ autoscaling_target_accelerator_duty_cycle (int):
2301
+ Optional. Target Accelerator Duty Cycle.
2302
+ Must also set accelerator_type and accelerator_count if specified.
2303
+ A default value of 60 will be used if not specified.
2220
2304
Returns:
2221
2305
endpoint ("Endpoint"):
2222
2306
Endpoint with the deployed model.
@@ -2252,6 +2336,8 @@ def _deploy(
2252
2336
explanation_parameters = explanation_parameters ,
2253
2337
metadata = metadata ,
2254
2338
deploy_request_timeout = deploy_request_timeout ,
2339
+ autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
2340
+ autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
2255
2341
)
2256
2342
2257
2343
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , endpoint )
0 commit comments