@@ -518,13 +518,14 @@ def _validate_deploy_args(
518
518
"Minimum and maximum replica counts must not be "
519
519
"if not using a shared resource pool."
520
520
)
521
- return super () ._validate_deploy_args (
521
+ return aiplatform . Endpoint ._validate_deploy_args (
522
522
min_replica_count = min_replica_count ,
523
523
max_replica_count = max_replica_count ,
524
524
accelerator_type = accelerator_type ,
525
525
deployed_model_display_name = deployed_model_display_name ,
526
526
traffic_split = traffic_split ,
527
527
traffic_percentage = traffic_percentage ,
528
+ deployment_resource_pool = deployment_resource_pool ,
528
529
)
529
530
530
531
if (
@@ -580,6 +581,7 @@ def deploy(
580
581
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
581
582
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
582
583
disable_container_logging : bool = False ,
584
+ fast_tryout_enabled : bool = False ,
583
585
) -> None :
584
586
"""Deploys a Model to the Endpoint.
585
587
@@ -661,6 +663,10 @@ def deploy(
661
663
disable_container_logging (bool):
662
664
If True, container logs from the deployed model will not be
663
665
written to Cloud Logging. Defaults to False.
666
+ fast_tryout_enabled (bool): Optional.
667
+ If True, model will be deployed using faster deployment path.
668
+ Useful for quick experiments. Not for production workloads. Only
669
+ available for most popular models and machine types. Defaults to False.
664
670
665
671
"""
666
672
self ._sync_gca_resource_if_skipped ()
@@ -699,6 +705,7 @@ def deploy(
699
705
autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
700
706
deployment_resource_pool = deployment_resource_pool ,
701
707
disable_container_logging = disable_container_logging ,
708
+ fast_tryout_enabled = fast_tryout_enabled ,
702
709
)
703
710
704
711
@base .optional_sync ()
@@ -722,6 +729,7 @@ def _deploy(
722
729
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
723
730
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
724
731
disable_container_logging : bool = False ,
732
+ fast_tryout_enabled : bool = False ,
725
733
) -> None :
726
734
"""Deploys a Model to the Endpoint.
727
735
@@ -797,6 +805,10 @@ def _deploy(
797
805
disable_container_logging (bool):
798
806
If True, container logs from the deployed model will not be
799
807
written to Cloud Logging. Defaults to False.
808
+ fast_tryout_enabled (bool): Optional.
809
+ If True, model will be deployed using faster deployment path.
810
+ Useful for quick experiments. Not for production workloads. Only
811
+ available for most popular models and machine types. Defaults to False.
800
812
801
813
"""
802
814
_LOGGER .log_action_start_against_resource (
@@ -825,6 +837,7 @@ def _deploy(
825
837
autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
826
838
deployment_resource_pool = deployment_resource_pool ,
827
839
disable_container_logging = disable_container_logging ,
840
+ fast_tryout_enabled = fast_tryout_enabled ,
828
841
)
829
842
830
843
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , self )
@@ -855,6 +868,7 @@ def _deploy_call(
855
868
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
856
869
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
857
870
disable_container_logging : bool = False ,
871
+ fast_tryout_enabled : bool = False ,
858
872
) -> None :
859
873
"""Helper method to deploy model to endpoint.
860
874
@@ -937,6 +951,10 @@ def _deploy_call(
937
951
disable_container_logging (bool):
938
952
If True, container logs from the deployed model will not be
939
953
written to Cloud Logging. Defaults to False.
954
+ fast_tryout_enabled (bool): Optional.
955
+ If True, model will be deployed using faster deployment path.
956
+ Useful for quick experiments. Not for production workloads. Only
957
+ available for most popular models and machine types. Defaults to False.
940
958
941
959
Raises:
942
960
ValueError: If only `accelerator_type` or `accelerator_count` is
@@ -950,72 +968,148 @@ def _deploy_call(
950
968
are present.
951
969
"""
952
970
if not deployment_resource_pool :
953
- return super ()._deploy_call (
954
- api_client = api_client ,
955
- endpoint_resource_name = endpoint_resource_name ,
956
- model = model ,
957
- endpoint_resource_traffic_split = endpoint_resource_traffic_split ,
958
- network = network ,
959
- deployed_model_display_name = deployed_model_display_name ,
960
- traffic_percentage = traffic_percentage ,
961
- traffic_split = traffic_split ,
962
- machine_type = machine_type ,
963
- min_replica_count = min_replica_count ,
964
- max_replica_count = max_replica_count ,
965
- accelerator_type = accelerator_type ,
966
- accelerator_count = accelerator_count ,
971
+ max_replica_count = max (min_replica_count , max_replica_count )
972
+
973
+ if bool (accelerator_type ) != bool (accelerator_count ):
974
+ raise ValueError (
975
+ "Both `accelerator_type` and `accelerator_count` should be specified or None."
976
+ )
977
+
978
+ if autoscaling_target_accelerator_duty_cycle is not None and (
979
+ not accelerator_type or not accelerator_count
980
+ ):
981
+ raise ValueError (
982
+ "Both `accelerator_type` and `accelerator_count` should be set "
983
+ "when specifying autoscaling_target_accelerator_duty_cycle`"
984
+ )
985
+
986
+ deployed_model = gca_endpoint_compat .DeployedModel (
987
+ model = model .versioned_resource_name ,
988
+ display_name = deployed_model_display_name ,
967
989
service_account = service_account ,
968
- explanation_spec = explanation_spec ,
969
- metadata = metadata ,
970
- deploy_request_timeout = deploy_request_timeout ,
971
- autoscaling_target_cpu_utilization = autoscaling_target_cpu_utilization ,
972
- autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
973
- disable_container_logging = disable_container_logging ,
990
+ enable_container_logging = not disable_container_logging ,
974
991
)
975
992
976
- deployed_model = gca_endpoint_compat .DeployedModel (
977
- model = model .versioned_resource_name ,
978
- display_name = deployed_model_display_name ,
979
- service_account = service_account ,
980
- enable_container_logging = not disable_container_logging ,
981
- )
982
-
983
- supports_shared_resources = (
984
- gca_model_compat .Model .DeploymentResourcesType .SHARED_RESOURCES
985
- in model .supported_deployment_resources_types
986
- )
993
+ supports_automatic_resources = (
994
+ gca_model_compat .Model .DeploymentResourcesType .AUTOMATIC_RESOURCES
995
+ in model .supported_deployment_resources_types
996
+ )
997
+ supports_dedicated_resources = (
998
+ gca_model_compat .Model .DeploymentResourcesType .DEDICATED_RESOURCES
999
+ in model .supported_deployment_resources_types
1000
+ )
1001
+ provided_custom_machine_spec = (
1002
+ machine_type
1003
+ or accelerator_type
1004
+ or accelerator_count
1005
+ or autoscaling_target_accelerator_duty_cycle
1006
+ or autoscaling_target_cpu_utilization
1007
+ )
987
1008
988
- if not supports_shared_resources :
989
- raise ValueError (
990
- "`deployment_resource_pool` may only be specified for models "
991
- " which support shared resources."
1009
+ # If the model supports both automatic and dedicated deployment resources,
1010
+ # decide based on the presence of machine spec customizations
1011
+ use_dedicated_resources = supports_dedicated_resources and (
1012
+ not supports_automatic_resources or provided_custom_machine_spec
992
1013
)
993
1014
994
- provided_custom_machine_spec = (
995
- machine_type
996
- or accelerator_type
997
- or accelerator_count
998
- or autoscaling_target_accelerator_duty_cycle
999
- or autoscaling_target_cpu_utilization
1000
- )
1015
+ if provided_custom_machine_spec and not use_dedicated_resources :
1016
+ _LOGGER . info (
1017
+ "Model does not support dedicated deployment resources. "
1018
+ "The machine_type, accelerator_type and accelerator_count,"
1019
+ " autoscaling_target_accelerator_duty_cycle,"
1020
+ " autoscaling_target_cpu_utilization parameters are ignored."
1021
+ )
1001
1022
1002
- if provided_custom_machine_spec :
1003
- raise ValueError (
1004
- "Conflicting parameters in deployment request. "
1005
- "The machine_type, accelerator_type and accelerator_count,"
1006
- "autoscaling_target_accelerator_duty_cycle,"
1007
- "autoscaling_target_cpu_utilization parameters may not be set "
1008
- "when `deployment_resource_pool` is specified."
1023
+ if use_dedicated_resources and not machine_type :
1024
+ machine_type = _DEFAULT_MACHINE_TYPE
1025
+ _LOGGER .info (f"Using default machine_type: { machine_type } " )
1026
+
1027
+ if use_dedicated_resources :
1028
+ dedicated_resources = gca_machine_resources_compat .DedicatedResources (
1029
+ min_replica_count = min_replica_count ,
1030
+ max_replica_count = max_replica_count ,
1031
+ )
1032
+
1033
+ machine_spec = gca_machine_resources_compat .MachineSpec (
1034
+ machine_type = machine_type
1035
+ )
1036
+
1037
+ if autoscaling_target_cpu_utilization :
1038
+ autoscaling_metric_spec = gca_machine_resources_compat .AutoscalingMetricSpec (
1039
+ metric_name = "aiplatform.googleapis.com/prediction/online/cpu/utilization" ,
1040
+ target = autoscaling_target_cpu_utilization ,
1041
+ )
1042
+ dedicated_resources .autoscaling_metric_specs .extend (
1043
+ [autoscaling_metric_spec ]
1044
+ )
1045
+
1046
+ if accelerator_type and accelerator_count :
1047
+ utils .validate_accelerator_type (accelerator_type )
1048
+ machine_spec .accelerator_type = accelerator_type
1049
+ machine_spec .accelerator_count = accelerator_count
1050
+
1051
+ if autoscaling_target_accelerator_duty_cycle :
1052
+ autoscaling_metric_spec = gca_machine_resources_compat .AutoscalingMetricSpec (
1053
+ metric_name = "aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle" ,
1054
+ target = autoscaling_target_accelerator_duty_cycle ,
1055
+ )
1056
+ dedicated_resources .autoscaling_metric_specs .extend (
1057
+ [autoscaling_metric_spec ]
1058
+ )
1059
+
1060
+ dedicated_resources .machine_spec = machine_spec
1061
+
1062
+ # Checking if flag fast_tryout_enabled is set, only in v1beta1
1063
+ deployed_model .faster_deployment_config = (
1064
+ gca_endpoint_compat .FasterDeploymentConfig (
1065
+ fast_tryout_enabled = fast_tryout_enabled
1066
+ )
1067
+ )
1068
+ deployed_model .dedicated_resources = dedicated_resources
1069
+ else :
1070
+ deployed_model = gca_endpoint_compat .DeployedModel (
1071
+ model = model .versioned_resource_name ,
1072
+ display_name = deployed_model_display_name ,
1073
+ service_account = service_account ,
1074
+ enable_container_logging = not disable_container_logging ,
1009
1075
)
1010
1076
1011
- deployed_model .shared_resources = deployment_resource_pool .resource_name
1077
+ supports_shared_resources = (
1078
+ gca_model_compat .Model .DeploymentResourcesType .SHARED_RESOURCES
1079
+ in model .supported_deployment_resources_types
1080
+ )
1012
1081
1013
- if explanation_spec :
1014
- raise ValueError (
1015
- "Model explanation is not supported for deployments using "
1016
- "shared resources."
1082
+ if not supports_shared_resources :
1083
+ raise ValueError (
1084
+ "`deployment_resource_pool` may only be specified for models "
1085
+ " which support shared resources."
1086
+ )
1087
+
1088
+ provided_custom_machine_spec = (
1089
+ machine_type
1090
+ or accelerator_type
1091
+ or accelerator_count
1092
+ or autoscaling_target_accelerator_duty_cycle
1093
+ or autoscaling_target_cpu_utilization
1017
1094
)
1018
1095
1096
+ if provided_custom_machine_spec :
1097
+ raise ValueError (
1098
+ "Conflicting parameters in deployment request. "
1099
+ "The machine_type, accelerator_type and accelerator_count,"
1100
+ "autoscaling_target_accelerator_duty_cycle,"
1101
+ "autoscaling_target_cpu_utilization parameters may not be set "
1102
+ "when `deployment_resource_pool` is specified."
1103
+ )
1104
+
1105
+ deployed_model .shared_resources = deployment_resource_pool .resource_name
1106
+
1107
+ if explanation_spec :
1108
+ raise ValueError (
1109
+ "Model explanation is not supported for deployments using "
1110
+ "shared resources."
1111
+ )
1112
+
1019
1113
# Checking if traffic percentage is valid
1020
1114
# TODO(b/221059294) PrivateEndpoint should support traffic split
1021
1115
if traffic_split is None and not network :
@@ -1264,6 +1358,7 @@ def deploy(
1264
1358
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1265
1359
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1266
1360
disable_container_logging : bool = False ,
1361
+ fast_tryout_enabled : bool = False ,
1267
1362
) -> Union [Endpoint , models .PrivateEndpoint ]:
1268
1363
"""Deploys model to endpoint.
1269
1364
@@ -1366,6 +1461,10 @@ def deploy(
1366
1461
disable_container_logging (bool):
1367
1462
If True, container logs from the deployed model will not be
1368
1463
written to Cloud Logging. Defaults to False.
1464
+ fast_tryout_enabled (bool): Optional.
1465
+ If True, model will be deployed using faster deployment path.
1466
+ Useful for quick experiments. Not for production workloads. Only
1467
+ available for most popular models and machine types. Defaults to False.
1369
1468
1370
1469
Returns:
1371
1470
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1421,6 +1520,7 @@ def deploy(
1421
1520
autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
1422
1521
deployment_resource_pool = deployment_resource_pool ,
1423
1522
disable_container_logging = disable_container_logging ,
1523
+ fast_tryout_enabled = fast_tryout_enabled ,
1424
1524
)
1425
1525
1426
1526
@base .optional_sync (return_input_arg = "endpoint" , bind_future_to_self = False )
@@ -1446,6 +1546,7 @@ def _deploy(
1446
1546
autoscaling_target_accelerator_duty_cycle : Optional [int ] = None ,
1447
1547
deployment_resource_pool : Optional [DeploymentResourcePool ] = None ,
1448
1548
disable_container_logging : bool = False ,
1549
+ fast_tryout_enabled : bool = False ,
1449
1550
) -> Union [Endpoint , models .PrivateEndpoint ]:
1450
1551
"""Deploys model to endpoint.
1451
1552
@@ -1540,6 +1641,9 @@ def _deploy(
1540
1641
disable_container_logging (bool):
1541
1642
If True, container logs from the deployed model will not be
1542
1643
written to Cloud Logging. Defaults to False.
1644
+ fast_tryout_enabled (bool):
1645
+ Optional. Whether to enable fast deployment. Defaults to False.
1646
+ Useful for quick experiments. Not for production workloads.
1543
1647
1544
1648
Returns:
1545
1649
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1591,6 +1695,7 @@ def _deploy(
1591
1695
autoscaling_target_accelerator_duty_cycle = autoscaling_target_accelerator_duty_cycle ,
1592
1696
deployment_resource_pool = deployment_resource_pool ,
1593
1697
disable_container_logging = disable_container_logging ,
1698
+ fast_tryout_enabled = fast_tryout_enabled ,
1594
1699
)
1595
1700
1596
1701
_LOGGER .log_action_completed_against_resource ("model" , "deployed" , endpoint )
0 commit comments