Skip to content

Commit 98288b8

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
feat: Adding fast_tryout_enabled option to Vertex public preview SDK
PiperOrigin-RevId: 694234300
1 parent 16f7beb commit 98288b8

File tree

3 files changed

+263
-55
lines changed

3 files changed

+263
-55
lines changed

google/cloud/aiplatform/preview/models.py

+160-55
Original file line numberDiff line numberDiff line change
@@ -518,13 +518,14 @@ def _validate_deploy_args(
518518
"Minimum and maximum replica counts must not be "
519519
"if not using a shared resource pool."
520520
)
521-
return super()._validate_deploy_args(
521+
return aiplatform.Endpoint._validate_deploy_args(
522522
min_replica_count=min_replica_count,
523523
max_replica_count=max_replica_count,
524524
accelerator_type=accelerator_type,
525525
deployed_model_display_name=deployed_model_display_name,
526526
traffic_split=traffic_split,
527527
traffic_percentage=traffic_percentage,
528+
deployment_resource_pool=deployment_resource_pool,
528529
)
529530

530531
if (
@@ -580,6 +581,7 @@ def deploy(
580581
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
581582
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
582583
disable_container_logging: bool = False,
584+
fast_tryout_enabled: bool = False,
583585
) -> None:
584586
"""Deploys a Model to the Endpoint.
585587
@@ -661,6 +663,10 @@ def deploy(
661663
disable_container_logging (bool):
662664
If True, container logs from the deployed model will not be
663665
written to Cloud Logging. Defaults to False.
666+
fast_tryout_enabled (bool): Optional.
667+
If True, model will be deployed using faster deployment path.
668+
Useful for quick experiments. Not for production workloads. Only
669+
available for most popular models and machine types. Defaults to False.
664670
665671
"""
666672
self._sync_gca_resource_if_skipped()
@@ -699,6 +705,7 @@ def deploy(
699705
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
700706
deployment_resource_pool=deployment_resource_pool,
701707
disable_container_logging=disable_container_logging,
708+
fast_tryout_enabled=fast_tryout_enabled,
702709
)
703710

704711
@base.optional_sync()
@@ -722,6 +729,7 @@ def _deploy(
722729
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
723730
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
724731
disable_container_logging: bool = False,
732+
fast_tryout_enabled: bool = False,
725733
) -> None:
726734
"""Deploys a Model to the Endpoint.
727735
@@ -797,6 +805,10 @@ def _deploy(
797805
disable_container_logging (bool):
798806
If True, container logs from the deployed model will not be
799807
written to Cloud Logging. Defaults to False.
808+
fast_tryout_enabled (bool): Optional.
809+
If True, model will be deployed using faster deployment path.
810+
Useful for quick experiments. Not for production workloads. Only
811+
available for most popular models and machine types. Defaults to False.
800812
801813
"""
802814
_LOGGER.log_action_start_against_resource(
@@ -825,6 +837,7 @@ def _deploy(
825837
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
826838
deployment_resource_pool=deployment_resource_pool,
827839
disable_container_logging=disable_container_logging,
840+
fast_tryout_enabled=fast_tryout_enabled,
828841
)
829842

830843
_LOGGER.log_action_completed_against_resource("model", "deployed", self)
@@ -855,6 +868,7 @@ def _deploy_call(
855868
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
856869
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
857870
disable_container_logging: bool = False,
871+
fast_tryout_enabled: bool = False,
858872
) -> None:
859873
"""Helper method to deploy model to endpoint.
860874
@@ -937,6 +951,10 @@ def _deploy_call(
937951
disable_container_logging (bool):
938952
If True, container logs from the deployed model will not be
939953
written to Cloud Logging. Defaults to False.
954+
fast_tryout_enabled (bool): Optional.
955+
If True, model will be deployed using faster deployment path.
956+
Useful for quick experiments. Not for production workloads. Only
957+
available for most popular models and machine types. Defaults to False.
940958
941959
Raises:
942960
ValueError: If only `accelerator_type` or `accelerator_count` is
@@ -950,72 +968,148 @@ def _deploy_call(
950968
are present.
951969
"""
952970
if not deployment_resource_pool:
953-
return super()._deploy_call(
954-
api_client=api_client,
955-
endpoint_resource_name=endpoint_resource_name,
956-
model=model,
957-
endpoint_resource_traffic_split=endpoint_resource_traffic_split,
958-
network=network,
959-
deployed_model_display_name=deployed_model_display_name,
960-
traffic_percentage=traffic_percentage,
961-
traffic_split=traffic_split,
962-
machine_type=machine_type,
963-
min_replica_count=min_replica_count,
964-
max_replica_count=max_replica_count,
965-
accelerator_type=accelerator_type,
966-
accelerator_count=accelerator_count,
971+
max_replica_count = max(min_replica_count, max_replica_count)
972+
973+
if bool(accelerator_type) != bool(accelerator_count):
974+
raise ValueError(
975+
"Both `accelerator_type` and `accelerator_count` should be specified or None."
976+
)
977+
978+
if autoscaling_target_accelerator_duty_cycle is not None and (
979+
not accelerator_type or not accelerator_count
980+
):
981+
raise ValueError(
982+
"Both `accelerator_type` and `accelerator_count` should be set "
983+
"when specifying autoscaling_target_accelerator_duty_cycle`"
984+
)
985+
986+
deployed_model = gca_endpoint_compat.DeployedModel(
987+
model=model.versioned_resource_name,
988+
display_name=deployed_model_display_name,
967989
service_account=service_account,
968-
explanation_spec=explanation_spec,
969-
metadata=metadata,
970-
deploy_request_timeout=deploy_request_timeout,
971-
autoscaling_target_cpu_utilization=autoscaling_target_cpu_utilization,
972-
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
973-
disable_container_logging=disable_container_logging,
990+
enable_container_logging=not disable_container_logging,
974991
)
975992

976-
deployed_model = gca_endpoint_compat.DeployedModel(
977-
model=model.versioned_resource_name,
978-
display_name=deployed_model_display_name,
979-
service_account=service_account,
980-
enable_container_logging=not disable_container_logging,
981-
)
982-
983-
supports_shared_resources = (
984-
gca_model_compat.Model.DeploymentResourcesType.SHARED_RESOURCES
985-
in model.supported_deployment_resources_types
986-
)
993+
supports_automatic_resources = (
994+
gca_model_compat.Model.DeploymentResourcesType.AUTOMATIC_RESOURCES
995+
in model.supported_deployment_resources_types
996+
)
997+
supports_dedicated_resources = (
998+
gca_model_compat.Model.DeploymentResourcesType.DEDICATED_RESOURCES
999+
in model.supported_deployment_resources_types
1000+
)
1001+
provided_custom_machine_spec = (
1002+
machine_type
1003+
or accelerator_type
1004+
or accelerator_count
1005+
or autoscaling_target_accelerator_duty_cycle
1006+
or autoscaling_target_cpu_utilization
1007+
)
9871008

988-
if not supports_shared_resources:
989-
raise ValueError(
990-
"`deployment_resource_pool` may only be specified for models "
991-
" which support shared resources."
1009+
# If the model supports both automatic and dedicated deployment resources,
1010+
# decide based on the presence of machine spec customizations
1011+
use_dedicated_resources = supports_dedicated_resources and (
1012+
not supports_automatic_resources or provided_custom_machine_spec
9921013
)
9931014

994-
provided_custom_machine_spec = (
995-
machine_type
996-
or accelerator_type
997-
or accelerator_count
998-
or autoscaling_target_accelerator_duty_cycle
999-
or autoscaling_target_cpu_utilization
1000-
)
1015+
if provided_custom_machine_spec and not use_dedicated_resources:
1016+
_LOGGER.info(
1017+
"Model does not support dedicated deployment resources. "
1018+
"The machine_type, accelerator_type and accelerator_count,"
1019+
"autoscaling_target_accelerator_duty_cycle,"
1020+
"autoscaling_target_cpu_utilization parameters are ignored."
1021+
)
10011022

1002-
if provided_custom_machine_spec:
1003-
raise ValueError(
1004-
"Conflicting parameters in deployment request. "
1005-
"The machine_type, accelerator_type and accelerator_count,"
1006-
"autoscaling_target_accelerator_duty_cycle,"
1007-
"autoscaling_target_cpu_utilization parameters may not be set "
1008-
"when `deployment_resource_pool` is specified."
1023+
if use_dedicated_resources and not machine_type:
1024+
machine_type = _DEFAULT_MACHINE_TYPE
1025+
_LOGGER.info(f"Using default machine_type: {machine_type}")
1026+
1027+
if use_dedicated_resources:
1028+
dedicated_resources = gca_machine_resources_compat.DedicatedResources(
1029+
min_replica_count=min_replica_count,
1030+
max_replica_count=max_replica_count,
1031+
)
1032+
1033+
machine_spec = gca_machine_resources_compat.MachineSpec(
1034+
machine_type=machine_type
1035+
)
1036+
1037+
if autoscaling_target_cpu_utilization:
1038+
autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
1039+
metric_name="aiplatform.googleapis.com/prediction/online/cpu/utilization",
1040+
target=autoscaling_target_cpu_utilization,
1041+
)
1042+
dedicated_resources.autoscaling_metric_specs.extend(
1043+
[autoscaling_metric_spec]
1044+
)
1045+
1046+
if accelerator_type and accelerator_count:
1047+
utils.validate_accelerator_type(accelerator_type)
1048+
machine_spec.accelerator_type = accelerator_type
1049+
machine_spec.accelerator_count = accelerator_count
1050+
1051+
if autoscaling_target_accelerator_duty_cycle:
1052+
autoscaling_metric_spec = gca_machine_resources_compat.AutoscalingMetricSpec(
1053+
metric_name="aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle",
1054+
target=autoscaling_target_accelerator_duty_cycle,
1055+
)
1056+
dedicated_resources.autoscaling_metric_specs.extend(
1057+
[autoscaling_metric_spec]
1058+
)
1059+
1060+
dedicated_resources.machine_spec = machine_spec
1061+
1062+
# Checking if flag fast_tryout_enabled is set, only in v1beta1
1063+
deployed_model.faster_deployment_config = (
1064+
gca_endpoint_compat.FasterDeploymentConfig(
1065+
fast_tryout_enabled=fast_tryout_enabled
1066+
)
1067+
)
1068+
deployed_model.dedicated_resources = dedicated_resources
1069+
else:
1070+
deployed_model = gca_endpoint_compat.DeployedModel(
1071+
model=model.versioned_resource_name,
1072+
display_name=deployed_model_display_name,
1073+
service_account=service_account,
1074+
enable_container_logging=not disable_container_logging,
10091075
)
10101076

1011-
deployed_model.shared_resources = deployment_resource_pool.resource_name
1077+
supports_shared_resources = (
1078+
gca_model_compat.Model.DeploymentResourcesType.SHARED_RESOURCES
1079+
in model.supported_deployment_resources_types
1080+
)
10121081

1013-
if explanation_spec:
1014-
raise ValueError(
1015-
"Model explanation is not supported for deployments using "
1016-
"shared resources."
1082+
if not supports_shared_resources:
1083+
raise ValueError(
1084+
"`deployment_resource_pool` may only be specified for models "
1085+
" which support shared resources."
1086+
)
1087+
1088+
provided_custom_machine_spec = (
1089+
machine_type
1090+
or accelerator_type
1091+
or accelerator_count
1092+
or autoscaling_target_accelerator_duty_cycle
1093+
or autoscaling_target_cpu_utilization
10171094
)
10181095

1096+
if provided_custom_machine_spec:
1097+
raise ValueError(
1098+
"Conflicting parameters in deployment request. "
1099+
"The machine_type, accelerator_type and accelerator_count,"
1100+
"autoscaling_target_accelerator_duty_cycle,"
1101+
"autoscaling_target_cpu_utilization parameters may not be set "
1102+
"when `deployment_resource_pool` is specified."
1103+
)
1104+
1105+
deployed_model.shared_resources = deployment_resource_pool.resource_name
1106+
1107+
if explanation_spec:
1108+
raise ValueError(
1109+
"Model explanation is not supported for deployments using "
1110+
"shared resources."
1111+
)
1112+
10191113
# Checking if traffic percentage is valid
10201114
# TODO(b/221059294) PrivateEndpoint should support traffic split
10211115
if traffic_split is None and not network:
@@ -1264,6 +1358,7 @@ def deploy(
12641358
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
12651359
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
12661360
disable_container_logging: bool = False,
1361+
fast_tryout_enabled: bool = False,
12671362
) -> Union[Endpoint, models.PrivateEndpoint]:
12681363
"""Deploys model to endpoint.
12691364
@@ -1366,6 +1461,10 @@ def deploy(
13661461
disable_container_logging (bool):
13671462
If True, container logs from the deployed model will not be
13681463
written to Cloud Logging. Defaults to False.
1464+
fast_tryout_enabled (bool): Optional.
1465+
If True, model will be deployed using faster deployment path.
1466+
Useful for quick experiments. Not for production workloads. Only
1467+
available for most popular models and machine types. Defaults to False.
13691468
13701469
Returns:
13711470
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1421,6 +1520,7 @@ def deploy(
14211520
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
14221521
deployment_resource_pool=deployment_resource_pool,
14231522
disable_container_logging=disable_container_logging,
1523+
fast_tryout_enabled=fast_tryout_enabled,
14241524
)
14251525

14261526
@base.optional_sync(return_input_arg="endpoint", bind_future_to_self=False)
@@ -1446,6 +1546,7 @@ def _deploy(
14461546
autoscaling_target_accelerator_duty_cycle: Optional[int] = None,
14471547
deployment_resource_pool: Optional[DeploymentResourcePool] = None,
14481548
disable_container_logging: bool = False,
1549+
fast_tryout_enabled: bool = False,
14491550
) -> Union[Endpoint, models.PrivateEndpoint]:
14501551
"""Deploys model to endpoint.
14511552
@@ -1540,6 +1641,9 @@ def _deploy(
15401641
disable_container_logging (bool):
15411642
If True, container logs from the deployed model will not be
15421643
written to Cloud Logging. Defaults to False.
1644+
fast_tryout_enabled (bool):
1645+
Optional. Whether to enable fast deployment. Defaults to False.
1646+
Useful for quick experiments. Not for production workloads.
15431647
15441648
Returns:
15451649
endpoint (Union[Endpoint, models.PrivateEndpoint]):
@@ -1591,6 +1695,7 @@ def _deploy(
15911695
autoscaling_target_accelerator_duty_cycle=autoscaling_target_accelerator_duty_cycle,
15921696
deployment_resource_pool=deployment_resource_pool,
15931697
disable_container_logging=disable_container_logging,
1698+
fast_tryout_enabled=fast_tryout_enabled,
15941699
)
15951700

15961701
_LOGGER.log_action_completed_against_resource("model", "deployed", endpoint)

0 commit comments

Comments
 (0)