Skip to content

Commit b63b3ba

Browse files
halio-grosiezou
andauthored
feat: Vertex Vizier support in SDK. (#1434)
* Added the Vizier client in the aiplatform folder. * Copied the pyvizier from the open source vizier. * Added the Trial in the aiplatform folder. * Forked the pyvizier from the pyVizier. * imported the objects from the pyvizier. * Added the Vizier client and types in the aiplatform init file. * Made the pyvizier converters compatible with the Vertex SDK proto. * Forked the framework for the Vertex Vizier from the Open Source Vizier. * Implemented the interfaces for Study and Trial. Added the unit tests and system tests for them. * Setup the dependencies for the Vizier. * Fix the lint error by running the nox -s blacken. * Fixed the lint errors for the Vizier. * Made the unit test import the google credentials. * Disable the coverage check for the pyvizier. It will be imported from the open source vizier. * Remove the converage dependency to avoid the conflicts. * Fixing the py-3.9 issue in the sample/module-builder * Convert the lambda function to avoid the import numpy be called in the confest.py test * Revert the requirements file. * Fix the lint error by running the nox -s blacken. * Fixed the syntax issue in the setup.py * Setup the local package * scrube the TODO since it's the documentation ticket. * Addresses the comments. * Ran blacken on the test_vizier file. * Import the OSS in the Vertex SDK. * Already imported the package from open source vizier. Removing the code copied from oss. * Import the google-vizier and fix the dependencies for Vertex Vizier. * Configured the dependency of the google-vizier. * Ran the nox -s blacken. * Fixed the lint issue. * Clean the debugging logs. * Decouple the Study, Trial and the aiplatform to make the sample test pass. * Fixed the issue in the system test got an unexpected keyword argument 'credentials' * Ran the nox -s blacken to format the python file. * Fixed the unit test failure. * Add the wrapper to give more error information about the vizier import error. Co-authored-by: Rosie Zou <[email protected]>
1 parent 152563b commit b63b3ba

File tree

17 files changed

+2585
-1
lines changed

17 files changed

+2585
-1
lines changed

.coveragerc

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ branch = True
55
show_missing = True
66
omit =
77
google/cloud/aiplatform/v1/schema/trainingjob/definition/__init__.py
8+
google/cloud/aiplatform/vizier/pyvizier/*
89
exclude_lines =
910
# Re-enable the standard pragma
1011
pragma: NO COVER

google/cloud/aiplatform/compat/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
services.index_endpoint_service_client = (
4545
services.index_endpoint_service_client_v1beta1
4646
)
47+
services.vizier_service_client = services.vizier_service_client_v1beta1
4748

4849
types.accelerator_type = types.accelerator_type_v1beta1
4950
types.annotation = types.annotation_v1beta1
@@ -117,6 +118,7 @@
117118
types.tensorboard_time_series = types.tensorboard_time_series_v1beta1
118119
types.training_pipeline = types.training_pipeline_v1beta1
119120
types.types = types.types_v1beta1
121+
types.vizier_service = types.vizier_service_v1beta1
120122

121123
if DEFAULT_VERSION == V1:
122124

@@ -134,6 +136,7 @@
134136
services.tensorboard_service_client = services.tensorboard_service_client_v1
135137
services.index_service_client = services.index_service_client_v1
136138
services.index_endpoint_service_client = services.index_endpoint_service_client_v1
139+
services.vizier_service_client = services.vizier_service_client_v1
137140

138141
types.accelerator_type = types.accelerator_type_v1
139142
types.annotation = types.annotation_v1
@@ -204,6 +207,7 @@
204207
types.tensorboard_time_series = types.tensorboard_time_series_v1
205208
types.training_pipeline = types.training_pipeline_v1
206209
types.types = types.types_v1
210+
types.vizier_service = types.vizier_service_v1
207211

208212
__all__ = (
209213
DEFAULT_VERSION,

google/cloud/aiplatform/compat/services/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@
5454
from google.cloud.aiplatform_v1beta1.services.tensorboard_service import (
5555
client as tensorboard_service_client_v1beta1,
5656
)
57+
from google.cloud.aiplatform_v1beta1.services.vizier_service import (
58+
client as vizier_service_client_v1beta1,
59+
)
5760

5861
from google.cloud.aiplatform_v1.services.dataset_service import (
5962
client as dataset_service_client_v1,
@@ -94,6 +97,9 @@
9497
from google.cloud.aiplatform_v1.services.tensorboard_service import (
9598
client as tensorboard_service_client_v1,
9699
)
100+
from google.cloud.aiplatform_v1.services.vizier_service import (
101+
client as vizier_service_client_v1,
102+
)
97103

98104
__all__ = (
99105
# v1
@@ -110,6 +116,7 @@
110116
prediction_service_client_v1,
111117
specialist_pool_service_client_v1,
112118
tensorboard_service_client_v1,
119+
vizier_service_client_v1,
113120
# v1beta1
114121
dataset_service_client_v1beta1,
115122
endpoint_service_client_v1beta1,
@@ -124,4 +131,5 @@
124131
specialist_pool_service_client_v1beta1,
125132
metadata_service_client_v1beta1,
126133
tensorboard_service_client_v1beta1,
134+
vizier_service_client_v1beta1,
127135
)

google/cloud/aiplatform/compat/types/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@
8181
tensorboard_time_series as tensorboard_time_series_v1beta1,
8282
training_pipeline as training_pipeline_v1beta1,
8383
types as types_v1beta1,
84+
vizier_service as vizier_service_v1beta1,
8485
)
8586
from google.cloud.aiplatform_v1.types import (
8687
accelerator_type as accelerator_type_v1,
@@ -147,6 +148,7 @@
147148
tensorboard_time_series as tensorboard_time_series_v1,
148149
training_pipeline as training_pipeline_v1,
149150
types as types_v1,
151+
vizier_service as vizier_service_v1,
150152
)
151153

152154
__all__ = (

google/cloud/aiplatform/utils/__init__.py

+14
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
pipeline_service_client_v1beta1,
4848
prediction_service_client_v1beta1,
4949
tensorboard_service_client_v1beta1,
50+
vizier_service_client_v1beta1,
5051
)
5152
from google.cloud.aiplatform.compat.services import (
5253
dataset_service_client_v1,
@@ -61,6 +62,7 @@
6162
pipeline_service_client_v1,
6263
prediction_service_client_v1,
6364
tensorboard_service_client_v1,
65+
vizier_service_client_v1,
6466
)
6567

6668
from google.cloud.aiplatform.compat.types import (
@@ -82,6 +84,7 @@
8284
job_service_client_v1beta1.JobServiceClient,
8385
metadata_service_client_v1beta1.MetadataServiceClient,
8486
tensorboard_service_client_v1beta1.TensorboardServiceClient,
87+
vizier_service_client_v1beta1.VizierServiceClient,
8588
# v1
8689
dataset_service_client_v1.DatasetServiceClient,
8790
endpoint_service_client_v1.EndpointServiceClient,
@@ -93,6 +96,7 @@
9396
pipeline_service_client_v1.PipelineServiceClient,
9497
job_service_client_v1.JobServiceClient,
9598
tensorboard_service_client_v1.TensorboardServiceClient,
99+
vizier_service_client_v1.VizierServiceClient,
96100
)
97101

98102

@@ -570,6 +574,15 @@ class TensorboardClientWithOverride(ClientWithOverride):
570574
)
571575

572576

577+
class VizierClientWithOverride(ClientWithOverride):
578+
_is_temporary = True
579+
_default_version = compat.DEFAULT_VERSION
580+
_version_map = (
581+
(compat.V1, vizier_service_client_v1.VizierServiceClient),
582+
(compat.V1BETA1, vizier_service_client_v1beta1.VizierServiceClient),
583+
)
584+
585+
573586
VertexAiServiceClientWithOverride = TypeVar(
574587
"VertexAiServiceClientWithOverride",
575588
DatasetClientWithOverride,
@@ -582,6 +595,7 @@ class TensorboardClientWithOverride(ClientWithOverride):
582595
PredictionClientWithOverride,
583596
MetadataClientWithOverride,
584597
TensorboardClientWithOverride,
598+
VizierClientWithOverride,
585599
)
586600

587601

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2022 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
from google.cloud.aiplatform.vizier.study import Study
17+
from google.cloud.aiplatform.vizier.trial import Trial
18+
19+
__all__ = (
20+
"Study",
21+
"Trial",
22+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
# -*- coding: utf-8 -*-
2+
3+
# Copyright 2022 Google LLC
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
"""Cross-platform Vizier client interfaces.
17+
18+
Aside from "materialize_" methods, code written using these interfaces are
19+
compatible with OSS and Cloud Vertex Vizier. Note importantly that subclasses
20+
may have more methods than what is required by interfaces, and such methods
21+
are not cross compatible. Our recommendation is to explicitly type your objects
22+
to be `StudyInterface` or `TrialInterface` when you want to guarantee that
23+
a code block is cross-platform.
24+
25+
Keywords:
26+
27+
#Materialize: The method returns a deep copy of the underlying pyvizier object.
28+
Modifying the returned object does not update the Vizier service.
29+
"""
30+
31+
from __future__ import annotations
32+
33+
from typing import Optional, Collection, Type, TypeVar, Mapping, Any
34+
import abc
35+
36+
from google.cloud.aiplatform.vizier import pyvizier as vz
37+
38+
_T = TypeVar("_T")
39+
40+
41+
class ResourceNotFoundError(LookupError):
42+
"""Error raised by Vizier clients when resource is not found."""
43+
44+
pass
45+
46+
47+
class TrialInterface(abc.ABC):
48+
"""Responsible for trial-level operations."""
49+
50+
@property
51+
@abc.abstractmethod
52+
def uid(self) -> int:
53+
"""Unique identifier of the trial."""
54+
55+
@property
56+
@abc.abstractmethod
57+
def parameters(self) -> Mapping[str, Any]:
58+
"""Parameters of the trial."""
59+
60+
@property
61+
@abc.abstractmethod
62+
def status(self) -> vz.TrialStatus:
63+
"""Trial's status."""
64+
65+
@abc.abstractmethod
66+
def delete(self) -> None:
67+
"""Delete the Trial in Vizier service.
68+
69+
There is currently no promise on how this object behaves after `delete()`.
70+
If you are sharing a Trial object in parallel processes, proceed with
71+
caution.
72+
"""
73+
74+
@abc.abstractmethod
75+
def complete(
76+
self,
77+
measurement: Optional[vz.Measurement] = None,
78+
*,
79+
infeasible_reason: Optional[str] = None,
80+
) -> Optional[vz.Measurement]:
81+
"""Completes the trial and #materializes the measurement.
82+
83+
* If `measurement` is provided, then Vizier writes it as the trial's final
84+
measurement and returns it.
85+
* If `infeasible_reason` is provided, `measurement` is not needed.
86+
* If neither is provided, then Vizier selects an existing (intermediate)
87+
measurement to be the final measurement and returns it.
88+
89+
Args:
90+
measurement: Final measurement.
91+
infeasible_reason: Infeasible reason for missing final measurement.
92+
93+
Returns:
94+
The final measurement of the trial, or None if the trial is marked
95+
infeasible.
96+
97+
Raises:
98+
ValueError: If neither `measurement` nor `infeasible_reason` is provided
99+
but the trial does not contain any intermediate measurements.
100+
"""
101+
102+
@abc.abstractmethod
103+
def should_stop(self) -> bool:
104+
"""Returns true if the trial should stop."""
105+
106+
@abc.abstractmethod
107+
def add_measurement(self, measurement: vz.Measurement) -> None:
108+
"""Adds an intermediate measurement."""
109+
110+
@abc.abstractmethod
111+
def materialize(self, *, include_all_measurements: bool = True) -> vz.Trial:
112+
"""#Materializes the Trial.
113+
114+
Args:
115+
include_all_measurements: If True, returned Trial includes all
116+
intermediate measurements. The final measurement is always provided.
117+
118+
Returns:
119+
Trial object.
120+
"""
121+
122+
123+
class StudyInterface(abc.ABC):
124+
"""Responsible for study-level operations."""
125+
126+
@abc.abstractmethod
127+
def create_or_load(
128+
self, display_name: str, problem: vz.ProblemStatement
129+
) -> StudyInterface:
130+
""" """
131+
132+
@abc.abstractmethod
133+
def suggest(
134+
self, *, count: Optional[int] = None, worker: str = ""
135+
) -> Collection[TrialInterface]:
136+
"""Returns Trials to be evaluated by worker.
137+
138+
Args:
139+
count: Number of suggestions.
140+
worker: When new Trials are generated, their `assigned_worker` field is
141+
populated with this worker. suggest() first looks for existing Trials
142+
that are assigned to `worker`, before generating new ones.
143+
144+
Returns:
145+
Trials.
146+
"""
147+
148+
@abc.abstractmethod
149+
def delete(self) -> None:
150+
"""Deletes the study."""
151+
152+
@abc.abstractmethod
153+
def trials(
154+
self, trial_filter: Optional[vz.TrialFilter] = None
155+
) -> Collection[TrialInterface]:
156+
"""Fetches a collection of trials."""
157+
158+
@abc.abstractmethod
159+
def get_trial(self, uid: int) -> TrialInterface:
160+
"""Fetches a single trial.
161+
162+
Args:
163+
uid: Unique identifier of the trial within study.
164+
165+
Returns:
166+
Trial.
167+
168+
Raises:
169+
ResourceNotFoundError: If trial does not exist.
170+
"""
171+
172+
@abc.abstractmethod
173+
def optimal_trials(self) -> Collection[TrialInterface]:
174+
"""Returns optimal trial(s)."""
175+
176+
@abc.abstractmethod
177+
def materialize_study_config(self) -> vz.StudyConfig:
178+
"""#Materializes the study config."""
179+
180+
@abc.abstractclassmethod
181+
def from_uid(cls: Type[_T], uid: str) -> _T:
182+
"""Fetches an existing study from the Vizier service.
183+
184+
Args:
185+
uid: Unique identifier of the study.
186+
187+
Returns:
188+
Study.
189+
190+
Raises:
191+
ResourceNotFoundError: If study does not exist.
192+
"""

0 commit comments

Comments
 (0)