Skip to content

Commit 85a8c41

Browse files
Ark-kuncopybara-github
authored andcommitted
chore: Load bigquery lazily
This avoids multiple issue: * bigquery modules are auto-loaded wven when they are not needed (e.g. `import vertexai`) * Failures when bigquery package is not installed * Failure when bigquery module fails to import (there were multiple cases) * The bigquery module can take a long time to load (some tests showed ~10 seconds) PiperOrigin-RevId: 682064304
1 parent 44df243 commit 85a8c41

File tree

4 files changed

+36
-12
lines changed

4 files changed

+36
-12
lines changed

google/cloud/aiplatform/datasets/column_names_dataset.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,17 @@
1818

1919
import csv
2020
import logging
21-
from typing import List, Optional, Set
21+
from typing import List, Optional, Set, TYPE_CHECKING
2222
from google.auth import credentials as auth_credentials
2323

24-
from google.cloud import bigquery
2524
from google.cloud import storage
2625

2726
from google.cloud.aiplatform import utils
2827
from google.cloud.aiplatform import datasets
2928

29+
if TYPE_CHECKING:
30+
from google.cloud import bigquery
31+
3032

3133
class _ColumnNamesDataset(datasets._Dataset):
3234
@property
@@ -165,7 +167,7 @@ def _retrieve_gcs_source_columns(
165167

166168
@staticmethod
167169
def _get_bq_schema_field_names_recursively(
168-
schema_field: bigquery.SchemaField,
170+
schema_field: "bigquery.SchemaField",
169171
) -> Set[str]:
170172
"""Retrieve the name for a schema field along with ancestor fields.
171173
Nested schema fields are flattened and concatenated with a ".".
@@ -243,6 +245,9 @@ def _retrieve_bq_source_columns(
243245
# Using dot-based "project.dataset.table" format instead.
244246
bq_table_uri = bq_table_uri.replace(":", ".")
245247

248+
# Loading bigquery lazily to avoid auto-loading it when importing vertexai
249+
from google.cloud import bigquery # pylint: disable=g-import-not-at-top
250+
246251
client = bigquery.Client(project=project, credentials=credentials)
247252
table = client.get_table(bq_table_uri)
248253
schema = table.schema

google/cloud/aiplatform/datasets/tabular_dataset.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,20 @@
1515
# limitations under the License.
1616
#
1717

18-
from typing import Dict, Optional, Sequence, Tuple, Union
18+
from typing import Dict, Optional, Sequence, Tuple, Union, TYPE_CHECKING
1919

2020
from google.auth import credentials as auth_credentials
2121

22-
from google.cloud import bigquery
2322
from google.cloud.aiplatform import base
2423
from google.cloud.aiplatform import datasets
2524
from google.cloud.aiplatform.datasets import _datasources
2625
from google.cloud.aiplatform import initializer
2726
from google.cloud.aiplatform import schema
2827
from google.cloud.aiplatform import utils
2928

29+
if TYPE_CHECKING:
30+
from google.cloud import bigquery
31+
3032
_AUTOML_TRAINING_MIN_ROWS = 1000
3133

3234
_LOGGER = base.Logger(__name__)
@@ -200,7 +202,7 @@ def create_from_dataframe(
200202
cls,
201203
df_source: "pd.DataFrame", # noqa: F821 - skip check for undefined name 'pd'
202204
staging_path: str,
203-
bq_schema: Optional[Union[str, bigquery.SchemaField]] = None,
205+
bq_schema: Optional[Union[str, "bigquery.SchemaField"]] = None,
204206
display_name: Optional[str] = None,
205207
project: Optional[str] = None,
206208
location: Optional[str] = None,
@@ -283,6 +285,9 @@ def create_from_dataframe(
283285
% (len(df_source), _AUTOML_TRAINING_MIN_ROWS),
284286
)
285287

288+
# Loading bigquery lazily to avoid auto-loading it when importing vertexai
289+
from google.cloud import bigquery # pylint: disable=g-import-not-at-top
290+
286291
bigquery_client = bigquery.Client(
287292
project=project or initializer.global_config.project,
288293
credentials=credentials or initializer.global_config.credentials,

google/cloud/aiplatform/featurestore/_entity_type.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#
1717

1818
import datetime
19-
from typing import Dict, List, Optional, Sequence, Tuple, Union
19+
from typing import Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
2020
import uuid
2121
from google.protobuf import timestamp_pb2
2222

@@ -38,7 +38,8 @@
3838
from google.cloud.aiplatform.utils import featurestore_utils
3939
from google.cloud.aiplatform.utils import resource_manager_utils
4040

41-
from google.cloud import bigquery
41+
if TYPE_CHECKING:
42+
from google.cloud import bigquery
4243

4344
_LOGGER = base.Logger(__name__)
4445
_ALL_FEATURE_IDS = "*"
@@ -1295,6 +1296,9 @@ def ingest_from_df(
12951296
"Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0"
12961297
)
12971298

1299+
# Loading bigquery lazily to avoid auto-loading it when importing vertexai
1300+
from google.cloud import bigquery # pylint: disable=g-import-not-at-top
1301+
12981302
bigquery_client = bigquery.Client(
12991303
project=self.project, credentials=self.credentials
13001304
)
@@ -1372,7 +1376,7 @@ def ingest_from_df(
13721376
@staticmethod
13731377
def _get_bq_schema_field(
13741378
name: str, feature_value_type: str
1375-
) -> bigquery.SchemaField:
1379+
) -> "bigquery.SchemaField":
13761380
"""Helper method to get BigQuery Schema Field.
13771381
13781382
Args:
@@ -1385,6 +1389,9 @@ def _get_bq_schema_field(
13851389
Returns:
13861390
bigquery.SchemaField: bigquery.SchemaField
13871391
"""
1392+
# Loading bigquery lazily to avoid auto-loading it when importing vertexai
1393+
from google.cloud import bigquery # pylint: disable=g-import-not-at-top
1394+
13881395
bq_data_type = (
13891396
utils.featurestore_utils.FEATURE_STORE_VALUE_TYPE_TO_BQ_DATA_TYPE_MAP[
13901397
feature_value_type

google/cloud/aiplatform/featurestore/featurestore.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# limitations under the License.
1616
#
1717

18-
from typing import Dict, List, Optional, Sequence, Tuple, Union
18+
from typing import Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
1919
import uuid
2020

2121
from google.auth import credentials as auth_credentials
@@ -37,7 +37,8 @@
3737
resource_manager_utils,
3838
)
3939

40-
from google.cloud import bigquery
40+
if TYPE_CHECKING:
41+
from google.cloud import bigquery
4142

4243
_LOGGER = base.Logger(__name__)
4344

@@ -1241,6 +1242,9 @@ def batch_serve_to_df(
12411242
f"{self.batch_serve_to_df.__name__}"
12421243
)
12431244

1245+
# Loading bigquery lazily to avoid auto-loading it when importing vertexai
1246+
from google.cloud import bigquery # pylint: disable=g-import-not-at-top
1247+
12441248
bigquery_client = bigquery.Client(
12451249
project=self.project, credentials=self.credentials
12461250
)
@@ -1360,7 +1364,7 @@ def _get_ephemeral_bq_full_dataset_id(
13601364
return f"{project_id}.{temp_bq_dataset_name}"[:1024]
13611365

13621366
def _create_ephemeral_bq_dataset(
1363-
self, bigquery_client: bigquery.Client, dataset_id: str
1367+
self, bigquery_client: "bigquery.Client", dataset_id: str
13641368
) -> "bigquery.Dataset":
13651369
"""Helper method to create an ephemeral dataset in BigQuery used to
13661370
temporarily stage data.
@@ -1373,6 +1377,9 @@ def _create_ephemeral_bq_dataset(
13731377
Returns:
13741378
bigquery.Dataset - new BigQuery dataset used to temporarily stage data
13751379
"""
1380+
# Loading bigquery lazily to avoid auto-loading it when importing vertexai
1381+
from google.cloud import bigquery # pylint: disable=g-import-not-at-top
1382+
13761383
temp_bq_dataset = bigquery.Dataset(dataset_ref=dataset_id)
13771384
temp_bq_dataset.location = self.location
13781385

0 commit comments

Comments
 (0)