chore: Load bigquery lazily

Ark-kun · copybara-github · commit 85a8c41d2f43 · 2024-10-03T15:59:42.000-07:00
This avoids multiple issue:

* bigquery modules are auto-loaded wven when they are not needed (e.g. `import vertexai`)
* Failures when bigquery package is not installed
* Failure when bigquery module fails to import (there were multiple cases)
* The bigquery module can take a long time to load (some tests showed ~10 seconds)

PiperOrigin-RevId: 682064304
diff --git a/google/cloud/aiplatform/datasets/column_names_dataset.py b/google/cloud/aiplatform/datasets/column_names_dataset.py
@@ -18,15 +18,17 @@
 
 import csv
 import logging
-from typing import List, Optional, Set
+from typing import List, Optional, Set, TYPE_CHECKING
 from google.auth import credentials as auth_credentials
 
-from google.cloud import bigquery
 from google.cloud import storage
 
 from google.cloud.aiplatform import utils
 from google.cloud.aiplatform import datasets
 
+if TYPE_CHECKING:
+    from google.cloud import bigquery
+
 
 class _ColumnNamesDataset(datasets._Dataset):
     @property
@@ -165,7 +167,7 @@ def _retrieve_gcs_source_columns(
 
     @staticmethod
     def _get_bq_schema_field_names_recursively(
-        schema_field: bigquery.SchemaField,
+        schema_field: "bigquery.SchemaField",
     ) -> Set[str]:
         """Retrieve the name for a schema field along with ancestor fields.
         Nested schema fields are flattened and concatenated with a ".".
@@ -243,6 +245,9 @@ def _retrieve_bq_source_columns(
         # Using dot-based "project.dataset.table" format instead.
         bq_table_uri = bq_table_uri.replace(":", ".")
 
+        # Loading bigquery lazily to avoid auto-loading it when importing vertexai
+        from google.cloud import bigquery  # pylint: disable=g-import-not-at-top
+
         client = bigquery.Client(project=project, credentials=credentials)
         table = client.get_table(bq_table_uri)
         schema = table.schema
diff --git a/google/cloud/aiplatform/datasets/tabular_dataset.py b/google/cloud/aiplatform/datasets/tabular_dataset.py
@@ -15,18 +15,20 @@
 # limitations under the License.
 #
 
-from typing import Dict, Optional, Sequence, Tuple, Union
+from typing import Dict, Optional, Sequence, Tuple, Union, TYPE_CHECKING
 
 from google.auth import credentials as auth_credentials
 
-from google.cloud import bigquery
 from google.cloud.aiplatform import base
 from google.cloud.aiplatform import datasets
 from google.cloud.aiplatform.datasets import _datasources
 from google.cloud.aiplatform import initializer
 from google.cloud.aiplatform import schema
 from google.cloud.aiplatform import utils
 
+if TYPE_CHECKING:
+    from google.cloud import bigquery
+
 _AUTOML_TRAINING_MIN_ROWS = 1000
 
 _LOGGER = base.Logger(__name__)
@@ -200,7 +202,7 @@ def create_from_dataframe(
         cls,
         df_source: "pd.DataFrame",  # noqa: F821 - skip check for undefined name 'pd'
         staging_path: str,
-        bq_schema: Optional[Union[str, bigquery.SchemaField]] = None,
+        bq_schema: Optional[Union[str, "bigquery.SchemaField"]] = None,
         display_name: Optional[str] = None,
         project: Optional[str] = None,
         location: Optional[str] = None,
@@ -283,6 +285,9 @@ def create_from_dataframe(
                 % (len(df_source), _AUTOML_TRAINING_MIN_ROWS),
             )
 
+        # Loading bigquery lazily to avoid auto-loading it when importing vertexai
+        from google.cloud import bigquery  # pylint: disable=g-import-not-at-top
+
         bigquery_client = bigquery.Client(
             project=project or initializer.global_config.project,
             credentials=credentials or initializer.global_config.credentials,
diff --git a/google/cloud/aiplatform/featurestore/_entity_type.py b/google/cloud/aiplatform/featurestore/_entity_type.py
@@ -16,7 +16,7 @@
 #
 
 import datetime
-from typing import Dict, List, Optional, Sequence, Tuple, Union
+from typing import Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
 import uuid
 from google.protobuf import timestamp_pb2
 
@@ -38,7 +38,8 @@
 from google.cloud.aiplatform.utils import featurestore_utils
 from google.cloud.aiplatform.utils import resource_manager_utils
 
-from google.cloud import bigquery
+if TYPE_CHECKING:
+    from google.cloud import bigquery
 
 _LOGGER = base.Logger(__name__)
 _ALL_FEATURE_IDS = "*"
@@ -1295,6 +1296,9 @@ def ingest_from_df(
                 "Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0"
             )
 
+        # Loading bigquery lazily to avoid auto-loading it when importing vertexai
+        from google.cloud import bigquery  # pylint: disable=g-import-not-at-top
+
         bigquery_client = bigquery.Client(
             project=self.project, credentials=self.credentials
         )
@@ -1372,7 +1376,7 @@ def ingest_from_df(
     @staticmethod
     def _get_bq_schema_field(
         name: str, feature_value_type: str
-    ) -> bigquery.SchemaField:
+    ) -> "bigquery.SchemaField":
         """Helper method to get BigQuery Schema Field.
 
         Args:
@@ -1385,6 +1389,9 @@ def _get_bq_schema_field(
         Returns:
             bigquery.SchemaField: bigquery.SchemaField
         """
+        # Loading bigquery lazily to avoid auto-loading it when importing vertexai
+        from google.cloud import bigquery  # pylint: disable=g-import-not-at-top
+
         bq_data_type = (
             utils.featurestore_utils.FEATURE_STORE_VALUE_TYPE_TO_BQ_DATA_TYPE_MAP[
                 feature_value_type
diff --git a/google/cloud/aiplatform/featurestore/featurestore.py b/google/cloud/aiplatform/featurestore/featurestore.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-from typing import Dict, List, Optional, Sequence, Tuple, Union
+from typing import Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
 import uuid
 
 from google.auth import credentials as auth_credentials
@@ -37,7 +37,8 @@
     resource_manager_utils,
 )
 
-from google.cloud import bigquery
+if TYPE_CHECKING:
+    from google.cloud import bigquery
 
 _LOGGER = base.Logger(__name__)
 
@@ -1241,6 +1242,9 @@ def batch_serve_to_df(
                 f"{self.batch_serve_to_df.__name__}"
             )
 
+        # Loading bigquery lazily to avoid auto-loading it when importing vertexai
+        from google.cloud import bigquery  # pylint: disable=g-import-not-at-top
+
         bigquery_client = bigquery.Client(
             project=self.project, credentials=self.credentials
         )
@@ -1360,7 +1364,7 @@ def _get_ephemeral_bq_full_dataset_id(
         return f"{project_id}.{temp_bq_dataset_name}"[:1024]
 
     def _create_ephemeral_bq_dataset(
-        self, bigquery_client: bigquery.Client, dataset_id: str
+        self, bigquery_client: "bigquery.Client", dataset_id: str
     ) -> "bigquery.Dataset":
         """Helper method to create an ephemeral dataset in BigQuery used to
         temporarily stage data.
@@ -1373,6 +1377,9 @@ def _create_ephemeral_bq_dataset(
         Returns:
             bigquery.Dataset - new BigQuery dataset used to temporarily stage data
         """
+        # Loading bigquery lazily to avoid auto-loading it when importing vertexai
+        from google.cloud import bigquery  # pylint: disable=g-import-not-at-top
+
         temp_bq_dataset = bigquery.Dataset(dataset_ref=dataset_id)
         temp_bq_dataset.location = self.location