chore: support bigquery v3

vertex-sdk-bot · copybara-github · commit 6fa93a49f972 · 2023-01-11T17:53:59.000-08:00
PiperOrigin-RevId: 501424651
diff --git a/google/cloud/aiplatform/datasets/tabular_dataset.py b/google/cloud/aiplatform/datasets/tabular_dataset.py
@@ -216,6 +216,17 @@ def create_from_dataframe(
                 "Pyarrow is not installed, and is required to use the BigQuery client."
                 'Please install the SDK using "pip install google-cloud-aiplatform[datasets]"'
             )
+        import pandas.api.types as pd_types
+
+        if any(
+            [
+                pd_types.is_datetime64_any_dtype(df_source[column])
+                for column in df_source.columns
+            ]
+        ):
+            _LOGGER.info(
+                "Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0"
+            )
 
         if len(df_source) < _AUTOML_TRAINING_MIN_ROWS:
             _LOGGER.info(
diff --git a/google/cloud/aiplatform/featurestore/_entity_type.py b/google/cloud/aiplatform/featurestore/_entity_type.py
@@ -1277,6 +1277,8 @@ def ingest_from_df(
             EntityType - The entityType resource object with feature values imported.
 
         """
+        import pandas.api.types as pd_types
+
         try:
             import pyarrow  # noqa: F401 - skip check for 'pyarrow' which is required when using 'google.cloud.bigquery'
         except ImportError:
@@ -1285,6 +1287,16 @@ def ingest_from_df(
                 f"{self.ingest_from_df.__name__}"
             )
 
+        if any(
+            [
+                pd_types.is_datetime64_any_dtype(df_source[column])
+                for column in df_source.columns
+            ]
+        ):
+            _LOGGER.info(
+                "Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0"
+            )
+
         bigquery_client = bigquery.Client(
             project=self.project, credentials=self.credentials
         )
diff --git a/setup.py b/setup.py
@@ -125,7 +125,7 @@
         "protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5",
         "packaging >= 14.3, <22.0.0dev",
         "google-cloud-storage >= 1.32.0, < 3.0.0dev",
-        "google-cloud-bigquery >= 1.15.0, < 3.0.0dev",
+        "google-cloud-bigquery >= 1.15.0, < 4.0.0dev",
         "google-cloud-resource-manager >= 1.3.3, < 3.0.0dev",
         "shapely < 2.0.0",
     ),
diff --git a/tests/system/aiplatform/test_dataset.py b/tests/system/aiplatform/test_dataset.py
@@ -21,6 +21,10 @@
 import importlib
 
 import pandas as pd
+import pkg_resources
+import re
+
+from datetime import datetime
 
 from google.api_core import exceptions
 from google.api_core import client_options
@@ -73,6 +77,8 @@
 _TEST_STR_COL = "string_col"
 _TEST_STR_ARR_COL = "string_array_col"
 _TEST_BYTES_COL = "bytes_col"
+_TEST_TIMESTAMP_COL = "timestamp_col"
+_TEST_DATETIME_COL = "datetime_col"
 _TEST_DF_COLUMN_NAMES = [
     _TEST_BOOL_COL,
     _TEST_BOOL_ARR_COL,
@@ -83,7 +89,14 @@
     _TEST_STR_COL,
     _TEST_STR_ARR_COL,
     _TEST_BYTES_COL,
+    _TEST_TIMESTAMP_COL,
+    _TEST_DATETIME_COL,
 ]
+
+_TEST_TIME_NOW = datetime.now()
+_TEST_TIMESTAMP_WITH_TIMEZONE = pd.Timestamp(_TEST_TIME_NOW, tz="US/Pacific")
+_TEST_TIMESTAMP_WITHOUT_TIMEZONE = pd.Timestamp(_TEST_TIME_NOW)
+
 _TEST_DATAFRAME = pd.DataFrame(
     data=[
         [
@@ -96,6 +109,8 @@
             "test",
             ["test1", "test2"],
             b"1",
+            _TEST_TIMESTAMP_WITH_TIMEZONE,
+            _TEST_TIMESTAMP_WITHOUT_TIMEZONE,
         ],
         [
             True,
@@ -107,6 +122,8 @@
             "test1",
             ["test2", "test3"],
             b"0",
+            _TEST_TIMESTAMP_WITH_TIMEZONE,
+            _TEST_TIMESTAMP_WITHOUT_TIMEZONE,
         ],
     ],
     columns=_TEST_DF_COLUMN_NAMES,
@@ -121,6 +138,8 @@
     bigquery.SchemaField(name="string_col", field_type="STRING"),
     bigquery.SchemaField(name="string_array_col", field_type="STRING", mode="REPEATED"),
     bigquery.SchemaField(name="bytes_col", field_type="STRING"),
+    bigquery.SchemaField(name="timestamp_col", field_type="TIMESTAMP"),
+    bigquery.SchemaField(name="datetime_col", field_type="DATETIME"),
 ]
 
 
@@ -248,8 +267,10 @@ def test_create_tabular_dataset(self):
                 tabular_dataset.delete()
 
     def test_create_tabular_dataset_from_dataframe(self, bigquery_dataset):
-        bq_staging_table = f"bq://{_TEST_PROJECT}.{bigquery_dataset.dataset_id}.test_table{uuid.uuid4()}"
-
+        table_id = f"test_table{uuid.uuid4()}"
+        bq_staging_table = (
+            f"bq://{_TEST_PROJECT}.{bigquery_dataset.dataset_id}.{table_id}"
+        )
         try:
             tabular_dataset = aiplatform.TabularDataset.create_from_dataframe(
                 df_source=_TEST_DATAFRAME,
@@ -269,6 +290,22 @@ def test_create_tabular_dataset_from_dataframe(self, bigquery_dataset):
                 tabular_dataset.metadata_schema_uri
                 == aiplatform.schema.dataset.metadata.tabular
             )
+            bigquery_client = bigquery.Client(
+                project=_TEST_PROJECT,
+                credentials=initializer.global_config.credentials,
+            )
+            table = bigquery_client.get_table(
+                f"{_TEST_PROJECT}.{bigquery_dataset.dataset_id}.{table_id}"
+            )
+            assert (
+                table.schema[-1]
+                == bigquery.SchemaField(name="datetime_col", field_type="DATETIME")
+                if re.match(
+                    r"3.*",
+                    pkg_resources.get_distribution("google-cloud-bigquery").version,
+                )
+                else bigquery.SchemaField(name="datetime_col", field_type="TIMESTAMP")
+            )
         finally:
             if tabular_dataset is not None:
                 tabular_dataset.delete()
diff --git a/tests/system/aiplatform/test_featurestore.py b/tests/system/aiplatform/test_featurestore.py
@@ -323,7 +323,7 @@ def test_ingest_feature_values_from_df_using_feature_time_column_and_online_read
             ],
             columns=["movie_id", "average_rating", "title", "genres", "update_time"],
         )
-        movies_df = movies_df.astype({"update_time": "datetime64"})
+        movies_df["update_time"] = pd.to_datetime(movies_df["update_time"], utc=True)
         feature_time_column = "update_time"
 
         movie_entity_type.ingest_from_df(
@@ -539,7 +539,9 @@ def test_batch_serve_to_df(self, shared_state, caplog):
             ],
             columns=["users", "movies", "timestamp"],
         )
-        read_instances_df = read_instances_df.astype({"timestamp": "datetime64"})
+        read_instances_df["timestamp"] = pd.to_datetime(
+            read_instances_df["timestamp"], utc=True
+        )
 
         df = featurestore.batch_serve_to_df(
             serving_feature_ids={
diff --git a/tests/unit/aiplatform/test_datasets.py b/tests/unit/aiplatform/test_datasets.py
@@ -484,60 +484,89 @@ def bigquery_table_schema_mock():
         bigquery.Table, "schema", new_callable=mock.PropertyMock
     ) as bigquery_table_schema_mock:
         bigquery_table_schema_mock.return_value = [
-            bigquery.SchemaField("column_1", "FLOAT", "NULLABLE", "", (), None),
-            bigquery.SchemaField("column_2", "FLOAT", "NULLABLE", "", (), None),
             bigquery.SchemaField(
-                "column_3",
-                "RECORD",
-                "NULLABLE",
-                "",
-                (
+                name="column_1",
+                field_type="FLOAT",
+                mode="NULLABLE",
+                description="",
+                fields=(),
+                policy_tags=None,
+            ),
+            bigquery.SchemaField(
+                name="column_2",
+                field_type="FLOAT",
+                mode="NULLABLE",
+                description="",
+                fields=(),
+                policy_tags=None,
+            ),
+            bigquery.SchemaField(
+                name="column_3",
+                field_type="RECORD",
+                mode="NULLABLE",
+                description="",
+                fields=(
                     bigquery.SchemaField(
-                        "nested_3_1",
-                        "RECORD",
-                        "NULLABLE",
-                        "",
-                        (
+                        name="nested_3_1",
+                        field_type="RECORD",
+                        mode="NULLABLE",
+                        description="",
+                        fields=(
                             bigquery.SchemaField(
-                                "nested_3_1_1", "FLOAT", "NULLABLE", "", (), None
+                                name="nested_3_1_1",
+                                field_type="FLOAT",
+                                mode="NULLABLE",
+                                description="",
+                                fields=(),
+                                policy_tags=None,
                             ),
                             bigquery.SchemaField(
-                                "nested_3_1_2", "FLOAT", "NULLABLE", "", (), None
+                                name="nested_3_1_2",
+                                field_type="FLOAT",
+                                mode="NULLABLE",
+                                description="",
+                                fields=(),
+                                policy_tags=None,
                             ),
                         ),
-                        None,
+                        policy_tags=None,
                     ),
                     bigquery.SchemaField(
-                        "nested_3_2", "FLOAT", "NULLABLE", "", (), None
+                        name="nested_3_2",
+                        field_type="FLOAT",
+                        mode="NULLABLE",
+                        description="",
+                        fields=(),
+                        policy_tags=None,
                     ),
                     bigquery.SchemaField(
-                        "nested_3_3",
-                        "RECORD",
-                        "NULLABLE",
-                        "",
-                        (
+                        name="nested_3_3",
+                        field_type="RECORD",
+                        mode="NULLABLE",
+                        description="",
+                        fields=(
                             bigquery.SchemaField(
-                                "nested_3_3_1",
-                                "RECORD",
-                                "NULLABLE",
-                                "",
-                                (
+                                name="nested_3_3_1",
+                                field_type="RECORD",
+                                mode="NULLABLE",
+                                description="",
+                                fields=(
                                     bigquery.SchemaField(
-                                        "nested_3_3_1_1",
-                                        "FLOAT",
-                                        "NULLABLE",
-                                        "",
-                                        (),
-                                        None,
+                                        name="nested_3_3_1_1",
+                                        field_type="FLOAT",
+                                        mode="NULLABLE",
+                                        description="",
+                                        fields=(),
+                                        policy_tags=None,
                                     ),
                                 ),
-                                None,
+                                policy_tags=None,
                             ),
                         ),
-                        None,
+                        policy_tags=None,
                     ),
                 ),
-                None,
+                policy_tags=None,
             ),
         ]
         yield bigquery_table_schema_mock