Skip to content

Commit 6fa93a4

Browse files
vertex-sdk-botcopybara-github
authored andcommitted
chore: support bigquery v3
PiperOrigin-RevId: 501424651
1 parent 30ae60a commit 6fa93a4

File tree

6 files changed

+131
-40
lines changed

6 files changed

+131
-40
lines changed

google/cloud/aiplatform/datasets/tabular_dataset.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,17 @@ def create_from_dataframe(
216216
"Pyarrow is not installed, and is required to use the BigQuery client."
217217
'Please install the SDK using "pip install google-cloud-aiplatform[datasets]"'
218218
)
219+
import pandas.api.types as pd_types
220+
221+
if any(
222+
[
223+
pd_types.is_datetime64_any_dtype(df_source[column])
224+
for column in df_source.columns
225+
]
226+
):
227+
_LOGGER.info(
228+
"Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0"
229+
)
219230

220231
if len(df_source) < _AUTOML_TRAINING_MIN_ROWS:
221232
_LOGGER.info(

google/cloud/aiplatform/featurestore/_entity_type.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1277,6 +1277,8 @@ def ingest_from_df(
12771277
EntityType - The entityType resource object with feature values imported.
12781278
12791279
"""
1280+
import pandas.api.types as pd_types
1281+
12801282
try:
12811283
import pyarrow # noqa: F401 - skip check for 'pyarrow' which is required when using 'google.cloud.bigquery'
12821284
except ImportError:
@@ -1285,6 +1287,16 @@ def ingest_from_df(
12851287
f"{self.ingest_from_df.__name__}"
12861288
)
12871289

1290+
if any(
1291+
[
1292+
pd_types.is_datetime64_any_dtype(df_source[column])
1293+
for column in df_source.columns
1294+
]
1295+
):
1296+
_LOGGER.info(
1297+
"Received datetime-like column in the dataframe. Please note that the column could be interpreted differently in BigQuery depending on which major version you are using. For more information, please reference the BigQuery v3 release notes here: https://github.com/googleapis/python-bigquery/releases/tag/v3.0.0"
1298+
)
1299+
12881300
bigquery_client = bigquery.Client(
12891301
project=self.project, credentials=self.credentials
12901302
)

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@
125125
"protobuf>=3.19.5,<5.0.0dev,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5",
126126
"packaging >= 14.3, <22.0.0dev",
127127
"google-cloud-storage >= 1.32.0, < 3.0.0dev",
128-
"google-cloud-bigquery >= 1.15.0, < 3.0.0dev",
128+
"google-cloud-bigquery >= 1.15.0, < 4.0.0dev",
129129
"google-cloud-resource-manager >= 1.3.3, < 3.0.0dev",
130130
"shapely < 2.0.0",
131131
),

tests/system/aiplatform/test_dataset.py

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
import importlib
2222

2323
import pandas as pd
24+
import pkg_resources
25+
import re
26+
27+
from datetime import datetime
2428

2529
from google.api_core import exceptions
2630
from google.api_core import client_options
@@ -73,6 +77,8 @@
7377
_TEST_STR_COL = "string_col"
7478
_TEST_STR_ARR_COL = "string_array_col"
7579
_TEST_BYTES_COL = "bytes_col"
80+
_TEST_TIMESTAMP_COL = "timestamp_col"
81+
_TEST_DATETIME_COL = "datetime_col"
7682
_TEST_DF_COLUMN_NAMES = [
7783
_TEST_BOOL_COL,
7884
_TEST_BOOL_ARR_COL,
@@ -83,7 +89,14 @@
8389
_TEST_STR_COL,
8490
_TEST_STR_ARR_COL,
8591
_TEST_BYTES_COL,
92+
_TEST_TIMESTAMP_COL,
93+
_TEST_DATETIME_COL,
8694
]
95+
96+
_TEST_TIME_NOW = datetime.now()
97+
_TEST_TIMESTAMP_WITH_TIMEZONE = pd.Timestamp(_TEST_TIME_NOW, tz="US/Pacific")
98+
_TEST_TIMESTAMP_WITHOUT_TIMEZONE = pd.Timestamp(_TEST_TIME_NOW)
99+
87100
_TEST_DATAFRAME = pd.DataFrame(
88101
data=[
89102
[
@@ -96,6 +109,8 @@
96109
"test",
97110
["test1", "test2"],
98111
b"1",
112+
_TEST_TIMESTAMP_WITH_TIMEZONE,
113+
_TEST_TIMESTAMP_WITHOUT_TIMEZONE,
99114
],
100115
[
101116
True,
@@ -107,6 +122,8 @@
107122
"test1",
108123
["test2", "test3"],
109124
b"0",
125+
_TEST_TIMESTAMP_WITH_TIMEZONE,
126+
_TEST_TIMESTAMP_WITHOUT_TIMEZONE,
110127
],
111128
],
112129
columns=_TEST_DF_COLUMN_NAMES,
@@ -121,6 +138,8 @@
121138
bigquery.SchemaField(name="string_col", field_type="STRING"),
122139
bigquery.SchemaField(name="string_array_col", field_type="STRING", mode="REPEATED"),
123140
bigquery.SchemaField(name="bytes_col", field_type="STRING"),
141+
bigquery.SchemaField(name="timestamp_col", field_type="TIMESTAMP"),
142+
bigquery.SchemaField(name="datetime_col", field_type="DATETIME"),
124143
]
125144

126145

@@ -248,8 +267,10 @@ def test_create_tabular_dataset(self):
248267
tabular_dataset.delete()
249268

250269
def test_create_tabular_dataset_from_dataframe(self, bigquery_dataset):
251-
bq_staging_table = f"bq://{_TEST_PROJECT}.{bigquery_dataset.dataset_id}.test_table{uuid.uuid4()}"
252-
270+
table_id = f"test_table{uuid.uuid4()}"
271+
bq_staging_table = (
272+
f"bq://{_TEST_PROJECT}.{bigquery_dataset.dataset_id}.{table_id}"
273+
)
253274
try:
254275
tabular_dataset = aiplatform.TabularDataset.create_from_dataframe(
255276
df_source=_TEST_DATAFRAME,
@@ -269,6 +290,22 @@ def test_create_tabular_dataset_from_dataframe(self, bigquery_dataset):
269290
tabular_dataset.metadata_schema_uri
270291
== aiplatform.schema.dataset.metadata.tabular
271292
)
293+
bigquery_client = bigquery.Client(
294+
project=_TEST_PROJECT,
295+
credentials=initializer.global_config.credentials,
296+
)
297+
table = bigquery_client.get_table(
298+
f"{_TEST_PROJECT}.{bigquery_dataset.dataset_id}.{table_id}"
299+
)
300+
assert (
301+
table.schema[-1]
302+
== bigquery.SchemaField(name="datetime_col", field_type="DATETIME")
303+
if re.match(
304+
r"3.*",
305+
pkg_resources.get_distribution("google-cloud-bigquery").version,
306+
)
307+
else bigquery.SchemaField(name="datetime_col", field_type="TIMESTAMP")
308+
)
272309
finally:
273310
if tabular_dataset is not None:
274311
tabular_dataset.delete()

tests/system/aiplatform/test_featurestore.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ def test_ingest_feature_values_from_df_using_feature_time_column_and_online_read
323323
],
324324
columns=["movie_id", "average_rating", "title", "genres", "update_time"],
325325
)
326-
movies_df = movies_df.astype({"update_time": "datetime64"})
326+
movies_df["update_time"] = pd.to_datetime(movies_df["update_time"], utc=True)
327327
feature_time_column = "update_time"
328328

329329
movie_entity_type.ingest_from_df(
@@ -539,7 +539,9 @@ def test_batch_serve_to_df(self, shared_state, caplog):
539539
],
540540
columns=["users", "movies", "timestamp"],
541541
)
542-
read_instances_df = read_instances_df.astype({"timestamp": "datetime64"})
542+
read_instances_df["timestamp"] = pd.to_datetime(
543+
read_instances_df["timestamp"], utc=True
544+
)
543545

544546
df = featurestore.batch_serve_to_df(
545547
serving_feature_ids={

tests/unit/aiplatform/test_datasets.py

Lines changed: 64 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -484,60 +484,89 @@ def bigquery_table_schema_mock():
484484
bigquery.Table, "schema", new_callable=mock.PropertyMock
485485
) as bigquery_table_schema_mock:
486486
bigquery_table_schema_mock.return_value = [
487-
bigquery.SchemaField("column_1", "FLOAT", "NULLABLE", "", (), None),
488-
bigquery.SchemaField("column_2", "FLOAT", "NULLABLE", "", (), None),
489487
bigquery.SchemaField(
490-
"column_3",
491-
"RECORD",
492-
"NULLABLE",
493-
"",
494-
(
488+
name="column_1",
489+
field_type="FLOAT",
490+
mode="NULLABLE",
491+
description="",
492+
fields=(),
493+
policy_tags=None,
494+
),
495+
bigquery.SchemaField(
496+
name="column_2",
497+
field_type="FLOAT",
498+
mode="NULLABLE",
499+
description="",
500+
fields=(),
501+
policy_tags=None,
502+
),
503+
bigquery.SchemaField(
504+
name="column_3",
505+
field_type="RECORD",
506+
mode="NULLABLE",
507+
description="",
508+
fields=(
495509
bigquery.SchemaField(
496-
"nested_3_1",
497-
"RECORD",
498-
"NULLABLE",
499-
"",
500-
(
510+
name="nested_3_1",
511+
field_type="RECORD",
512+
mode="NULLABLE",
513+
description="",
514+
fields=(
501515
bigquery.SchemaField(
502-
"nested_3_1_1", "FLOAT", "NULLABLE", "", (), None
516+
name="nested_3_1_1",
517+
field_type="FLOAT",
518+
mode="NULLABLE",
519+
description="",
520+
fields=(),
521+
policy_tags=None,
503522
),
504523
bigquery.SchemaField(
505-
"nested_3_1_2", "FLOAT", "NULLABLE", "", (), None
524+
name="nested_3_1_2",
525+
field_type="FLOAT",
526+
mode="NULLABLE",
527+
description="",
528+
fields=(),
529+
policy_tags=None,
506530
),
507531
),
508-
None,
532+
policy_tags=None,
509533
),
510534
bigquery.SchemaField(
511-
"nested_3_2", "FLOAT", "NULLABLE", "", (), None
535+
name="nested_3_2",
536+
field_type="FLOAT",
537+
mode="NULLABLE",
538+
description="",
539+
fields=(),
540+
policy_tags=None,
512541
),
513542
bigquery.SchemaField(
514-
"nested_3_3",
515-
"RECORD",
516-
"NULLABLE",
517-
"",
518-
(
543+
name="nested_3_3",
544+
field_type="RECORD",
545+
mode="NULLABLE",
546+
description="",
547+
fields=(
519548
bigquery.SchemaField(
520-
"nested_3_3_1",
521-
"RECORD",
522-
"NULLABLE",
523-
"",
524-
(
549+
name="nested_3_3_1",
550+
field_type="RECORD",
551+
mode="NULLABLE",
552+
description="",
553+
fields=(
525554
bigquery.SchemaField(
526-
"nested_3_3_1_1",
527-
"FLOAT",
528-
"NULLABLE",
529-
"",
530-
(),
531-
None,
555+
name="nested_3_3_1_1",
556+
field_type="FLOAT",
557+
mode="NULLABLE",
558+
description="",
559+
fields=(),
560+
policy_tags=None,
532561
),
533562
),
534-
None,
563+
policy_tags=None,
535564
),
536565
),
537-
None,
566+
policy_tags=None,
538567
),
539568
),
540-
None,
569+
policy_tags=None,
541570
),
542571
]
543572
yield bigquery_table_schema_mock

0 commit comments

Comments
 (0)