Skip to content

Commit 8f5a41d

Browse files
suzmueLinchin
andauthored
fix: add warning when encountering unknown field types (#1989)
* fix: add warning when encountering unknown field types The types returned for currently unsupported field types may change in the future, when support is added. Warn users that the types they are using are not yet supported. * fix: add warning for unknown subfield types as well * fix: remove unused warnings * fix: remove leftover debugging code * move test case closer to related test * add comments * fix formatting * fix test_table and use warnings.warn instead of pytest.warn * add explicit warning about behavior subject to change in the future add warning for write and warn about future behavior changes * add default converter for _SCALAR_VALUE_TO_JSON_PARAM * factor out shared warning * fix test case and make coverage happy * add unit test to StructQueryParameter class --------- Co-authored-by: Lingqing Gan <[email protected]>
1 parent d0bb87a commit 8f5a41d

File tree

6 files changed

+123
-26
lines changed

6 files changed

+123
-26
lines changed

google/cloud/bigquery/_helpers.py

+28-10
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import math
2222
import re
2323
import os
24+
import warnings
2425
from typing import Optional, Union
2526

2627
from dateutil import relativedelta
@@ -297,12 +298,7 @@ def _record_from_json(value, field):
297298
record = {}
298299
record_iter = zip(field.fields, value["f"])
299300
for subfield, cell in record_iter:
300-
converter = _CELLDATA_FROM_JSON[subfield.field_type]
301-
if subfield.mode == "REPEATED":
302-
value = [converter(item["v"], subfield) for item in cell["v"]]
303-
else:
304-
value = converter(cell["v"], subfield)
305-
record[subfield.name] = value
301+
record[subfield.name] = _field_from_json(cell["v"], subfield)
306302
return record
307303

308304

@@ -382,7 +378,11 @@ def _field_to_index_mapping(schema):
382378

383379

384380
def _field_from_json(resource, field):
385-
converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value)
381+
def default_converter(value, field):
382+
_warn_unknown_field_type(field)
383+
return value
384+
385+
converter = _CELLDATA_FROM_JSON.get(field.field_type, default_converter)
386386
if field.mode == "REPEATED":
387387
return [converter(item["v"], field) for item in resource]
388388
else:
@@ -484,6 +484,11 @@ def _json_to_json(value):
484484
return json.dumps(value)
485485

486486

487+
def _string_to_json(value):
488+
"""NOOP string -> string coercion"""
489+
return value
490+
491+
487492
def _timestamp_to_json_parameter(value):
488493
"""Coerce 'value' to an JSON-compatible representation.
489494
@@ -596,6 +601,7 @@ def _range_field_to_json(range_element_type, value):
596601
"DATE": _date_to_json,
597602
"TIME": _time_to_json,
598603
"JSON": _json_to_json,
604+
"STRING": _string_to_json,
599605
# Make sure DECIMAL and BIGDECIMAL are handled, even though
600606
# requests for them should be converted to NUMERIC. Better safe
601607
# than sorry.
@@ -609,6 +615,15 @@ def _range_field_to_json(range_element_type, value):
609615
_SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter
610616

611617

618+
def _warn_unknown_field_type(field):
619+
warnings.warn(
620+
"Unknown type '{}' for field '{}'. Behavior reading and writing this type is not officially supported and may change in the future.".format(
621+
field.field_type, field.name
622+
),
623+
FutureWarning,
624+
)
625+
626+
612627
def _scalar_field_to_json(field, row_value):
613628
"""Maps a field and value to a JSON-safe value.
614629
@@ -621,9 +636,12 @@ def _scalar_field_to_json(field, row_value):
621636
Returns:
622637
Any: A JSON-serializable object.
623638
"""
624-
converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
625-
if converter is None: # STRING doesn't need converting
626-
return row_value
639+
640+
def default_converter(value):
641+
_warn_unknown_field_type(field)
642+
return value
643+
644+
converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type, default_converter)
627645
return converter(row_value)
628646

629647

google/cloud/bigquery/_pandas_helpers.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,9 @@ def bq_to_arrow_field(bq_field, array_type=None):
204204
metadata=metadata,
205205
)
206206

207-
warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name))
207+
warnings.warn(
208+
"Unable to determine Arrow type for field '{}'.".format(bq_field.name)
209+
)
208210
return None
209211

210212

google/cloud/bigquery/query.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -591,9 +591,8 @@ def to_api_repr(self) -> dict:
591591
Dict: JSON mapping
592592
"""
593593
value = self.value
594-
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_)
595-
if converter is not None:
596-
value = converter(value) # type: ignore
594+
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_, lambda value: value)
595+
value = converter(value) # type: ignore
597596
resource: Dict[str, Any] = {
598597
"parameterType": {"type": self.type_},
599598
"parameterValue": {"value": value},
@@ -748,9 +747,10 @@ def to_api_repr(self) -> dict:
748747
else:
749748
a_type = self.array_type.to_api_repr()
750749

751-
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"])
752-
if converter is not None:
753-
values = [converter(value) for value in values] # type: ignore
750+
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(
751+
a_type["type"], lambda value: value
752+
)
753+
values = [converter(value) for value in values] # type: ignore
754754
a_values = [{"value": value} for value in values]
755755

756756
resource = {
@@ -792,7 +792,7 @@ def __repr__(self):
792792

793793

794794
class StructQueryParameter(_AbstractQueryParameter):
795-
"""Named / positional query parameters for struct values.
795+
"""Name / positional query parameters for struct values.
796796
797797
Args:
798798
name (Optional[str]):
@@ -897,10 +897,8 @@ def to_api_repr(self) -> dict:
897897
values[name] = repr_["parameterValue"]
898898
else:
899899
s_types[name] = {"name": name, "type": {"type": type_}}
900-
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_)
901-
if converter is not None:
902-
value = converter(value) # type: ignore
903-
values[name] = {"value": value}
900+
converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_, lambda value: value)
901+
values[name] = {"value": converter(value)}
904902

905903
resource = {
906904
"parameterType": {

tests/unit/test__helpers.py

+61-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import decimal
1818
import json
1919
import os
20+
import warnings
2021
import pytest
2122
import packaging
2223
import unittest
@@ -640,6 +641,17 @@ def test_w_single_scalar_column(self):
640641
row = {"f": [{"v": "1"}]}
641642
self.assertEqual(self._call_fut(row, schema=[col]), (1,))
642643

644+
def test_w_unknown_type(self):
645+
# SELECT 1 AS col
646+
col = _Field("REQUIRED", "col", "UNKNOWN")
647+
row = {"f": [{"v": "1"}]}
648+
with warnings.catch_warnings(record=True) as warned:
649+
self.assertEqual(self._call_fut(row, schema=[col]), ("1",))
650+
self.assertEqual(len(warned), 1)
651+
warning = warned[0]
652+
self.assertTrue("UNKNOWN" in str(warning))
653+
self.assertTrue("col" in str(warning))
654+
643655
def test_w_single_scalar_geography_column(self):
644656
# SELECT 1 AS col
645657
col = _Field("REQUIRED", "geo", "GEOGRAPHY")
@@ -660,6 +672,17 @@ def test_w_single_array_column(self):
660672
row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
661673
self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],))
662674

675+
def test_w_unknown_type_repeated(self):
676+
# SELECT 1 AS col
677+
col = _Field("REPEATED", "col", "UNKNOWN")
678+
row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
679+
with warnings.catch_warnings(record=True) as warned:
680+
self.assertEqual(self._call_fut(row, schema=[col]), (["1", "2", "3"],))
681+
self.assertEqual(len(warned), 1)
682+
warning = warned[0]
683+
self.assertTrue("UNKNOWN" in str(warning))
684+
self.assertTrue("col" in str(warning))
685+
663686
def test_w_struct_w_nested_array_column(self):
664687
# SELECT ([1, 2], 3, [4, 5]) as col
665688
first = _Field("REPEATED", "first", "INTEGER")
@@ -684,6 +707,39 @@ def test_w_struct_w_nested_array_column(self):
684707
({"first": [1, 2], "second": 3, "third": [4, 5]},),
685708
)
686709

710+
def test_w_unknown_type_subfield(self):
711+
# SELECT [(1, 2, 3), (4, 5, 6)] as col
712+
first = _Field("REPEATED", "first", "UNKNOWN1")
713+
second = _Field("REQUIRED", "second", "UNKNOWN2")
714+
third = _Field("REPEATED", "third", "INTEGER")
715+
col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third])
716+
row = {
717+
"f": [
718+
{
719+
"v": {
720+
"f": [
721+
{"v": [{"v": "1"}, {"v": "2"}]},
722+
{"v": "3"},
723+
{"v": [{"v": "4"}, {"v": "5"}]},
724+
]
725+
}
726+
}
727+
]
728+
}
729+
with warnings.catch_warnings(record=True) as warned:
730+
self.assertEqual(
731+
self._call_fut(row, schema=[col]),
732+
({"first": ["1", "2"], "second": "3", "third": [4, 5]},),
733+
)
734+
self.assertEqual(len(warned), 2) # 1 warning per unknown field.
735+
warned = [str(warning) for warning in warned]
736+
self.assertTrue(
737+
any(["first" in warning and "UNKNOWN1" in warning for warning in warned])
738+
)
739+
self.assertTrue(
740+
any(["second" in warning and "UNKNOWN2" in warning for warning in warned])
741+
)
742+
687743
def test_w_array_of_struct(self):
688744
# SELECT [(1, 2, 3), (4, 5, 6)] as col
689745
first = _Field("REQUIRED", "first", "INTEGER")
@@ -1076,8 +1132,12 @@ def _call_fut(self, field, value):
10761132
def test_w_unknown_field_type(self):
10771133
field = _make_field("UNKNOWN")
10781134
original = object()
1079-
converted = self._call_fut(field, original)
1135+
with warnings.catch_warnings(record=True) as warned:
1136+
converted = self._call_fut(field, original)
10801137
self.assertIs(converted, original)
1138+
self.assertEqual(len(warned), 1)
1139+
warning = warned[0]
1140+
self.assertTrue("UNKNOWN" in str(warning))
10811141

10821142
def test_w_known_field_type(self):
10831143
field = _make_field("INT64")

tests/unit/test_query.py

+19
Original file line numberDiff line numberDiff line change
@@ -1780,6 +1780,25 @@ def test_to_api_repr_w_nested_struct(self):
17801780
param = self._make_one("foo", scalar_1, sub)
17811781
self.assertEqual(param.to_api_repr(), EXPECTED)
17821782

1783+
def test_to_api_repr_w_unknown_type(self):
1784+
EXPECTED = {
1785+
"name": "foo",
1786+
"parameterType": {
1787+
"type": "STRUCT",
1788+
"structTypes": [
1789+
{"name": "bar", "type": {"type": "INT64"}},
1790+
{"name": "baz", "type": {"type": "UNKNOWN_TYPE"}},
1791+
],
1792+
},
1793+
"parameterValue": {
1794+
"structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}}
1795+
},
1796+
}
1797+
sub_1 = _make_subparam("bar", "INT64", 123)
1798+
sub_2 = _make_subparam("baz", "UNKNOWN_TYPE", "abc")
1799+
param = self._make_one("foo", sub_1, sub_2)
1800+
self.assertEqual(param.to_api_repr(), EXPECTED)
1801+
17831802
def test___eq___wrong_type(self):
17841803
field = self._make_one("test", _make_subparam("bar", "STRING", "abc"))
17851804
other = object()

tests/unit/test_table.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -2751,9 +2751,9 @@ def test_to_arrow_w_unknown_type(self):
27512751
self.assertEqual(ages, [33, 29])
27522752
self.assertEqual(sports, ["volleyball", "basketball"])
27532753

2754-
self.assertEqual(len(warned), 1)
2755-
warning = warned[0]
2756-
self.assertTrue("sport" in str(warning))
2754+
# Expect warning from both the arrow conversion, and the json deserialization.
2755+
self.assertEqual(len(warned), 2)
2756+
self.assertTrue(all("sport" in str(warning) for warning in warned))
27572757

27582758
def test_to_arrow_w_empty_table(self):
27592759
pyarrow = pytest.importorskip(

0 commit comments

Comments
 (0)