Skip to content

Commit 0ce1ca5

Browse files
authored
BigQuery: Add to_standard_sql() method to SchemaField (#8880)
* Add to_standard_sql() method to SchemaField * Support standard SQL names in to_standard_sql() * Add support for ARRAY type in to_standard_sql()
1 parent 8c8e360 commit 0ce1ca5

File tree

2 files changed

+228
-0
lines changed

2 files changed

+228
-0
lines changed

bigquery/google/cloud/bigquery/schema.py

+62
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,33 @@
1414

1515
"""Schemas for BigQuery tables / queries."""
1616

17+
from google.cloud.bigquery_v2 import types
18+
19+
20+
# SQL types reference:
21+
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
22+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
23+
LEGACY_TO_STANDARD_TYPES = {
24+
"STRING": types.StandardSqlDataType.STRING,
25+
"BYTES": types.StandardSqlDataType.BYTES,
26+
"INTEGER": types.StandardSqlDataType.INT64,
27+
"INT64": types.StandardSqlDataType.INT64,
28+
"FLOAT": types.StandardSqlDataType.FLOAT64,
29+
"FLOAT64": types.StandardSqlDataType.FLOAT64,
30+
"NUMERIC": types.StandardSqlDataType.NUMERIC,
31+
"BOOLEAN": types.StandardSqlDataType.BOOL,
32+
"BOOL": types.StandardSqlDataType.BOOL,
33+
"GEOGRAPHY": types.StandardSqlDataType.GEOGRAPHY,
34+
"RECORD": types.StandardSqlDataType.STRUCT,
35+
"STRUCT": types.StandardSqlDataType.STRUCT,
36+
"TIMESTAMP": types.StandardSqlDataType.TIMESTAMP,
37+
"DATE": types.StandardSqlDataType.DATE,
38+
"TIME": types.StandardSqlDataType.TIME,
39+
"DATETIME": types.StandardSqlDataType.DATETIME,
40+
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
41+
}
42+
"""String names of the legacy SQL types to integer codes of Standard SQL types."""
43+
1744

1845
class SchemaField(object):
1946
"""Describe a single field within a table schema.
@@ -146,6 +173,41 @@ def _key(self):
146173
self._fields,
147174
)
148175

176+
def to_standard_sql(self):
177+
"""Return the field as the standard SQL field representation object.
178+
179+
Returns:
180+
An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`.
181+
"""
182+
sql_type = types.StandardSqlDataType()
183+
184+
if self.mode == "REPEATED":
185+
sql_type.type_kind = types.StandardSqlDataType.ARRAY
186+
else:
187+
sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
188+
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
189+
)
190+
191+
if sql_type.type_kind == types.StandardSqlDataType.ARRAY: # noqa: E721
192+
array_element_type = LEGACY_TO_STANDARD_TYPES.get(
193+
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
194+
)
195+
sql_type.array_element_type.type_kind = array_element_type
196+
197+
# ARRAY cannot directly contain other arrays, only scalar types and STRUCTs
198+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type
199+
if array_element_type == types.StandardSqlDataType.STRUCT: # noqa: E721
200+
sql_type.array_element_type.struct_type.fields.extend(
201+
field.to_standard_sql() for field in self.fields
202+
)
203+
204+
elif sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721
205+
sql_type.struct_type.fields.extend(
206+
field.to_standard_sql() for field in self.fields
207+
)
208+
209+
return types.StandardSqlField(name=self.name, type=sql_type)
210+
149211
def __eq__(self, other):
150212
if not isinstance(other, SchemaField):
151213
return NotImplemented

bigquery/tests/unit/test_schema.py

+166
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ def _get_target_class():
2424

2525
return SchemaField
2626

27+
@staticmethod
28+
def _get_standard_sql_data_type_class():
29+
from google.cloud.bigquery_v2 import types
30+
31+
return types.StandardSqlDataType
32+
2733
def _make_one(self, *args, **kw):
2834
return self._get_target_class()(*args, **kw)
2935

@@ -151,6 +157,166 @@ def test_fields_property(self):
151157
schema_field = self._make_one("boat", "RECORD", fields=fields)
152158
self.assertIs(schema_field.fields, fields)
153159

160+
def test_to_standard_sql_simple_type(self):
161+
sql_type = self._get_standard_sql_data_type_class()
162+
examples = (
163+
# a few legacy types
164+
("INTEGER", sql_type.INT64),
165+
("FLOAT", sql_type.FLOAT64),
166+
("BOOLEAN", sql_type.BOOL),
167+
("DATETIME", sql_type.DATETIME),
168+
# a few standard types
169+
("INT64", sql_type.INT64),
170+
("FLOAT64", sql_type.FLOAT64),
171+
("BOOL", sql_type.BOOL),
172+
("GEOGRAPHY", sql_type.GEOGRAPHY),
173+
)
174+
for legacy_type, standard_type in examples:
175+
field = self._make_one("some_field", legacy_type)
176+
standard_field = field.to_standard_sql()
177+
self.assertEqual(standard_field.name, "some_field")
178+
self.assertEqual(standard_field.type.type_kind, standard_type)
179+
self.assertFalse(standard_field.type.HasField("sub_type"))
180+
181+
def test_to_standard_sql_struct_type(self):
182+
from google.cloud.bigquery_v2 import types
183+
184+
# Expected result object:
185+
#
186+
# name: "image_usage"
187+
# type {
188+
# type_kind: STRUCT
189+
# struct_type {
190+
# fields {
191+
# name: "image_content"
192+
# type {type_kind: BYTES}
193+
# }
194+
# fields {
195+
# name: "last_used"
196+
# type {
197+
# type_kind: STRUCT
198+
# struct_type {
199+
# fields {
200+
# name: "date_field"
201+
# type {type_kind: DATE}
202+
# }
203+
# fields {
204+
# name: "time_field"
205+
# type {type_kind: TIME}
206+
# }
207+
# }
208+
# }
209+
# }
210+
# }
211+
# }
212+
213+
sql_type = self._get_standard_sql_data_type_class()
214+
215+
# level 2 fields
216+
sub_sub_field_date = types.StandardSqlField(
217+
name="date_field", type=sql_type(type_kind=sql_type.DATE)
218+
)
219+
sub_sub_field_time = types.StandardSqlField(
220+
name="time_field", type=sql_type(type_kind=sql_type.TIME)
221+
)
222+
223+
# level 1 fields
224+
sub_field_struct = types.StandardSqlField(
225+
name="last_used", type=sql_type(type_kind=sql_type.STRUCT)
226+
)
227+
sub_field_struct.type.struct_type.fields.extend(
228+
[sub_sub_field_date, sub_sub_field_time]
229+
)
230+
sub_field_bytes = types.StandardSqlField(
231+
name="image_content", type=sql_type(type_kind=sql_type.BYTES)
232+
)
233+
234+
# level 0 (top level)
235+
expected_result = types.StandardSqlField(
236+
name="image_usage", type=sql_type(type_kind=sql_type.STRUCT)
237+
)
238+
expected_result.type.struct_type.fields.extend(
239+
[sub_field_bytes, sub_field_struct]
240+
)
241+
242+
# construct legacy SchemaField object
243+
sub_sub_field1 = self._make_one("date_field", "DATE")
244+
sub_sub_field2 = self._make_one("time_field", "TIME")
245+
sub_field_record = self._make_one(
246+
"last_used", "RECORD", fields=(sub_sub_field1, sub_sub_field2)
247+
)
248+
sub_field_bytes = self._make_one("image_content", "BYTES")
249+
250+
for type_name in ("RECORD", "STRUCT"):
251+
schema_field = self._make_one(
252+
"image_usage", type_name, fields=(sub_field_bytes, sub_field_record)
253+
)
254+
standard_field = schema_field.to_standard_sql()
255+
self.assertEqual(standard_field, expected_result)
256+
257+
def test_to_standard_sql_array_type_simple(self):
258+
from google.cloud.bigquery_v2 import types
259+
260+
sql_type = self._get_standard_sql_data_type_class()
261+
262+
# construct expected result object
263+
expected_sql_type = sql_type(type_kind=sql_type.ARRAY)
264+
expected_sql_type.array_element_type.type_kind = sql_type.INT64
265+
expected_result = types.StandardSqlField(
266+
name="valid_numbers", type=expected_sql_type
267+
)
268+
269+
# construct "repeated" SchemaField object and convert to standard SQL
270+
schema_field = self._make_one("valid_numbers", "INT64", mode="REPEATED")
271+
standard_field = schema_field.to_standard_sql()
272+
273+
self.assertEqual(standard_field, expected_result)
274+
275+
def test_to_standard_sql_array_type_struct(self):
276+
from google.cloud.bigquery_v2 import types
277+
278+
sql_type = self._get_standard_sql_data_type_class()
279+
280+
# define person STRUCT
281+
name_field = types.StandardSqlField(
282+
name="name", type=sql_type(type_kind=sql_type.STRING)
283+
)
284+
age_field = types.StandardSqlField(
285+
name="age", type=sql_type(type_kind=sql_type.INT64)
286+
)
287+
person_struct = types.StandardSqlField(
288+
name="person_info", type=sql_type(type_kind=sql_type.STRUCT)
289+
)
290+
person_struct.type.struct_type.fields.extend([name_field, age_field])
291+
292+
# define expected result - an ARRAY of person structs
293+
expected_sql_type = sql_type(
294+
type_kind=sql_type.ARRAY, array_element_type=person_struct.type
295+
)
296+
expected_result = types.StandardSqlField(
297+
name="known_people", type=expected_sql_type
298+
)
299+
300+
# construct legacy repeated SchemaField object
301+
sub_field1 = self._make_one("name", "STRING")
302+
sub_field2 = self._make_one("age", "INTEGER")
303+
schema_field = self._make_one(
304+
"known_people", "RECORD", fields=(sub_field1, sub_field2), mode="REPEATED"
305+
)
306+
307+
standard_field = schema_field.to_standard_sql()
308+
self.assertEqual(standard_field, expected_result)
309+
310+
def test_to_standard_sql_unknown_type(self):
311+
sql_type = self._get_standard_sql_data_type_class()
312+
field = self._make_one("weird_field", "TROOLEAN")
313+
314+
standard_field = field.to_standard_sql()
315+
316+
self.assertEqual(standard_field.name, "weird_field")
317+
self.assertEqual(standard_field.type.type_kind, sql_type.TYPE_KIND_UNSPECIFIED)
318+
self.assertFalse(standard_field.type.HasField("sub_type"))
319+
154320
def test___eq___wrong_type(self):
155321
field = self._make_one("test", "STRING")
156322
other = object()

0 commit comments

Comments
 (0)