Skip to content

Commit e0bbc8e

Browse files
committed
Add to_standard_sql() method to SchemaField
1 parent 6fc0de5 commit e0bbc8e

File tree

2 files changed

+148
-0
lines changed

2 files changed

+148
-0
lines changed

bigquery/google/cloud/bigquery/schema.py

+41
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,27 @@
1414

1515
"""Schemas for BigQuery tables / queries."""
1616

17+
from google.cloud.bigquery_v2 import types
18+
19+
20+
# SQL types reference:
21+
# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
22+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
23+
LEGACY_TO_STANDARD_TYPES = {
24+
"STRING": types.StandardSqlDataType.STRING,
25+
"BYTES": types.StandardSqlDataType.BYTES,
26+
"INTEGER": types.StandardSqlDataType.INT64,
27+
"FLOAT": types.StandardSqlDataType.FLOAT64,
28+
"NUMERIC": types.StandardSqlDataType.NUMERIC,
29+
"BOOLEAN": types.StandardSqlDataType.BOOL,
30+
"RECORD": types.StandardSqlDataType.STRUCT,
31+
"TIMESTAMP": types.StandardSqlDataType.TIMESTAMP,
32+
"DATE": types.StandardSqlDataType.DATE,
33+
"TIME": types.StandardSqlDataType.TIME,
34+
"DATETIME": types.StandardSqlDataType.DATETIME,
35+
}
36+
"""String names of the legacy SQL types to integer codes of Standard SQL types."""
37+
1738

1839
class SchemaField(object):
1940
"""Describe a single field within a table schema.
@@ -146,6 +167,26 @@ def _key(self):
146167
self._fields,
147168
)
148169

170+
def to_standard_sql(self):
171+
"""Return the field as the standard SQL field representation object.
172+
173+
Returns:
174+
An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`.
175+
"""
176+
sql_type = types.StandardSqlDataType()
177+
sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
178+
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
179+
)
180+
181+
# NOTE: No need to also handle the "ARRAY" composed type, the latter
182+
# does not exist in legacy SQL types.
183+
if sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721
184+
for field in self.fields:
185+
subfield = field.to_standard_sql()
186+
sql_type.struct_type.fields.extend([subfield])
187+
188+
return types.StandardSqlField(name=self.name, type=sql_type)
189+
149190
def __eq__(self, other):
150191
if not isinstance(other, SchemaField):
151192
return NotImplemented

bigquery/tests/unit/test_schema.py

+107
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ def _get_target_class():
2424

2525
return SchemaField
2626

27+
@staticmethod
28+
def _get_standard_sql_data_type_class():
29+
from google.cloud.bigquery_v2 import types
30+
31+
return types.StandardSqlDataType
32+
2733
def _make_one(self, *args, **kw):
2834
return self._get_target_class()(*args, **kw)
2935

@@ -151,6 +157,107 @@ def test_fields_property(self):
151157
schema_field = self._make_one("boat", "RECORD", fields=fields)
152158
self.assertIs(schema_field.fields, fields)
153159

160+
def test_to_standard_sql_simple_type(self):
161+
sql_type = self._get_standard_sql_data_type_class()
162+
examples = (
163+
("INTEGER", sql_type.INT64),
164+
("FLOAT", sql_type.FLOAT64),
165+
("BOOLEAN", sql_type.BOOL),
166+
("DATETIME", sql_type.DATETIME),
167+
)
168+
for legacy_type, standard_type in examples:
169+
field = self._make_one("some_field", legacy_type)
170+
standard_field = field.to_standard_sql()
171+
self.assertEqual(standard_field.name, "some_field")
172+
self.assertEqual(standard_field.type.type_kind, standard_type)
173+
self.assertFalse(standard_field.type.HasField("sub_type"))
174+
175+
def test_to_standard_sql_complex_type(self):
176+
from google.cloud.bigquery_v2 import types
177+
178+
# Expected result object:
179+
#
180+
# name: "image_usage"
181+
# type {
182+
# type_kind: STRUCT
183+
# struct_type {
184+
# fields {
185+
# name: "image_content"
186+
# type {type_kind: BYTES}
187+
# }
188+
# fields {
189+
# name: "last_used"
190+
# type {
191+
# type_kind: STRUCT
192+
# struct_type {
193+
# fields {
194+
# name: "date_field"
195+
# type {type_kind: DATE}
196+
# }
197+
# fields {
198+
# name: "time_field"
199+
# type {type_kind: TIME}
200+
# }
201+
# }
202+
# }
203+
# }
204+
# }
205+
# }
206+
207+
sql_type = self._get_standard_sql_data_type_class()
208+
209+
# level 2 fields
210+
sub_sub_field_date = types.StandardSqlField(
211+
name="date_field", type=sql_type(type_kind=sql_type.DATE)
212+
)
213+
sub_sub_field_time = types.StandardSqlField(
214+
name="time_field", type=sql_type(type_kind=sql_type.TIME)
215+
)
216+
217+
# level 1 fields
218+
sub_field_struct = types.StandardSqlField(
219+
name="last_used", type=sql_type(type_kind=sql_type.STRUCT)
220+
)
221+
sub_field_struct.type.struct_type.fields.extend(
222+
[sub_sub_field_date, sub_sub_field_time]
223+
)
224+
sub_field_bytes = types.StandardSqlField(
225+
name="image_content", type=sql_type(type_kind=sql_type.BYTES)
226+
)
227+
228+
# level 0 (top level)
229+
expected_result = types.StandardSqlField(
230+
name="image_usage", type=sql_type(type_kind=sql_type.STRUCT)
231+
)
232+
expected_result.type.struct_type.fields.extend(
233+
[sub_field_bytes, sub_field_struct]
234+
)
235+
236+
# construct legacy SchemaField object
237+
sub_sub_field1 = self._make_one("date_field", "DATE")
238+
sub_sub_field2 = self._make_one("time_field", "TIME")
239+
sub_field_record = self._make_one(
240+
"last_used", "RECORD", fields=(sub_sub_field1, sub_sub_field2)
241+
)
242+
sub_field_bytes = self._make_one("image_content", "BYTES")
243+
schema_field = self._make_one(
244+
"image_usage", "RECORD", fields=(sub_field_bytes, sub_field_record)
245+
)
246+
247+
standard_field = schema_field.to_standard_sql()
248+
249+
self.assertEqual(standard_field, expected_result)
250+
251+
def test_to_standard_sql_unknown_type(self):
252+
sql_type = self._get_standard_sql_data_type_class()
253+
field = self._make_one("weird_field", "TROOLEAN")
254+
255+
standard_field = field.to_standard_sql()
256+
257+
self.assertEqual(standard_field.name, "weird_field")
258+
self.assertEqual(standard_field.type.type_kind, sql_type.TYPE_KIND_UNSPECIFIED)
259+
self.assertFalse(standard_field.type.HasField("sub_type"))
260+
154261
def test___eq___wrong_type(self):
155262
field = self._make_one("test", "STRING")
156263
other = object()

0 commit comments

Comments
 (0)