Skip to content

Commit 27a4fc3

Browse files
committed
Add support for ARRAY type in to_standard_sql()
1 parent 3f03054 commit 27a4fc3

File tree

2 files changed

+75
-6
lines changed

2 files changed

+75
-6
lines changed

bigquery/google/cloud/bigquery/schema.py

+22-6
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"DATE": types.StandardSqlDataType.DATE,
3838
"TIME": types.StandardSqlDataType.TIME,
3939
"DATETIME": types.StandardSqlDataType.DATETIME,
40+
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
4041
}
4142
"""String names of the legacy SQL types to integer codes of Standard SQL types."""
4243

@@ -179,13 +180,28 @@ def to_standard_sql(self):
179180
An instance of :class:`~google.cloud.bigquery_v2.types.StandardSqlField`.
180181
"""
181182
sql_type = types.StandardSqlDataType()
182-
sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
183-
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
184-
)
185183

186-
# NOTE: No need to also handle the "ARRAY" composed type, the latter
187-
# does not exist in legacy SQL types.
188-
if sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721
184+
if self.mode == "REPEATED":
185+
sql_type.type_kind = types.StandardSqlDataType.ARRAY
186+
else:
187+
sql_type.type_kind = LEGACY_TO_STANDARD_TYPES.get(
188+
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
189+
)
190+
191+
if sql_type.type_kind == types.StandardSqlDataType.ARRAY: # noqa: E721
192+
array_element_type = LEGACY_TO_STANDARD_TYPES.get(
193+
self.field_type, types.StandardSqlDataType.TYPE_KIND_UNSPECIFIED
194+
)
195+
sql_type.array_element_type.type_kind = array_element_type
196+
197+
# ARRAY cannot directly contain other arrays, only scalar types and STRUCTs
198+
# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#array-type
199+
if array_element_type == types.StandardSqlDataType.STRUCT: # noqa: E721
200+
sql_type.array_element_type.struct_type.fields.extend(
201+
field.to_standard_sql() for field in self.fields
202+
)
203+
204+
elif sql_type.type_kind == types.StandardSqlDataType.STRUCT: # noqa: E721
189205
sql_type.struct_type.fields.extend(
190206
field.to_standard_sql() for field in self.fields
191207
)

bigquery/tests/unit/test_schema.py

+53
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,59 @@ def test_to_standard_sql_struct_type(self):
254254
standard_field = schema_field.to_standard_sql()
255255
self.assertEqual(standard_field, expected_result)
256256

257+
def test_to_standard_sql_array_type_simple(self):
258+
from google.cloud.bigquery_v2 import types
259+
260+
sql_type = self._get_standard_sql_data_type_class()
261+
262+
# construct expected result object
263+
expected_sql_type = sql_type(type_kind=sql_type.ARRAY)
264+
expected_sql_type.array_element_type.type_kind = sql_type.INT64
265+
expected_result = types.StandardSqlField(
266+
name="valid_numbers", type=expected_sql_type
267+
)
268+
269+
# construct "repeated" SchemaField object and convert to standard SQL
270+
schema_field = self._make_one("valid_numbers", "INT64", mode="REPEATED")
271+
standard_field = schema_field.to_standard_sql()
272+
273+
self.assertEqual(standard_field, expected_result)
274+
275+
def test_to_standard_sql_array_type_struct(self):
276+
from google.cloud.bigquery_v2 import types
277+
278+
sql_type = self._get_standard_sql_data_type_class()
279+
280+
# define person STRUCT
281+
name_field = types.StandardSqlField(
282+
name="name", type=sql_type(type_kind=sql_type.STRING)
283+
)
284+
age_field = types.StandardSqlField(
285+
name="age", type=sql_type(type_kind=sql_type.INT64)
286+
)
287+
person_struct = types.StandardSqlField(
288+
name="person_info", type=sql_type(type_kind=sql_type.STRUCT)
289+
)
290+
person_struct.type.struct_type.fields.extend([name_field, age_field])
291+
292+
# define expected result - an ARRAY of person structs
293+
expected_sql_type = sql_type(
294+
type_kind=sql_type.ARRAY, array_element_type=person_struct.type
295+
)
296+
expected_result = types.StandardSqlField(
297+
name="known_people", type=expected_sql_type
298+
)
299+
300+
# construct legacy repeated SchemaField object
301+
sub_field1 = self._make_one("name", "STRING")
302+
sub_field2 = self._make_one("age", "INTEGER")
303+
schema_field = self._make_one(
304+
"known_people", "RECORD", fields=(sub_field1, sub_field2), mode="REPEATED"
305+
)
306+
307+
standard_field = schema_field.to_standard_sql()
308+
self.assertEqual(standard_field, expected_result)
309+
257310
def test_to_standard_sql_unknown_type(self):
258311
sql_type = self._get_standard_sql_data_type_class()
259312
field = self._make_one("weird_field", "TROOLEAN")

0 commit comments

Comments
 (0)