Skip to content

Commit a2a6efb

Browse files
authored
fix(bigquery)!: Canonicalize struct & array inline constructor (#3751)
* fix(bigquery): Fix parsing of struct & array inline constructor * Add more comments on _parse_type
1 parent 4a843e6 commit a2a6efb

File tree

5 files changed

+76
-23
lines changed

5 files changed

+76
-23
lines changed

sqlglot/dialects/duckdb.py

+14-7
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,24 @@ def _build_make_timestamp(args: t.List) -> exp.Expression:
116116

117117
def _struct_sql(self: DuckDB.Generator, expression: exp.Struct) -> str:
118118
args: t.List[str] = []
119+
120+
# BigQuery allows inline construction such as "STRUCT<a STRING, b INTEGER>('str', 1)" which is
121+
# canonicalized to "ROW('str', 1) AS STRUCT(a TEXT, b INT)" in DuckDB
122+
is_struct_cast = expression.find_ancestor(exp.Cast)
123+
119124
for i, expr in enumerate(expression.expressions):
120-
if isinstance(expr, exp.PropertyEQ):
121-
key = expr.name
122-
value = expr.expression
125+
is_property_eq = isinstance(expr, exp.PropertyEQ)
126+
value = expr.expression if is_property_eq else expr
127+
128+
if is_struct_cast:
129+
args.append(self.sql(value))
123130
else:
124-
key = f"_{i}"
125-
value = expr
131+
key = expr.name if is_property_eq else f"_{i}"
132+
args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}")
126133

127-
args.append(f"{self.sql(exp.Literal.string(key))}: {self.sql(value)}")
134+
csv_args = ", ".join(args)
128135

129-
return f"{{{', '.join(args)}}}"
136+
return f"ROW({csv_args})" if is_struct_cast else f"{{{csv_args}}}"
130137

131138

132139
def _datatype_sql(self: DuckDB.Generator, expression: exp.DataType) -> str:

sqlglot/parser.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -4264,6 +4264,13 @@ def _parse_type(
42644264
index = self._index
42654265
data_type = self._parse_types(check_func=True, allow_identifiers=False)
42664266

4267+
# parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g.
4268+
# STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>)
4269+
if isinstance(data_type, exp.Cast):
4270+
# This constructor can contain ops directly after it, for instance struct unnesting:
4271+
# STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).*
4272+
return self._parse_column_ops(data_type)
4273+
42674274
if data_type:
42684275
index2 = self._index
42694276
this = self._parse_primary()
@@ -4483,9 +4490,14 @@ def _parse_types(
44834490
this=exp.DataType.Type[type_token.value],
44844491
expressions=expressions,
44854492
nested=nested,
4486-
values=values,
44874493
prefix=prefix,
44884494
)
4495+
4496+
# Empty arrays/structs are allowed
4497+
if values is not None:
4498+
cls = exp.Struct if is_struct else exp.Array
4499+
this = exp.cast(cls(expressions=values), this, copy=False)
4500+
44894501
elif expressions:
44904502
this.set("expressions", expressions)
44914503

tests/dialects/test_bigquery.py

+47-5
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ def test_bigquery(self):
138138
self.validate_identity("SELECT CAST(CURRENT_DATE AS STRING FORMAT 'DAY') AS current_day")
139139
self.validate_identity("SAFE_CAST(encrypted_value AS STRING FORMAT 'BASE64')")
140140
self.validate_identity("CAST(encrypted_value AS STRING FORMAT 'BASE64')")
141-
self.validate_identity("CAST(STRUCT<a INT64>(1) AS STRUCT<a INT64>)")
142141
self.validate_identity("STRING_AGG(a)")
143142
self.validate_identity("STRING_AGG(a, ' & ')")
144143
self.validate_identity("STRING_AGG(DISTINCT a, ' & ')")
@@ -162,12 +161,9 @@ def test_bigquery(self):
162161
self.validate_identity("x <> ''")
163162
self.validate_identity("DATE_TRUNC(col, WEEK(MONDAY))")
164163
self.validate_identity("SELECT b'abc'")
165-
self.validate_identity("""SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[])""")
166164
self.validate_identity("SELECT AS STRUCT 1 AS a, 2 AS b")
167165
self.validate_identity("SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b")
168166
self.validate_identity("SELECT AS VALUE STRUCT(1 AS a, 2 AS b)")
169-
self.validate_identity("SELECT STRUCT<ARRAY<STRING>>(['2023-01-17'])")
170-
self.validate_identity("SELECT STRUCT<STRING>((SELECT a FROM b.c LIMIT 1)).*")
171167
self.validate_identity("SELECT * FROM q UNPIVOT(values FOR quarter IN (b, c))")
172168
self.validate_identity("""CREATE TABLE x (a STRUCT<values ARRAY<INT64>>)""")
173169
self.validate_identity("""CREATE TABLE x (a STRUCT<b STRING OPTIONS (description='b')>)""")
@@ -1613,7 +1609,7 @@ def test_gap_fill(self):
16131609
"SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'null')], origin => CAST('2023-11-01 09:30:01' AS DATETIME)) ORDER BY time"
16141610
)
16151611
self.validate_identity(
1616-
"SELECT * FROM GAP_FILL(TABLE (SELECT * FROM UNNEST(ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>[STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(2, CAST('2023-11-01 09:36:00' AS DATETIME), 77, 'ACTIVE'), STRUCT(3, CAST('2023-11-01 09:37:00' AS DATETIME), 78, 'ACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')])), ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'linear')]) ORDER BY time"
1612+
"SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE, value_columns => [('signal', 'locf')]) ORDER BY time"
16171613
)
16181614

16191615
def test_models(self):
@@ -1763,3 +1759,49 @@ def test_mod(self):
17631759
"MOD((a + 1), b)",
17641760
"MOD(a + 1, b)",
17651761
)
1762+
1763+
def test_inline_constructor(self):
1764+
self.validate_identity(
1765+
"""SELECT STRUCT<ARRAY<STRING>>(["2023-01-17"])""",
1766+
"""SELECT CAST(STRUCT(['2023-01-17']) AS STRUCT<ARRAY<STRING>>)""",
1767+
)
1768+
self.validate_identity(
1769+
"""SELECT STRUCT<STRING>((SELECT 'foo')).*""",
1770+
"""SELECT CAST(STRUCT((SELECT 'foo')) AS STRUCT<STRING>).*""",
1771+
)
1772+
1773+
self.validate_all(
1774+
"SELECT ARRAY<INT>[1, 2, 3]",
1775+
write={
1776+
"bigquery": "SELECT CAST([1, 2, 3] AS ARRAY<INT64>)",
1777+
"duckdb": "SELECT CAST([1, 2, 3] AS INT[])",
1778+
},
1779+
)
1780+
self.validate_all(
1781+
"CAST(STRUCT<a INT64>(1) AS STRUCT<a INT64>)",
1782+
write={
1783+
"bigquery": "CAST(CAST(STRUCT(1) AS STRUCT<a INT64>) AS STRUCT<a INT64>)",
1784+
"duckdb": "CAST(CAST(ROW(1) AS STRUCT(a BIGINT)) AS STRUCT(a BIGINT))",
1785+
},
1786+
)
1787+
self.validate_all(
1788+
"SELECT * FROM UNNEST(ARRAY<STRUCT<x INT64>>[])",
1789+
write={
1790+
"bigquery": "SELECT * FROM UNNEST(CAST([] AS ARRAY<STRUCT<x INT64>>))",
1791+
"duckdb": "SELECT * FROM UNNEST(CAST([] AS STRUCT(x BIGINT)[]))",
1792+
},
1793+
)
1794+
self.validate_all(
1795+
"SELECT * FROM UNNEST(ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>[STRUCT(1, DATETIME '2023-11-01 09:34:01', 74, 'INACTIVE'),STRUCT(4, DATETIME '2023-11-01 09:38:01', 80, 'ACTIVE')])",
1796+
write={
1797+
"bigquery": "SELECT * FROM UNNEST(CAST([STRUCT(1, CAST('2023-11-01 09:34:01' AS DATETIME), 74, 'INACTIVE'), STRUCT(4, CAST('2023-11-01 09:38:01' AS DATETIME), 80, 'ACTIVE')] AS ARRAY<STRUCT<device_id INT64, time DATETIME, signal INT64, state STRING>>))",
1798+
"duckdb": "SELECT * FROM UNNEST(CAST([ROW(1, CAST('2023-11-01 09:34:01' AS TIMESTAMP), 74, 'INACTIVE'), ROW(4, CAST('2023-11-01 09:38:01' AS TIMESTAMP), 80, 'ACTIVE')] AS STRUCT(device_id BIGINT, time TIMESTAMP, signal BIGINT, state TEXT)[]))",
1799+
},
1800+
)
1801+
self.validate_all(
1802+
"SELECT STRUCT<a INT64, b STRUCT<c STRING>>(1, STRUCT('c_str'))",
1803+
write={
1804+
"bigquery": "SELECT CAST(STRUCT(1, STRUCT('c_str')) AS STRUCT<a INT64, b STRUCT<c STRING>>)",
1805+
"duckdb": "SELECT CAST(ROW(1, ROW('c_str')) AS STRUCT(a BIGINT, b STRUCT(c TEXT)))",
1806+
},
1807+
)

tests/dialects/test_duckdb.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1042,11 +1042,11 @@ def test_cast(self):
10421042
)
10431043
self.validate_identity(
10441044
"CAST([[STRUCT_PACK(a := 1)]] AS STRUCT(a BIGINT)[][])",
1045-
"CAST([[{'a': 1}]] AS STRUCT(a BIGINT)[][])",
1045+
"CAST([[ROW(1)]] AS STRUCT(a BIGINT)[][])",
10461046
)
10471047
self.validate_identity(
10481048
"CAST([STRUCT_PACK(a := 1)] AS STRUCT(a BIGINT)[])",
1049-
"CAST([{'a': 1}] AS STRUCT(a BIGINT)[])",
1049+
"CAST([ROW(1)] AS STRUCT(a BIGINT)[])",
10501050
)
10511051

10521052
self.validate_all(

tests/fixtures/identity.sql

-8
Original file line numberDiff line numberDiff line change
@@ -777,14 +777,6 @@ ALTER TABLE table1 RENAME COLUMN IF EXISTS c1 TO c2
777777
SELECT div.a FROM test_table AS div
778778
WITH view AS (SELECT 1 AS x) SELECT * FROM view
779779
ARRAY<STRUCT<INT, DOUBLE, ARRAY<INT>>>
780-
ARRAY<INT>[1, 2, 3]
781-
ARRAY<INT>[]
782-
STRUCT<x VARCHAR(10)>
783-
STRUCT<x VARCHAR(10)>("bla")
784-
STRUCT<VARCHAR(10)>("bla")
785-
STRUCT<INT>(5)
786-
STRUCT<DATE>("2011-05-05")
787-
STRUCT<x INT, y TEXT>(1, t.str_col)
788780
STRUCT<int INT>
789781
SELECT CAST(NULL AS ARRAY<INT>) IS NULL AS array_is_null
790782
ALTER TABLE "schema"."tablename" ADD CONSTRAINT "CHK_Name" CHECK (NOT "IdDwh" IS NULL AND "IdDwh" <> (0))

0 commit comments

Comments
 (0)