Skip to content

Commit 6e3219f

Browse files
committed
refactor(bigquery): move BigQueryType to use sqlglot for type parsing and generation
1 parent f5a0a5a commit 6e3219f

File tree

7 files changed

+138
-106
lines changed

7 files changed

+138
-106
lines changed

ibis/backends/base/sqlglot/datatypes.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -257,21 +257,23 @@ def _from_sqlglot_DECIMAL(
257257
@classmethod
258258
def _from_ibis_Array(cls, dtype: dt.Array) -> sge.DataType:
259259
value_type = cls.from_ibis(dtype.value_type)
260-
return sge.DataType(this=typecode.ARRAY, expressions=[value_type])
260+
return sge.DataType(this=typecode.ARRAY, expressions=[value_type], nested=True)
261261

262262
@classmethod
263263
def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType:
264264
key_type = cls.from_ibis(dtype.key_type)
265265
value_type = cls.from_ibis(dtype.value_type)
266-
return sge.DataType(this=typecode.MAP, expressions=[key_type, value_type])
266+
return sge.DataType(
267+
this=typecode.MAP, expressions=[key_type, value_type], nested=True
268+
)
267269

268270
@classmethod
269271
def _from_ibis_Struct(cls, dtype: dt.Struct) -> sge.DataType:
270272
fields = [
271273
sge.ColumnDef(this=str(name), kind=cls.from_ibis(field))
272274
for name, field in dtype.items()
273275
]
274-
return sge.DataType(this=typecode.STRUCT, expressions=fields)
276+
return sge.DataType(this=typecode.STRUCT, expressions=fields, nested=True)
275277

276278
@classmethod
277279
def _from_ibis_Decimal(cls, dtype: dt.Decimal) -> sge.DataType:

ibis/backends/bigquery/__init__.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
)
3737
from ibis.backends.bigquery.compiler import BigQueryCompiler
3838
from ibis.backends.bigquery.datatypes import BigQuerySchema, BigQueryType
39-
from ibis.formats.pandas import PandasData
4039

4140
with contextlib.suppress(ImportError):
4241
from ibis.backends.bigquery.udf import udf # noqa: F401
@@ -709,6 +708,8 @@ def execute(self, expr, params=None, limit="default", **kwargs):
709708
return expr.__pandas_result__(result)
710709

711710
def fetch_from_cursor(self, cursor, schema):
711+
from ibis.formats.pandas import PandasData
712+
712713
arrow_t = self._cursor_to_arrow(cursor)
713714
df = arrow_t.to_pandas(timestamp_as_object=True)
714715
return PandasData.convert_table(df, schema)
@@ -988,11 +989,7 @@ def create_table(
988989
column_defs = [
989990
sg.exp.ColumnDef(
990991
this=name,
991-
kind=sg.parse_one(
992-
BigQueryType.from_ibis(typ),
993-
into=sg.exp.DataType,
994-
read=self.name,
995-
),
992+
kind=BigQueryType.from_ibis(typ),
996993
constraints=(
997994
None
998995
if typ.nullable or typ.is_array()

ibis/backends/bigquery/client.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,22 +83,21 @@ def bq_param_array(dtype: dt.Array, value, name):
8383
value_type = dtype.value_type
8484

8585
try:
86-
bigquery_type = BigQueryType.from_ibis(value_type)
86+
bigquery_type = BigQueryType.to_string(value_type)
8787
except NotImplementedError:
8888
raise com.UnsupportedBackendType(dtype)
8989
else:
90-
if isinstance(value_type, dt.Struct):
90+
if isinstance(value_type, dt.Array):
91+
raise TypeError("ARRAY<ARRAY<T>> is not supported in BigQuery")
92+
elif isinstance(value_type, dt.Struct):
9193
query_value = [
9294
bigquery_param(dtype.value_type, struct, f"element_{i:d}")
9395
for i, struct in enumerate(value)
9496
]
9597
bigquery_type = "STRUCT"
96-
elif isinstance(value_type, dt.Array):
97-
raise TypeError("ARRAY<ARRAY<T>> is not supported in BigQuery")
9898
else:
9999
query_value = value
100-
result = bq.ArrayQueryParameter(name, bigquery_type, query_value)
101-
return result
100+
return bq.ArrayQueryParameter(name, bigquery_type, query_value)
102101

103102

104103
@bigquery_param.register

ibis/backends/bigquery/datatypes.py

Lines changed: 104 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,124 @@
11
from __future__ import annotations
22

33
import google.cloud.bigquery as bq
4-
import sqlglot as sg
4+
import sqlglot.expressions as sge
55

66
import ibis
77
import ibis.expr.datatypes as dt
88
import ibis.expr.schema as sch
9-
from ibis.formats import SchemaMapper, TypeMapper
10-
11-
_from_bigquery_types = {
12-
"INT64": dt.Int64,
13-
"INTEGER": dt.Int64,
14-
"FLOAT": dt.Float64,
15-
"FLOAT64": dt.Float64,
16-
"BOOL": dt.Boolean,
17-
"BOOLEAN": dt.Boolean,
18-
"STRING": dt.String,
19-
"DATE": dt.Date,
20-
"TIME": dt.Time,
21-
"BYTES": dt.Binary,
22-
"JSON": dt.JSON,
23-
}
24-
25-
26-
class BigQueryType(TypeMapper):
27-
@classmethod
28-
def to_ibis(cls, typ: str, nullable: bool = True) -> dt.DataType:
29-
if typ == "DATETIME":
30-
return dt.Timestamp(timezone=None, nullable=nullable)
31-
elif typ == "TIMESTAMP":
32-
return dt.Timestamp(timezone="UTC", nullable=nullable)
33-
elif typ == "NUMERIC":
34-
return dt.Decimal(38, 9, nullable=nullable)
35-
elif typ == "BIGNUMERIC":
36-
return dt.Decimal(76, 38, nullable=nullable)
37-
elif typ == "GEOGRAPHY":
38-
return dt.GeoSpatial(geotype="geography", srid=4326, nullable=nullable)
39-
else:
40-
try:
41-
return _from_bigquery_types[typ](nullable=nullable)
42-
except KeyError:
43-
raise TypeError(f"Unable to convert BigQuery type to ibis: {typ}")
9+
from ibis.backends.base.sqlglot.datatypes import SqlglotType
10+
from ibis.formats import SchemaMapper
11+
12+
13+
class BigQueryType(SqlglotType):
14+
dialect = "bigquery"
15+
16+
default_decimal_precision = 38
17+
default_decimal_scale = 9
18+
19+
@classmethod
20+
def _from_sqlglot_NUMERIC(cls) -> dt.Decimal:
21+
return dt.Decimal(
22+
cls.default_decimal_precision,
23+
cls.default_decimal_scale,
24+
nullable=cls.default_nullable,
25+
)
26+
27+
@classmethod
28+
def _from_sqlglot_BIGNUMERIC(cls) -> dt.Decimal:
29+
return dt.Decimal(76, 38, nullable=cls.default_nullable)
30+
31+
@classmethod
32+
def _from_sqlglot_DATETIME(cls) -> dt.Decimal:
33+
return dt.Timestamp(timezone=None, nullable=cls.default_nullable)
34+
35+
@classmethod
36+
def _from_sqlglot_TIMESTAMP(cls) -> dt.Decimal:
37+
return dt.Timestamp(timezone="UTC", nullable=cls.default_nullable)
38+
39+
@classmethod
40+
def _from_sqlglot_GEOGRAPHY(cls) -> dt.Decimal:
41+
return dt.GeoSpatial(
42+
geotype="geography", srid=4326, nullable=cls.default_nullable
43+
)
44+
45+
@classmethod
46+
def _from_sqlglot_TINYINT(cls) -> dt.Int64:
47+
return dt.Int64(nullable=cls.default_nullable)
48+
49+
_from_sqlglot_UINT = (
50+
_from_sqlglot_USMALLINT
51+
) = (
52+
_from_sqlglot_UTINYINT
53+
) = _from_sqlglot_INT = _from_sqlglot_SMALLINT = _from_sqlglot_TINYINT
54+
55+
@classmethod
56+
def _from_sqlglot_UBIGINT(cls) -> dt.Int64:
57+
raise TypeError("Unsigned BIGINT isn't representable in BigQuery INT64")
58+
59+
@classmethod
60+
def _from_sqlglot_FLOAT(cls) -> dt.Double:
61+
return dt.Float64(nullable=cls.default_nullable)
4462

4563
@classmethod
46-
def from_ibis(cls, dtype: dt.DataType) -> str:
47-
if dtype.is_floating():
48-
return "FLOAT64"
49-
elif dtype.is_uint64():
64+
def _from_sqlglot_MAP(cls) -> dt.Map:
65+
raise NotImplementedError(
66+
"Cannot convert sqlglot Map type to ibis type: maps are not supported in BigQuery"
67+
)
68+
69+
@classmethod
70+
def _from_ibis_Map(cls, dtype: dt.Map) -> sge.DataType:
71+
raise NotImplementedError(
72+
"Cannot convert Ibis Map type to BigQuery type: maps are not supported in BigQuery"
73+
)
74+
75+
@classmethod
76+
def _from_ibis_Timestamp(cls, dtype: dt.Timestamp) -> sge.DataType:
77+
if dtype.timezone is None:
78+
return sge.DataType(this=sge.DataType.Type.DATETIME)
79+
elif dtype.timezone == "UTC":
80+
return sge.DataType(this=sge.DataType.Type.TIMESTAMPTZ)
81+
else:
5082
raise TypeError(
51-
"Conversion from uint64 to BigQuery integer type (int64) is lossy"
83+
"BigQuery does not support timestamps with timezones other than 'UTC'"
5284
)
53-
elif dtype.is_integer():
54-
return "INT64"
55-
elif dtype.is_binary():
56-
return "BYTES"
57-
elif dtype.is_date():
58-
return "DATE"
59-
elif dtype.is_timestamp():
60-
if dtype.timezone is None:
61-
return "DATETIME"
62-
elif dtype.timezone == "UTC":
63-
return "TIMESTAMP"
64-
else:
65-
raise TypeError(
66-
"BigQuery does not support timestamps with timezones other than 'UTC'"
67-
)
68-
elif dtype.is_decimal():
69-
if (dtype.precision, dtype.scale) == (76, 38):
70-
return "BIGNUMERIC"
71-
if (dtype.precision, dtype.scale) in [(38, 9), (None, None)]:
72-
return "NUMERIC"
85+
86+
@classmethod
87+
def _from_ibis_Decimal(cls, dtype: dt.Decimal) -> sge.DataType:
88+
precision = dtype.precision
89+
scale = dtype.scale
90+
if (precision, scale) == (76, 38):
91+
return sge.DataType(this=sge.DataType.Type.BIGDECIMAL)
92+
elif (precision, scale) in ((38, 9), (None, None)):
93+
return sge.DataType(this=sge.DataType.Type.DECIMAL)
94+
else:
7395
raise TypeError(
7496
"BigQuery only supports decimal types with precision of 38 and "
7597
f"scale of 9 (NUMERIC) or precision of 76 and scale of 38 (BIGNUMERIC). "
7698
f"Current precision: {dtype.precision}. Current scale: {dtype.scale}"
7799
)
78-
elif dtype.is_array():
79-
return f"ARRAY<{cls.from_ibis(dtype.value_type)}>"
80-
elif dtype.is_struct():
81-
fields = (
82-
f"{sg.to_identifier(k).sql('bigquery')} {cls.from_ibis(v)}"
83-
for k, v in dtype.fields.items()
84-
)
85-
return "STRUCT<{}>".format(", ".join(fields))
86-
elif dtype.is_json():
87-
return "JSON"
88-
elif dtype.is_geospatial():
89-
if (dtype.geotype, dtype.srid) == ("geography", 4326):
90-
return "GEOGRAPHY"
100+
101+
@classmethod
102+
def _from_ibis_UInt64(cls, dtype: dt.UInt64) -> sge.DataType:
103+
raise TypeError(
104+
f"Conversion from {dtype} to BigQuery integer type (Int64) is lossy"
105+
)
106+
107+
@classmethod
108+
def _from_ibis_UInt32(cls, dtype: dt.UInt32) -> sge.DataType:
109+
return sge.DataType(this=sge.DataType.Type.BIGINT)
110+
111+
_from_ibis_UInt8 = _from_ibis_UInt16 = _from_ibis_UInt32
112+
113+
@classmethod
114+
def _from_ibis_GeoSpatial(cls, dtype: dt.GeoSpatial) -> sge.DataType:
115+
if (dtype.geotype, dtype.srid) == ("geography", 4326):
116+
return sge.DataType(this=sge.DataType.Type.GEOGRAPHY)
117+
else:
91118
raise TypeError(
92119
"BigQuery geography uses points on WGS84 reference ellipsoid."
93120
f"Current geotype: {dtype.geotype}, Current srid: {dtype.srid}"
94121
)
95-
elif dtype.is_map():
96-
raise NotImplementedError("Maps are not supported in BigQuery")
97-
else:
98-
return str(dtype).upper()
99122

100123

101124
class BigQuerySchema(SchemaMapper):
@@ -112,7 +135,7 @@ def from_ibis(cls, schema: sch.Schema) -> list[bq.SchemaField]:
112135
is_struct = value_type.is_struct()
113136

114137
field_type = (
115-
"RECORD" if is_struct else BigQueryType.from_ibis(typ.value_type)
138+
"RECORD" if is_struct else BigQueryType.to_string(typ.value_type)
116139
)
117140
mode = "REPEATED"
118141
fields = cls.from_ibis(ibis.schema(getattr(value_type, "fields", {})))
@@ -121,7 +144,7 @@ def from_ibis(cls, schema: sch.Schema) -> list[bq.SchemaField]:
121144
mode = "NULLABLE" if typ.nullable else "REQUIRED"
122145
fields = cls.from_ibis(ibis.schema(typ.fields))
123146
else:
124-
field_type = BigQueryType.from_ibis(typ)
147+
field_type = BigQueryType.to_string(typ)
125148
mode = "NULLABLE" if typ.nullable else "REQUIRED"
126149
fields = ()
127150

@@ -138,7 +161,7 @@ def _dtype_from_bigquery_field(cls, field: bq.SchemaField) -> dt.DataType:
138161
fields = {f.name: cls._dtype_from_bigquery_field(f) for f in field.fields}
139162
dtype = dt.Struct(fields)
140163
else:
141-
dtype = BigQueryType.to_ibis(typ)
164+
dtype = BigQueryType.from_string(typ)
142165

143166
mode = field.mode
144167
if mode == "NULLABLE":

ibis/backends/bigquery/registry.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def bigquery_cast_floating_to_integer(compiled_arg, from_, to):
7575
@bigquery_cast.register(str, dt.DataType, dt.DataType)
7676
def bigquery_cast_generate(compiled_arg, from_, to):
7777
"""Cast to desired type."""
78-
sql_type = BigQueryType.from_ibis(to)
78+
sql_type = BigQueryType.to_string(to)
7979
return f"CAST({compiled_arg} AS {sql_type})"
8080

8181

@@ -337,7 +337,7 @@ def _literal(t, op):
337337

338338
if value is None:
339339
if not dtype.is_null():
340-
return f"CAST(NULL AS {BigQueryType.from_ibis(dtype)})"
340+
return f"CAST(NULL AS {BigQueryType.to_string(dtype)})"
341341
return "NULL"
342342
elif dtype.is_boolean():
343343
return str(value).upper()
@@ -350,7 +350,7 @@ def _literal(t, op):
350350
prefix = "-" * value.is_signed()
351351
return f"CAST('{prefix}inf' AS FLOAT64)"
352352
else:
353-
return f"{BigQueryType.from_ibis(dtype)} '{value}'"
353+
return f"{BigQueryType.to_string(dtype)} '{value}'"
354354
elif dtype.is_uuid():
355355
return _sg_literal(str(value))
356356
elif dtype.is_numeric():
@@ -564,7 +564,7 @@ def compiles_string_to_timestamp(translator, op):
564564

565565

566566
def compiles_floor(t, op):
567-
bigquery_type = BigQueryType.from_ibis(op.dtype)
567+
bigquery_type = BigQueryType.to_string(op.dtype)
568568
arg = op.arg
569569
return f"CAST(FLOOR({t.translate(arg)}) AS {bigquery_type})"
570570

ibis/backends/bigquery/tests/unit/test_datatypes.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import pytest
4+
import sqlglot as sg
45
from pytest import param
56

67
import ibis.expr.datatypes as dt
@@ -69,13 +70,13 @@
6970
],
7071
)
7172
def test_simple(datatype, expected):
72-
assert BigQueryType.from_ibis(datatype) == expected
73+
assert BigQueryType.to_string(datatype) == expected
7374

7475

7576
@pytest.mark.parametrize("datatype", [dt.uint64, dt.Decimal(8, 3)])
7677
def test_simple_failure_mode(datatype):
7778
with pytest.raises(TypeError):
78-
BigQueryType.from_ibis(datatype)
79+
BigQueryType.to_string(datatype)
7980

8081

8182
@pytest.mark.parametrize(
@@ -101,3 +102,13 @@ def test_simple_failure_mode(datatype):
101102
)
102103
def test_spread_type(type_, expected):
103104
assert list(spread_type(type_)) == expected
105+
106+
107+
def test_struct_type():
108+
dtype = dt.Array(dt.int64)
109+
parsed_type = sg.parse_one("BIGINT[]", into=sg.exp.DataType, read="duckdb")
110+
111+
expected = "ARRAY<INT64>"
112+
113+
assert parsed_type.sql(dialect="bigquery") == expected
114+
assert BigQueryType.to_string(dtype) == expected

0 commit comments

Comments
 (0)