1
1
from __future__ import annotations
2
2
3
3
import google .cloud .bigquery as bq
4
- import sqlglot as sg
4
+ import sqlglot . expressions as sge
5
5
6
6
import ibis
7
7
import ibis .expr .datatypes as dt
8
8
import ibis .expr .schema as sch
9
- from ibis .formats import SchemaMapper , TypeMapper
10
-
11
- _from_bigquery_types = {
12
- "INT64" : dt .Int64 ,
13
- "INTEGER" : dt .Int64 ,
14
- "FLOAT" : dt .Float64 ,
15
- "FLOAT64" : dt .Float64 ,
16
- "BOOL" : dt .Boolean ,
17
- "BOOLEAN" : dt .Boolean ,
18
- "STRING" : dt .String ,
19
- "DATE" : dt .Date ,
20
- "TIME" : dt .Time ,
21
- "BYTES" : dt .Binary ,
22
- "JSON" : dt .JSON ,
23
- }
24
-
25
-
26
- class BigQueryType (TypeMapper ):
27
- @classmethod
28
- def to_ibis (cls , typ : str , nullable : bool = True ) -> dt .DataType :
29
- if typ == "DATETIME" :
30
- return dt .Timestamp (timezone = None , nullable = nullable )
31
- elif typ == "TIMESTAMP" :
32
- return dt .Timestamp (timezone = "UTC" , nullable = nullable )
33
- elif typ == "NUMERIC" :
34
- return dt .Decimal (38 , 9 , nullable = nullable )
35
- elif typ == "BIGNUMERIC" :
36
- return dt .Decimal (76 , 38 , nullable = nullable )
37
- elif typ == "GEOGRAPHY" :
38
- return dt .GeoSpatial (geotype = "geography" , srid = 4326 , nullable = nullable )
39
- else :
40
- try :
41
- return _from_bigquery_types [typ ](nullable = nullable )
42
- except KeyError :
43
- raise TypeError (f"Unable to convert BigQuery type to ibis: { typ } " )
9
+ from ibis .backends .base .sqlglot .datatypes import SqlglotType
10
+ from ibis .formats import SchemaMapper
11
+
12
+
13
+ class BigQueryType (SqlglotType ):
14
+ dialect = "bigquery"
15
+
16
+ default_decimal_precision = 38
17
+ default_decimal_scale = 9
18
+
19
+ @classmethod
20
+ def _from_sqlglot_NUMERIC (cls ) -> dt .Decimal :
21
+ return dt .Decimal (
22
+ cls .default_decimal_precision ,
23
+ cls .default_decimal_scale ,
24
+ nullable = cls .default_nullable ,
25
+ )
26
+
27
+ @classmethod
28
+ def _from_sqlglot_BIGNUMERIC (cls ) -> dt .Decimal :
29
+ return dt .Decimal (76 , 38 , nullable = cls .default_nullable )
30
+
31
+ @classmethod
32
+ def _from_sqlglot_DATETIME (cls ) -> dt .Decimal :
33
+ return dt .Timestamp (timezone = None , nullable = cls .default_nullable )
34
+
35
+ @classmethod
36
+ def _from_sqlglot_TIMESTAMP (cls ) -> dt .Decimal :
37
+ return dt .Timestamp (timezone = "UTC" , nullable = cls .default_nullable )
38
+
39
+ @classmethod
40
+ def _from_sqlglot_GEOGRAPHY (cls ) -> dt .Decimal :
41
+ return dt .GeoSpatial (
42
+ geotype = "geography" , srid = 4326 , nullable = cls .default_nullable
43
+ )
44
+
45
+ @classmethod
46
+ def _from_sqlglot_TINYINT (cls ) -> dt .Int64 :
47
+ return dt .Int64 (nullable = cls .default_nullable )
48
+
49
+ _from_sqlglot_UINT = (
50
+ _from_sqlglot_USMALLINT
51
+ ) = (
52
+ _from_sqlglot_UTINYINT
53
+ ) = _from_sqlglot_INT = _from_sqlglot_SMALLINT = _from_sqlglot_TINYINT
54
+
55
+ @classmethod
56
+ def _from_sqlglot_UBIGINT (cls ) -> dt .Int64 :
57
+ raise TypeError ("Unsigned BIGINT isn't representable in BigQuery INT64" )
58
+
59
+ @classmethod
60
+ def _from_sqlglot_FLOAT (cls ) -> dt .Double :
61
+ return dt .Float64 (nullable = cls .default_nullable )
44
62
45
63
@classmethod
46
- def from_ibis (cls , dtype : dt .DataType ) -> str :
47
- if dtype .is_floating ():
48
- return "FLOAT64"
49
- elif dtype .is_uint64 ():
64
+ def _from_sqlglot_MAP (cls ) -> dt .Map :
65
+ raise NotImplementedError (
66
+ "Cannot convert sqlglot Map type to ibis type: maps are not supported in BigQuery"
67
+ )
68
+
69
+ @classmethod
70
+ def _from_ibis_Map (cls , dtype : dt .Map ) -> sge .DataType :
71
+ raise NotImplementedError (
72
+ "Cannot convert Ibis Map type to BigQuery type: maps are not supported in BigQuery"
73
+ )
74
+
75
+ @classmethod
76
+ def _from_ibis_Timestamp (cls , dtype : dt .Timestamp ) -> sge .DataType :
77
+ if dtype .timezone is None :
78
+ return sge .DataType (this = sge .DataType .Type .DATETIME )
79
+ elif dtype .timezone == "UTC" :
80
+ return sge .DataType (this = sge .DataType .Type .TIMESTAMPTZ )
81
+ else :
50
82
raise TypeError (
51
- "Conversion from uint64 to BigQuery integer type (int64) is lossy "
83
+ "BigQuery does not support timestamps with timezones other than 'UTC' "
52
84
)
53
- elif dtype .is_integer ():
54
- return "INT64"
55
- elif dtype .is_binary ():
56
- return "BYTES"
57
- elif dtype .is_date ():
58
- return "DATE"
59
- elif dtype .is_timestamp ():
60
- if dtype .timezone is None :
61
- return "DATETIME"
62
- elif dtype .timezone == "UTC" :
63
- return "TIMESTAMP"
64
- else :
65
- raise TypeError (
66
- "BigQuery does not support timestamps with timezones other than 'UTC'"
67
- )
68
- elif dtype .is_decimal ():
69
- if (dtype .precision , dtype .scale ) == (76 , 38 ):
70
- return "BIGNUMERIC"
71
- if (dtype .precision , dtype .scale ) in [(38 , 9 ), (None , None )]:
72
- return "NUMERIC"
85
+
86
+ @classmethod
87
+ def _from_ibis_Decimal (cls , dtype : dt .Decimal ) -> sge .DataType :
88
+ precision = dtype .precision
89
+ scale = dtype .scale
90
+ if (precision , scale ) == (76 , 38 ):
91
+ return sge .DataType (this = sge .DataType .Type .BIGDECIMAL )
92
+ elif (precision , scale ) in ((38 , 9 ), (None , None )):
93
+ return sge .DataType (this = sge .DataType .Type .DECIMAL )
94
+ else :
73
95
raise TypeError (
74
96
"BigQuery only supports decimal types with precision of 38 and "
75
97
f"scale of 9 (NUMERIC) or precision of 76 and scale of 38 (BIGNUMERIC). "
76
98
f"Current precision: { dtype .precision } . Current scale: { dtype .scale } "
77
99
)
78
- elif dtype .is_array ():
79
- return f"ARRAY<{ cls .from_ibis (dtype .value_type )} >"
80
- elif dtype .is_struct ():
81
- fields = (
82
- f"{ sg .to_identifier (k ).sql ('bigquery' )} { cls .from_ibis (v )} "
83
- for k , v in dtype .fields .items ()
84
- )
85
- return "STRUCT<{}>" .format (", " .join (fields ))
86
- elif dtype .is_json ():
87
- return "JSON"
88
- elif dtype .is_geospatial ():
89
- if (dtype .geotype , dtype .srid ) == ("geography" , 4326 ):
90
- return "GEOGRAPHY"
100
+
101
+ @classmethod
102
+ def _from_ibis_UInt64 (cls , dtype : dt .UInt64 ) -> sge .DataType :
103
+ raise TypeError (
104
+ f"Conversion from { dtype } to BigQuery integer type (Int64) is lossy"
105
+ )
106
+
107
+ @classmethod
108
+ def _from_ibis_UInt32 (cls , dtype : dt .UInt32 ) -> sge .DataType :
109
+ return sge .DataType (this = sge .DataType .Type .BIGINT )
110
+
111
+ _from_ibis_UInt8 = _from_ibis_UInt16 = _from_ibis_UInt32
112
+
113
+ @classmethod
114
+ def _from_ibis_GeoSpatial (cls , dtype : dt .GeoSpatial ) -> sge .DataType :
115
+ if (dtype .geotype , dtype .srid ) == ("geography" , 4326 ):
116
+ return sge .DataType (this = sge .DataType .Type .GEOGRAPHY )
117
+ else :
91
118
raise TypeError (
92
119
"BigQuery geography uses points on WGS84 reference ellipsoid."
93
120
f"Current geotype: { dtype .geotype } , Current srid: { dtype .srid } "
94
121
)
95
- elif dtype .is_map ():
96
- raise NotImplementedError ("Maps are not supported in BigQuery" )
97
- else :
98
- return str (dtype ).upper ()
99
122
100
123
101
124
class BigQuerySchema (SchemaMapper ):
@@ -112,7 +135,7 @@ def from_ibis(cls, schema: sch.Schema) -> list[bq.SchemaField]:
112
135
is_struct = value_type .is_struct ()
113
136
114
137
field_type = (
115
- "RECORD" if is_struct else BigQueryType .from_ibis (typ .value_type )
138
+ "RECORD" if is_struct else BigQueryType .to_string (typ .value_type )
116
139
)
117
140
mode = "REPEATED"
118
141
fields = cls .from_ibis (ibis .schema (getattr (value_type , "fields" , {})))
@@ -121,7 +144,7 @@ def from_ibis(cls, schema: sch.Schema) -> list[bq.SchemaField]:
121
144
mode = "NULLABLE" if typ .nullable else "REQUIRED"
122
145
fields = cls .from_ibis (ibis .schema (typ .fields ))
123
146
else :
124
- field_type = BigQueryType .from_ibis (typ )
147
+ field_type = BigQueryType .to_string (typ )
125
148
mode = "NULLABLE" if typ .nullable else "REQUIRED"
126
149
fields = ()
127
150
@@ -138,7 +161,7 @@ def _dtype_from_bigquery_field(cls, field: bq.SchemaField) -> dt.DataType:
138
161
fields = {f .name : cls ._dtype_from_bigquery_field (f ) for f in field .fields }
139
162
dtype = dt .Struct (fields )
140
163
else :
141
- dtype = BigQueryType .to_ibis (typ )
164
+ dtype = BigQueryType .from_string (typ )
142
165
143
166
mode = field .mode
144
167
if mode == "NULLABLE" :
0 commit comments