Skip to content

Commit 1c91d65

Browse files
authored
fix(datatype-parsing): ensure that geospatial types are round trippable through the data type parser (#10171)
Fixes #10170.
1 parent 54472b4 commit 1c91d65

File tree

2 files changed

+21
-8
lines changed

2 files changed

+21
-8
lines changed

ibis/expr/datatypes/parse.py

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
from __future__ import annotations
22

33
import ast
4-
import functools
54
import re
5+
from functools import lru_cache, partial
66
from operator import methodcaller
77

88
import parsy
@@ -50,7 +50,7 @@ def spaceless_string(*strings: str):
5050

5151

5252
@public
53-
@functools.lru_cache(maxsize=100)
53+
@lru_cache(maxsize=100)
5454
def parse(
5555
text: str, default_decimal_parameters: tuple[int | None, int | None] = (None, None)
5656
) -> dt.DataType:
@@ -88,12 +88,15 @@ def parse(
8888
geotype = spaceless_string("geography", "geometry")
8989

9090
srid_geotype = SEMICOLON.then(parsy.seq(srid=NUMBER.skip(COLON), geotype=geotype))
91+
geotype_srid = COLON.then(parsy.seq(geotype=geotype, srid=SEMICOLON.then(NUMBER)))
9192
geotype_part = COLON.then(parsy.seq(geotype=geotype))
9293
srid_part = SEMICOLON.then(parsy.seq(srid=NUMBER))
9394

9495
def geotype_parser(typ: type[dt.DataType]) -> dt.DataType:
9596
return spaceless_string(typ.__name__.lower()).then(
96-
(srid_geotype | geotype_part | srid_part).optional(dict()).combine_dict(typ)
97+
(srid_geotype | geotype_srid | geotype_part | srid_part)
98+
.optional(dict())
99+
.combine_dict(typ)
97100
)
98101

99102
primitive = (
@@ -116,15 +119,25 @@ def geotype_parser(typ: type[dt.DataType]) -> dt.DataType:
116119
"time",
117120
"date",
118121
"null",
119-
).map(functools.partial(getattr, dt))
120-
| spaceless_string("bytes").result(dt.binary)
121-
| geotype.map(dt.GeoSpatial)
122+
).map(partial(getattr, dt))
122123
| geotype_parser(dt.LineString)
123124
| geotype_parser(dt.Polygon)
124125
| geotype_parser(dt.Point)
125126
| geotype_parser(dt.MultiLineString)
126127
| geotype_parser(dt.MultiPolygon)
127128
| geotype_parser(dt.MultiPoint)
129+
| spaceless_string("bytes").result(dt.binary)
130+
| spaceless_string("geospatial:geography").then(
131+
srid_part.optional(dict()).combine_dict(
132+
partial(dt.GeoSpatial, geotype="geography")
133+
)
134+
)
135+
| spaceless_string("geospatial:geometry").then(
136+
srid_part.optional(dict()).combine_dict(
137+
partial(dt.GeoSpatial, geotype="geometry")
138+
)
139+
)
140+
| geotype.map(dt.GeoSpatial)
128141
)
129142

130143
varchar_or_char = (
@@ -201,7 +214,7 @@ def geotype_parser(typ: type[dt.DataType]) -> dt.DataType:
201214
| map
202215
| struct
203216
| spaceless_string("jsonb", "json", "uuid", "macaddr", "inet").map(
204-
functools.partial(getattr, dt)
217+
partial(getattr, dt)
205218
)
206219
| spaceless_string("int").result(dt.int64)
207220
| spaceless_string("str").result(dt.string)

ibis/expr/datatypes/tests/test_parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,6 @@ def test_parse_null():
266266

267267

268268
# corresponds to its.all_dtypes() but without:
269-
# - geospacial types, the string representation is different from what the parser expects
270269
# - struct types, the generated struct field names contain special characters
271270

272271
field_names = st.text(
@@ -286,6 +285,7 @@ def test_parse_null():
286285
| its.struct_dtypes(names=field_names)
287286
| its.array_dtypes(roundtrippable_dtypes)
288287
| its.map_dtypes(roundtrippable_dtypes, roundtrippable_dtypes)
288+
| its.geospatial_dtypes()
289289
)
290290
)
291291

0 commit comments

Comments
 (0)