Skip to content

Commit 33327dc

Browse files
committed
feat(duckdb): enforce aswkb for projections, coerce to geopandas
Two things happening in here: 1. We intercept the call to `get_sqla_type` to enforce the use of `ST_AsWKB`, overriding the behavior of `geoalchemy` which is very PostGIS-focused. 2. We add a DuckDB-specific `_to_geodataframe` since we're getting a WKB out of DuckDB and can pass this directly to `geopandas` without first going through Shapely (as we do in the base alchemy method)
1 parent cc16715 commit 33327dc

File tree

3 files changed

+50
-3
lines changed

3 files changed

+50
-3
lines changed

ibis/backends/duckdb/__init__.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from ibis import util
2525
from ibis.backends.base import CanCreateSchema
2626
from ibis.backends.base.sql.alchemy import AlchemyCrossSchemaBackend
27+
from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported
2728
from ibis.backends.base.sqlglot import C, F
2829
from ibis.backends.duckdb.compiler import DuckDBSQLCompiler
2930
from ibis.backends.duckdb.datatypes import DuckDBType
@@ -1199,7 +1200,30 @@ def fetch_from_cursor(
11991200
for name, col in zip(table.column_names, table.columns)
12001201
}
12011202
)
1202-
return PandasData.convert_table(df, schema)
1203+
df = PandasData.convert_table(df, schema)
1204+
if not df.empty and geospatial_supported:
1205+
return self._to_geodataframe(df, schema)
1206+
return df
1207+
1208+
# TODO(gforsyth): this may not need to be specialized in the future
1209+
@staticmethod
1210+
def _to_geodataframe(df, schema):
1211+
"""Convert `df` to a `GeoDataFrame`.
1212+
1213+
Required libraries for geospatial support must be installed and
1214+
a geospatial column is present in the dataframe.
1215+
"""
1216+
import geopandas as gpd
1217+
1218+
geom_col = None
1219+
for name, dtype in schema.items():
1220+
if dtype.is_geospatial():
1221+
if not geom_col:
1222+
geom_col = name
1223+
df[name] = gpd.GeoSeries.from_wkb(df[name])
1224+
if geom_col:
1225+
df = gpd.GeoDataFrame(df, geometry=geom_col)
1226+
return df
12031227

12041228
def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]:
12051229
with self.begin() as con:

ibis/backends/duckdb/datatypes.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,19 @@
77
from ibis.backends.base.sql.alchemy.datatypes import AlchemyType
88
from ibis.backends.base.sqlglot.datatypes import DuckDBType as SqlglotDuckdbType
99

10+
11+
try:
12+
from geoalchemy2 import Geometry
13+
14+
class Geometry_WKB(Geometry):
15+
as_binary = "ST_AsWKB"
16+
17+
except ImportError:
18+
19+
class Geometry_WKB:
20+
...
21+
22+
1023
_from_duckdb_types = {
1124
psql.BYTEA: dt.Binary,
1225
psql.UUID: dt.UUID,
@@ -35,6 +48,8 @@
3548
dt.UInt16: ducktypes.USmallInteger,
3649
dt.UInt32: ducktypes.UInteger,
3750
dt.UInt64: ducktypes.UBigInteger,
51+
# Handle projections with geometry columns
52+
dt.Geometry: Geometry_WKB,
3853
}
3954

4055

ibis/backends/duckdb/tests/test_register.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,22 @@ def test_read_parquet(data_dir):
4646
assert t.count().execute()
4747

4848

49-
@pytest.mark.xfail(raises=duckdb.duckdb.CatalogException, reason="ST_AsEWKB")
50-
def test_read_geo_fail(con, data_dir):
49+
@pytest.mark.xfail(raises=NotImplementedError)
50+
def test_read_geo_to_pyarrow(con, data_dir):
5151
pytest.importorskip("geopandas")
5252
t = con.read_geo(data_dir / "geojson" / "zones.geojson")
5353
# can't convert geometry to arrow type yet
5454
assert t.head().to_pyarrow()
5555

5656

57+
def test_read_geo_to_geopandas(con, data_dir):
58+
gpd = pytest.importorskip("geopandas")
59+
t = con.read_geo(data_dir / "geojson" / "zones.geojson")
60+
# can't convert geometry to arrow type yet
61+
gdf = t.head().to_pandas()
62+
assert isinstance(gdf, gpd.GeoDataFrame)
63+
64+
5765
def test_read_geo(con, data_dir):
5866
pytest.importorskip("geopandas")
5967
t = con.read_geo(data_dir / "geojson" / "zones.geojson")

0 commit comments

Comments
 (0)