Skip to content

Commit 83bed74

Browse files
authored
fix(duckdb): ensure that duckdb columns argument to read_csv accepts duckdb syntax not ibis syntax (#10696)
BREAKING CHANGE: The duckdb backend's `read_csv` method accepts only DuckDB types for the values components of the `columns` and `types` arguments. You may need need to adjust existing code. For example, the string `"float64"` should be replaced with the string `"double"`.
1 parent b9bd2a8 commit 83bed74

File tree

3 files changed

+22
-10
lines changed

3 files changed

+22
-10
lines changed

ibis/backends/duckdb/__init__.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -667,14 +667,18 @@ def read_csv(
667667
def make_struct_argument(obj: Mapping[str, str | dt.DataType]) -> sge.Struct:
668668
expressions = []
669669
geospatial = False
670-
type_mapper = self.compiler.type_mapper
670+
dialect = self.compiler.dialect
671+
possible_geospatial_types = (
672+
sge.DataType.Type.GEOGRAPHY,
673+
sge.DataType.Type.GEOMETRY,
674+
)
671675

672676
for name, typ in obj.items():
673-
typ = dt.dtype(typ)
674-
geospatial |= typ.is_geospatial()
675-
sgtype = type_mapper.from_ibis(typ)
677+
sgtype = sg.parse_one(typ, read=dialect, into=sge.DataType)
678+
geospatial |= sgtype.this in possible_geospatial_types
676679
prop = sge.PropertyEQ(
677-
this=sge.to_identifier(name), expression=sge.convert(sgtype)
680+
this=sge.to_identifier(name),
681+
expression=sge.convert(sgtype.sql(dialect)),
678682
)
679683
expressions.append(prop)
680684

ibis/backends/duckdb/tests/test_client.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import sys
66

77
import duckdb
8+
import numpy as np
89
import pandas as pd
910
import pyarrow as pa
1011
import pytest
@@ -391,11 +392,12 @@ def test_multiple_tables_with_the_same_name(tmp_path):
391392
@pytest.mark.parametrize(
392393
"input",
393394
[
394-
{"columns": {"lat": "float64", "lon": "float64", "geom": "geometry"}},
395-
{"types": {"geom": "geometry"}},
395+
{"columns": {"lat": "double", "lon": "float", "geom": "geometry"}},
396+
{"types": {"geom": "geometry", "lon": "float"}},
396397
],
398+
ids=["columns", "types"],
397399
)
398-
@pytest.mark.parametrize("all_varchar", [True, False])
400+
@pytest.mark.parametrize("all_varchar", [True, False], ids=["varchar", "not_varchar"])
399401
@pytest.mark.xfail(
400402
LINUX and SANDBOXED,
401403
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
@@ -423,8 +425,6 @@ def test_memtable_doesnt_leak(con):
423425

424426

425427
def test_pyarrow_batches_chunk_size(con): # 10443
426-
import numpy as np
427-
428428
t = ibis.memtable(
429429
{
430430
"id": np.arange(10_000),

ibis/backends/duckdb/tests/test_io.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import ibis
1616
import ibis.expr.datatypes as dt
1717
from ibis.conftest import ARM64, LINUX, MACOS, SANDBOXED
18+
from ibis.util import gen_name
1819

1920

2021
def test_read_csv(con, data_dir):
@@ -461,3 +462,10 @@ def test_read_json_no_auto_detection(con, tmp_path):
461462

462463
t = con.read_json(path, auto_detect=False, columns={"year": "varchar"})
463464
assert t.year.type() == dt.string
465+
466+
467+
def test_read_csv_with_duckdb_specific_types(con):
468+
path = gen_name("duckdb")
469+
columns = {"a": "STRUCT(a INTEGER)"}
470+
with pytest.raises(duckdb.IOException, match="No files found"):
471+
con.read_csv(path, columns=columns)

0 commit comments

Comments
 (0)