Skip to content

Commit 883d2d3

Browse files
authored
fix(duckdb): allow setting auto_detect to False by fixing translation of columns argument (#10065)
1 parent a121ab3 commit 883d2d3

File tree

2 files changed

+35
-1
lines changed

2 files changed

+35
-1
lines changed

ibis/backends/duckdb/__init__.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -600,6 +600,7 @@ def read_json(
600600
self,
601601
source_list: str | list[str] | tuple[str],
602602
table_name: str | None = None,
603+
columns: Mapping[str, str] | None = None,
603604
**kwargs,
604605
) -> ir.Table:
605606
"""Read newline-delimited JSON into an ibis table.
@@ -614,8 +615,13 @@ def read_json(
614615
File or list of files
615616
table_name
616617
Optional table name
618+
columns
619+
Optional mapping from string column name to duckdb type string.
617620
**kwargs
618-
Additional keyword arguments passed to DuckDB's `read_json_auto` function
621+
Additional keyword arguments passed to DuckDB's `read_json_auto` function.
622+
623+
See https://duckdb.org/docs/data/json/overview.html#json-loading
624+
for parameters and more information about reading JSON.
619625
620626
Returns
621627
-------
@@ -630,6 +636,21 @@ def read_json(
630636
sg.to_identifier(key).eq(sge.convert(val)) for key, val in kwargs.items()
631637
]
632638

639+
if columns:
640+
options.append(
641+
sg.to_identifier("columns").eq(
642+
sge.Struct.from_arg_list(
643+
[
644+
sge.PropertyEQ(
645+
this=sg.to_identifier(key),
646+
expression=sge.convert(value),
647+
)
648+
for key, value in columns.items()
649+
]
650+
)
651+
)
652+
)
653+
633654
self._create_temp_view(
634655
table_name,
635656
sg.select(STAR).from_(

ibis/backends/duckdb/tests/test_register.py

+13
Original file line numberDiff line numberDiff line change
@@ -505,3 +505,16 @@ def test_memtable_null_column_parquet_dtype_roundtrip(con, tmp_path):
505505
after = con.read_parquet(tmp_path / "tmp.parquet")
506506

507507
assert before.a.type() == after.a.type()
508+
509+
510+
def test_read_json_no_auto_detection(con, tmp_path):
511+
ndjson_data = """
512+
{"year": 2007}
513+
{"year": 2008}
514+
{"year": 2009}
515+
"""
516+
path = tmp_path.joinpath("test.ndjson")
517+
path.write_text(ndjson_data)
518+
519+
t = con.read_json(path, auto_detect=False, columns={"year": "varchar"})
520+
assert t.year.type() == dt.string

0 commit comments

Comments
 (0)