Skip to content

Commit b2c0989

Browse files
authored
fix(duckdb): return null typed pyarrow arrays and disable creating tables with all null columns in duckdb (#9810)
1 parent 15f075e commit b2c0989

File tree

20 files changed

+163
-37
lines changed

20 files changed

+163
-37
lines changed

ibis/backends/clickhouse/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,12 @@ def _normalize_external_tables(self, external_tables=None) -> ExternalData | Non
253253
n += 1
254254
if not (schema := obj.schema):
255255
raise TypeError(f"Schema is empty for external table {name}")
256+
if null_fields := schema.null_fields:
257+
raise com.IbisTypeError(
258+
"ClickHouse doesn't support NULL-typed fields. "
259+
"Consider assigning a type through casting or on construction. "
260+
f"Got null typed fields: {null_fields}"
261+
)
256262

257263
structure = [
258264
f"{name} {type_mapper.to_string(typ.copy(nullable=not typ.is_nested()))}"

ibis/backends/duckdb/__init__.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import ibis.expr.types as ir
2727
from ibis import util
2828
from ibis.backends import CanCreateDatabase, UrlFromPath
29-
from ibis.backends.duckdb.converter import DuckDBPandasData
29+
from ibis.backends.duckdb.converter import DuckDBPandasData, DuckDBPyArrowData
3030
from ibis.backends.sql import SQLBackend
3131
from ibis.backends.sql.compilers.base import STAR, AlterTable, C, RenameTable
3232
from ibis.common.dispatch import lazy_singledispatch
@@ -148,8 +148,6 @@ def create_table(
148148

149149
if obj is None and schema is None:
150150
raise ValueError("Either `obj` or `schema` must be specified")
151-
if schema is not None:
152-
schema = ibis.schema(schema)
153151

154152
quoted = self.compiler.quoted
155153
dialect = self.dialect
@@ -172,16 +170,25 @@ def create_table(
172170
else:
173171
query = None
174172

173+
if schema is None:
174+
schema = table.schema()
175+
else:
176+
schema = ibis.schema(schema)
177+
178+
if null_fields := schema.null_fields:
179+
raise exc.IbisTypeError(
180+
"DuckDB does not support creating tables with NULL typed columns. "
181+
"Ensure that every column has non-NULL type. "
182+
f"NULL columns: {null_fields}"
183+
)
184+
175185
if overwrite:
176186
temp_name = util.gen_name("duckdb_table")
177187
else:
178188
temp_name = name
179189

180190
initial_table = sg.table(temp_name, catalog=catalog, db=database, quoted=quoted)
181-
target = sge.Schema(
182-
this=initial_table,
183-
expressions=(schema or table.schema()).to_sqlglot(dialect),
184-
)
191+
target = sge.Schema(this=initial_table, expressions=schema.to_sqlglot(dialect))
185192

186193
create_stmt = sge.Create(
187194
kind="TABLE",
@@ -252,7 +259,7 @@ def table(self, name: str, database: str | None = None) -> ir.Table:
252259

253260
table_schema = self.get_schema(name, catalog=catalog, database=database)
254261
# load geospatial only if geo columns
255-
if any(typ.is_geospatial() for typ in table_schema.types):
262+
if table_schema.geospatial:
256263
self.load_extension("spatial")
257264
return ops.DatabaseTable(
258265
name,
@@ -1302,7 +1309,7 @@ def to_pyarrow(
13021309
**_: Any,
13031310
) -> pa.Table:
13041311
table = self._to_duckdb_relation(expr, params=params, limit=limit).arrow()
1305-
return expr.__pyarrow_result__(table)
1312+
return expr.__pyarrow_result__(table, data_mapper=DuckDBPyArrowData)
13061313

13071314
def execute(
13081315
self,

ibis/backends/duckdb/converter.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,31 @@
11
from __future__ import annotations
22

3+
from typing import TYPE_CHECKING
4+
5+
import pyarrow as pa
6+
37
from ibis.formats.pandas import PandasData
8+
from ibis.formats.pyarrow import PyArrowData
9+
10+
if TYPE_CHECKING:
11+
import ibis.expr.datatypes as dt
412

513

614
class DuckDBPandasData(PandasData):
715
@staticmethod
816
def convert_Array(s, dtype, pandas_type):
917
return s.replace(float("nan"), None)
18+
19+
20+
class DuckDBPyArrowData(PyArrowData):
21+
@classmethod
22+
def convert_scalar(cls, scalar: pa.Scalar, dtype: dt.DataType) -> pa.Scalar:
23+
if dtype.is_null():
24+
return pa.scalar(None)
25+
return super().convert_scalar(scalar, dtype)
26+
27+
@classmethod
28+
def convert_column(cls, column: pa.Array, dtype: dt.DataType) -> pa.Array:
29+
if dtype.is_null():
30+
return pa.nulls(len(column))
31+
return super().convert_column(column, dtype)

ibis/backends/duckdb/tests/test_client.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from pytest import param
1212

1313
import ibis
14+
import ibis.common.exceptions as com
1415
import ibis.expr.datatypes as dt
1516
from ibis.conftest import LINUX, SANDBOXED, not_windows
1617
from ibis.util import gen_name
@@ -442,3 +443,25 @@ def test_pyarrow_batches_chunk_size(con): # 10443
442443
batches = con.to_pyarrow_batches(t, chunk_size=-1)
443444
with pytest.raises(TypeError):
444445
next(batches)
446+
447+
448+
@pytest.mark.parametrize(
449+
"kwargs",
450+
[
451+
dict(obj=ibis.memtable({"a": [None]})),
452+
dict(obj=ibis.memtable({"a": [None]}), schema=ibis.schema({"a": "null"})),
453+
dict(schema=ibis.schema({"a": "null"})),
454+
],
455+
ids=["obj", "obj-schema", "schema"],
456+
)
457+
def test_create_table_with_nulls(con, kwargs):
458+
t = ibis.memtable({"a": [None]})
459+
schema = t.schema()
460+
461+
assert schema == ibis.schema({"a": "null"})
462+
assert schema.null_fields == ("a",)
463+
464+
name = gen_name("duckdb_all_nulls")
465+
466+
with pytest.raises(com.IbisTypeError, match="NULL typed columns"):
467+
con.create_table(name, **kwargs)

ibis/backends/exasol/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
278278

279279
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
280280
schema = op.schema
281-
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
281+
if null_columns := schema.null_fields:
282282
raise com.IbisTypeError(
283283
"Exasol cannot yet reliably handle `null` typed columns; "
284284
f"got null typed columns: {null_columns}"

ibis/backends/flink/__init__.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,13 +375,22 @@ def compile(
375375
expr, params=params, pretty=pretty
376376
) # Discard `limit` and other kwargs.
377377

378+
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
379+
if null_columns := op.schema.null_fields:
380+
raise exc.IbisTypeError(
381+
f"{self.name} cannot yet reliably handle `null` typed columns; "
382+
f"got null typed columns: {null_columns}"
383+
)
384+
self.create_view(op.name, op.data.to_frame(), schema=op.schema, temp=True)
385+
386+
def _finalize_memtable(self, name: str) -> None:
387+
self.drop_view(name, temp=True, force=True)
388+
378389
def execute(self, expr: ir.Expr, **kwargs: Any) -> Any:
379390
"""Execute an expression."""
380-
self._verify_in_memory_tables_are_unique(expr)
381-
self._register_udfs(expr)
391+
self._run_pre_execute_hooks(expr)
382392

383-
table_expr = expr.as_table()
384-
sql = self.compile(table_expr, **kwargs)
393+
sql = self.compile(expr.as_table(), **kwargs)
385394
df = self._table_env.sql_query(sql).to_pandas()
386395

387396
return expr.__pandas_result__(df)

ibis/backends/impala/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1395,7 +1395,7 @@ def explain(
13951395

13961396
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
13971397
schema = op.schema
1398-
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
1398+
if null_columns := schema.null_fields:
13991399
raise com.IbisTypeError(
14001400
"Impala cannot yet reliably handle `null` typed columns; "
14011401
f"got null typed columns: {null_columns}"

ibis/backends/mssql/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,7 @@ def create_table(
755755

756756
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
757757
schema = op.schema
758-
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
758+
if null_columns := schema.null_fields:
759759
raise com.IbisTypeError(
760760
"MS SQL cannot yet reliably handle `null` typed columns; "
761761
f"got null typed columns: {null_columns}"

ibis/backends/mysql/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,7 @@ def create_table(
463463

464464
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
465465
schema = op.schema
466-
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
466+
if null_columns := schema.null_fields:
467467
raise com.IbisTypeError(
468468
"MySQL cannot yet reliably handle `null` typed columns; "
469469
f"got null typed columns: {null_columns}"

ibis/backends/oracle/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,11 @@ def drop_table(
509509

510510
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
511511
schema = op.schema
512+
if null_columns := schema.null_fields:
513+
raise exc.IbisTypeError(
514+
f"{self.name} cannot yet reliably handle `null` typed columns; "
515+
f"got null typed columns: {null_columns}"
516+
)
512517

513518
name = op.name
514519
quoted = self.compiler.quoted

ibis/backends/postgres/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ def _from_url(self, url: ParseResult, **kwargs):
9191

9292
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
9393
schema = op.schema
94-
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
94+
if null_columns := schema.null_fields:
9595
raise exc.IbisTypeError(
9696
f"{self.name} cannot yet reliably handle `null` typed columns; "
9797
f"got null typed columns: {null_columns}"

ibis/backends/risingwave/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ def create_table(
642642

643643
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
644644
schema = op.schema
645-
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
645+
if null_columns := schema.null_fields:
646646
raise com.IbisTypeError(
647647
f"{self.name} cannot yet reliably handle `null` typed columns; "
648648
f"got null typed columns: {null_columns}"

ibis/backends/tests/test_array.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,9 @@ def test_array_intersect(con, data):
11011101
["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
11021102
)
11031103
@pytest.mark.notyet(["athena"], raises=PyAthenaDatabaseError)
1104+
@pytest.mark.notyet(
1105+
["flink"], raises=ValueError, reason="array of struct is not supported"
1106+
)
11041107
def test_unnest_struct(con):
11051108
data = {"value": [[{"a": 1}, {"a": 2}], [{"a": 3}, {"a": 4}]]}
11061109
t = ibis.memtable(data, schema=ibis.schema({"value": "!array<!struct<a: !int>>"}))
@@ -1120,8 +1123,8 @@ def test_unnest_struct(con):
11201123
@pytest.mark.notimpl(
11211124
["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
11221125
)
1123-
@pytest.mark.notimpl(
1124-
["flink"], reason="flink unnests a and b as separate columns", raises=Py4JJavaError
1126+
@pytest.mark.notyet(
1127+
["flink"], raises=ValueError, reason="array of struct is not supported"
11251128
)
11261129
@pytest.mark.notyet(["athena"], raises=PyAthenaDatabaseError)
11271130
def test_unnest_struct_with_multiple_fields(con):
@@ -1229,9 +1232,7 @@ def test_zip_null(con, fn):
12291232
["trino"], reason="inserting maps into structs doesn't work", raises=TrinoUserError
12301233
)
12311234
@pytest.mark.notyet(
1232-
["flink"],
1233-
raises=Py4JJavaError,
1234-
reason="does not seem to support field selection on unnest",
1235+
["flink"], raises=ValueError, reason="array of struct is not supported"
12351236
)
12361237
@pytest.mark.notyet(["athena"], raises=PyAthenaOperationalError)
12371238
def test_array_of_struct_unnest(con):
@@ -1765,16 +1766,17 @@ def test_table_unnest_column_expr(backend):
17651766
assert set(result.values) == set(expected.replace({np.nan: None}).values)
17661767

17671768

1768-
@pytest.mark.notimpl(
1769-
["datafusion", "polars", "flink"], raises=com.OperationNotDefinedError
1770-
)
1769+
@pytest.mark.notimpl(["datafusion", "polars"], raises=com.OperationNotDefinedError)
17711770
@pytest.mark.notimpl(["trino"], raises=TrinoUserError)
17721771
@pytest.mark.notimpl(["athena"], raises=PyAthenaOperationalError)
17731772
@pytest.mark.notimpl(["postgres"], raises=PsycoPg2SyntaxError)
17741773
@pytest.mark.notimpl(["risingwave"], raises=PsycoPg2ProgrammingError)
17751774
@pytest.mark.notyet(
17761775
["risingwave"], raises=PsycoPg2InternalError, reason="not supported in risingwave"
17771776
)
1777+
@pytest.mark.notyet(
1778+
["flink"], raises=ValueError, reason="array of struct is not supported"
1779+
)
17781780
def test_table_unnest_array_of_struct_of_array(con):
17791781
t = ibis.memtable(
17801782
{

ibis/backends/tests/test_client.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1773,9 +1773,7 @@ def test_insert_into_table_missing_columns(con, temp_table):
17731773

17741774
@pytest.mark.notyet(["druid"], raises=AssertionError, reason="can't drop tables")
17751775
@pytest.mark.notyet(
1776-
["clickhouse", "flink"],
1777-
raises=AssertionError,
1778-
reason="memtables are assembled every time",
1776+
["clickhouse"], raises=AssertionError, reason="memtables are assembled every time"
17791777
)
17801778
@pytest.mark.notyet(
17811779
["bigquery"], raises=AssertionError, reason="test is flaky", strict=False
@@ -1821,7 +1819,7 @@ def test_same_name_memtable_is_overwritten(con):
18211819

18221820

18231821
@pytest.mark.notimpl(
1824-
["clickhouse", "flink"],
1822+
["clickhouse"],
18251823
raises=AssertionError,
18261824
reason="backend doesn't use _register_in_memory_table",
18271825
)

ibis/backends/tests/test_export.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from pytest import param
88

99
import ibis
10+
import ibis.common.exceptions as com
1011
import ibis.expr.datatypes as dt
1112
from ibis import util
1213
from ibis.backends.tests.errors import (
@@ -17,6 +18,7 @@
1718
ExaQueryError,
1819
MySQLOperationalError,
1920
OracleDatabaseError,
21+
Py4JJavaError,
2022
PyAthenaOperationalError,
2123
PyDeltaTableError,
2224
PyDruidProgrammingError,
@@ -30,6 +32,7 @@
3032

3133
pd = pytest.importorskip("pandas")
3234
pa = pytest.importorskip("pyarrow")
35+
pat = pytest.importorskip("pyarrow.types")
3336

3437
limit = [param(42, id="limit")]
3538

@@ -661,4 +664,44 @@ def test_scalar_to_memory(limit, awards_players, output_format, converter):
661664

662665
expr = awards_players.filter(awards_players.awardID == "DEADBEEF").yearID.min()
663666
res = method(expr)
667+
664668
assert converter(res) is None
669+
670+
671+
mark_notyet_nulls = pytest.mark.notyet(
672+
[
673+
"clickhouse",
674+
"exasol",
675+
"flink",
676+
"impala",
677+
"mssql",
678+
"mysql",
679+
"oracle",
680+
"postgres",
681+
"risingwave",
682+
"trino",
683+
],
684+
raises=com.IbisTypeError,
685+
reason="unable to handle null types as input",
686+
)
687+
688+
689+
@mark_notyet_nulls
690+
def test_all_null_table(con):
691+
t = ibis.memtable({"a": [None]})
692+
result = con.to_pyarrow(t)
693+
assert pat.is_null(result["a"].type)
694+
695+
696+
@mark_notyet_nulls
697+
def test_all_null_column(con):
698+
t = ibis.memtable({"a": [None]})
699+
result = con.to_pyarrow(t.a)
700+
assert pat.is_null(result.type)
701+
702+
703+
@pytest.mark.notyet(["flink"], raises=Py4JJavaError)
704+
def test_all_null_scalar(con):
705+
e = ibis.literal(None)
706+
result = con.to_pyarrow(e)
707+
assert pat.is_null(result.type)

ibis/backends/tests/test_numeric.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1629,6 +1629,7 @@ def test_scalar_round_is_integer(con):
16291629
],
16301630
)
16311631
@pytest.mark.notyet(["exasol"], raises=ExaQueryError)
1632+
@pytest.mark.notimpl(["flink"], raises=NotImplementedError)
16321633
def test_memtable_decimal(con, numbers):
16331634
schema = ibis.schema(dict(numbers=dt.Decimal(38, 9)))
16341635
t = ibis.memtable({"numbers": numbers}, schema=schema)

ibis/backends/trino/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,7 @@ def _fetch_from_cursor(self, cursor, schema: sch.Schema) -> pd.DataFrame:
550550

551551
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
552552
schema = op.schema
553-
if null_columns := [col for col, dtype in schema.items() if dtype.is_null()]:
553+
if null_columns := schema.null_fields:
554554
raise com.IbisTypeError(
555555
"Trino cannot yet reliably handle `null` typed columns; "
556556
f"got null typed columns: {null_columns}"

0 commit comments

Comments
 (0)