Skip to content

Commit 43e5f12

Browse files
authored
refactor(datafusion): avoid reinitializing memtables on every execute call (#10057)
1 parent 9488115 commit 43e5f12

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

ibis/backends/datafusion/__init__.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import datafusion as df
1111
import pyarrow as pa
12-
import pyarrow.dataset as ds
1312
import pyarrow_hotfix # noqa: F401
1413
import sqlglot as sg
1514
import sqlglot.expressions as sge
@@ -418,14 +417,16 @@ def _register_failure(self):
418417

419418
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
420419
name = op.name
421-
schema = op.schema
422420

423-
self.con.deregister_table(name)
424-
if batches := op.data.to_pyarrow(schema).to_batches():
425-
self.con.register_record_batches(name, [batches])
426-
else:
427-
empty_dataset = ds.dataset([], schema=schema.to_pyarrow())
428-
self.con.register_dataset(name=name, dataset=empty_dataset)
421+
db = self.con.catalog().database()
422+
423+
try:
424+
db.table(name)
425+
except Exception: # noqa: BLE001 because datafusion doesn't have anything better
426+
# self.con.register_table is broken, so we do this roundabout thing
427+
# of constructing a datafusion DataFrame, which has a side effect
428+
# of registering the table
429+
self.con.from_arrow_table(op.data.to_pyarrow(op.schema), name)
429430

430431
def read_csv(
431432
self, path: str | Path, table_name: str | None = None, **kwargs: Any

0 commit comments

Comments
 (0)