Skip to content

Commit aa60584

Browse files
authored
refactor(register): remove deprecated register method (#10545)
BREAKING CHANGE: The deprecated `register` method has been removed. Please use the file-specific `read_*` methods instead. For in-memory objects, pass them to `ibis.memtable` or `create_table`.
1 parent 0c57e8b commit aa60584

File tree

8 files changed

+215
-708
lines changed

8 files changed

+215
-708
lines changed

ibis/backends/datafusion/__init__.py

Lines changed: 64 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import datafusion as df
1111
import pyarrow as pa
12-
import pyarrow.dataset as ds
1312
import pyarrow_hotfix # noqa: F401
1413
import sqlglot as sg
1514
import sqlglot.expressions as sge
@@ -28,7 +27,7 @@
2827
from ibis.common.dispatch import lazy_singledispatch
2928
from ibis.expr.operations.udf import InputType
3029
from ibis.formats.pyarrow import PyArrowSchema, PyArrowType
31-
from ibis.util import deprecated, gen_name, normalize_filename, normalize_filenames
30+
from ibis.util import gen_name, normalize_filename, normalize_filenames, warn_deprecated
3231

3332
try:
3433
from datafusion import ExecutionContext as SessionContext
@@ -88,37 +87,30 @@ def do_connect(
8887
Parameters
8988
----------
9089
config
91-
Mapping of table names to files or a `SessionContext`
90+
Mapping of table names to files (deprecated in 10.0) or a `SessionContext`
9291
instance.
9392
9493
Examples
9594
--------
95+
>>> from datafusion import SessionContext
96+
>>> ctx = SessionContext()
97+
>>> _ = ctx.from_pydict({"a": [1, 2, 3]}, "mytable")
9698
>>> import ibis
97-
>>> config = {
98-
... "astronauts": "ci/ibis-testing-data/parquet/astronauts.parquet",
99-
... "diamonds": "ci/ibis-testing-data/csv/diamonds.csv",
100-
... }
101-
>>> con = ibis.datafusion.connect(config)
99+
>>> con = ibis.datafusion.connect(ctx)
102100
>>> con.list_tables()
103-
['astronauts', 'diamonds']
104-
>>> con.table("diamonds")
105-
DatabaseTable: diamonds
106-
carat float64
107-
cut string
108-
color string
109-
clarity string
110-
depth float64
111-
table float64
112-
price int64
113-
x float64
114-
y float64
115-
z float64
101+
['mytable']
116102
"""
117103
if isinstance(config, SessionContext):
118104
(self.con, config) = (config, None)
119105
else:
120106
if config is not None and not isinstance(config, Mapping):
121107
raise TypeError("Input to ibis.datafusion.connect must be a mapping")
108+
elif config is not None and config: # warn if dict is not empty
109+
warn_deprecated(
110+
"Passing a mapping of tables names to files",
111+
as_of="10.0",
112+
instead="Please use the explicit `read_*` methods for the files you would like to load instead.",
113+
)
122114
if SessionConfig is not None:
123115
df_config = SessionConfig(
124116
{"datafusion.sql_parser.dialect": "PostgreSQL"}
@@ -178,6 +170,57 @@ def _get_schema_using_query(self, query: str) -> sch.Schema:
178170

179171
return PyArrowSchema.to_ibis(df.schema())
180172

173+
def _register(
174+
self,
175+
source: str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame,
176+
table_name: str | None = None,
177+
**kwargs: Any,
178+
) -> ir.Table:
179+
import pandas as pd
180+
import pyarrow.dataset as ds
181+
182+
if isinstance(source, (str, Path)):
183+
first = str(source)
184+
elif isinstance(source, pa.Table):
185+
self.con.deregister_table(table_name)
186+
self.con.register_record_batches(table_name, [source.to_batches()])
187+
return self.table(table_name)
188+
elif isinstance(source, pa.RecordBatch):
189+
self.con.deregister_table(table_name)
190+
self.con.register_record_batches(table_name, [[source]])
191+
return self.table(table_name)
192+
elif isinstance(source, ds.Dataset):
193+
self.con.deregister_table(table_name)
194+
self.con.register_dataset(table_name, source)
195+
return self.table(table_name)
196+
elif isinstance(source, pd.DataFrame):
197+
return self.register(pa.Table.from_pandas(source), table_name, **kwargs)
198+
else:
199+
raise ValueError("`source` must be either a string or a pathlib.Path")
200+
201+
if first.startswith(("parquet://", "parq://")) or first.endswith(
202+
("parq", "parquet")
203+
):
204+
return self.read_parquet(source, table_name=table_name, **kwargs)
205+
elif first.startswith(("csv://", "txt://")) or first.endswith(
206+
("csv", "tsv", "txt")
207+
):
208+
return self.read_csv(source, table_name=table_name, **kwargs)
209+
else:
210+
self._register_failure()
211+
return None
212+
213+
def _register_failure(self):
214+
import inspect
215+
216+
msg = ", ".join(
217+
m[0] for m in inspect.getmembers(self) if m[0].startswith("read_")
218+
)
219+
raise ValueError(
220+
f"Cannot infer appropriate read function for input, "
221+
f"please call one of {msg} directly"
222+
)
223+
181224
def _register_builtin_udfs(self):
182225
from ibis.backends.datafusion import udfs
183226

@@ -345,68 +388,6 @@ def get_schema(
345388
table = database.table(table_name)
346389
return sch.schema(table.schema)
347390

348-
@deprecated(
349-
as_of="9.1",
350-
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
351-
)
352-
def register(
353-
self,
354-
source: str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame,
355-
table_name: str | None = None,
356-
**kwargs: Any,
357-
) -> ir.Table:
358-
return self._register(source, table_name, **kwargs)
359-
360-
def _register(
361-
self,
362-
source: str | Path | pa.Table | pa.RecordBatch | pa.Dataset | pd.DataFrame,
363-
table_name: str | None = None,
364-
**kwargs: Any,
365-
) -> ir.Table:
366-
import pandas as pd
367-
368-
if isinstance(source, (str, Path)):
369-
first = str(source)
370-
elif isinstance(source, pa.Table):
371-
self.con.deregister_table(table_name)
372-
self.con.register_record_batches(table_name, [source.to_batches()])
373-
return self.table(table_name)
374-
elif isinstance(source, pa.RecordBatch):
375-
self.con.deregister_table(table_name)
376-
self.con.register_record_batches(table_name, [[source]])
377-
return self.table(table_name)
378-
elif isinstance(source, ds.Dataset):
379-
self.con.deregister_table(table_name)
380-
self.con.register_dataset(table_name, source)
381-
return self.table(table_name)
382-
elif isinstance(source, pd.DataFrame):
383-
return self.register(pa.Table.from_pandas(source), table_name, **kwargs)
384-
else:
385-
raise ValueError("`source` must be either a string or a pathlib.Path")
386-
387-
if first.startswith(("parquet://", "parq://")) or first.endswith(
388-
("parq", "parquet")
389-
):
390-
return self.read_parquet(source, table_name=table_name, **kwargs)
391-
elif first.startswith(("csv://", "txt://")) or first.endswith(
392-
("csv", "tsv", "txt")
393-
):
394-
return self.read_csv(source, table_name=table_name, **kwargs)
395-
else:
396-
self._register_failure()
397-
return None
398-
399-
def _register_failure(self):
400-
import inspect
401-
402-
msg = ", ".join(
403-
m[0] for m in inspect.getmembers(self) if m[0].startswith("read_")
404-
)
405-
raise ValueError(
406-
f"Cannot infer appropriate read function for input, "
407-
f"please call one of {msg} directly"
408-
)
409-
410391
def _register_in_memory_table(self, op: ops.InMemoryTable) -> None:
411392
# self.con.register_table is broken, so we do this roundabout thing
412393
# of constructing a datafusion DataFrame, which has a side effect

ibis/backends/datafusion/tests/test_connect.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,15 @@ def test_none_config():
2525

2626
def test_str_config(name_to_path):
2727
config = {name: str(path) for name, path in name_to_path.items()}
28-
conn = ibis.datafusion.connect(config)
28+
with pytest.warns(FutureWarning):
29+
conn = ibis.datafusion.connect(config)
2930
assert sorted(conn.list_tables()) == sorted(name_to_path)
3031

3132

3233
def test_path_config(name_to_path):
3334
config = name_to_path
34-
conn = ibis.datafusion.connect(config)
35+
with pytest.warns(FutureWarning):
36+
conn = ibis.datafusion.connect(config)
3537
assert sorted(conn.list_tables()) == sorted(name_to_path)
3638

3739

ibis/backends/datafusion/tests/test_register.py

Lines changed: 0 additions & 72 deletions
This file was deleted.

ibis/backends/duckdb/__init__.py

Lines changed: 0 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
from ibis.backends.sql.compilers.base import STAR, AlterTable, C, RenameTable
3232
from ibis.common.dispatch import lazy_singledispatch
3333
from ibis.expr.operations.udf import InputType
34-
from ibis.util import deprecated
3534

3635
if TYPE_CHECKING:
3736
from collections.abc import Iterable, Mapping, MutableMapping, Sequence
@@ -483,77 +482,6 @@ def drop_database(
483482
with self._safe_raw_sql(sge.Drop(this=name, kind="SCHEMA", replace=force)):
484483
pass
485484

486-
@deprecated(
487-
as_of="9.1",
488-
instead="use the explicit `read_*` method for the filetype you are trying to read, e.g., read_parquet, read_csv, etc.",
489-
)
490-
def register(
491-
self,
492-
source: str | Path | Any,
493-
table_name: str | None = None,
494-
**kwargs: Any,
495-
) -> ir.Table:
496-
"""Register a data source as a table in the current database.
497-
498-
Parameters
499-
----------
500-
source
501-
The data source(s). May be a path to a file or directory of
502-
parquet/csv files, an iterable of parquet or CSV files, a pandas
503-
dataframe, a pyarrow table or dataset, or a postgres URI.
504-
table_name
505-
An optional name to use for the created table. This defaults to a
506-
sequentially generated name.
507-
**kwargs
508-
Additional keyword arguments passed to DuckDB loading functions for
509-
CSV or parquet. See https://duckdb.org/docs/data/csv and
510-
https://duckdb.org/docs/data/parquet for more information.
511-
512-
Returns
513-
-------
514-
ir.Table
515-
The just-registered table
516-
517-
"""
518-
519-
if isinstance(source, (str, Path)):
520-
first = str(source)
521-
elif isinstance(source, (list, tuple)):
522-
first = source[0]
523-
else:
524-
try:
525-
return self.read_in_memory(source, table_name=table_name, **kwargs)
526-
except (duckdb.InvalidInputException, NameError):
527-
self._register_failure()
528-
529-
if first.startswith(("parquet://", "parq://")) or first.endswith(
530-
("parq", "parquet")
531-
):
532-
return self.read_parquet(source, table_name=table_name, **kwargs)
533-
elif first.startswith(
534-
("csv://", "csv.gz://", "txt://", "txt.gz://")
535-
) or first.endswith(("csv", "csv.gz", "tsv", "tsv.gz", "txt", "txt.gz")):
536-
return self.read_csv(source, table_name=table_name, **kwargs)
537-
elif first.startswith(("postgres://", "postgresql://")):
538-
return self.read_postgres(source, table_name=table_name, **kwargs)
539-
elif first.startswith("sqlite://"):
540-
return self.read_sqlite(
541-
first[len("sqlite://") :], table_name=table_name, **kwargs
542-
)
543-
else:
544-
self._register_failure() # noqa: RET503
545-
546-
def _register_failure(self):
547-
import inspect
548-
549-
msg = ", ".join(
550-
name for name, _ in inspect.getmembers(self) if name.startswith("read_")
551-
)
552-
raise ValueError(
553-
f"Cannot infer appropriate read function for input, "
554-
f"please call one of {msg} directly"
555-
)
556-
557485
@util.experimental
558486
def read_json(
559487
self,

ibis/backends/duckdb/tests/test_register.py renamed to ibis/backends/duckdb/tests/test_io.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -229,20 +229,6 @@ def test_read_sqlite_no_table_name(con, tmp_path):
229229
con.read_sqlite(path)
230230

231231

232-
@pytest.mark.xfail(
233-
LINUX and SANDBOXED,
234-
reason="nix on linux cannot download duckdb extensions or data due to sandboxing",
235-
raises=duckdb.IOException,
236-
)
237-
def test_register_sqlite(con, tmp_path):
238-
path = tmp_path / "test.db"
239-
sqlite_con = sqlite3.connect(str(path))
240-
sqlite_con.execute("CREATE TABLE t AS SELECT 1 a UNION SELECT 2 UNION SELECT 3")
241-
with pytest.warns(FutureWarning, match="v9.1"):
242-
ft = con.register(f"sqlite://{path}", "t")
243-
assert ft.count().execute()
244-
245-
246232
# Because we create a new connection and the test requires loading/installing a
247233
# DuckDB extension, we need to xfail these on Nix.
248234
@pytest.mark.xfail(

0 commit comments

Comments
 (0)