Skip to content

Commit 8166717

Browse files
authored
refactor(joins): require explicit abstract table as RHS of joins (#9661)
## Description of changes We have (had) limited support for passing in in-memory objects as the RHS of a join, where we would create a memtable for the user and then use that. For backends where memtable creation is expensive, or for queries where there may be multiple calls to the same in-memory data, it is better to be explicit and first register the in-memory data with the backend using either `memtable` or `create_table`. BREAKING CHANGE: Passing a `pyarrow.Table` or a `pandas.DataFrame` as the right-hand-side of a join is no longer supported. To join against in-memory data, you can pass the in-memory object to `ibis.memtable` or `con.create_table` and use the resulting table object instead. ## Issues closed * Resolves #9571
1 parent 10b38ee commit 8166717

File tree

3 files changed

+11
-27
lines changed

3 files changed

+11
-27
lines changed

ibis/backends/tests/test_join.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ def test_join_with_pandas(batting, awards_players):
188188
batting_filt = batting.filter(lambda t: t.yearID < 1900)
189189
awards_players_filt = awards_players.filter(lambda t: t.yearID < 1900).execute()
190190
assert isinstance(awards_players_filt, pd.DataFrame)
191-
expr = batting_filt.join(awards_players_filt, "yearID")
191+
t = ibis.memtable(awards_players_filt)
192+
expr = batting_filt.join(t, "yearID")
192193
df = expr.execute()
193194
assert df.yearID.nunique() == 7
194195

@@ -206,7 +207,9 @@ def test_join_with_pandas_non_null_typed_columns(batting, awards_players):
206207

207208
assert sch.infer(awards_players_filt) == sch.Schema(dict(yearID="int"))
208209
assert isinstance(awards_players_filt, pd.DataFrame)
209-
expr = batting_filt.join(awards_players_filt, "yearID")
210+
211+
t = ibis.memtable(awards_players_filt)
212+
expr = batting_filt.join(t, "yearID")
210213
df = expr.execute()
211214
assert df.yearID.nunique() == 7
212215

ibis/expr/types/joins.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,14 @@
55

66
from public import public
77

8-
import ibis
98
import ibis.expr.operations as ops
109
from ibis import util
1110
from ibis.common.deferred import Deferred
1211
from ibis.common.egraph import DisjointSet
1312
from ibis.common.exceptions import (
1413
ExpressionError,
1514
IbisInputError,
15+
IbisTypeError,
1616
InputTypeError,
1717
IntegrityError,
1818
)
@@ -31,28 +31,6 @@
3131
from ibis.expr.operations.relations import JoinKind
3232

3333

34-
def coerce_to_table(data):
35-
try:
36-
import pandas as pd
37-
except ImportError:
38-
pass
39-
else:
40-
if isinstance(data, pd.DataFrame):
41-
return ibis.memtable(data)
42-
43-
try:
44-
import pyarrow as pa
45-
except ImportError:
46-
pass
47-
else:
48-
if isinstance(data, pa.Table):
49-
return ibis.memtable(data)
50-
51-
if not isinstance(data, Table):
52-
raise TypeError(f"right operand must be a Table, got {type(data).__name__}")
53-
return data
54-
55-
5634
def disambiguate_fields(
5735
how,
5836
predicates,
@@ -254,7 +232,10 @@ def join(
254232
lname: str = "",
255233
rname: str = "{name}_right",
256234
):
257-
right = coerce_to_table(right)
235+
if not isinstance(right, Table):
236+
raise IbisTypeError(
237+
f"Right side of join must be an Ibis table, got {type(right)}."
238+
)
258239

259240
if how == "left_semi":
260241
how = "semi"

ibis/tests/expr/test_table.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1315,7 +1315,7 @@ def test_join_invalid_expr_type(con):
13151315
invalid_right = left.foo_id
13161316
join_key = ["bar_id"]
13171317

1318-
with pytest.raises(TypeError):
1318+
with pytest.raises(com.IbisTypeError):
13191319
left.inner_join(invalid_right, join_key)
13201320

13211321

0 commit comments

Comments
 (0)