Skip to content

Commit cd9a34c

Browse files
committed
feat(api): add distinct to Intersection and Difference operations
1 parent 772f56e commit cd9a34c

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

ibis/expr/operations/relations.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -254,13 +254,14 @@ def __init__(self, left, right, by, predicates, **kwargs):
254254
class SetOp(TableNode, sch.HasSchema):
255255
left = rlz.table
256256
right = rlz.table
257+
distinct = rlz.optional(rlz.instance_of(bool), default=False)
257258

258-
def __init__(self, left, right, **kwargs):
259+
def __init__(self, left, right, distinct: bool, **kwargs):
259260
if not left.schema().equals(right.schema()):
260261
raise com.RelationError(
261262
'Table schemas must be equal for set operations'
262263
)
263-
super().__init__(left=left, right=right, **kwargs)
264+
super().__init__(left=left, right=right, distinct=distinct, **kwargs)
264265

265266
@property
266267
def schema(self):
@@ -272,7 +273,7 @@ def blocks(self):
272273

273274
@public
274275
class Union(SetOp):
275-
distinct = rlz.optional(rlz.instance_of(bool), default=False)
276+
pass
276277

277278

278279
@public

ibis/expr/types/relations.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ def view(self) -> Table:
292292

293293
return ops.SelfReference(self).to_expr()
294294

295-
def difference(self, right: Table) -> Table:
295+
def difference(self, right: Table, distinct: bool = True) -> Table:
296296
"""Compute the set difference of two table expressions.
297297
298298
The input tables must have identical schemas.
@@ -301,6 +301,8 @@ def difference(self, right: Table) -> Table:
301301
----------
302302
right
303303
Table expression
304+
distinct
305+
Only diff distinct rows not occurring in the calling table
304306
305307
Returns
306308
-------
@@ -309,7 +311,7 @@ def difference(self, right: Table) -> Table:
309311
"""
310312
from ibis.expr import operations as ops
311313

312-
return ops.Difference(self, right).to_expr()
314+
return ops.Difference(self, right, distinct=distinct).to_expr()
313315

314316
def aggregate(
315317
self,
@@ -455,8 +457,7 @@ def union(
455457
right
456458
Table expression
457459
distinct
458-
Only union distinct rows not occurring in the calling table (this
459-
can be very expensive, be careful)
460+
Only union distinct rows not occurring in the calling table
460461
461462
Returns
462463
-------
@@ -467,7 +468,7 @@ def union(
467468

468469
return ops.Union(self, right, distinct=distinct).to_expr()
469470

470-
def intersect(self, right: Table) -> Table:
471+
def intersect(self, right: Table, distinct: bool = True) -> Table:
471472
"""Compute the set intersection of two table expressions.
472473
473474
The input tables must have identical schemas.
@@ -476,6 +477,8 @@ def intersect(self, right: Table) -> Table:
476477
----------
477478
right
478479
Table expression
480+
distinct
481+
Only intersect distinct rows not occurring in the calling table
479482
480483
Returns
481484
-------
@@ -484,7 +487,7 @@ def intersect(self, right: Table) -> Table:
484487
"""
485488
from ibis.expr import operations as ops
486489

487-
return ops.Intersection(self, right).to_expr()
490+
return ops.Intersection(self, right, distinct=distinct).to_expr()
488491

489492
def to_array(self) -> ir.Column:
490493
"""View a single column table as an array.

0 commit comments

Comments
 (0)