Skip to content

Commit db29e10

Browse files
committed
feat(pyspark): implement count distinct
1 parent aea4ccd commit db29e10

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

ibis/backends/pyspark/compiler.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,11 @@ def compile_count(t, op, **kwargs):
579579
return compile_aggregator(t, op, fn=F.count, **kwargs)
580580

581581

582+
@compiles(ops.CountDistinct)
583+
def compile_count_distinct(t, op, **kwargs):
584+
return compile_aggregator(t, op, fn=F.count_distinct, **kwargs)
585+
586+
582587
@compiles(ops.CountStar)
583588
def compile_count_star(t, op, aggcontext=None, **kwargs):
584589
src_table = t.translate(op.arg, **kwargs)

ibis/backends/tests/test_aggregation.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ def mean_and_std(v):
282282
lambda t, where: t.bool_col[where].dropna().nunique(),
283283
id='nunique',
284284
marks=pytest.mark.notimpl(
285-
["pyspark", "datafusion"], raises=com.OperationNotDefinedError
285+
["datafusion"], raises=com.OperationNotDefinedError
286286
),
287287
),
288288
param(

0 commit comments

Comments
 (0)