Skip to content

Commit c9120e1

Browse files
authored
fix(bigquery): ensure that count distinct can be used in window functions without a specified window (#10911)
1 parent 8336483 commit c9120e1

File tree

2 files changed

+50
-2
lines changed

2 files changed

+50
-2
lines changed

ibis/backends/bigquery/tests/system/test_client.py

+42
Original file line numberDiff line numberDiff line change
@@ -505,3 +505,45 @@ def test_raw_sql_params_with_alias(con):
505505
query_parameters = {cutoff: value}
506506
result = con.raw_sql(f"SELECT @{name} AS {name}", params=query_parameters)
507507
assert list(map(dict, result)) == [{name: value}]
508+
509+
510+
@pytest.fixture(scope="module")
511+
def tmp_table(con):
512+
data = pd.DataFrame(
513+
{"foo": [1, 1, 2, 2, 3, 3], "bar": ["a", "b", "a", "a", "b", "b"]}
514+
)
515+
name = gen_name("test_window_with_count_distinct")
516+
test_table = con.create_table(name, data)
517+
yield test_table
518+
con.drop_table(name, force=True)
519+
520+
521+
@pytest.mark.parametrize(
522+
("expr", "query"),
523+
[
524+
(
525+
lambda t: t.group_by("foo").mutate(bar=lambda t: t.bar.nunique()),
526+
"SELECT foo, COUNT(DISTINCT bar) OVER (PARTITION BY foo) AS bar FROM {}".format,
527+
),
528+
(
529+
lambda t: t.filter(
530+
lambda t: t.bar.nunique().over(ibis.window(group_by="foo")) > 1
531+
),
532+
"SELECT * FROM {} QUALIFY COUNT(DISTINCT bar) OVER (PARTITION BY foo) > 1".format,
533+
),
534+
],
535+
ids=["project", "qualify"],
536+
)
537+
def test_window_with_count_distinct(tmp_table, expr, query):
538+
identifier = tmp_table.get_name()
539+
sql = query(identifier)
540+
result = (
541+
expr(tmp_table).to_pandas().sort_values(["foo", "bar"]).reset_index(drop=True)
542+
)
543+
expected = (
544+
tmp_table.sql(sql)
545+
.to_pandas()
546+
.sort_values(["foo", "bar"])
547+
.reset_index(drop=True)
548+
)
549+
tm.assert_frame_equal(result, expected)

ibis/backends/sql/compilers/bigquery/__init__.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -327,8 +327,14 @@ def _compile_python_udf(self, udf_node: ops.ScalarUDF) -> sge.Create:
327327
@staticmethod
328328
def _minimize_spec(op, spec):
329329
# bigquery doesn't allow certain window functions to specify a window frame
330-
if isinstance(func := op.func, ops.Analytic) and not isinstance(
331-
func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue)
330+
if (
331+
isinstance(func := op.func, ops.CountDistinct)
332+
and (spec.args["start"], spec.args["end"]) == ("UNBOUNDED", "UNBOUNDED")
333+
) or (
334+
isinstance(func, ops.Analytic)
335+
and not isinstance(
336+
func, (ops.First, ops.Last, FirstValue, LastValue, ops.NthValue)
337+
)
332338
):
333339
return None
334340
return spec

0 commit comments

Comments
 (0)