From ce7c13a8108c41efc30f5e5732b0b837f5975e06 Mon Sep 17 00:00:00 2001 From: Trevor Bergeron Date: Thu, 13 Feb 2025 21:19:22 +0000 Subject: [PATCH] perf: Simplify sum aggregate SQL text --- bigframes/core/compile/aggregate_compiler.py | 4 +--- bigframes/core/compile/compiled.py | 5 ----- bigframes/core/compile/compiler.py | 9 +++++++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/bigframes/core/compile/aggregate_compiler.py b/bigframes/core/compile/aggregate_compiler.py index 02c7ae128b..91a96febe0 100644 --- a/bigframes/core/compile/aggregate_compiler.py +++ b/bigframes/core/compile/aggregate_compiler.py @@ -164,9 +164,7 @@ def _( ) -> ibis_types.NumericValue: # Will be null if all inputs are null. Pandas defaults to zero sum though. bq_sum = _apply_window_if_present(column.sum(), window) - return ( - ibis_api.case().when(bq_sum.isnull(), ibis_types.literal(0)).else_(bq_sum).end() # type: ignore - ) + return bq_sum.fillna(ibis_types.literal(0)) @compile_unary_agg.register diff --git a/bigframes/core/compile/compiled.py b/bigframes/core/compile/compiled.py index 6a2b583b28..b0cf30269e 100644 --- a/bigframes/core/compile/compiled.py +++ b/bigframes/core/compile/compiled.py @@ -205,7 +205,6 @@ def aggregate( self, aggregations: typing.Sequence[tuple[ex.Aggregation, str]], by_column_ids: typing.Sequence[ex.DerefOp] = (), - dropna: bool = True, order_by: typing.Sequence[OrderingExpression] = (), ) -> UnorderedIR: """ @@ -230,10 +229,6 @@ def aggregate( for aggregate, col_out in aggregations } if by_column_ids: - if dropna: - table = table.filter( - [table[ref.id.sql].notnull() for ref in by_column_ids] - ) result = table.group_by((ref.id.sql for ref in by_column_ids)).aggregate( **stats ) diff --git a/bigframes/core/compile/compiler.py b/bigframes/core/compile/compiler.py index ed95dacf74..9b271bf67b 100644 --- a/bigframes/core/compile/compiler.py +++ b/bigframes/core/compile/compiler.py @@ -25,7 +25,7 @@ import google.cloud.bigquery import pandas as pd -from bigframes import dtypes +from bigframes import dtypes, operations from bigframes.core import utils import bigframes.core.compile.compiled as compiled import bigframes.core.compile.concat as concat_impl @@ -278,8 +278,13 @@ def compile_rowcount(self, node: nodes.RowCountNode): def compile_aggregate(self, node: nodes.AggregateNode): aggs = tuple((agg, id.sql) for agg, id in node.aggregations) result = self.compile_node(node.child).aggregate( - aggs, node.by_column_ids, node.dropna, order_by=node.order_by + aggs, node.by_column_ids, order_by=node.order_by ) + # TODO: Remove dropna field and use filter node instead + if node.dropna: + for key in node.by_column_ids: + if node.child.field_by_id[key.id].nullable: + result = result.filter(operations.notnull_op.as_expr(key)) return result @_compile_node.register