Skip to content

Commit 5239077

Browse files
committed
differentiate range bound and unbounded window
1 parent 850ea41 commit 5239077

File tree

6 files changed

+64
-28
lines changed

6 files changed

+64
-28
lines changed

bigframes/core/array_value.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ def project_window_op(
412412
skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
413413
"""
414414
# TODO: Support non-deterministic windowing
415-
if window_spec.row_bounded or not op.order_independent:
415+
if window_spec.is_row_bounded or not op.order_independent:
416416
if self.node.order_ambiguous and not self.session._strictly_ordered:
417417
if not self.session._allows_ambiguity:
418418
raise ValueError(

bigframes/core/compile/compiled.py

+20-9
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,9 @@ def project_window_op(
463463
never_skip_nulls=never_skip_nulls,
464464
)
465465

466+
if expression.op.order_independent and window_spec.is_unbounded:
467+
# notably percentile_cont does not support ordering clause
468+
window_spec = window_spec.without_order()
466469
window = self._ibis_window_from_spec(window_spec)
467470
bindings = {col: self._get_ibis_column(col) for col in self.column_ids}
468471

@@ -538,21 +541,29 @@ def _ibis_window_from_spec(self, window_spec: WindowSpec):
538541
# 1. Order-independent op (aggregation, cut, rank) with unbound window - no ordering clause needed
539542
# 2. Order-independent op (aggregation, cut, rank) with range window - use ordering clause, ties allowed
540543
# 3. Order-depedenpent op (navigation functions, array_agg) or rows bounds - use total row order to break ties.
541-
if not window_spec.ordering:
542-
# If window spec has following or preceding bounds, we need to apply an unambiguous ordering.
543-
raise ValueError("No ordering provided for ordered analytic function")
544+
if window_spec.is_row_bounded:
545+
if not window_spec.ordering:
546+
# If window spec has following or preceding bounds, we need to apply an unambiguous ordering.
547+
raise ValueError("No ordering provided for ordered analytic function")
548+
order_by = _convert_row_ordering_to_table_values(
549+
self._column_names,
550+
window_spec.ordering,
551+
)
544552

545-
if window_spec.row_bounded:
553+
elif window_spec.is_range_bounded:
554+
order_by = _convert_range_ordering_to_table_value(
555+
self._column_names,
556+
window_spec.ordering[0],
557+
)
558+
# The rest if branches are for unbounded windows
559+
elif window_spec.ordering:
560+
# Unbound grouping window. Suitable for aggregations but not for analytic function application.
546561
order_by = _convert_row_ordering_to_table_values(
547562
self._column_names,
548563
window_spec.ordering,
549564
)
550565
else:
551-
order_by = [
552-
_convert_range_ordering_to_table_value(
553-
self._column_names, window_spec.ordering[0]
554-
)
555-
]
566+
order_by = None
556567

557568
window = bigframes_vendored.ibis.window(order_by=order_by, group_by=group_by)
558569
if window_spec.bounds is not None:

bigframes/core/nodes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1358,7 +1358,7 @@ def _validate(self):
13581358
"""Validate the local data in the node."""
13591359
# Since inner order and row bounds are coupled, rank ops can't be row bounded
13601360
assert (
1361-
not self.window_spec.row_bounded
1361+
not self.window_spec.is_row_bounded
13621362
) or self.expression.op.implicitly_inherits_order
13631363
assert all(ref in self.child.ids for ref in self.expression.column_references)
13641364

@@ -1420,7 +1420,7 @@ def inherits_order(self) -> bool:
14201420
op_inherits_order = (
14211421
not self.expression.op.order_independent
14221422
) and self.expression.op.implicitly_inherits_order
1423-
return op_inherits_order or self.window_spec.row_bounded
1423+
return op_inherits_order or self.window_spec.is_row_bounded
14241424

14251425
@property
14261426
def additive_base(self) -> BigFrameNode:

bigframes/core/rewrite/windows.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def rewrite_range_rolling(node: nodes.BigFrameNode) -> nodes.BigFrameNode:
2424
if not isinstance(node, nodes.WindowOpNode):
2525
return node
2626

27-
if node.window_spec.row_bounded:
27+
if not node.window_spec.is_range_bounded:
2828
return node
2929

3030
if len(node.window_spec.ordering) != 1:

bigframes/core/window/rolling.py

+18-13
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def create_range_window(
140140
if index_dtypes[0] != dtypes.TIMESTAMP_DTYPE:
141141
raise ValueError("Index type should be timestamps with timezones")
142142

143-
order_direction = _find_ordering(block.expr.node, block.index_columns[0])
143+
order_direction = _find_order_direction(block.expr.node, block.index_columns[0])
144144
if order_direction is None:
145145
raise ValueError(
146146
"The index might not be in a monotonic order. Please sort the index before rolling."
@@ -160,7 +160,7 @@ def create_range_window(
160160

161161

162162
@singledispatch
163-
def _find_ordering(
163+
def _find_order_direction(
164164
root: nodes.BigFrameNode, column_id: str
165165
) -> ordering.OrderingDirection | None:
166166
"""Returns the order of the given column with tree traversal. If the column cannot be found,
@@ -169,32 +169,37 @@ def _find_ordering(
169169
return None
170170

171171

172-
@_find_ordering.register
172+
@_find_order_direction.register
173173
def _(root: nodes.OrderByNode, column_id: str):
174-
for order_expr in root.by:
175-
scalar_expr = order_expr.scalar_expression
176-
if isinstance(scalar_expr, ex.DerefOp) and scalar_expr.id.name == column_id:
177-
return order_expr.direction
174+
if len(root.by) == 0:
175+
return None
176+
177+
# Only when the column is used as the first ordering key
178+
# does it guarantee that its values are in a monotonic order.
179+
order_expr = root.by[0]
180+
scalar_expr = order_expr.scalar_expression
181+
if isinstance(scalar_expr, ex.DerefOp) and scalar_expr.id.name == column_id:
182+
return order_expr.direction
178183

179184
return None
180185

181186

182-
@_find_ordering.register
187+
@_find_order_direction.register
183188
def _(root: nodes.ReversedNode, column_id: str):
184-
direction = _find_ordering(root.child, column_id)
189+
direction = _find_order_direction(root.child, column_id)
185190

186191
if direction is None:
187192
return None
188193
return direction.reverse()
189194

190195

191-
@_find_ordering.register
196+
@_find_order_direction.register
192197
def _(root: nodes.SelectionNode, column_id: str):
193198
for alias_ref in root.input_output_pairs:
194199
if alias_ref.id.name == column_id:
195-
return _find_ordering(root.child, alias_ref.ref.id.name)
200+
return _find_order_direction(root.child, alias_ref.ref.id.name)
196201

197202

198-
@_find_ordering.register
203+
@_find_order_direction.register
199204
def _(root: nodes.FilterNode, column_id: str):
200-
return _find_ordering(root.child, column_id)
205+
return _find_order_direction(root.child, column_id)

bigframes/core/window_spec.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ class WindowSpec:
240240
min_periods: int = 0
241241

242242
@property
243-
def row_bounded(self):
243+
def is_row_bounded(self):
244244
"""
245245
Whether the window is bounded by row offsets.
246246
@@ -249,6 +249,26 @@ def row_bounded(self):
249249
"""
250250
return isinstance(self.bounds, RowsWindowBounds)
251251

252+
@property
253+
def is_range_bounded(self):
254+
"""
255+
Whether the window is bounded by range offsets.
256+
257+
This is relevant for determining whether the window requires a total order
258+
to calculate deterministically.
259+
"""
260+
return isinstance(self.bounds, RangeWindowBounds)
261+
262+
@property
263+
def is_unbounded(self):
264+
"""
265+
Whether the window is unbounded.
266+
267+
This is relevant for determining whether the window requires a total order
268+
to calculate deterministically.
269+
"""
270+
return self.bounds is None
271+
252272
@property
253273
def all_referenced_columns(self) -> Set[ids.ColumnId]:
254274
"""
@@ -261,7 +281,7 @@ def all_referenced_columns(self) -> Set[ids.ColumnId]:
261281

262282
def without_order(self) -> WindowSpec:
263283
"""Removes ordering clause if ordering isn't required to define bounds."""
264-
if self.row_bounded:
284+
if self.is_row_bounded:
265285
raise ValueError("Cannot remove order from row-bounded window")
266286
return replace(self, ordering=())
267287

0 commit comments

Comments
 (0)