Skip to content

[BugFix] frame_start/frame_end is modified unexpectedly when the window contains lead/lag and other agg #58437

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions be/src/exec/analytor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -956,24 +956,26 @@ void Analytor::_update_window_batch(int64_t partition_start, int64_t partition_e
data_columns[j] = _agg_intput_columns[i][j].get();
}

auto current_frame_start = frame_start;
auto current_frame_end = frame_end;
// For lead/lag function, it uses the relationship between the frame_start and frame_end to determine
// whether NULL value should be generated, so the frame should not be normalized.
if (!_is_lead_lag_functions[i]) {
frame_start = std::max<int64_t>(frame_start, _partition.start);
current_frame_start = std::max<int64_t>(current_frame_start, _partition.start);
// For half unounded window, we have not found the partition end, _found_partition_end.second refers to the next position.
// And for others, _found_partition_end.second is identical to _partition.end, so we can always use _found_partition_end
// instead of _partition.end to refer to the current right boundary.
frame_end = std::min<int64_t>(frame_end, _partition.end);
current_frame_end = std::min<int64_t>(current_frame_end, _partition.end);
}
if (_is_merge_funcs) {
for (size_t j = frame_start; j < frame_end; j++) {
for (size_t j = current_frame_start; j < current_frame_end; j++) {
_agg_functions[i]->merge(_agg_fn_ctxs[i], data_columns[0],
_managed_fn_states[0]->mutable_data() + _agg_states_offsets[i], j);
}
} else {
_agg_functions[i]->update_batch_single_state_with_frame(
_agg_fn_ctxs[i], _managed_fn_states[0]->mutable_data() + _agg_states_offsets[i], data_columns,
partition_start, partition_end, frame_start, frame_end);
partition_start, partition_end, current_frame_start, current_frame_end);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- name: test_window_contains_lead_lag_and_aggs
create table t0 (c0 int, c1 int,c2 int) properties("replication_num"="1");
-- result:
-- !result
insert into t0 values (1,2,3),(1,3,5),(1,4,3),(2,1,1),(2,3,4),(2,5,6);
-- result:
-- !result
create table result_tab(r bigint) properties("replication_num"="1");
-- result:
-- !result
insert into result_tab
with cte as(
SELECT c0,c1,c2,
sum(c2) OVER(PARTITION BY c0 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) c3,
max(c2) OVER(PARTITION BY c0 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) c4,
lead(c2,1,0) OVER(PARTITION BY c0 ORDER BY c1) c5
FROM t0)
select sum(murmur_hash3_32(c0,c1,c2,c3,c4,c5)) from cte;
-- result:
-- !result
insert into result_tab
with cte as(
SELECT c0,c1,c2,
lead(c2,1,0) OVER(PARTITION BY c0 ORDER BY c1) c3,
sum(c2) OVER(PARTITION BY c0 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) c4,
max(c2) OVER(PARTITION BY c0 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) c5
FROM t0)
select sum(murmur_hash3_32(c0,c1,c2,c4,c5,c3)) from cte;
-- result:
-- !result
select assert_true(count(distinct r)=1), assert_true(count(1)=2) from result_tab;
-- result:
1 1
-- !result
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- name: test_window_contains_lead_lag_and_aggs

create table t0 (c0 int, c1 int,c2 int) properties("replication_num"="1");

insert into t0 values (1,2,3),(1,3,5),(1,4,3),(2,1,1),(2,3,4),(2,5,6);

create table result_tab(r bigint) properties("replication_num"="1");

insert into result_tab
with cte as(
SELECT c0,c1,c2,
sum(c2) OVER(PARTITION BY c0 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) c3,
max(c2) OVER(PARTITION BY c0 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) c4,
lead(c2,1,0) OVER(PARTITION BY c0 ORDER BY c1) c5
FROM t0)
select sum(murmur_hash3_32(c0,c1,c2,c3,c4,c5)) from cte;

insert into result_tab
with cte as(
SELECT c0,c1,c2,
lead(c2,1,0) OVER(PARTITION BY c0 ORDER BY c1) c3,
sum(c2) OVER(PARTITION BY c0 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) c4,
max(c2) OVER(PARTITION BY c0 ORDER BY c1 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) c5
FROM t0)
select sum(murmur_hash3_32(c0,c1,c2,c4,c5,c3)) from cte;

select assert_true(count(distinct r)=1), assert_true(count(1)=2) from result_tab;
Loading