Skip to content

Commit b27d252

Browse files
authored
fix(stream): arrangement backfill background ddl (#14563)
1 parent 76656e0 commit b27d252

File tree

8 files changed

+159
-40
lines changed

8 files changed

+159
-40
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/usr/bin/env bash
2+
3+
set -euo pipefail
4+
5+
if [ $BUILDKITE_COMMAND_EXIT_STATUS -ne 0 ]; then
6+
mv .risingwave/log risedev-logs
7+
zip -q -r risedev-logs.zip risedev-logs/
8+
buildkite-agent artifact upload risedev-logs.zip
9+
REGRESS_TEST_DIR="$PWD/src/tests/regress/output/results/"
10+
if [ -d "$REGRESS_TEST_DIR" ]; then
11+
mkdir regress-test-logs && cp src/tests/regress/output/results/* regress-test-logs/
12+
zip -q -r regress-test.zip regress-test-logs/
13+
buildkite-agent artifact upload regress-test-logs.zip
14+
fi
15+
if [ -e "$PWD/connector-node.log" ]; then
16+
buildkite-agent artifact upload "$PWD/connector-node.log"
17+
fi
18+
fi

ci/scripts/common.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,3 +87,16 @@ function download_and_prepare_rw() {
8787
cargo make pre-start-dev
8888
cargo make --allow-private link-all-in-one-binaries
8989
}
90+
91+
function filter_stack_trace() {
92+
# Only keep first 3 lines of backtrace: 0-2.
93+
echo "filtering stack trace for $1"
94+
touch tmp
95+
cat "$1" \
96+
| sed -E '/ [1-9][0-9]+:/d' \
97+
| sed -E '/ [3-9]+:/d' \
98+
| sed -E '/ at .rustc/d' \
99+
| sed -E '/ at ...cargo/d' > tmp
100+
cp tmp "$1"
101+
rm tmp
102+
}

ci/scripts/deterministic-recovery-test.sh

Lines changed: 54 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,70 @@ risingwave_meta::manager::catalog=debug,\
1414
risingwave_meta::rpc::ddl_controller=debug,\
1515
risingwave_meta::barrier::mod=debug,\
1616
risingwave_simulation=debug"
17+
18+
# Extra logs you can enable if the existing trace does not give enough info.
19+
#risingwave_stream::executor::backfill=trace,
20+
#risingwave_meta::barrier::progress=debug,
21+
22+
# ========= Some tips for debugging recovery tests =========
23+
# 1. If materialized view failed to create after multiple retries
24+
# - Check logs to see where the materialized view creation was stuck.
25+
# 1. Is it stuck at waiting for backfill executor response?
26+
# In that case perhaps some backfill logic is flawed, add more trace in backfill to debug.
27+
# 2. Is it stuck at waiting for backfill executor to finish?
28+
# In that case perhaps some part of the backfill loop is slow / stuck.
29+
# 3. Is it stuck at waiting for some executor to report progress?
30+
# In that case perhaps the tracking of backfill's progress in meta is flawed.
31+
1732
export LOGDIR=.risingwave/log
1833

1934
mkdir -p $LOGDIR
2035

36+
filter_stack_trace_for_all_logs() {
37+
# Defined in `common.sh`
38+
for log in "${LOGDIR}"/*.log; do
39+
filter_stack_trace $log
40+
done
41+
}
42+
43+
trap filter_stack_trace_for_all_logs ERR
44+
2145
echo "--- deterministic simulation e2e, ci-3cn-2fe-3meta, recovery, background_ddl"
22-
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} ./e2e_test/background_ddl/sim/basic.slt 2> $LOGDIR/recovery-ddl-{}.log && rm $LOGDIR/recovery-ddl-{}.log'
46+
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation \
47+
--kill \
48+
--kill-rate=${KILL_RATE} \
49+
${USE_ARRANGEMENT_BACKFILL:-} \
50+
./e2e_test/background_ddl/sim/basic.slt \
51+
2> $LOGDIR/recovery-background-ddl-{}.log && rm $LOGDIR/recovery-background-ddl-{}.log'
2352

2453
echo "--- deterministic simulation e2e, ci-3cn-2fe-3meta, recovery, ddl"
25-
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} --background-ddl-rate=${BACKGROUND_DDL_RATE} ./e2e_test/ddl/\*\*/\*.slt 2> $LOGDIR/recovery-ddl-{}.log && rm $LOGDIR/recovery-ddl-{}.log'
54+
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation \
55+
--kill \
56+
--kill-rate=${KILL_RATE} \
57+
--background-ddl-rate=${BACKGROUND_DDL_RATE} \
58+
${USE_ARRANGEMENT_BACKFILL:-} \
59+
./e2e_test/ddl/\*\*/\*.slt 2> $LOGDIR/recovery-ddl-{}.log && rm $LOGDIR/recovery-ddl-{}.log'
2660

2761
echo "--- deterministic simulation e2e, ci-3cn-2fe-3meta, recovery, streaming"
28-
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} --background-ddl-rate=${BACKGROUND_DDL_RATE} ./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/recovery-streaming-{}.log && rm $LOGDIR/recovery-streaming-{}.log'
62+
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation \
63+
--kill \
64+
--kill-rate=${KILL_RATE} \
65+
--background-ddl-rate=${BACKGROUND_DDL_RATE} \
66+
${USE_ARRANGEMENT_BACKFILL:-} \
67+
./e2e_test/streaming/\*\*/\*.slt 2> $LOGDIR/recovery-streaming-{}.log && rm $LOGDIR/recovery-streaming-{}.log'
2968

3069
echo "--- deterministic simulation e2e, ci-3cn-2fe-3meta, recovery, batch"
31-
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} --background-ddl-rate=${BACKGROUND_DDL_RATE} ./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/recovery-batch-{}.log && rm $LOGDIR/recovery-batch-{}.log'
70+
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation \
71+
--kill \
72+
--kill-rate=${KILL_RATE} \
73+
--background-ddl-rate=${BACKGROUND_DDL_RATE} \
74+
${USE_ARRANGEMENT_BACKFILL:-} \
75+
./e2e_test/batch/\*\*/\*.slt 2> $LOGDIR/recovery-batch-{}.log && rm $LOGDIR/recovery-batch-{}.log'
3276

3377
echo "--- deterministic simulation e2e, ci-3cn-2fe-3meta, recovery, kafka source,sink"
34-
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation --kill --kill-rate=${KILL_RATE} --kafka-datadir=./scripts/source/test_data ./e2e_test/source/basic/kafka\*.slt 2> $LOGDIR/recovery-source-{}.log && rm $LOGDIR/recovery-source-{}.log'
78+
seq $TEST_NUM | parallel MADSIM_TEST_SEED={} './risingwave_simulation \
79+
--kill \
80+
--kill-rate=${KILL_RATE} \
81+
--kafka-datadir=./scripts/source/test_data \
82+
${USE_ARRANGEMENT_BACKFILL:-} \
83+
./e2e_test/source/basic/kafka\*.slt 2> $LOGDIR/recovery-source-{}.log && rm $LOGDIR/recovery-source-{}.log'

ci/workflows/main-cron.yml

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,27 @@ steps:
315315
run: rw-build-env
316316
config: ci/docker-compose.yml
317317
mount-buildkite-agent: true
318-
- ./ci/plugins/upload-failure-logs
318+
# Only upload zipped files, otherwise the logs is too much.
319+
- ./ci/plugins/upload-failure-logs-zipped
320+
timeout_in_minutes: 60
321+
retry: *auto-retry
322+
323+
# Ddl statements will randomly run with background_ddl.
324+
- label: "background_ddl, arrangement_backfill recovery test (deterministic simulation)"
325+
key: "background-ddl-arrangement-backfill-recovery-test-deterministic"
326+
command: "TEST_NUM=12 KILL_RATE=1.0 BACKGROUND_DDL_RATE=0.8 USE_ARRANGEMENT_BACKFILL=--use-arrangement-backfill timeout 55m ci/scripts/deterministic-recovery-test.sh"
327+
if: |
328+
!(build.pull_request.labels includes "ci/main-cron/skip-ci") && build.env("CI_STEPS") == null
329+
|| build.pull_request.labels includes "ci/run-recovery-test-deterministic-simulation"
330+
|| build.env("CI_STEPS") =~ /(^|,)recovery-tests?-deterministic-simulation(,|$$)/
331+
depends_on: "build-simulation"
332+
plugins:
333+
- docker-compose#v4.9.0:
334+
run: rw-build-env
335+
config: ci/docker-compose.yml
336+
mount-buildkite-agent: true
337+
# Only upload zipped files, otherwise the logs is too much.
338+
- ./ci/plugins/upload-failure-logs-zipped
319339
timeout_in_minutes: 60
320340
retry: *auto-retry
321341

@@ -333,7 +353,8 @@ steps:
333353
run: rw-build-env
334354
config: ci/docker-compose.yml
335355
mount-buildkite-agent: true
336-
- ./ci/plugins/upload-failure-logs
356+
# Only upload zipped files, otherwise the logs is too much.
357+
- ./ci/plugins/upload-failure-logs-zipped
337358
timeout_in_minutes: 60
338359
retry: *auto-retry
339360

ci/workflows/pull-request.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,10 +463,11 @@ steps:
463463
run: rw-build-env
464464
config: ci/docker-compose.yml
465465
mount-buildkite-agent: true
466+
# Only upload zipped files, otherwise the logs is too much.
467+
- ./ci/plugins/upload-failure-logs-zipped
466468
# - test-collector#v1.0.0:
467469
# files: "*-junit.xml"
468470
# format: "junit"
469-
- ./ci/plugins/upload-failure-logs
470471
timeout_in_minutes: 25
471472
cancel_on_build_failing: true
472473
retry: *auto-retry

src/stream/src/executor/backfill/arrangement_backfill.rs

Lines changed: 37 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ where
110110

111111
#[try_stream(ok = Message, error = StreamExecutorError)]
112112
async fn execute_inner(mut self) {
113+
tracing::debug!("Arrangement Backfill Executor started");
113114
// The primary key columns, in the output columns of the upstream_table scan.
114115
// Table scan scans a subset of the columns of the upstream table.
115116
let pk_in_output_indices = self.upstream_table.pk_in_output_indices().unwrap();
@@ -451,54 +452,58 @@ where
451452
"backfill_finished_wait_for_barrier"
452453
);
453454
// If not finished then we need to update state, otherwise no need.
454-
if let Message::Barrier(barrier) = &msg
455-
&& !is_completely_finished
456-
{
457-
// If snapshot was empty, we do not need to backfill,
458-
// but we still need to persist the finished state.
459-
// We currently persist it on the second barrier here rather than first.
460-
// This is because we can't update state table in first epoch,
461-
// since it expects to have been initialized in previous epoch
462-
// (there's no epoch before the first epoch).
463-
for vnode in upstream_table.vnodes().iter_vnodes() {
464-
backfill_state.finish_progress(vnode, upstream_table.pk_indices().len());
465-
}
455+
if let Message::Barrier(barrier) = &msg {
456+
if is_completely_finished {
457+
// If already finished, no need to persist any state.
458+
} else {
459+
// If snapshot was empty, we do not need to backfill,
460+
// but we still need to persist the finished state.
461+
// We currently persist it on the second barrier here rather than first.
462+
// This is because we can't update state table in first epoch,
463+
// since it expects to have been initialized in previous epoch
464+
// (there's no epoch before the first epoch).
465+
for vnode in upstream_table.vnodes().iter_vnodes() {
466+
backfill_state
467+
.finish_progress(vnode, upstream_table.pk_indices().len());
468+
}
466469

467-
persist_state_per_vnode(
468-
barrier.epoch,
469-
&mut self.state_table,
470-
&mut backfill_state,
471-
#[cfg(debug_assertions)]
472-
state_len,
473-
vnodes.iter_vnodes(),
474-
)
475-
.await?;
470+
persist_state_per_vnode(
471+
barrier.epoch,
472+
&mut self.state_table,
473+
&mut backfill_state,
474+
#[cfg(debug_assertions)]
475+
state_len,
476+
vnodes.iter_vnodes(),
477+
)
478+
.await?;
479+
}
476480

477481
self.progress
478482
.finish(barrier.epoch.curr, total_snapshot_processed_rows);
483+
tracing::trace!(
484+
epoch = ?barrier.epoch,
485+
"Updated CreateMaterializedTracker"
486+
);
479487
yield msg;
480488
break;
481-
} else {
482-
// Allow other messages to pass through.
483-
yield msg;
484489
}
490+
// Allow other messages to pass through.
491+
// We won't yield twice here, since if there's a barrier,
492+
// we will always break out of the loop.
493+
yield msg;
485494
}
486495
}
487496

497+
tracing::trace!(
498+
"Arrangement Backfill has already finished and forward messages directly to the downstream"
499+
);
500+
488501
// After progress finished + state persisted,
489502
// we can forward messages directly to the downstream,
490503
// as backfill is finished.
491504
#[for_await]
492505
for msg in upstream {
493506
if let Some(msg) = mapping_message(msg?, &self.output_indices) {
494-
tracing::trace!(
495-
actor = self.actor_id,
496-
message = ?msg,
497-
"backfill_finished_after_barrier"
498-
);
499-
if let Message::Barrier(barrier) = &msg {
500-
self.state_table.commit_no_data_expected(barrier.epoch);
501-
}
502507
yield msg;
503508
}
504509
}

src/stream/src/executor/backfill/no_shuffle_backfill.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,9 @@ where
533533
yield msg;
534534
break;
535535
}
536+
// Allow other messages to pass through.
537+
// We won't yield twice here, since if there's a barrier,
538+
// we will always break out of the loop.
536539
yield msg;
537540
}
538541
}

src/tests/simulation/src/main.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,10 @@ pub struct Args {
144144
/// The probability of background ddl for a ddl query.
145145
#[clap(long, default_value = "0.0")]
146146
background_ddl_rate: f64,
147+
148+
/// Use arrangement backfill by default
149+
#[clap(long, default_value = "false")]
150+
use_arrangement_backfill: bool,
147151
}
148152

149153
#[tokio::main]
@@ -173,6 +177,11 @@ async fn main() {
173177
meta_nodes: args.meta_nodes,
174178
etcd_timeout_rate: args.etcd_timeout_rate,
175179
etcd_data_path: args.etcd_data,
180+
per_session_queries: if args.use_arrangement_backfill {
181+
vec!["SET enable_arrangement_backfill = true;".to_string()].into()
182+
} else {
183+
vec![].into()
184+
},
176185
..Default::default()
177186
};
178187
let kill_opts = KillOpts {

0 commit comments

Comments
 (0)