Skip to content

Commit a5c0c71

Browse files
authored
refactor(state-table): introduce watermark buffer strategy to state table (#7869)
Add an option to control the buffering strategy of the watermarks in state table. Will be used in #7752 Approved-By: st1page Approved-By: soundOfDestiny
1 parent b262f21 commit a5c0c71

File tree

3 files changed

+84
-19
lines changed

3 files changed

+84
-19
lines changed

src/stream/src/common/table/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
pub mod state_table;
16+
mod watermark;
1617

1718
#[cfg(test)]
1819
pub mod test_state_table;

src/stream/src/common/table/state_table.rs

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ use risingwave_storage::table::{compute_chunk_vnode, compute_vnode, Distribution
5151
use risingwave_storage::StateStore;
5252
use tracing::trace;
5353

54+
use super::watermark::{WatermarkBufferByEpoch, WatermarkBufferStrategy};
5455
use crate::executor::{StreamExecutorError, StreamExecutorResult};
5556

5657
/// This num is arbitrary and we may want to improve this choice in the future.
@@ -59,7 +60,10 @@ const STATE_CLEANING_PERIOD_EPOCH: usize = 5;
5960
/// `StateTable` is the interface accessing relational data in KV(`StateStore`) with
6061
/// row-based encoding.
6162
#[derive(Clone)]
62-
pub struct StateTable<S: StateStore> {
63+
pub struct StateTable<
64+
S: StateStore,
65+
W: WatermarkBufferStrategy = WatermarkBufferByEpoch<STATE_CLEANING_PERIOD_EPOCH>,
66+
> {
6367
/// Id for this table.
6468
table_id: TableId,
6569

@@ -119,12 +123,11 @@ pub struct StateTable<S: StateStore> {
119123
/// latest watermark
120124
cur_watermark: Option<ScalarImpl>,
121125

122-
/// number of commits with watermark since the last time we did state cleaning by watermark.
123-
num_wmked_commits_since_last_clean: usize,
126+
watermark_buffer_strategy: W,
124127
}
125128

126129
// initialize
127-
impl<S: StateStore> StateTable<S> {
130+
impl<S: StateStore, W: WatermarkBufferStrategy> StateTable<S, W> {
128131
/// Create state table from table catalog and store.
129132
pub async fn from_table_catalog(
130133
table_catalog: &Table,
@@ -243,7 +246,7 @@ impl<S: StateStore> StateTable<S> {
243246
epoch: None,
244247
last_watermark: None,
245248
cur_watermark: None,
246-
num_wmked_commits_since_last_clean: 0,
249+
watermark_buffer_strategy: W::default(),
247250
}
248251
}
249252

@@ -424,7 +427,7 @@ impl<S: StateStore> StateTable<S> {
424427
epoch: None,
425428
last_watermark: None,
426429
cur_watermark: None,
427-
num_wmked_commits_since_last_clean: 0,
430+
watermark_buffer_strategy: W::default(),
428431
}
429432
}
430433

@@ -595,6 +598,7 @@ impl<S: StateStore> StateTable<S> {
595598
std::mem::replace(&mut self.vnodes, new_vnodes)
596599
}
597600
}
601+
598602
// write
599603
impl<S: StateStore> StateTable<S> {
600604
#[expect(clippy::boxed_local)]
@@ -751,7 +755,7 @@ impl<S: StateStore> StateTable<S> {
751755
assert_eq!(self.epoch(), new_epoch.prev);
752756
assert!(!self.is_dirty());
753757
if self.cur_watermark.is_some() {
754-
self.num_wmked_commits_since_last_clean += 1;
758+
self.watermark_buffer_strategy.tick();
755759
}
756760
self.update_epoch(new_epoch);
757761
}
@@ -762,15 +766,19 @@ impl<S: StateStore> StateTable<S> {
762766
buffer: BTreeMap<Bytes, KeyOp>,
763767
epoch: u64,
764768
) -> StreamExecutorResult<()> {
765-
let watermark = self.cur_watermark.as_ref().and_then(|cur_watermark_ref| {
766-
self.num_wmked_commits_since_last_clean += 1;
767-
768-
if self.num_wmked_commits_since_last_clean >= STATE_CLEANING_PERIOD_EPOCH {
769-
Some(cur_watermark_ref)
769+
let watermark = {
770+
if let Some(watermark) = self.cur_watermark.take() {
771+
self.watermark_buffer_strategy.tick();
772+
if !self.watermark_buffer_strategy.apply() {
773+
self.cur_watermark = Some(watermark);
774+
None
775+
} else {
776+
Some(watermark)
777+
}
770778
} else {
771779
None
772780
}
773-
});
781+
};
774782

775783
let mut write_batch = self.local_store.start_write_batch(WriteOptions {
776784
epoch,
@@ -784,7 +792,7 @@ impl<S: StateStore> StateTable<S> {
784792
};
785793
let range_end_suffix = watermark.map(|watermark| {
786794
serialize_pk(
787-
row::once(Some(watermark.clone())),
795+
row::once(Some(watermark)),
788796
prefix_serializer.as_ref().unwrap(),
789797
)
790798
});
@@ -835,10 +843,6 @@ impl<S: StateStore> StateTable<S> {
835843
}
836844
}
837845
write_batch.ingest().await?;
838-
if watermark.is_some() {
839-
self.last_watermark = self.cur_watermark.take();
840-
self.num_wmked_commits_since_last_clean = 0;
841-
}
842846
Ok(())
843847
}
844848

@@ -950,7 +954,7 @@ fn get_second<T, U>(arg: StreamExecutorResult<(T, U)>) -> StreamExecutorResult<U
950954
}
951955

952956
// Iterator functions
953-
impl<S: StateStore> StateTable<S> {
957+
impl<S: StateStore, W: WatermarkBufferStrategy> StateTable<S, W> {
954958
/// This function scans rows from the relational table.
955959
pub async fn iter(&self) -> StreamExecutorResult<RowStream<'_, S>> {
956960
self.iter_with_pk_prefix(row::empty()).await
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright 2023 RisingWave Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
/// Strategy to decide how to buffer the watermarks, used for state cleaning.
16+
pub trait WatermarkBufferStrategy: Default {
17+
/// Trigger when a epoch is committed.
18+
fn tick(&mut self);
19+
20+
/// Whether to clear the buffer.
21+
///
22+
/// Returns true to indicate that the buffer should be cleared and the strategy states reset.
23+
fn apply(&mut self) -> bool;
24+
}
25+
26+
/// No buffer, apply watermark to memory immediately.
27+
/// Use the strategy when you want to apply the watermark immediately.
28+
#[derive(Default, Debug)]
29+
pub struct WatermarkNoBuffer;
30+
31+
impl WatermarkBufferStrategy for WatermarkNoBuffer {
32+
fn tick(&mut self) {}
33+
34+
fn apply(&mut self) -> bool {
35+
true
36+
}
37+
}
38+
39+
/// Buffer the watermark by a epoch period.
40+
/// The strategy reduced the delete-range calls to storage.
41+
#[derive(Default, Debug)]
42+
pub struct WatermarkBufferByEpoch<const PERIOD: usize> {
43+
/// number of epochs since the last time we did state cleaning by watermark.
44+
buffered_epochs_cnt: usize,
45+
}
46+
47+
impl<const PERIOD: usize> WatermarkBufferStrategy for WatermarkBufferByEpoch<PERIOD> {
48+
fn tick(&mut self) {
49+
self.buffered_epochs_cnt += 1;
50+
}
51+
52+
fn apply(&mut self) -> bool {
53+
if self.buffered_epochs_cnt >= PERIOD {
54+
self.buffered_epochs_cnt = 0;
55+
true
56+
} else {
57+
false
58+
}
59+
}
60+
}

0 commit comments

Comments
 (0)