Skip to content

Commit 3fbe1ab

Browse files
author
Eric Fu
authored
feat: improve sink mview throughput metrics (#12622)
1 parent c847095 commit 3fbe1ab

15 files changed

+137
-118
lines changed

docker/dashboards/risingwave-dev-dashboard.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

docker/dashboards/risingwave-user-dashboard.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

grafana/risingwave-dev-dashboard.dashboard.py

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -641,11 +641,6 @@ def section_object_storage(outer_panels):
641641

642642
def section_streaming(outer_panels):
643643
panels = outer_panels.sub_panel()
644-
sink_filter = "executor_identity=~\".*SinkExecutor.*\""
645-
mv_filter = "executor_identity=~\".*MaterializeExecutor.*\""
646-
table_type_filter = "table_type=~\"MATERIALIZED_VIEW\""
647-
mv_throughput_query = f'sum(rate({metric("stream_executor_row_count", filter=mv_filter)}[$__rate_interval]) * on(actor_id) group_left(materialized_view_id, table_name) (group({metric("table_info", filter=table_type_filter)}) by (actor_id, materialized_view_id, table_name))) by (materialized_view_id, table_name)'
648-
sink_throughput_query = f'sum(rate({metric("stream_executor_row_count", filter=sink_filter)}[$__rate_interval]) * on(actor_id) group_left(sink_name) (group({metric("sink_info")}) by (actor_id, sink_name))) by (sink_name)'
649644
return [
650645
outer_panels.row_collapsed(
651646
"Streaming",
@@ -740,22 +735,41 @@ def section_streaming(outer_panels):
740735
),
741736
panels.timeseries_rowsps(
742737
"Sink Throughput(rows/s)",
743-
"The figure shows the number of rows output by each sink per second.",
738+
"The number of rows streamed into each sink per second.",
744739
[
745740
panels.target(
746-
sink_throughput_query,
747-
"sink {{sink_name}}",
741+
f"sum(rate({metric('stream_sink_input_row_count')}[$__rate_interval])) by (sink_id) * on(sink_id) group_left(sink_name) group({metric('sink_info')}) by (sink_id, sink_name)",
742+
"sink {{sink_id}} {{sink_name}}",
743+
),
744+
],
745+
),
746+
panels.timeseries_rowsps(
747+
"Sink Throughput(rows/s) per Partition",
748+
"The number of rows streamed into each sink per second.",
749+
[
750+
panels.target(
751+
f"sum(rate({metric('stream_sink_input_row_count')}[$__rate_interval])) by (sink_id, actor_id) * on(actor_id) group_left(sink_name) {metric('sink_info')}",
752+
"sink {{sink_id}} {{sink_name}} - actor {{actor_id}}",
748753
),
749754
],
750755
),
751-
752756
panels.timeseries_rowsps(
753757
"Materialized View Throughput(rows/s)",
754758
"The figure shows the number of rows written into each materialized view per second.",
755759
[
756760
panels.target(
757-
mv_throughput_query,
758-
"materialized view {{table_name}} table_id {{materialized_view_id}}",
761+
f"sum(rate({metric('stream_mview_input_row_count')}[$__rate_interval])) by (table_id) * on(table_id) group_left(table_name) group({metric('table_info')}) by (table_id, table_name)",
762+
"mview {{table_id}} {{table_name}}",
763+
),
764+
],
765+
),
766+
panels.timeseries_rowsps(
767+
"Materialized View Throughput(rows/s) per Partition",
768+
"The figure shows the number of rows written into each materialized view per second.",
769+
[
770+
panels.target(
771+
f"sum(rate({metric('stream_mview_input_row_count')}[$__rate_interval])) by (actor_id, table_id) * on(actor_id, table_id) group_left(table_name) {metric('table_info')}",
772+
"mview {{table_id}} {{table_name}} - actor {{actor_id}}",
759773
),
760774
],
761775
),
@@ -899,8 +913,8 @@ def section_streaming_actors(outer_panels):
899913
"When enabled, this metric shows the input throughput of each executor.",
900914
[
901915
panels.target(
902-
f"rate({metric('stream_executor_row_count')}[$__rate_interval]) > 0",
903-
"actor {{actor_id}}->{{executor_identity}}",
916+
f"rate({metric('stream_executor_row_count')}[$__rate_interval])",
917+
"{{executor_identity}} actor {{actor_id}}",
904918
),
905919
],
906920
),

grafana/risingwave-dev-dashboard.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

grafana/risingwave-user-dashboard.dashboard.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -73,22 +73,22 @@ def section_overview(panels):
7373
],
7474
),
7575
panels.timeseries_rowsps(
76-
"Aggregated Sink Throughput(rows/s)",
77-
"The figure shows the number of rows output by each sink per second.",
76+
"Sink Throughput(rows/s)",
77+
"The number of rows streamed into each sink per second.",
7878
[
7979
panels.target(
80-
f"sum(rate({metric('stream_executor_row_count', filter=sink_filter)}[$__rate_interval])) by (executor_identity)",
81-
"{{executor_identity}}",
80+
f"sum(rate({metric('stream_sink_input_row_count')}[$__rate_interval])) by (sink_id) * on(sink_id) group_left(sink_name) group({metric('sink_info')}) by (sink_id, sink_name)",
81+
"sink {{sink_id}} {{sink_name}}",
8282
),
8383
],
8484
),
8585
panels.timeseries_rowsps(
86-
"Aggregated Materialized View Throughput(rows/s)",
87-
"The figure shows the number of rows output by each materialized view per second.",
86+
"Materialized View Throughput(rows/s)",
87+
"The figure shows the number of rows written into each materialized view per second.",
8888
[
8989
panels.target(
90-
f"sum(rate({metric('stream_executor_row_count', filter=mv_filter)}[$__rate_interval])) by (executor_identity)",
91-
"{{executor_identity}}",
90+
f"sum(rate({metric('stream_mview_input_row_count')}[$__rate_interval])) by (table_id) * on(table_id) group_left(table_name) group({metric('table_info')}) by (table_id, table_name)",
91+
"mview {{table_id}} {{table_name}}",
9292
),
9393
],
9494
),

grafana/risingwave-user-dashboard.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/connector/src/sink/catalog/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ impl SinkId {
5252
}
5353
}
5454

55+
impl std::fmt::Display for SinkId {
56+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
57+
write!(f, "{}", self.sink_id)
58+
}
59+
}
60+
5561
impl From<u32> for SinkId {
5662
fn from(id: u32) -> Self {
5763
Self::new(id)

src/meta/src/rpc/metrics.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ impl MetaMetrics {
536536
let sink_info = register_int_gauge_vec_with_registry!(
537537
"sink_info",
538538
"Mapping from actor id to (actor id, sink name)",
539-
&["actor_id", "sink_name",],
539+
&["actor_id", "sink_id", "sink_name",],
540540
registry
541541
)
542542
.unwrap();
@@ -810,13 +810,14 @@ pub async fn start_fragment_info_monitor(
810810

811811
if let Some(stream_node) = &actor.nodes {
812812
if let Some(Sink(sink_node)) = &stream_node.node_body {
813-
let sink_name = match &sink_node.sink_desc {
814-
Some(sink_desc) => &sink_desc.name,
815-
_ => "unknown",
813+
let (sink_id, sink_name) = match &sink_node.sink_desc {
814+
Some(sink_desc) => (sink_desc.id, sink_desc.name.as_str()),
815+
_ => (0, "unknown"), // unreachable
816816
};
817+
let sink_id_str = sink_id.to_string();
817818
meta_metrics
818819
.sink_info
819-
.with_label_values(&[&actor_id_str, sink_name])
820+
.with_label_values(&[&actor_id_str, &sink_id_str, sink_name])
820821
.set(1);
821822
}
822823
}

src/stream/src/executor/monitor/streaming_stats.rs

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ use risingwave_common::monitor::GLOBAL_METRICS_REGISTRY;
3030
pub struct StreamingMetrics {
3131
pub level: MetricLevel,
3232

33+
// Executor metrics (disabled by default)
3334
pub executor_row_count: GenericCounterVec<AtomicU64>,
35+
36+
// Actor metrics
3437
pub actor_execution_time: GenericGaugeVec<AtomicF64>,
3538
pub actor_output_buffer_blocking_duration_ns: GenericCounterVec<AtomicU64>,
3639
pub actor_input_buffer_blocking_duration_ns: GenericCounterVec<AtomicU64>,
@@ -48,10 +51,16 @@ pub struct StreamingMetrics {
4851
pub actor_in_record_cnt: GenericCounterVec<AtomicU64>,
4952
pub actor_out_record_cnt: GenericCounterVec<AtomicU64>,
5053
pub actor_sampled_deserialize_duration_ns: GenericCounterVec<AtomicU64>,
54+
55+
// Source
5156
pub source_output_row_count: GenericCounterVec<AtomicU64>,
5257
pub source_row_per_barrier: GenericCounterVec<AtomicU64>,
5358
pub source_split_change_count: GenericCounterVec<AtomicU64>,
5459

60+
// Sink & materialized view
61+
pub sink_input_row_count: GenericCounterVec<AtomicU64>,
62+
pub mview_input_row_count: GenericCounterVec<AtomicU64>,
63+
5564
// Exchange (see also `compute::ExchangeServiceMetrics`)
5665
pub exchange_frag_recv_size: GenericCounterVec<AtomicU64>,
5766

@@ -159,7 +168,7 @@ impl StreamingMetrics {
159168
let executor_row_count = register_int_counter_vec_with_registry!(
160169
"stream_executor_row_count",
161170
"Total number of rows that have been output from each executor",
162-
&["actor_id", "executor_identity"],
171+
&["actor_id", "fragment_id", "executor_identity"],
163172
registry
164173
)
165174
.unwrap();
@@ -188,6 +197,22 @@ impl StreamingMetrics {
188197
)
189198
.unwrap();
190199

200+
let sink_input_row_count = register_int_counter_vec_with_registry!(
201+
"stream_sink_input_row_count",
202+
"Total number of rows streamed into sink executors",
203+
&["sink_id", "actor_id", "fragment_id"],
204+
registry
205+
)
206+
.unwrap();
207+
208+
let mview_input_row_count = register_int_counter_vec_with_registry!(
209+
"stream_mview_input_row_count",
210+
"Total number of rows streamed into materialize executors",
211+
&["table_id", "actor_id", "fragment_id"],
212+
registry
213+
)
214+
.unwrap();
215+
191216
let actor_execution_time = register_gauge_vec_with_registry!(
192217
"stream_actor_actor_execution_time",
193218
"Total execution time (s) of an actor",
@@ -825,6 +850,8 @@ impl StreamingMetrics {
825850
source_output_row_count,
826851
source_row_per_barrier,
827852
source_split_change_count,
853+
sink_input_row_count,
854+
mview_input_row_count,
828855
exchange_frag_recv_size,
829856
join_lookup_miss_count,
830857
join_total_lookup_count,

src/stream/src/executor/mview/materialize.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,11 @@ impl<S: StateStore, SD: ValueRowSerde> MaterializeExecutor<S, SD> {
119119

120120
#[try_stream(ok = Message, error = StreamExecutorError)]
121121
async fn execute_inner(mut self) {
122+
// for metrics
123+
let table_id_str = self.state_table.table_id().to_string();
124+
let actor_id_str = self.actor_context.id.to_string();
125+
let fragment_id_str = self.actor_context.fragment_id.to_string();
126+
122127
let data_types = self.schema().data_types().clone();
123128
let mut input = self.input.execute();
124129

@@ -136,6 +141,12 @@ impl<S: StateStore, SD: ValueRowSerde> MaterializeExecutor<S, SD> {
136141
yield match msg {
137142
Message::Watermark(w) => Message::Watermark(w),
138143
Message::Chunk(chunk) => {
144+
self.actor_context
145+
.streaming_metrics
146+
.mview_input_row_count
147+
.with_label_values(&[&table_id_str, &actor_id_str, &fragment_id_str])
148+
.inc_by(chunk.cardinality() as u64);
149+
139150
match self.conflict_behavior {
140151
ConflictBehavior::Overwrite | ConflictBehavior::IgnoreConflict => {
141152
// create MaterializeBuffer from chunk

src/stream/src/executor/sink.rs

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// limitations under the License.
1414

1515
use std::mem;
16-
use std::sync::Arc;
1716

1817
use anyhow::anyhow;
1918
use futures::stream::select;
@@ -25,7 +24,7 @@ use risingwave_common::array::{merge_chunk_row, Op, StreamChunk, StreamChunkComp
2524
use risingwave_common::catalog::{ColumnCatalog, Field, Schema};
2625
use risingwave_common::util::epoch::EpochPair;
2726
use risingwave_connector::dispatch_sink;
28-
use risingwave_connector::sink::catalog::SinkType;
27+
use risingwave_connector::sink::catalog::{SinkId, SinkType};
2928
use risingwave_connector::sink::log_store::{
3029
LogReader, LogStoreFactory, LogStoreTransformChunkLogReader, LogWriter,
3130
};
@@ -35,12 +34,10 @@ use risingwave_connector::sink::{
3534

3635
use super::error::{StreamExecutorError, StreamExecutorResult};
3736
use super::{BoxedExecutor, Executor, Message, PkIndices};
38-
use crate::executor::monitor::StreamingMetrics;
3937
use crate::executor::{expect_first_barrier, ActorContextRef, BoxedMessageStream};
4038

4139
pub struct SinkExecutor<F: LogStoreFactory> {
4240
input: BoxedExecutor,
43-
_metrics: Arc<StreamingMetrics>,
4441
sink: SinkImpl,
4542
identity: String,
4643
pk_indices: PkIndices,
@@ -83,7 +80,6 @@ impl<F: LogStoreFactory> SinkExecutor<F> {
8380
#[allow(clippy::too_many_arguments)]
8481
pub async fn new(
8582
input: BoxedExecutor,
86-
metrics: Arc<StreamingMetrics>,
8783
sink_writer_param: SinkWriterParam,
8884
sink_param: SinkParam,
8985
columns: Vec<ColumnCatalog>,
@@ -100,7 +96,6 @@ impl<F: LogStoreFactory> SinkExecutor<F> {
10096
.collect();
10197
Ok(Self {
10298
input,
103-
_metrics: metrics,
10499
sink,
105100
identity: format!("SinkExecutor {:X?}", sink_writer_param.executor_id),
106101
pk_indices,
@@ -127,6 +122,7 @@ impl<F: LogStoreFactory> SinkExecutor<F> {
127122
self.input,
128123
stream_key,
129124
self.log_writer,
125+
self.sink_param.sink_id,
130126
self.sink_param.sink_type,
131127
self.actor_context,
132128
stream_key_sink_pk_mismatch,
@@ -148,6 +144,7 @@ impl<F: LogStoreFactory> SinkExecutor<F> {
148144
input: BoxedExecutor,
149145
stream_key: PkIndices,
150146
mut log_writer: impl LogWriter,
147+
sink_id: SinkId,
151148
sink_type: SinkType,
152149
actor_context: ActorContextRef,
153150
stream_key_sink_pk_mismatch: bool,
@@ -165,6 +162,11 @@ impl<F: LogStoreFactory> SinkExecutor<F> {
165162
// Propagate the first barrier
166163
yield Message::Barrier(barrier);
167164

165+
// for metrics
166+
let sink_id_str = sink_id.to_string();
167+
let actor_id_str = actor_context.id.to_string();
168+
let fragment_id_str = actor_context.fragment_id.to_string();
169+
168170
// When stream key is different from the user defined primary key columns for sinks. The operations could be out of order
169171
// stream key: a,b
170172
// sink pk: a
@@ -203,6 +205,12 @@ impl<F: LogStoreFactory> SinkExecutor<F> {
203205
match msg? {
204206
Message::Watermark(w) => watermark = Some(w),
205207
Message::Chunk(c) => {
208+
actor_context
209+
.streaming_metrics
210+
.sink_input_row_count
211+
.with_label_values(&[&sink_id_str, &actor_id_str, &fragment_id_str])
212+
.inc_by(c.capacity() as u64);
213+
206214
chunk_buffer.push_chunk(c);
207215
}
208216
Message::Barrier(barrier) => {
@@ -415,7 +423,6 @@ mod test {
415423

416424
let sink_executor = SinkExecutor::new(
417425
Box::new(mock),
418-
Arc::new(StreamingMetrics::unused()),
419426
SinkWriterParam::for_test(),
420427
sink_param,
421428
columns.clone(),
@@ -537,7 +544,6 @@ mod test {
537544

538545
let sink_executor = SinkExecutor::new(
539546
Box::new(mock),
540-
Arc::new(StreamingMetrics::unused()),
541547
SinkWriterParam::for_test(),
542548
sink_param,
543549
columns.clone(),
@@ -656,7 +662,6 @@ mod test {
656662

657663
let sink_executor = SinkExecutor::new(
658664
Box::new(mock),
659-
Arc::new(StreamingMetrics::unused()),
660665
SinkWriterParam::for_test(),
661666
sink_param,
662667
columns,

0 commit comments

Comments
 (0)