Skip to content

Commit 3513fc8

Browse files
committed
improve more
Signed-off-by: Bugen Zhao <[email protected]>
1 parent 606bb71 commit 3513fc8

File tree

6 files changed

+45
-57
lines changed

6 files changed

+45
-57
lines changed

docker/dashboards/risingwave-dev-dashboard.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

grafana/risingwave-dev-dashboard.dashboard.py

Lines changed: 21 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1134,6 +1134,9 @@ def section_streaming_actors(outer_panels: Panels):
11341134
"much time it takes an actor to process a message, i.e. a barrier, a watermark or rows of data, "
11351135
"on average. Then we divide this duration by 1 second and show it as a percentage.",
11361136
[
1137+
# The metrics might be pre-aggregated locally on each compute node when `actor_id` is masked due to metrics level settings.
1138+
# Thus to calculate the average, we need to manually divide the actor count.
1139+
#
11371140
# Note: actor_count is equal to the number of dispatchers for a given downstream fragment,
11381141
# this holds true as long as we don't support multiple edges between two fragments.
11391142
panels.target(
@@ -1188,11 +1191,11 @@ def section_streaming_actors(outer_panels: Panels):
11881191
[
11891192
panels.target(
11901193
f"sum({metric('stream_memory_usage')}) by (table_id, desc)",
1191-
"table {{table_id}} desc: {{desc}}",
1194+
"table total {{table_id}}: {{desc}}",
11921195
),
11931196
panels.target(
11941197
f"{metric('stream_memory_usage', actor_level_filter)}",
1195-
"actor {{actor_id}} table {{table_id}} desc: {{desc}}",
1198+
"actor {{actor_id}} table {{table_id}}: {{desc}}",
11961199
),
11971200
],
11981201
),
@@ -1232,13 +1235,13 @@ def section_streaming_actors(outer_panels: Panels):
12321235
f"sum(rate({table_metric('stream_materialize_cache_total_count')}[$__rate_interval])) by (table_id, fragment_id)",
12331236
"total cached count - table {{table_id}} fragment {{fragment_id}}",
12341237
),
1235-
panels.target_hidden(
1236-
f"rate({table_metric('stream_materialize_cache_hit_count')}[$__rate_interval])",
1237-
"cache hit count - table {{table_id}} actor {{actor_id}}",
1238+
panels.target(
1239+
f"rate({table_metric('stream_materialize_cache_hit_count', actor_level_filter)}[$__rate_interval])",
1240+
"cache hit count - actor {{actor_id}} table {{table_id}} fragment {{fragment_id}}",
12381241
),
1239-
panels.target_hidden(
1240-
f"rate({table_metric('stream_materialize_cache_total_count')}[$__rate_interval])",
1241-
"total cached count - table {{table_id}} actor {{actor_id}}",
1242+
panels.target(
1243+
f"rate({table_metric('stream_materialize_cache_total_count', actor_level_filter)}[$__rate_interval])",
1244+
"total cached count - actor {{actor_id}} table {{table_id}} fragment {{fragment_id}}",
12421245
),
12431246
],
12441247
),
@@ -1331,12 +1334,15 @@ def section_streaming_actors(outer_panels: Panels):
13311334
"Executor Barrier Align Per Second",
13321335
"",
13331336
[
1337+
# The metrics might be pre-aggregated locally on each compute node when `actor_id` is masked due to metrics level settings.
1338+
# Thus to calculate the average, we need to manually divide the actor count.
13341339
panels.target(
1335-
f"avg(rate({metric('stream_barrier_align_duration_ns')}[$__rate_interval]) / 1000000000) by (fragment_id,wait_side, executor)",
1336-
"fragment {{fragment_id}} {{wait_side}} {{executor}}",
1340+
f"sum(rate({metric('stream_barrier_align_duration_ns')}[$__rate_interval]) / 1000000000) by (fragment_id, wait_side, executor) \
1341+
/ ignoring (wait_side, executor) group_left sum({metric('stream_actor_count')}) by (fragment_id)",
1342+
"fragment avg {{fragment_id}} {{wait_side}} {{executor}}",
13371343
),
1338-
panels.target_hidden(
1339-
f"rate({metric('stream_barrier_align_duration_ns')}[$__rate_interval]) / 1000000000",
1344+
panels.target(
1345+
f"rate({metric('stream_barrier_align_duration_ns', actor_level_filter)}[$__rate_interval]) / 1000000000",
13401346
"actor {{actor_id}} fragment {{fragment_id}} {{wait_side}} {{executor}}",
13411347
),
13421348
],
@@ -1566,25 +1572,11 @@ def section_streaming_actors(outer_panels: Panels):
15661572
[
15671573
panels.target(
15681574
f"sum(rate({metric('stream_executor_row_count')}[$__rate_interval])) by (executor_identity, fragment_id)",
1569-
"{{executor_identity}} fragment {{fragment_id}}",
1570-
),
1571-
panels.target_hidden(
1572-
f"rate({metric('stream_executor_row_count')}[$__rate_interval])",
1573-
"{{executor_identity}} actor {{actor_id}}",
1575+
"{{executor_identity}} fragment total {{fragment_id}}",
15741576
),
1575-
],
1576-
),
1577-
panels.timeseries_bytes(
1578-
"Actor Memory Usage (TaskLocalAlloc)",
1579-
"The actor-level memory usage statistics reported by TaskLocalAlloc. (Disabled by default)",
1580-
[
15811577
panels.target(
1582-
f"sum({metric('actor_memory_usage')}) by (fragment_id)",
1583-
"fragment {{fragment_id}}",
1584-
),
1585-
panels.target_hidden(
1586-
f"{metric('actor_memory_usage')}",
1587-
"actor {{actor_id}}",
1578+
f"rate({metric('stream_executor_row_count', actor_level_filter)}[$__rate_interval])",
1579+
"{{executor_identity}} actor {{actor_id}}",
15881580
),
15891581
],
15901582
),

grafana/risingwave-dev-dashboard.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/common/src/jemalloc.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,3 @@ macro_rules! enable_jemalloc {
2121
static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
2222
};
2323
}
24-
25-
#[macro_export]
26-
macro_rules! enable_task_local_jemalloc {
27-
() => {
28-
#[global_allocator]
29-
static GLOBAL: task_stats_alloc::TaskLocalAlloc<tikv_jemallocator::Jemalloc> =
30-
task_stats_alloc::TaskLocalAlloc(tikv_jemallocator::Jemalloc);
31-
};
32-
}

src/stream/src/cache/managed_lru.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ where
6767
.metrics
6868
.stream_memory_usage
6969
.with_guarded_label_values(&[
70-
&metrics_info.table_id,
7170
&metrics_info.actor_id,
71+
&metrics_info.table_id,
7272
&metrics_info.desc,
7373
]);
7474
memory_usage_metrics.set(0.into());

src/stream/src/executor/monitor/streaming_stats.rs

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use risingwave_common::config::MetricLevel;
2424
use risingwave_common::metrics::{
2525
LabelGuardedGauge, LabelGuardedGaugeVec, LabelGuardedHistogramVec, LabelGuardedIntCounter,
2626
LabelGuardedIntCounterVec, LabelGuardedIntGauge, LabelGuardedIntGaugeVec, MetricVecRelabelExt,
27-
RelabeledGuardedHistogramVec, RelabeledGuardedIntCounterVec,
27+
RelabeledGuardedHistogramVec, RelabeledGuardedIntCounterVec, RelabeledGuardedIntGaugeVec,
2828
};
2929
use risingwave_common::monitor::GLOBAL_METRICS_REGISTRY;
3030
use risingwave_common::util::epoch::Epoch;
@@ -46,7 +46,7 @@ pub struct StreamingMetrics {
4646
pub level: MetricLevel,
4747

4848
// Executor metrics (disabled by default)
49-
pub executor_row_count: LabelGuardedIntCounterVec<3>,
49+
pub executor_row_count: RelabeledGuardedIntCounterVec<3>,
5050

5151
// Streaming actor metrics from tokio (disabled by default)
5252
actor_execution_time: LabelGuardedGaugeVec<1>,
@@ -199,12 +199,12 @@ pub struct StreamingMetrics {
199199
pub jemalloc_metadata_bytes: IntGauge,
200200
pub jvm_allocated_bytes: IntGauge,
201201
pub jvm_active_bytes: IntGauge,
202-
pub stream_memory_usage: LabelGuardedIntGaugeVec<3>,
202+
pub stream_memory_usage: RelabeledGuardedIntGaugeVec<3>,
203203

204204
// Materialized view
205-
materialize_cache_hit_count: LabelGuardedIntCounterVec<3>,
206-
materialize_cache_total_count: LabelGuardedIntCounterVec<3>,
207-
materialize_input_row_count: LabelGuardedIntCounterVec<3>,
205+
materialize_cache_hit_count: RelabeledGuardedIntCounterVec<3>,
206+
materialize_cache_total_count: RelabeledGuardedIntCounterVec<3>,
207+
materialize_input_row_count: RelabeledGuardedIntCounterVec<3>,
208208
}
209209

210210
pub static GLOBAL_STREAMING_METRICS: OnceLock<StreamingMetrics> = OnceLock::new();
@@ -223,7 +223,8 @@ impl StreamingMetrics {
223223
&["actor_id", "fragment_id", "executor_identity"],
224224
registry
225225
)
226-
.unwrap();
226+
.unwrap()
227+
.relabel_debug_1(level);
227228

228229
let source_output_row_count = register_guarded_int_counter_vec_with_registry!(
229230
"stream_source_output_rows_counts",
@@ -260,10 +261,11 @@ impl StreamingMetrics {
260261
let materialize_input_row_count = register_guarded_int_counter_vec_with_registry!(
261262
"stream_mview_input_row_count",
262263
"Total number of rows streamed into materialize executors",
263-
&["table_id", "actor_id", "fragment_id"],
264+
&["actor_id", "table_id", "fragment_id"],
264265
registry
265266
)
266-
.unwrap();
267+
.unwrap()
268+
.relabel_debug_1(level);
267269

268270
let sink_chunk_buffer_size = register_guarded_int_gauge_vec_with_registry!(
269271
"stream_sink_chunk_buffer_size",
@@ -1057,26 +1059,29 @@ impl StreamingMetrics {
10571059
let materialize_cache_hit_count = register_guarded_int_counter_vec_with_registry!(
10581060
"stream_materialize_cache_hit_count",
10591061
"Materialize executor cache hit count",
1060-
&["table_id", "actor_id", "fragment_id"],
1062+
&["actor_id", "table_id", "fragment_id"],
10611063
registry
10621064
)
1063-
.unwrap();
1065+
.unwrap()
1066+
.relabel_debug_1(level);
10641067

10651068
let materialize_cache_total_count = register_guarded_int_counter_vec_with_registry!(
10661069
"stream_materialize_cache_total_count",
10671070
"Materialize executor cache total operation",
1068-
&["table_id", "actor_id", "fragment_id"],
1071+
&["actor_id", "table_id", "fragment_id"],
10691072
registry
10701073
)
1071-
.unwrap();
1074+
.unwrap()
1075+
.relabel_debug_1(level);
10721076

10731077
let stream_memory_usage = register_guarded_int_gauge_vec_with_registry!(
10741078
"stream_memory_usage",
10751079
"Memory usage for stream executors",
1076-
&["table_id", "actor_id", "desc"],
1080+
&["actor_id", "table_id", "desc"],
10771081
registry
10781082
)
1079-
.unwrap();
1083+
.unwrap()
1084+
.relabel_debug_1(level);
10801085

10811086
let iceberg_write_qps = register_guarded_int_counter_vec_with_registry!(
10821087
"iceberg_write_qps",
@@ -1634,8 +1639,8 @@ impl StreamingMetrics {
16341639
fragment_id: FragmentId,
16351640
) -> MaterializeMetrics {
16361641
let label_list: &[&str; 3] = &[
1637-
&table_id.to_string(),
16381642
&actor_id.to_string(),
1643+
&table_id.to_string(),
16391644
&fragment_id.to_string(),
16401645
];
16411646
MaterializeMetrics {

0 commit comments

Comments
 (0)