Skip to content

Commit beabcb2

Browse files
author
Eric Fu
authored
feat(source): suppress parser log (#14005)
1 parent 69f24de commit beabcb2

File tree

12 files changed

+166
-55
lines changed

12 files changed

+166
-55
lines changed

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ ethnum = { version = "1", features = ["serde"] }
4747
fixedbitset = { version = "0.4", features = ["std"] }
4848
fs-err = "2"
4949
futures = { version = "0.3", default-features = false, features = ["alloc"] }
50+
governor = { version = "0.6", default-features = false, features = ["std"] }
5051
hex = "0.4.3"
5152
http = "0.2"
5253
humantime = "2.1"

src/common/src/lib.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ pub mod error;
5656
pub mod array;
5757
#[macro_use]
5858
pub mod util;
59+
pub mod acl;
5960
pub mod buffer;
6061
pub mod cache;
6162
pub mod cast;
@@ -65,17 +66,16 @@ pub mod constants;
6566
pub mod estimate_size;
6667
pub mod field_generator;
6768
pub mod hash;
69+
pub mod log;
6870
pub mod memory;
71+
pub mod metrics;
6972
pub mod monitor;
7073
pub mod row;
7174
pub mod session_config;
7275
pub mod system_param;
7376
pub mod telemetry;
74-
pub mod transaction;
75-
76-
pub mod acl;
77-
pub mod metrics;
7877
pub mod test_utils;
78+
pub mod transaction;
7979
pub mod types;
8080
pub mod vnode_mapping;
8181

src/common/src/log.rs

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
// Copyright 2023 RisingWave Labs
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::num::NonZeroU32;
16+
use std::sync::atomic::{AtomicUsize, Ordering};
17+
18+
use governor::Quota;
19+
20+
type RateLimiter = governor::RateLimiter<
21+
governor::state::NotKeyed,
22+
governor::state::InMemoryState,
23+
governor::clock::MonotonicClock,
24+
>;
25+
26+
/// `LogSuppresser` is a helper to suppress log spamming.
27+
pub struct LogSuppresser {
28+
/// The number of times the log has been suppressed. Will be returned and cleared when the
29+
/// rate limiter allows next log to be printed.
30+
suppressed_count: AtomicUsize,
31+
32+
/// Inner rate limiter.
33+
rate_limiter: RateLimiter,
34+
}
35+
36+
#[derive(Debug)]
37+
pub struct LogSuppressed;
38+
39+
impl LogSuppresser {
40+
pub fn new(rate_limiter: RateLimiter) -> Self {
41+
Self {
42+
suppressed_count: AtomicUsize::new(0),
43+
rate_limiter,
44+
}
45+
}
46+
47+
/// Check if the log should be suppressed.
48+
/// If the log should be suppressed, return `Err(LogSuppressed)`.
49+
/// Otherwise, return `Ok(usize)` with count of suppressed messages before.
50+
pub fn check(&self) -> core::result::Result<usize, LogSuppressed> {
51+
match self.rate_limiter.check() {
52+
Ok(()) => Ok(self.suppressed_count.swap(0, Ordering::Relaxed)),
53+
Err(_) => {
54+
self.suppressed_count.fetch_add(1, Ordering::Relaxed);
55+
Err(LogSuppressed)
56+
}
57+
}
58+
}
59+
}
60+
61+
impl Default for LogSuppresser {
62+
/// Default rate limiter allows 1 log per second.
63+
fn default() -> Self {
64+
Self::new(RateLimiter::direct(Quota::per_second(
65+
NonZeroU32::new(1).unwrap(),
66+
)))
67+
}
68+
}
69+
70+
#[cfg(test)]
71+
mod tests {
72+
use std::sync::LazyLock;
73+
use std::time::Duration;
74+
75+
use super::*;
76+
77+
#[tokio::test]
78+
async fn demo() {
79+
let mut interval = tokio::time::interval(Duration::from_millis(100));
80+
for _ in 0..100 {
81+
interval.tick().await;
82+
static RATE_LIMITER: LazyLock<LogSuppresser> = LazyLock::new(|| {
83+
LogSuppresser::new(RateLimiter::direct(Quota::per_second(
84+
NonZeroU32::new(5).unwrap(),
85+
)))
86+
});
87+
88+
if let Ok(suppressed_count) = RATE_LIMITER.check() {
89+
println!("failed to foo bar. suppressed_count = {}", suppressed_count);
90+
}
91+
}
92+
}
93+
}

src/connector/src/parser/avro/util.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::sync::LazyLock;
16+
1517
use apache_avro::schema::{DecimalSchema, RecordSchema, Schema};
1618
use itertools::Itertools;
19+
use risingwave_common::log::LogSuppresser;
1720
use risingwave_common::types::{DataType, Decimal};
1821
use risingwave_pb::plan_common::ColumnDesc;
1922

@@ -82,11 +85,16 @@ fn avro_type_mapping(schema: &Schema) -> anyhow::Result<DataType> {
8285
Schema::Double => DataType::Float64,
8386
Schema::Decimal(DecimalSchema { precision, .. }) => {
8487
if *precision > Decimal::MAX_PRECISION.into() {
85-
tracing::warn!(
86-
"RisingWave supports decimal precision up to {}, but got {}. Will truncate.",
88+
static LOG_SUPPERSSER: LazyLock<LogSuppresser> =
89+
LazyLock::new(LogSuppresser::default);
90+
if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
91+
tracing::warn!(
92+
"RisingWave supports decimal precision up to {}, but got {}. Will truncate. ({} suppressed)",
8793
Decimal::MAX_PRECISION,
94+
suppressed_count,
8895
precision
8996
);
97+
}
9098
}
9199
DataType::Decimal
92100
}

src/connector/src/parser/mod.rs

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
use std::collections::HashMap;
1616
use std::fmt::Debug;
17+
use std::sync::LazyLock;
1718

1819
use auto_enums::auto_enum;
1920
pub use avro::AvroParserConfig;
@@ -28,6 +29,7 @@ use risingwave_common::array::{ArrayBuilderImpl, Op, StreamChunk};
2829
use risingwave_common::catalog::{KAFKA_TIMESTAMP_COLUMN_NAME, TABLE_NAME_COLUMN_NAME};
2930
use risingwave_common::error::ErrorCode::ProtocolError;
3031
use risingwave_common::error::{Result, RwError};
32+
use risingwave_common::log::LogSuppresser;
3133
use risingwave_common::types::{Datum, Scalar};
3234
use risingwave_common::util::iter_util::ZipEqFast;
3335
use risingwave_pb::catalog::{
@@ -328,13 +330,18 @@ impl SourceStreamChunkRowWriter<'_> {
328330
// TODO: decide whether the error should not be ignored (e.g., even not a valid Debezium message)
329331
// TODO: not using tracing span to provide `split_id` and `offset` due to performance concern,
330332
// see #13105
331-
tracing::warn!(
332-
%error,
333-
split_id = self.row_meta.as_ref().map(|m| m.split_id),
334-
offset = self.row_meta.as_ref().map(|m| m.offset),
335-
column = desc.name,
336-
"failed to parse non-pk column, padding with `NULL`"
337-
);
333+
static LOG_SUPPERSSER: LazyLock<LogSuppresser> =
334+
LazyLock::new(LogSuppresser::default);
335+
if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
336+
tracing::warn!(
337+
%error,
338+
split_id = self.row_meta.as_ref().map(|m| m.split_id),
339+
offset = self.row_meta.as_ref().map(|m| m.offset),
340+
column = desc.name,
341+
suppressed_count,
342+
"failed to parse non-pk column, padding with `NULL`"
343+
);
344+
}
338345
Ok(A::output_for(Datum::None))
339346
}
340347
}
@@ -598,12 +605,17 @@ async fn into_chunk_stream<P: ByteStreamSourceParser>(mut parser: P, data_stream
598605
if let Err(error) = res {
599606
// TODO: not using tracing span to provide `split_id` and `offset` due to performance concern,
600607
// see #13105
601-
tracing::error!(
602-
%error,
603-
split_id = &*msg.split_id,
604-
offset = msg.offset,
605-
"failed to parse message, skipping"
606-
);
608+
static LOG_SUPPERSSER: LazyLock<LogSuppresser> =
609+
LazyLock::new(LogSuppresser::default);
610+
if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
611+
tracing::error!(
612+
%error,
613+
split_id = &*msg.split_id,
614+
offset = msg.offset,
615+
suppressed_count,
616+
"failed to parse message, skipping"
617+
);
618+
}
607619
parser.source_ctx().report_user_source_error(error);
608620
}
609621
}

src/connector/src/parser/mysql.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
use std::sync::LazyLock;
16+
1517
use chrono::NaiveDate;
1618
use mysql_async::Row as MysqlRow;
1719
use risingwave_common::catalog::Schema;
20+
use risingwave_common::log::LogSuppresser;
1821
use risingwave_common::types::{
1922
DataType, Date, Datum, Decimal, JsonbVal, ScalarImpl, Time, Timestamp, Timestamptz,
2023
};
@@ -88,7 +91,11 @@ pub fn mysql_row_to_datums(mysql_row: &mut MysqlRow, schema: &Schema) -> Vec<Dat
8891
| DataType::Int256
8992
| DataType::Serial => {
9093
// Interval, Struct, List, Int256 are not supported
91-
tracing::warn!(rw_field.name, ?rw_field.data_type, "unsupported data type, set to null");
94+
static LOG_SUPPERSSER: LazyLock<LogSuppresser> =
95+
LazyLock::new(LogSuppresser::default);
96+
if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
97+
tracing::warn!(rw_field.name, ?rw_field.data_type, suppressed_count, "unsupported data type, set to null");
98+
}
9299
None
93100
}
94101
}

src/connector/src/parser/unified/avro.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
// limitations under the License.
1414

1515
use std::str::FromStr;
16+
use std::sync::LazyLock;
1617

1718
use anyhow::anyhow;
1819
use apache_avro::schema::{DecimalSchema, RecordSchema};
@@ -24,6 +25,7 @@ use num_bigint::{BigInt, Sign};
2425
use risingwave_common::array::{ListValue, StructValue};
2526
use risingwave_common::cast::{i64_to_timestamp, i64_to_timestamptz};
2627
use risingwave_common::error::Result as RwResult;
28+
use risingwave_common::log::LogSuppresser;
2729
use risingwave_common::types::{DataType, Date, Datum, Interval, JsonbVal, ScalarImpl, Time};
2830
use risingwave_common::util::iter_util::ZipEqFast;
2931

@@ -56,7 +58,13 @@ impl<'a> AvroParseOptions<'a> {
5658
self.schema
5759
.map(|schema| avro_extract_field_schema(schema, key))
5860
.transpose()
59-
.map_err(|_err| tracing::error!("extract sub-schema"))
61+
.map_err(|_err| {
62+
static LOG_SUPPERSSER: LazyLock<LogSuppresser> =
63+
LazyLock::new(LogSuppresser::default);
64+
if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
65+
tracing::error!(suppressed_count, "extract sub-schema");
66+
}
67+
})
6068
.ok()
6169
.flatten()
6270
}

src/connector/src/parser/unified/json.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@
1313
// limitations under the License.
1414

1515
use std::str::FromStr;
16+
use std::sync::LazyLock;
1617

1718
use base64::Engine;
1819
use itertools::Itertools;
1920
use num_bigint::{BigInt, Sign};
2021
use risingwave_common::array::{ListValue, StructValue};
2122
use risingwave_common::cast::{i64_to_timestamp, i64_to_timestamptz, str_to_bytea};
23+
use risingwave_common::log::LogSuppresser;
2224
use risingwave_common::types::{
2325
DataType, Date, Decimal, Int256, Interval, JsonbVal, ScalarImpl, Time, Timestamp, Timestamptz,
2426
};
@@ -464,7 +466,10 @@ impl JsonParseOptions {
464466
path: struct_type_info.to_string(), // TODO: this is not good, we should maintain a path stack
465467
};
466468
// TODO: is it possible to unify the logging with the one in `do_action`?
467-
tracing::warn!(%error, "undefined nested field, padding with `NULL`");
469+
static LOG_SUPPERSSER: LazyLock<LogSuppresser> = LazyLock::new(LogSuppresser::default);
470+
if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
471+
tracing::warn!(%error, suppressed_count, "undefined nested field, padding with `NULL`");
472+
}
468473
&BorrowedValue::Static(simd_json::StaticNode::Null)
469474
});
470475
self.parse(field_value, Some(field_type))

src/connector/src/parser/unified/protobuf.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,13 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
use std::sync::Arc;
15+
use std::sync::{Arc, LazyLock};
1616

1717
use anyhow::anyhow;
1818
use prost_reflect::{DescriptorPool, DynamicMessage, ReflectMessage};
1919
use risingwave_common::error::ErrorCode::ProtocolError;
2020
use risingwave_common::error::RwError;
21+
use risingwave_common::log::LogSuppresser;
2122
use risingwave_common::types::DataType;
2223

2324
use super::{Access, AccessResult};
@@ -47,7 +48,11 @@ impl Access for ProtobufAccess {
4748
.get_field_by_name(path[0])
4849
.ok_or_else(|| {
4950
let err_msg = format!("protobuf schema don't have field {}", path[0]);
50-
tracing::error!(err_msg);
51+
static LOG_SUPPERSSER: LazyLock<LogSuppresser> =
52+
LazyLock::new(LogSuppresser::default);
53+
if let Ok(suppressed_count) = LOG_SUPPERSSER.check() {
54+
tracing::error!(suppressed_count, err_msg);
55+
}
5156
RwError::from(ProtocolError(err_msg))
5257
})
5358
.map_err(|e| AccessError::Other(anyhow!(e)))?;

0 commit comments

Comments
 (0)