Skip to content

Commit c382f05

Browse files
djchds
authored andcommitted
subscriber: use state machine to parse EnvFilter directives (#3243)
There is a report in #3174 that even in release mode, building the regex used to parse `EnvFilter` directives can take a relatively large amount of time (600us). This change replaces the `regex` based parsing of the directives with a state machine implementation that is faster and also easier to reason about. Fixes: #3174
1 parent 6017d2c commit c382f05

File tree

2 files changed

+114
-92
lines changed

2 files changed

+114
-92
lines changed

tracing-subscriber/Cargo.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ rust-version = "1.63.0"
2727
default = ["smallvec", "fmt", "ansi", "tracing-log", "std"]
2828
alloc = []
2929
std = ["alloc", "tracing-core/std"]
30-
env-filter = ["matchers", "regex", "once_cell", "tracing", "std", "thread_local"]
30+
env-filter = ["matchers", "once_cell", "tracing", "std", "thread_local"]
3131
fmt = ["registry", "std"]
3232
ansi = ["fmt", "nu-ansi-term"]
3333
registry = ["sharded-slab", "thread_local", "std"]
@@ -37,14 +37,15 @@ valuable = ["tracing-core/valuable", "valuable_crate", "valuable-serde", "tracin
3737
# formatters.
3838
local-time = ["time/local-offset"]
3939
nu-ansi-term = ["dep:nu-ansi-term"]
40+
# For backwards compatibility only
41+
regex = []
4042

4143
[dependencies]
4244
tracing-core = { path = "../tracing-core", version = "0.1.33", default-features = false }
4345

4446
# only required by the filter feature
4547
tracing = { optional = true, path = "../tracing", version = "0.1.41", default-features = false }
4648
matchers = { optional = true, version = "0.1.0" }
47-
regex = { optional = true, version = "1.6.0", default-features = false, features = ["std", "unicode-case", "unicode-perl"] }
4849
smallvec = { optional = true, version = "1.9.0" }
4950
once_cell = { optional = true, version = "1.13.0" }
5051

tracing-subscriber/src/filter/env/directive.rs

Lines changed: 111 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@ use crate::filter::{
44
env::{field, FieldMap},
55
level::LevelFilter,
66
};
7-
use once_cell::sync::Lazy;
8-
use regex::Regex;
97
use std::{cmp::Ordering, fmt, iter::FromIterator, str::FromStr};
108
use tracing_core::{span, Level, Metadata};
119

@@ -120,99 +118,122 @@ impl Directive {
120118
}
121119

122120
pub(super) fn parse(from: &str, regex: bool) -> Result<Self, ParseError> {
123-
static DIRECTIVE_RE: Lazy<Regex> = Lazy::new(|| {
124-
Regex::new(
125-
r"(?x)
126-
^(?P<global_level>(?i:trace|debug|info|warn|error|off|[0-5]))$ |
127-
# ^^^.
128-
# `note: we match log level names case-insensitively
129-
^
130-
(?: # target name or span name
131-
(?P<target>[\w:-]+)|(?P<span>\[[^\]]*\])
132-
){1,2}
133-
(?: # level or nothing
134-
=(?P<level>(?i:trace|debug|info|warn|error|off|[0-5]))?
135-
# ^^^.
136-
# `note: we match log level names case-insensitively
137-
)?
138-
$
139-
",
140-
)
141-
.unwrap()
142-
});
143-
static SPAN_PART_RE: Lazy<Regex> =
144-
Lazy::new(|| Regex::new(r"(?P<name>[^\]\{]+)?(?:\{(?P<fields>[^\}]*)\})?").unwrap());
145-
static FIELD_FILTER_RE: Lazy<Regex> =
146-
// TODO(eliza): this doesn't _currently_ handle value matchers that include comma
147-
// characters. We should fix that.
148-
Lazy::new(|| {
149-
Regex::new(
150-
r"(?x)
151-
(
152-
# field name
153-
[[:word:]][[[:word:]]\.]*
154-
# value part (optional)
155-
(?:=[^,]+)?
156-
)
157-
# trailing comma or EOS
158-
(?:,\s?|$)
159-
",
160-
)
161-
.unwrap()
162-
});
163-
164-
let caps = DIRECTIVE_RE.captures(from).ok_or_else(ParseError::new)?;
121+
let mut cur = Self {
122+
level: LevelFilter::TRACE,
123+
target: None,
124+
in_span: None,
125+
fields: Vec::new(),
126+
};
127+
128+
#[derive(Debug)]
129+
enum ParseState {
130+
Start,
131+
LevelOrTarget { start: usize },
132+
Span { span_start: usize },
133+
Field { field_start: usize },
134+
Fields,
135+
Target,
136+
Level { level_start: usize },
137+
Complete,
138+
}
165139

166-
if let Some(level) = caps
167-
.name("global_level")
168-
.and_then(|s| s.as_str().parse().ok())
169-
{
170-
return Ok(Directive {
171-
level,
172-
..Default::default()
173-
});
140+
use ParseState::*;
141+
let mut state = Start;
142+
for (i, c) in from.trim().char_indices() {
143+
state = match (state, c) {
144+
(Start, '[') => Span { span_start: i + 1 },
145+
(Start, c) if !['-', ':', '_'].contains(&c) && !c.is_alphanumeric() => {
146+
return Err(ParseError::new())
147+
}
148+
(Start, _) => LevelOrTarget { start: i },
149+
(LevelOrTarget { start }, '=') => {
150+
cur.target = Some(from[start..i].to_owned());
151+
Level { level_start: i + 1 }
152+
}
153+
(LevelOrTarget { start }, '[') => {
154+
cur.target = Some(from[start..i].to_owned());
155+
Span { span_start: i + 1 }
156+
}
157+
(LevelOrTarget { start }, ',') => {
158+
let (level, target) = match &from[start..] {
159+
"" => (LevelFilter::TRACE, None),
160+
level_or_target => match LevelFilter::from_str(level_or_target) {
161+
Ok(level) => (level, None),
162+
Err(_) => (LevelFilter::TRACE, Some(level_or_target.to_owned())),
163+
},
164+
};
165+
166+
cur.level = level;
167+
cur.target = target;
168+
Complete
169+
}
170+
(state @ LevelOrTarget { .. }, _) => state,
171+
(Target, '=') => Level { level_start: i + 1 },
172+
(Span { span_start }, ']') => {
173+
cur.in_span = Some(from[span_start..i].to_owned());
174+
Target
175+
}
176+
(Span { span_start }, '{') => {
177+
cur.in_span = match &from[span_start..i] {
178+
"" => None,
179+
_ => Some(from[span_start..i].to_owned()),
180+
};
181+
Field { field_start: i + 1 }
182+
}
183+
(state @ Span { .. }, _) => state,
184+
(Field { field_start }, '}') => {
185+
cur.fields.push(match &from[field_start..i] {
186+
"" => return Err(ParseError::new()),
187+
field => field::Match::parse(field, regex)?,
188+
});
189+
Fields
190+
}
191+
(Field { field_start }, ',') => {
192+
cur.fields.push(match &from[field_start..i] {
193+
"" => return Err(ParseError::new()),
194+
field => field::Match::parse(field, regex)?,
195+
});
196+
Field { field_start: i + 1 }
197+
}
198+
(state @ Field { .. }, _) => state,
199+
(Fields, ']') => Target,
200+
(Level { level_start }, ',') => {
201+
cur.level = match &from[level_start..i] {
202+
"" => LevelFilter::TRACE,
203+
level => LevelFilter::from_str(level)?,
204+
};
205+
Complete
206+
}
207+
(state @ Level { .. }, _) => state,
208+
_ => return Err(ParseError::new()),
209+
};
174210
}
175211

176-
let target = caps.name("target").and_then(|c| {
177-
let s = c.as_str();
178-
if s.parse::<LevelFilter>().is_ok() {
179-
None
180-
} else {
181-
Some(s.to_owned())
212+
match state {
213+
LevelOrTarget { start } => {
214+
let (level, target) = match &from[start..] {
215+
"" => (LevelFilter::TRACE, None),
216+
level_or_target => match LevelFilter::from_str(level_or_target) {
217+
Ok(level) => (level, None),
218+
// Setting the target without the level enables every level for that target
219+
Err(_) => (LevelFilter::TRACE, Some(level_or_target.to_owned())),
220+
},
221+
};
222+
223+
cur.level = level;
224+
cur.target = target;
182225
}
183-
});
184-
185-
let (in_span, fields) = caps
186-
.name("span")
187-
.and_then(|cap| {
188-
let cap = cap.as_str().trim_matches(|c| c == '[' || c == ']');
189-
let caps = SPAN_PART_RE.captures(cap)?;
190-
let span = caps.name("name").map(|c| c.as_str().to_owned());
191-
let fields = caps
192-
.name("fields")
193-
.map(|c| {
194-
FIELD_FILTER_RE
195-
.find_iter(c.as_str())
196-
.map(|c| field::Match::parse(c.as_str(), regex))
197-
.collect::<Result<Vec<_>, _>>()
198-
})
199-
.unwrap_or_else(|| Ok(Vec::new()));
200-
Some((span, fields))
201-
})
202-
.unwrap_or_else(|| (None, Ok(Vec::new())));
203-
204-
let level = caps
205-
.name("level")
206-
.and_then(|l| l.as_str().parse().ok())
207-
// Setting the target without the level enables every level for that target
208-
.unwrap_or(LevelFilter::TRACE);
226+
Level { level_start } => {
227+
cur.level = match &from[level_start..] {
228+
"" => LevelFilter::TRACE,
229+
level => LevelFilter::from_str(level)?,
230+
};
231+
}
232+
Target | Complete => {}
233+
_ => return Err(ParseError::new()),
234+
};
209235

210-
Ok(Self {
211-
level,
212-
target,
213-
in_span,
214-
fields: fields?,
215-
})
236+
Ok(cur)
216237
}
217238
}
218239

0 commit comments

Comments
 (0)