Skip to content

Commit ac69a47

Browse files
authored
feat(vrl): Add decode_mime_q function (#14813)
1 parent af18849 commit ac69a47

File tree

7 files changed

+354
-0
lines changed

7 files changed

+354
-0
lines changed

Cargo.lock

+19
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/vrl/stdlib/Cargo.toml

+5
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ vector-common = { path = "../../vector-common", default-features = false, option
1818

1919
base64 = { version = "0.13", optional = true }
2020
bytes = { version = "1.2.1", optional = true }
21+
charset = { version = "0.1.3", optional = true }
2122
chrono = { version = "0.4", optional = true }
2223
cidr-utils = { version = "0.5", optional = true }
2324
csv = { version = "1.1", optional = true }
25+
data-encoding = { version = "2.3.2", optional = true }
2426
dns-lookup = { version = "1.0.8", optional = true }
2527
grok = { version = "2", optional = true }
2628
hex = { version = "0.4", optional = true }
@@ -30,6 +32,7 @@ md-5 = { version = "0.10", optional = true }
3032
nom = { version = "7", optional = true }
3133
percent-encoding = { version = "2.2", optional = true }
3234
once_cell = { version = "1.15", optional = true }
35+
quoted_printable = {version = "0.4.5", optional = true }
3336
rand = { version = "0.8.5", optional = true }
3437
regex = { version = "1", optional = true }
3538
rust_decimal = { version = "1", optional = true }
@@ -81,6 +84,7 @@ default = [
8184
"contains",
8285
"decode_base64",
8386
"decode_percent",
87+
"decode_mime_q",
8488
"decrypt",
8589
"del",
8690
"downcase",
@@ -224,6 +228,7 @@ contains = []
224228
cryptography = ["dep:aes", "dep:ctr", "dep:cbc", "dep:cfb-mode", "dep:ofb"]
225229
decode_base64 = ["dep:base64"]
226230
decode_percent = ["dep:percent-encoding"]
231+
decode_mime_q = ["dep:data-encoding","dep:charset","dep:quoted_printable"]
227232
decrypt = ["cryptography", "random_bytes", "encrypt"]
228233
del = []
229234
downcase = []

lib/vrl/stdlib/benches/benches.rs

+14
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,20 @@ bench_function! {
278278
}
279279
}
280280

281+
bench_function! {
282+
decode_mime_q => vrl_stdlib::DecodeMimeQ;
283+
284+
base_64 {
285+
args: func_args![value: "=?utf-8?b?SGVsbG8sIFdvcmxkIQ==?="],
286+
want: Ok("Hello, World!"),
287+
}
288+
289+
quoted_printable {
290+
args: func_args![value: "Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?="],
291+
want: Ok("Subject: ¡Hola, señor!"),
292+
}
293+
}
294+
281295
bench_function! {
282296
downcase => vrl_stdlib::Downcase;
283297

lib/vrl/stdlib/src/decode_mime_q.rs

+264
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
use ::value::Value;
2+
use charset::Charset;
3+
use data_encoding::BASE64_MIME;
4+
use nom::{
5+
branch::alt,
6+
bytes::complete::{tag, take_until, take_until1},
7+
combinator::{map, map_opt, opt, success},
8+
error::{ContextError, ParseError},
9+
multi::fold_many1,
10+
sequence::{delimited, pair, separated_pair},
11+
IResult,
12+
};
13+
use vrl::prelude::expression::FunctionExpression;
14+
use vrl::prelude::*;
15+
16+
#[derive(Clone, Copy, Debug)]
17+
pub struct DecodeMimeQ;
18+
19+
impl Function for DecodeMimeQ {
20+
fn identifier(&self) -> &'static str {
21+
"decode_mime_q"
22+
}
23+
24+
fn parameters(&self) -> &'static [Parameter] {
25+
&[Parameter {
26+
keyword: "value",
27+
kind: kind::BYTES,
28+
required: true,
29+
}]
30+
}
31+
32+
fn compile(
33+
&self,
34+
_state: &state::TypeState,
35+
_ctx: &mut FunctionCompileContext,
36+
arguments: ArgumentList,
37+
) -> Compiled {
38+
let value = arguments.required("value");
39+
40+
Ok(DecodeMimeQFn { value }.as_expr())
41+
}
42+
43+
fn examples(&self) -> &'static [Example] {
44+
&[
45+
Example {
46+
title: "Single",
47+
source: r#"decode_mime_q!("=?utf-8?b?SGVsbG8sIFdvcmxkIQ==?=")"#,
48+
result: Ok(r#"Hello, World!"#),
49+
},
50+
Example {
51+
title: "Embedded",
52+
source: r#"decode_mime_q!("From: =?utf-8?b?SGVsbG8sIFdvcmxkIQ==?= <=?utf-8?q?hello=5Fworld=40example=2ecom?=>")"#,
53+
result: Ok(r#"From: Hello, World! <[email protected]>"#),
54+
},
55+
Example {
56+
title: "Without charset",
57+
source: r#"decode_mime_q!("?b?SGVsbG8sIFdvcmxkIQ==")"#,
58+
result: Ok(r#"Hello, World!"#),
59+
},
60+
]
61+
}
62+
}
63+
64+
#[derive(Clone, Debug)]
65+
struct DecodeMimeQFn {
66+
value: Box<dyn Expression>,
67+
}
68+
69+
impl FunctionExpression for DecodeMimeQFn {
70+
fn resolve(&self, ctx: &mut Context) -> Resolved {
71+
let value = self.value.resolve(ctx)?;
72+
73+
decode_mime_q(value)
74+
}
75+
76+
fn type_def(&self, _: &state::TypeState) -> TypeDef {
77+
TypeDef::bytes().fallible()
78+
}
79+
}
80+
81+
fn decode_mime_q(bytes: Value) -> Resolved {
82+
// Parse
83+
let input = bytes.try_bytes_utf8_lossy()?;
84+
let input: &str = &input;
85+
let (remaining, decoded) = alt((
86+
fold_many1(
87+
parse_delimited_q,
88+
|| Result::<String>::Ok(String::new()),
89+
|result, (head, word)| {
90+
let mut result = result?;
91+
92+
result.push_str(head);
93+
result.push_str(&word.decode_word()?);
94+
95+
Ok(result)
96+
},
97+
),
98+
alt((
99+
map_opt(parse_internal_q, |word| word.decode_word().map(Ok).ok()),
100+
success(Ok(String::new())),
101+
)),
102+
))(input)
103+
.map_err(|e| match e {
104+
nom::Err::Error(e) | nom::Err::Failure(e) => {
105+
// Create a descriptive error message if possible.
106+
nom::error::convert_error(input, e)
107+
}
108+
nom::Err::Incomplete(_) => e.to_string(),
109+
})?;
110+
111+
let mut decoded = decoded?;
112+
113+
// Add remaining input to the decoded string.
114+
decoded.push_str(remaining);
115+
116+
Ok(decoded.into())
117+
}
118+
119+
/// Parses input into (head, (charset, encoding, encoded text))
120+
fn parse_delimited_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
121+
input: &'a str,
122+
) -> IResult<&'a str, (&'a str, EncodedWord<'a>), E> {
123+
pair(
124+
take_until("=?"),
125+
delimited(tag("=?"), parse_internal_q, tag("?=")),
126+
)(input)
127+
}
128+
129+
/// Parses inside of encoded word into (charset, encoding, encoded text)
130+
fn parse_internal_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
131+
input: &'a str,
132+
) -> IResult<&'a str, EncodedWord<'a>, E> {
133+
map(
134+
separated_pair(
135+
opt(take_until1("?")),
136+
tag("?"),
137+
separated_pair(
138+
take_until("?"),
139+
tag("?"),
140+
alt((take_until("?="), |input| Ok(("", input)))),
141+
),
142+
),
143+
|(charset, (encoding, input))| EncodedWord {
144+
charset,
145+
encoding,
146+
input,
147+
},
148+
)(input)
149+
}
150+
151+
struct EncodedWord<'a> {
152+
charset: Option<&'a str>,
153+
encoding: &'a str,
154+
input: &'a str,
155+
}
156+
157+
impl<'a> EncodedWord<'a> {
158+
fn decode_word(&self) -> Result<String> {
159+
// Modified version from https://github.com/staktrace/mailparse/blob/a83d961fe53fd6504d75ee951a0e91dfea03c830/src/header.rs#L39
160+
161+
// Decode
162+
let decoded = match self.encoding {
163+
"B" | "b" => BASE64_MIME
164+
.decode(self.input.as_bytes())
165+
.map_err(|_| "Unable to decode base64 value")?,
166+
"Q" | "q" => {
167+
// The quoted_printable module does a trim_end on the input, so if
168+
// that affects the output we should save and restore the trailing
169+
// whitespace
170+
let to_decode = self.input.replace('_', " ");
171+
let trimmed = to_decode.trim_end();
172+
let mut d = quoted_printable::decode(&trimmed, quoted_printable::ParseMode::Robust);
173+
if d.is_ok() && to_decode.len() != trimmed.len() {
174+
d.as_mut()
175+
.unwrap()
176+
.extend_from_slice(to_decode[trimmed.len()..].as_bytes());
177+
}
178+
d.map_err(|_| "Unable to decode quoted_printable value")?
179+
}
180+
_ => return Err(format!("Invalid encoding: {:?}", self.encoding).into()),
181+
};
182+
183+
// Convert to UTF-8
184+
let charset = self.charset.unwrap_or("utf-8");
185+
let charset = Charset::for_label_no_replacement(charset.as_bytes())
186+
.ok_or_else(|| format!("Unable to decode {:?} value", charset))?;
187+
let (cow, _) = charset.decode_without_bom_handling(&decoded);
188+
Ok(cow.into_owned())
189+
}
190+
}
191+
192+
#[cfg(test)]
193+
mod test {
194+
use super::*;
195+
use nom::error::VerboseError;
196+
197+
#[test]
198+
fn internal() {
199+
let (remaining, word) =
200+
parse_internal_q::<VerboseError<&str>>("utf-8?Q?hello=5Fworld=40example=2ecom")
201+
.unwrap();
202+
assert_eq!(remaining, "");
203+
assert_eq!(word.charset, Some("utf-8"));
204+
assert_eq!(word.encoding, "Q");
205+
assert_eq!(word.input, "hello=5Fworld=40example=2ecom");
206+
}
207+
208+
#[test]
209+
fn internal_no_charset() {
210+
let (remaining, word) =
211+
parse_internal_q::<VerboseError<&str>>("?Q?hello=5Fworld=40example=2ecom").unwrap();
212+
assert_eq!(remaining, "");
213+
assert_eq!(word.charset, None);
214+
assert_eq!(word.encoding, "Q");
215+
assert_eq!(word.input, "hello=5Fworld=40example=2ecom");
216+
}
217+
218+
test_function![
219+
decode_mime_q=> DecodeMimeQ;
220+
221+
non_utf8_charset {
222+
args: func_args![value: value!("Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=")],
223+
want: Ok(value!("Subject: ¡Hola, señor!")),
224+
tdef: TypeDef::bytes().fallible(),
225+
}
226+
227+
missing_encoding{
228+
args: func_args![value: value!("Subject: =?iso-8859-1??=A1Hola,_se=F1or!?=")],
229+
want: Err("Invalid encoding: \"\""),
230+
tdef: TypeDef::bytes().fallible(),
231+
}
232+
233+
unknown_charset{
234+
args: func_args![value: value!("Subject: =?iso-9001?Q?hello=5Fworld=40example=2ecom?=")],
235+
want: Err("Unable to decode \"iso-9001\" value"),
236+
tdef: TypeDef::bytes().fallible(),
237+
}
238+
239+
no_start{
240+
args: func_args![value: value!("Hello world.")],
241+
want: Ok(value!("Hello world.")),
242+
tdef: TypeDef::bytes().fallible(),
243+
}
244+
245+
not_encoded{
246+
args: func_args![value: value!("Is =? equal to ?= or not?")],
247+
want: Ok(value!("Is =? equal to ?= or not?")),
248+
tdef: TypeDef::bytes().fallible(),
249+
}
250+
251+
partial{
252+
args: func_args![value: value!("Is =? equal or not?")],
253+
want: Ok(value!("Is =? equal or not?")),
254+
tdef: TypeDef::bytes().fallible(),
255+
}
256+
257+
empty{
258+
args: func_args![value: value!("")],
259+
want: Ok(value!("")),
260+
tdef: TypeDef::bytes().fallible(),
261+
}
262+
263+
];
264+
}

0 commit comments

Comments
 (0)