|
| 1 | +use ::value::Value; |
| 2 | +use charset::Charset; |
| 3 | +use data_encoding::BASE64_MIME; |
| 4 | +use nom::{ |
| 5 | + branch::alt, |
| 6 | + bytes::complete::{tag, take_until, take_until1}, |
| 7 | + combinator::{map, map_opt, opt, success}, |
| 8 | + error::{ContextError, ParseError}, |
| 9 | + multi::fold_many1, |
| 10 | + sequence::{delimited, pair, separated_pair}, |
| 11 | + IResult, |
| 12 | +}; |
| 13 | +use vrl::prelude::expression::FunctionExpression; |
| 14 | +use vrl::prelude::*; |
| 15 | + |
| 16 | +#[derive(Clone, Copy, Debug)] |
| 17 | +pub struct DecodeMimeQ; |
| 18 | + |
| 19 | +impl Function for DecodeMimeQ { |
| 20 | + fn identifier(&self) -> &'static str { |
| 21 | + "decode_mime_q" |
| 22 | + } |
| 23 | + |
| 24 | + fn parameters(&self) -> &'static [Parameter] { |
| 25 | + &[Parameter { |
| 26 | + keyword: "value", |
| 27 | + kind: kind::BYTES, |
| 28 | + required: true, |
| 29 | + }] |
| 30 | + } |
| 31 | + |
| 32 | + fn compile( |
| 33 | + &self, |
| 34 | + _state: &state::TypeState, |
| 35 | + _ctx: &mut FunctionCompileContext, |
| 36 | + arguments: ArgumentList, |
| 37 | + ) -> Compiled { |
| 38 | + let value = arguments.required("value"); |
| 39 | + |
| 40 | + Ok(DecodeMimeQFn { value }.as_expr()) |
| 41 | + } |
| 42 | + |
| 43 | + fn examples(&self) -> &'static [Example] { |
| 44 | + &[ |
| 45 | + Example { |
| 46 | + title: "Single", |
| 47 | + source: r#"decode_mime_q!("=?utf-8?b?SGVsbG8sIFdvcmxkIQ==?=")"#, |
| 48 | + result: Ok(r#"Hello, World!"#), |
| 49 | + }, |
| 50 | + Example { |
| 51 | + title: "Embedded", |
| 52 | + source: r#"decode_mime_q!("From: =?utf-8?b?SGVsbG8sIFdvcmxkIQ==?= <=?utf-8?q?hello=5Fworld=40example=2ecom?=>")"#, |
| 53 | + result: Ok(r#"From: Hello, World! <[email protected]>"#), |
| 54 | + }, |
| 55 | + Example { |
| 56 | + title: "Without charset", |
| 57 | + source: r#"decode_mime_q!("?b?SGVsbG8sIFdvcmxkIQ==")"#, |
| 58 | + result: Ok(r#"Hello, World!"#), |
| 59 | + }, |
| 60 | + ] |
| 61 | + } |
| 62 | +} |
| 63 | + |
| 64 | +#[derive(Clone, Debug)] |
| 65 | +struct DecodeMimeQFn { |
| 66 | + value: Box<dyn Expression>, |
| 67 | +} |
| 68 | + |
| 69 | +impl FunctionExpression for DecodeMimeQFn { |
| 70 | + fn resolve(&self, ctx: &mut Context) -> Resolved { |
| 71 | + let value = self.value.resolve(ctx)?; |
| 72 | + |
| 73 | + decode_mime_q(value) |
| 74 | + } |
| 75 | + |
| 76 | + fn type_def(&self, _: &state::TypeState) -> TypeDef { |
| 77 | + TypeDef::bytes().fallible() |
| 78 | + } |
| 79 | +} |
| 80 | + |
| 81 | +fn decode_mime_q(bytes: Value) -> Resolved { |
| 82 | + // Parse |
| 83 | + let input = bytes.try_bytes_utf8_lossy()?; |
| 84 | + let input: &str = &input; |
| 85 | + let (remaining, decoded) = alt(( |
| 86 | + fold_many1( |
| 87 | + parse_delimited_q, |
| 88 | + || Result::<String>::Ok(String::new()), |
| 89 | + |result, (head, word)| { |
| 90 | + let mut result = result?; |
| 91 | + |
| 92 | + result.push_str(head); |
| 93 | + result.push_str(&word.decode_word()?); |
| 94 | + |
| 95 | + Ok(result) |
| 96 | + }, |
| 97 | + ), |
| 98 | + alt(( |
| 99 | + map_opt(parse_internal_q, |word| word.decode_word().map(Ok).ok()), |
| 100 | + success(Ok(String::new())), |
| 101 | + )), |
| 102 | + ))(input) |
| 103 | + .map_err(|e| match e { |
| 104 | + nom::Err::Error(e) | nom::Err::Failure(e) => { |
| 105 | + // Create a descriptive error message if possible. |
| 106 | + nom::error::convert_error(input, e) |
| 107 | + } |
| 108 | + nom::Err::Incomplete(_) => e.to_string(), |
| 109 | + })?; |
| 110 | + |
| 111 | + let mut decoded = decoded?; |
| 112 | + |
| 113 | + // Add remaining input to the decoded string. |
| 114 | + decoded.push_str(remaining); |
| 115 | + |
| 116 | + Ok(decoded.into()) |
| 117 | +} |
| 118 | + |
| 119 | +/// Parses input into (head, (charset, encoding, encoded text)) |
| 120 | +fn parse_delimited_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>( |
| 121 | + input: &'a str, |
| 122 | +) -> IResult<&'a str, (&'a str, EncodedWord<'a>), E> { |
| 123 | + pair( |
| 124 | + take_until("=?"), |
| 125 | + delimited(tag("=?"), parse_internal_q, tag("?=")), |
| 126 | + )(input) |
| 127 | +} |
| 128 | + |
| 129 | +/// Parses inside of encoded word into (charset, encoding, encoded text) |
| 130 | +fn parse_internal_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>( |
| 131 | + input: &'a str, |
| 132 | +) -> IResult<&'a str, EncodedWord<'a>, E> { |
| 133 | + map( |
| 134 | + separated_pair( |
| 135 | + opt(take_until1("?")), |
| 136 | + tag("?"), |
| 137 | + separated_pair( |
| 138 | + take_until("?"), |
| 139 | + tag("?"), |
| 140 | + alt((take_until("?="), |input| Ok(("", input)))), |
| 141 | + ), |
| 142 | + ), |
| 143 | + |(charset, (encoding, input))| EncodedWord { |
| 144 | + charset, |
| 145 | + encoding, |
| 146 | + input, |
| 147 | + }, |
| 148 | + )(input) |
| 149 | +} |
| 150 | + |
| 151 | +struct EncodedWord<'a> { |
| 152 | + charset: Option<&'a str>, |
| 153 | + encoding: &'a str, |
| 154 | + input: &'a str, |
| 155 | +} |
| 156 | + |
| 157 | +impl<'a> EncodedWord<'a> { |
| 158 | + fn decode_word(&self) -> Result<String> { |
| 159 | + // Modified version from https://github.com/staktrace/mailparse/blob/a83d961fe53fd6504d75ee951a0e91dfea03c830/src/header.rs#L39 |
| 160 | + |
| 161 | + // Decode |
| 162 | + let decoded = match self.encoding { |
| 163 | + "B" | "b" => BASE64_MIME |
| 164 | + .decode(self.input.as_bytes()) |
| 165 | + .map_err(|_| "Unable to decode base64 value")?, |
| 166 | + "Q" | "q" => { |
| 167 | + // The quoted_printable module does a trim_end on the input, so if |
| 168 | + // that affects the output we should save and restore the trailing |
| 169 | + // whitespace |
| 170 | + let to_decode = self.input.replace('_', " "); |
| 171 | + let trimmed = to_decode.trim_end(); |
| 172 | + let mut d = quoted_printable::decode(&trimmed, quoted_printable::ParseMode::Robust); |
| 173 | + if d.is_ok() && to_decode.len() != trimmed.len() { |
| 174 | + d.as_mut() |
| 175 | + .unwrap() |
| 176 | + .extend_from_slice(to_decode[trimmed.len()..].as_bytes()); |
| 177 | + } |
| 178 | + d.map_err(|_| "Unable to decode quoted_printable value")? |
| 179 | + } |
| 180 | + _ => return Err(format!("Invalid encoding: {:?}", self.encoding).into()), |
| 181 | + }; |
| 182 | + |
| 183 | + // Convert to UTF-8 |
| 184 | + let charset = self.charset.unwrap_or("utf-8"); |
| 185 | + let charset = Charset::for_label_no_replacement(charset.as_bytes()) |
| 186 | + .ok_or_else(|| format!("Unable to decode {:?} value", charset))?; |
| 187 | + let (cow, _) = charset.decode_without_bom_handling(&decoded); |
| 188 | + Ok(cow.into_owned()) |
| 189 | + } |
| 190 | +} |
| 191 | + |
| 192 | +#[cfg(test)] |
| 193 | +mod test { |
| 194 | + use super::*; |
| 195 | + use nom::error::VerboseError; |
| 196 | + |
| 197 | + #[test] |
| 198 | + fn internal() { |
| 199 | + let (remaining, word) = |
| 200 | + parse_internal_q::<VerboseError<&str>>("utf-8?Q?hello=5Fworld=40example=2ecom") |
| 201 | + .unwrap(); |
| 202 | + assert_eq!(remaining, ""); |
| 203 | + assert_eq!(word.charset, Some("utf-8")); |
| 204 | + assert_eq!(word.encoding, "Q"); |
| 205 | + assert_eq!(word.input, "hello=5Fworld=40example=2ecom"); |
| 206 | + } |
| 207 | + |
| 208 | + #[test] |
| 209 | + fn internal_no_charset() { |
| 210 | + let (remaining, word) = |
| 211 | + parse_internal_q::<VerboseError<&str>>("?Q?hello=5Fworld=40example=2ecom").unwrap(); |
| 212 | + assert_eq!(remaining, ""); |
| 213 | + assert_eq!(word.charset, None); |
| 214 | + assert_eq!(word.encoding, "Q"); |
| 215 | + assert_eq!(word.input, "hello=5Fworld=40example=2ecom"); |
| 216 | + } |
| 217 | + |
| 218 | + test_function![ |
| 219 | + decode_mime_q=> DecodeMimeQ; |
| 220 | + |
| 221 | + non_utf8_charset { |
| 222 | + args: func_args![value: value!("Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=")], |
| 223 | + want: Ok(value!("Subject: ¡Hola, señor!")), |
| 224 | + tdef: TypeDef::bytes().fallible(), |
| 225 | + } |
| 226 | + |
| 227 | + missing_encoding{ |
| 228 | + args: func_args![value: value!("Subject: =?iso-8859-1??=A1Hola,_se=F1or!?=")], |
| 229 | + want: Err("Invalid encoding: \"\""), |
| 230 | + tdef: TypeDef::bytes().fallible(), |
| 231 | + } |
| 232 | + |
| 233 | + unknown_charset{ |
| 234 | + args: func_args![value: value!("Subject: =?iso-9001?Q?hello=5Fworld=40example=2ecom?=")], |
| 235 | + want: Err("Unable to decode \"iso-9001\" value"), |
| 236 | + tdef: TypeDef::bytes().fallible(), |
| 237 | + } |
| 238 | + |
| 239 | + no_start{ |
| 240 | + args: func_args![value: value!("Hello world.")], |
| 241 | + want: Ok(value!("Hello world.")), |
| 242 | + tdef: TypeDef::bytes().fallible(), |
| 243 | + } |
| 244 | + |
| 245 | + not_encoded{ |
| 246 | + args: func_args![value: value!("Is =? equal to ?= or not?")], |
| 247 | + want: Ok(value!("Is =? equal to ?= or not?")), |
| 248 | + tdef: TypeDef::bytes().fallible(), |
| 249 | + } |
| 250 | + |
| 251 | + partial{ |
| 252 | + args: func_args![value: value!("Is =? equal or not?")], |
| 253 | + want: Ok(value!("Is =? equal or not?")), |
| 254 | + tdef: TypeDef::bytes().fallible(), |
| 255 | + } |
| 256 | + |
| 257 | + empty{ |
| 258 | + args: func_args![value: value!("")], |
| 259 | + want: Ok(value!("")), |
| 260 | + tdef: TypeDef::bytes().fallible(), |
| 261 | + } |
| 262 | + |
| 263 | + ]; |
| 264 | +} |
0 commit comments