@@ -2,9 +2,11 @@ use std::convert::TryInto;
2
2
3
3
use bytes:: Bytes ;
4
4
use chrono:: Utc ;
5
+ use derivative:: Derivative ;
5
6
use lookup:: PathPrefix ;
6
7
use serde:: { Deserialize , Serialize } ;
7
8
use smallvec:: { smallvec, SmallVec } ;
9
+ use vector_config:: configurable_component;
8
10
use vector_core:: {
9
11
config:: { log_schema, DataType , LogNamespace } ,
10
12
event:: Event ,
@@ -16,7 +18,36 @@ use super::Deserializer;
16
18
17
19
/// Config used to build a `JsonDeserializer`.
18
20
#[ derive( Debug , Clone , Default , Deserialize , Serialize ) ]
19
- pub struct JsonDeserializerConfig ;
21
+ pub struct JsonDeserializerConfig {
22
+ #[ serde(
23
+ default ,
24
+ skip_serializing_if = "vector_core::serde::skip_serializing_if_default"
25
+ ) ]
26
+ /// Options for the JSON deserializer.
27
+ pub json : JsonDeserializerOptions ,
28
+ }
29
+
30
+ /// JSON-specific decoding options.
31
+ #[ configurable_component]
32
+ #[ derive( Debug , Clone , PartialEq , Eq , Derivative ) ]
33
+ #[ derivative( Default ) ]
34
+ pub struct JsonDeserializerOptions {
35
+ /// Determines whether or not to replace invalid UTF-8 sequences instead of returning an error.
36
+ ///
37
+ /// When true, invalid UTF-8 sequences are replaced with the [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD].
38
+ ///
39
+ /// [U+FFFD]: https://en.wikipedia.org/wiki/Specials_(Unicode_block)#Replacement_character
40
+ #[ serde(
41
+ default = "default_lossy" ,
42
+ skip_serializing_if = "vector_core::serde::skip_serializing_if_default"
43
+ ) ]
44
+ #[ derivative( Default ( value = "default_lossy()" ) ) ]
45
+ lossy : bool ,
46
+ }
47
+
48
+ const fn default_lossy ( ) -> bool {
49
+ true
50
+ }
20
51
21
52
impl JsonDeserializerConfig {
22
53
/// Build the `JsonDeserializer` from this configuration.
@@ -56,19 +87,23 @@ impl JsonDeserializerConfig {
56
87
57
88
impl JsonDeserializerConfig {
58
89
/// Creates a new `JsonDeserializerConfig`.
59
- pub fn new ( ) -> Self {
60
- Default :: default ( )
90
+ pub fn new ( options : JsonDeserializerOptions ) -> Self {
91
+ Self { json : options }
61
92
}
62
93
}
63
94
64
95
/// Deserializer that builds `Event`s from a byte frame containing JSON.
65
- #[ derive( Debug , Clone , Default ) ]
66
- pub struct JsonDeserializer ;
96
+ #[ derive( Debug , Clone , Derivative ) ]
97
+ #[ derivative( Default ) ]
98
+ pub struct JsonDeserializer {
99
+ #[ derivative( Default ( value = "default_lossy()" ) ) ]
100
+ lossy : bool ,
101
+ }
67
102
68
103
impl JsonDeserializer {
69
104
/// Creates a new `JsonDeserializer`.
70
- pub fn new ( ) -> Self {
71
- Default :: default ( )
105
+ pub fn new ( lossy : bool ) -> Self {
106
+ Self { lossy }
72
107
}
73
108
}
74
109
@@ -84,8 +119,11 @@ impl Deserializer for JsonDeserializer {
84
119
return Ok ( smallvec ! [ ] ) ;
85
120
}
86
121
87
- let json: serde_json:: Value = serde_json:: from_slice ( & bytes)
88
- . map_err ( |error| format ! ( "Error parsing JSON: {:?}" , error) ) ?;
122
+ let json: serde_json:: Value = match self . lossy {
123
+ true => serde_json:: from_str ( & String :: from_utf8_lossy ( & bytes) ) ,
124
+ false => serde_json:: from_slice ( & bytes) ,
125
+ }
126
+ . map_err ( |error| format ! ( "Error parsing JSON: {:?}" , error) ) ?;
89
127
90
128
// If the root is an Array, split it into multiple events
91
129
let mut events = match json {
@@ -119,8 +157,10 @@ impl Deserializer for JsonDeserializer {
119
157
}
120
158
121
159
impl From < & JsonDeserializerConfig > for JsonDeserializer {
122
- fn from ( _: & JsonDeserializerConfig ) -> Self {
123
- Self
160
+ fn from ( config : & JsonDeserializerConfig ) -> Self {
161
+ Self {
162
+ lossy : config. json . lossy ,
163
+ }
124
164
}
125
165
}
126
166
@@ -133,7 +173,7 @@ mod tests {
133
173
#[ test]
134
174
fn deserialize_json ( ) {
135
175
let input = Bytes :: from ( r#"{ "foo": 123 }"# ) ;
136
- let deserializer = JsonDeserializer :: new ( ) ;
176
+ let deserializer = JsonDeserializer :: default ( ) ;
137
177
138
178
for namespace in [ LogNamespace :: Legacy , LogNamespace :: Vector ] {
139
179
let events = deserializer. parse ( input. clone ( ) , namespace) . unwrap ( ) ;
@@ -160,7 +200,7 @@ mod tests {
160
200
#[ test]
161
201
fn deserialize_json_array ( ) {
162
202
let input = Bytes :: from ( r#"[{ "foo": 123 }, { "bar": 456 }]"# ) ;
163
- let deserializer = JsonDeserializer :: new ( ) ;
203
+ let deserializer = JsonDeserializer :: default ( ) ;
164
204
for namespace in [ LogNamespace :: Legacy , LogNamespace :: Vector ] {
165
205
let events = deserializer. parse ( input. clone ( ) , namespace) . unwrap ( ) ;
166
206
let mut events = events. into_iter ( ) ;
@@ -197,7 +237,7 @@ mod tests {
197
237
#[ test]
198
238
fn deserialize_skip_empty ( ) {
199
239
let input = Bytes :: from ( "" ) ;
200
- let deserializer = JsonDeserializer :: new ( ) ;
240
+ let deserializer = JsonDeserializer :: default ( ) ;
201
241
202
242
for namespace in [ LogNamespace :: Legacy , LogNamespace :: Vector ] {
203
243
let events = deserializer. parse ( input. clone ( ) , namespace) . unwrap ( ) ;
@@ -208,7 +248,44 @@ mod tests {
208
248
#[ test]
209
249
fn deserialize_error_invalid_json ( ) {
210
250
let input = Bytes :: from ( "{ foo" ) ;
211
- let deserializer = JsonDeserializer :: new ( ) ;
251
+ let deserializer = JsonDeserializer :: default ( ) ;
252
+
253
+ for namespace in [ LogNamespace :: Legacy , LogNamespace :: Vector ] {
254
+ assert ! ( deserializer. parse( input. clone( ) , namespace) . is_err( ) ) ;
255
+ }
256
+ }
257
+
258
+ #[ test]
259
+ fn deserialize_lossy_replace_invalid_utf8 ( ) {
260
+ let input = Bytes :: from ( b"{ \" foo\" : \" Hello \xF0 \x90 \x80 World\" }" . as_slice ( ) ) ;
261
+ let deserializer = JsonDeserializer :: new ( true ) ;
262
+
263
+ for namespace in [ LogNamespace :: Legacy , LogNamespace :: Vector ] {
264
+ let events = deserializer. parse ( input. clone ( ) , namespace) . unwrap ( ) ;
265
+ let mut events = events. into_iter ( ) ;
266
+
267
+ {
268
+ let event = events. next ( ) . unwrap ( ) ;
269
+ let log = event. as_log ( ) ;
270
+ assert_eq ! ( log[ "foo" ] , b"Hello \xEF \xBF \xBD World" . into( ) ) ;
271
+ assert_eq ! (
272
+ log. get( (
273
+ lookup:: PathPrefix :: Event ,
274
+ log_schema( ) . timestamp_key( ) . unwrap( )
275
+ ) )
276
+ . is_some( ) ,
277
+ namespace == LogNamespace :: Legacy
278
+ ) ;
279
+ }
280
+
281
+ assert_eq ! ( events. next( ) , None ) ;
282
+ }
283
+ }
284
+
285
+ #[ test]
286
+ fn deserialize_non_lossy_error_invalid_utf8 ( ) {
287
+ let input = Bytes :: from ( b"{ \" foo\" : \" Hello \xF0 \x90 \x80 World\" }" . as_slice ( ) ) ;
288
+ let deserializer = JsonDeserializer :: new ( false ) ;
212
289
213
290
for namespace in [ LogNamespace :: Legacy , LogNamespace :: Vector ] {
214
291
assert ! ( deserializer. parse( input. clone( ) , namespace) . is_err( ) ) ;
0 commit comments