12
12
// See the License for the specific language governing permissions and
13
13
// limitations under the License.
14
14
15
- use bytes:: Bytes ;
15
+ use bytes:: { BufMut , Bytes } ;
16
16
use prost:: Message ;
17
17
use prost_reflect:: {
18
18
DynamicMessage , FieldDescriptor , Kind , MessageDescriptor , ReflectMessage , Value ,
@@ -30,13 +30,25 @@ pub struct ProtoEncoder {
30
30
schema : Schema ,
31
31
col_indices : Option < Vec < usize > > ,
32
32
descriptor : MessageDescriptor ,
33
+ header : ProtoHeader ,
34
+ }
35
+
36
+ #[ derive( Debug , Clone , Copy ) ]
37
+ pub enum ProtoHeader {
38
+ None ,
39
+ /// <https://docs.confluent.io/platform/7.5/schema-registry/fundamentals/serdes-develop/index.html#messages-wire-format>
40
+ ///
41
+ /// * 00
42
+ /// * 4-byte big-endian schema ID
43
+ ConfluentSchemaRegistry ( i32 ) ,
33
44
}
34
45
35
46
impl ProtoEncoder {
36
47
pub fn new (
37
48
schema : Schema ,
38
49
col_indices : Option < Vec < usize > > ,
39
50
descriptor : MessageDescriptor ,
51
+ header : ProtoHeader ,
40
52
) -> SinkResult < Self > {
41
53
match & col_indices {
42
54
Some ( col_indices) => validate_fields (
@@ -59,12 +71,18 @@ impl ProtoEncoder {
59
71
schema,
60
72
col_indices,
61
73
descriptor,
74
+ header,
62
75
} )
63
76
}
64
77
}
65
78
79
+ pub struct ProtoEncoded {
80
+ message : DynamicMessage ,
81
+ header : ProtoHeader ,
82
+ }
83
+
66
84
impl RowEncoder for ProtoEncoder {
67
- type Output = DynamicMessage ;
85
+ type Output = ProtoEncoded ;
68
86
69
87
fn schema ( & self ) -> & Schema {
70
88
& self . schema
@@ -87,12 +105,68 @@ impl RowEncoder for ProtoEncoder {
87
105
& self . descriptor ,
88
106
)
89
107
. map_err ( Into :: into)
108
+ . map ( |m| ProtoEncoded {
109
+ message : m,
110
+ header : self . header ,
111
+ } )
90
112
}
91
113
}
92
114
93
- impl SerTo < Vec < u8 > > for DynamicMessage {
115
+ impl SerTo < Vec < u8 > > for ProtoEncoded {
94
116
fn ser_to ( self ) -> SinkResult < Vec < u8 > > {
95
- Ok ( self . encode_to_vec ( ) )
117
+ let mut buf = Vec :: new ( ) ;
118
+ match self . header {
119
+ ProtoHeader :: None => { /* noop */ }
120
+ ProtoHeader :: ConfluentSchemaRegistry ( schema_id) => {
121
+ buf. reserve ( 1 + 4 ) ;
122
+ buf. put_u8 ( 0 ) ;
123
+ buf. put_i32 ( schema_id) ;
124
+ MessageIndexes :: from ( self . message . descriptor ( ) ) . encode ( & mut buf) ;
125
+ }
126
+ }
127
+ self . message . encode ( & mut buf) . unwrap ( ) ;
128
+ Ok ( buf)
129
+ }
130
+ }
131
+
132
+ struct MessageIndexes ( Vec < i32 > ) ;
133
+
134
+ impl MessageIndexes {
135
+ fn from ( desc : MessageDescriptor ) -> Self {
136
+ // https://github.com/protocolbuffers/protobuf/blob/v25.1/src/google/protobuf/descriptor.proto
137
+ // https://docs.rs/prost-reflect/0.12.0/src/prost_reflect/descriptor/tag.rs.html
138
+ // https://docs.rs/prost-reflect/0.12.0/src/prost_reflect/descriptor/build/visit.rs.html#125
139
+ // `FileDescriptorProto` field #4 is `repeated DescriptorProto message_type`
140
+ const TAG_FILE_MESSAGE : i32 = 4 ;
141
+ // `DescriptorProto` field #3 is `repeated DescriptorProto nested_type`
142
+ const TAG_MESSAGE_NESTED : i32 = 3 ;
143
+
144
+ let mut indexes = vec ! [ ] ;
145
+ let mut path = desc. path ( ) . array_chunks ( ) ;
146
+ let & [ tag, idx] = path. next ( ) . unwrap ( ) ;
147
+ assert_eq ! ( tag, TAG_FILE_MESSAGE ) ;
148
+ indexes. push ( idx) ;
149
+ for & [ tag, idx] in path {
150
+ assert_eq ! ( tag, TAG_MESSAGE_NESTED ) ;
151
+ indexes. push ( idx) ;
152
+ }
153
+ Self ( indexes)
154
+ }
155
+
156
+ fn zig_i32 ( value : i32 , buf : & mut impl BufMut ) {
157
+ let unsigned = ( ( value << 1 ) ^ ( value >> 31 ) ) as u32 as u64 ;
158
+ prost:: encoding:: encode_varint ( unsigned, buf) ;
159
+ }
160
+
161
+ fn encode ( & self , buf : & mut impl BufMut ) {
162
+ if self . 0 == [ 0 ] {
163
+ buf. put_u8 ( 0 ) ;
164
+ return ;
165
+ }
166
+ Self :: zig_i32 ( self . 0 . len ( ) . try_into ( ) . unwrap ( ) , buf) ;
167
+ for & idx in & self . 0 {
168
+ Self :: zig_i32 ( idx, buf) ;
169
+ }
96
170
}
97
171
}
98
172
@@ -367,7 +441,8 @@ mod tests {
367
441
Some ( ScalarImpl :: Timestamptz ( Timestamptz :: from_micros( 3 ) ) ) ,
368
442
] ) ;
369
443
370
- let encoder = ProtoEncoder :: new ( schema, None , descriptor. clone ( ) ) . unwrap ( ) ;
444
+ let encoder =
445
+ ProtoEncoder :: new ( schema, None , descriptor. clone ( ) , ProtoHeader :: None ) . unwrap ( ) ;
371
446
let m = encoder. encode ( row) . unwrap ( ) ;
372
447
let encoded: Vec < u8 > = m. ser_to ( ) . unwrap ( ) ;
373
448
assert_eq ! (
0 commit comments