3
3
// (found in the LICENSE-* files in the repository)
4
4
5
5
mod builder;
6
- use super :: { bit_array:: BitArrayReader , CACHE_LINE_BYTES } ;
6
+ use super :: { bit_array:: BitArrayReader , AMQFilter , CACHE_LINE_BYTES } ;
7
+ use crate :: {
8
+ coding:: { DecodeError , Encode , EncodeError } ,
9
+ file:: MAGIC_BYTES ,
10
+ } ;
7
11
pub use builder:: Builder ;
12
+ use byteorder:: { LittleEndian , ReadBytesExt , WriteBytesExt } ;
13
+ use std:: io:: { Read , Write } ;
8
14
9
15
/// Two hashes that are used for double hashing
10
16
pub type CompositeHash = ( u64 , u64 ) ;
@@ -20,29 +26,22 @@ pub struct BlockedBloomFilter {
20
26
num_blocks : usize ,
21
27
}
22
28
23
- // TODO: Implement Encode and Decode for BlockedBloomFilter
29
+ impl AMQFilter for BlockedBloomFilter {
30
+ fn bytes ( & self ) -> & [ u8 ] {
31
+ self . inner . bytes ( )
32
+ }
24
33
25
- impl BlockedBloomFilter {
26
34
/// Size of bloom filter in bytes
27
35
#[ must_use]
28
- pub fn len ( & self ) -> usize {
36
+ fn len ( & self ) -> usize {
29
37
self . inner . bytes ( ) . len ( )
30
38
}
31
39
32
- fn from_raw ( m : usize , k : usize , slice : crate :: Slice ) -> Self {
33
- let num_blocks = m. div_ceil ( CACHE_LINE_BYTES ) ;
34
- Self {
35
- inner : BitArrayReader :: new ( slice) ,
36
- k,
37
- num_blocks,
38
- }
39
- }
40
-
41
40
/// Returns `true` if the hash may be contained.
42
41
///
43
42
/// Will never have a false negative.
44
43
#[ must_use]
45
- pub fn contains_hash ( & self , ( mut h1, mut h2) : CompositeHash ) -> bool {
44
+ fn contains_hash ( & self , ( mut h1, mut h2) : CompositeHash ) -> bool {
46
45
let block_idx = h1 % ( self . num_blocks as u64 ) ;
47
46
48
47
for i in 1 ..( self . k as u64 ) {
@@ -65,10 +64,56 @@ impl BlockedBloomFilter {
65
64
///
66
65
/// Will never have a false negative.
67
66
#[ must_use]
68
- pub fn contains ( & self , key : & [ u8 ] ) -> bool {
67
+ fn contains ( & self , key : & [ u8 ] ) -> bool {
69
68
self . contains_hash ( Self :: get_hash ( key) )
70
69
}
71
70
71
+ fn filter_type ( & self ) -> super :: FilterType {
72
+ super :: FilterType :: BlockedBloom
73
+ }
74
+ }
75
+
76
+ impl Encode for BlockedBloomFilter {
77
+ fn encode_into < W : Write > ( & self , writer : & mut W ) -> Result < ( ) , EncodeError > {
78
+ // Write header
79
+ writer. write_all ( & MAGIC_BYTES ) ?;
80
+
81
+ writer. write_u8 ( super :: FilterType :: BlockedBloom as u8 ) ?;
82
+
83
+ // NOTE: Hash type (unused)
84
+ writer. write_u8 ( 0 ) ?;
85
+
86
+ writer. write_u64 :: < LittleEndian > ( self . num_blocks as u64 ) ?;
87
+ writer. write_u64 :: < LittleEndian > ( self . k as u64 ) ?;
88
+ writer. write_all ( self . inner . bytes ( ) ) ?;
89
+
90
+ Ok ( ( ) )
91
+ }
92
+ }
93
+
94
+ impl BlockedBloomFilter {
95
+ // To be used by AMQFilter after magic bytes and filter type have been read and parsed
96
+ pub ( super ) fn decode_from < R : Read > ( reader : & mut R ) -> Result < Self , DecodeError > {
97
+ // NOTE: Hash type (unused)
98
+ let hash_type = reader. read_u8 ( ) ?;
99
+ assert_eq ! ( 0 , hash_type, "Invalid bloom hash type" ) ;
100
+
101
+ let num_blocks = reader. read_u64 :: < LittleEndian > ( ) ? as usize ;
102
+ let k = reader. read_u64 :: < LittleEndian > ( ) ? as usize ;
103
+
104
+ let mut bytes = vec ! [ 0 ; num_blocks * CACHE_LINE_BYTES ] ;
105
+ reader. read_exact ( & mut bytes) ?;
106
+
107
+ Ok ( Self :: from_raw ( num_blocks, k, bytes. into ( ) ) )
108
+ }
109
+
110
+ fn from_raw ( num_blocks : usize , k : usize , slice : crate :: Slice ) -> Self {
111
+ Self {
112
+ inner : BitArrayReader :: new ( slice) ,
113
+ k,
114
+ num_blocks,
115
+ }
116
+ }
72
117
/// Returns `true` if the bit at `idx` is `1`.
73
118
fn has_bit ( & self , block_idx : usize , idx_in_block : usize ) -> bool {
74
119
self . inner
@@ -84,6 +129,57 @@ impl BlockedBloomFilter {
84
129
#[ cfg( test) ]
85
130
mod tests {
86
131
use super :: * ;
132
+ use crate :: segment:: filter:: { AMQFilterBuilder , FilterType } ;
133
+
134
+ use std:: fs:: File ;
135
+ use test_log:: test;
136
+
137
+ #[ test]
138
+ fn blocked_bloom_serde_round_trip ( ) -> crate :: Result < ( ) > {
139
+ let dir = tempfile:: tempdir ( ) ?;
140
+
141
+ let path = dir. path ( ) . join ( "bf" ) ;
142
+ let mut file = File :: create ( & path) ?;
143
+
144
+ let mut filter = Builder :: with_fp_rate ( 10 , 0.0001 ) ;
145
+
146
+ let keys = & [
147
+ b"item0" , b"item1" , b"item2" , b"item3" , b"item4" , b"item5" , b"item6" , b"item7" ,
148
+ b"item8" , b"item9" ,
149
+ ] ;
150
+
151
+ for key in keys {
152
+ filter. set_with_hash ( BlockedBloomFilter :: get_hash ( * key) ) ;
153
+ }
154
+
155
+ let filter = filter. build ( ) ;
156
+
157
+ for key in keys {
158
+ assert ! ( filter. contains( & * * key) ) ;
159
+ }
160
+ assert ! ( !filter. contains( b"asdasads" ) ) ;
161
+ assert ! ( !filter. contains( b"item10" ) ) ;
162
+ assert ! ( !filter. contains( b"cxycxycxy" ) ) ;
163
+
164
+ filter. encode_into ( & mut file) ?;
165
+ file. sync_all ( ) ?;
166
+ drop ( file) ;
167
+
168
+ let mut file = File :: open ( & path) ?;
169
+ let filter_copy = AMQFilterBuilder :: decode_from ( & mut file) ?;
170
+
171
+ assert_eq ! ( filter. inner. bytes( ) , filter_copy. bytes( ) ) ;
172
+ assert_eq ! ( FilterType :: BlockedBloom , filter_copy. filter_type( ) ) ;
173
+
174
+ for key in keys {
175
+ assert ! ( filter. contains( & * * key) ) ;
176
+ }
177
+ assert ! ( !filter_copy. contains( b"asdasads" ) ) ;
178
+ assert ! ( !filter_copy. contains( b"item10" ) ) ;
179
+ assert ! ( !filter_copy. contains( b"cxycxycxy" ) ) ;
180
+
181
+ Ok ( ( ) )
182
+ }
87
183
88
184
#[ test]
89
185
fn blocked_bloom_basic ( ) {
0 commit comments