-
-
Notifications
You must be signed in to change notification settings - Fork 2.4k
/
Copy pathveb_tree.rs
342 lines (294 loc) · 10.1 KB
/
veb_tree.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
// This struct implements Van Emde Boas tree (VEB tree). It stores integers in range [0, U), where
// O is any integer that is a power of 2. It supports operations such as insert, search,
// predecessor, and successor in O(log(log(U))) time. The structure takes O(U) space.
pub struct VebTree {
size: u32,
child_size: u32, // Set to square root of size. Cache here to avoid recomputation.
min: u32,
max: u32,
summary: Option<Box<VebTree>>,
cluster: Vec<VebTree>,
}
impl VebTree {
/// Create a new, empty VEB tree. The tree will contain number of elements equal to size
/// rounded up to the nearest power of two.
pub fn new(size: u32) -> VebTree {
let rounded_size = size.next_power_of_two();
let child_size = (size as f64).sqrt().ceil() as u32;
let mut cluster = Vec::new();
if rounded_size > 2 {
for _ in 0..rounded_size {
cluster.push(VebTree::new(child_size));
}
}
VebTree {
size: rounded_size,
child_size,
min: u32::MAX,
max: u32::MIN,
cluster,
summary: if rounded_size <= 2 {
None
} else {
Some(Box::new(VebTree::new(child_size)))
},
}
}
fn high(&self, value: u32) -> u32 {
value / self.child_size
}
fn low(&self, value: u32) -> u32 {
value % self.child_size
}
fn index(&self, cluster: u32, offset: u32) -> u32 {
cluster * self.child_size + offset
}
pub fn min(&self) -> u32 {
self.min
}
pub fn max(&self) -> u32 {
self.max
}
pub fn iter(&self) -> VebTreeIter {
VebTreeIter::new(self)
}
// A VEB tree is empty if the min is greater than the max.
pub fn empty(&self) -> bool {
self.min > self.max
}
// Returns true if value is in the tree, false otherwise.
pub fn search(&self, value: u32) -> bool {
if self.empty() {
return false;
} else if value == self.min || value == self.max {
return true;
} else if value < self.min || value > self.max {
return false;
}
self.cluster[self.high(value) as usize].search(self.low(value))
}
fn insert_empty(&mut self, value: u32) {
assert!(self.empty(), "tree should be empty");
self.min = value;
self.max = value;
}
// Inserts value into the tree.
pub fn insert(&mut self, mut value: u32) {
assert!(value < self.size);
if self.empty() {
self.insert_empty(value);
return;
}
if value < self.min {
// If the new value is less than the current tree's min, set the min to the new value
// and insert the old min.
(value, self.min) = (self.min, value);
}
if self.size > 2 {
// Non base case. The checks for min/max will handle trees of size 2.
let high = self.high(value);
let low = self.low(value);
if self.cluster[high as usize].empty() {
// If the cluster tree for the value is empty, we set the min/max of the tree to
// value and record that the cluster tree has an elements in the summary.
self.cluster[high as usize].insert_empty(low);
if let Some(summary) = self.summary.as_mut() {
summary.insert(high);
}
} else {
// If the cluster tree already has a value, the summary does not need to be
// updated. Recursively insert the value into the cluster tree.
self.cluster[high as usize].insert(low);
}
}
if value > self.max {
self.max = value;
}
}
// Returns the next greatest value(successor) in the tree after pred. Returns
// `None` if there is no successor.
pub fn succ(&self, pred: u32) -> Option<u32> {
if self.empty() {
return None;
}
if self.size == 2 {
// Base case. If pred is 0, and 1 exists in the tree (max is set to 1), the successor
// is 1.
return if pred == 0 && self.max == 1 {
Some(1)
} else {
None
};
}
if pred < self.min {
// If the predecessor is less than the minimum of this tree, the successor is the min.
return Some(self.min);
}
let low = self.low(pred);
let high = self.high(pred);
if !self.cluster[high as usize].empty() && low < self.cluster[high as usize].max {
// The successor is within the same cluster as the predecessor
return Some(self.index(high, self.cluster[high as usize].succ(low).unwrap()));
};
// If we reach this point, the successor exists in a different cluster. We use the summary
// to efficiently query which cluster the successor lives in. If there is no successor
// cluster, return None.
let succ_cluster = self.summary.as_ref().unwrap().succ(high);
succ_cluster
.map(|succ_cluster| self.index(succ_cluster, self.cluster[succ_cluster as usize].min))
}
// Returns the next smallest value(predecessor) in the tree after succ. Returns
// `None` if there is no predecessor. pred() is almost a mirror of succ().
// Differences are noted in comments.
pub fn pred(&self, succ: u32) -> Option<u32> {
if self.empty() {
return None;
}
// base case.
if self.size == 2 {
return if succ == 1 && self.min == 0 {
Some(0)
} else {
None
};
}
if succ > self.max {
return Some(self.max);
}
let low = self.low(succ);
let high = self.high(succ);
if !self.cluster[high as usize].empty() && low > self.cluster[high as usize].min {
return Some(self.index(high, self.cluster[high as usize].pred(low).unwrap()));
};
// Find the cluster that has the predecessor. The successor will be that cluster's max.
let succ_cluster = self.summary.as_ref().unwrap().pred(high);
match succ_cluster {
Some(succ_cluster) => {
Some(self.index(succ_cluster, self.cluster[succ_cluster as usize].max))
}
// Special case for pred() that does not exist in succ(). The current tree's min
// does not exist in a cluster. So if we cannot find a cluster that could have the
// predecessor, the predecessor could be the min of the current tree.
None => {
if succ > self.min {
Some(self.min)
} else {
None
}
}
}
}
}
pub struct VebTreeIter<'a> {
tree: &'a VebTree,
curr: Option<u32>,
}
impl<'a> VebTreeIter<'a> {
pub fn new(tree: &'a VebTree) -> VebTreeIter<'a> {
let curr = if tree.empty() { None } else { Some(tree.min) };
VebTreeIter { tree, curr }
}
}
impl<'a> Iterator for VebTreeIter<'a> {
type Item = u32;
fn next(&mut self) -> Option<u32> {
let curr = self.curr;
curr?;
self.curr = self.tree.succ(curr.unwrap());
curr
}
}
#[cfg(test)]
mod test {
use super::VebTree;
use rand::{rngs::StdRng, Rng, SeedableRng};
fn test_veb_tree(size: u32, mut elements: Vec<u32>, exclude: Vec<u32>) {
// Insert elements
let mut tree = VebTree::new(size);
for element in elements.iter() {
tree.insert(*element);
}
// Test search
for element in elements.iter() {
assert!(tree.search(*element));
}
for element in exclude {
assert!(!tree.search(element));
}
// Test iterator and successor, and predecessor
elements.sort();
elements.dedup();
for (i, element) in tree.iter().enumerate() {
assert!(elements[i] == element);
}
for i in 1..elements.len() {
assert!(tree.succ(elements[i - 1]) == Some(elements[i]));
assert!(tree.pred(elements[i]) == Some(elements[i - 1]));
}
}
#[test]
fn test_empty() {
test_veb_tree(16, Vec::new(), (0..16).collect());
}
#[test]
fn test_single() {
test_veb_tree(16, Vec::from([5]), (0..16).filter(|x| *x != 5).collect());
}
#[test]
fn test_two() {
test_veb_tree(
16,
Vec::from([4, 9]),
(0..16).filter(|x| *x != 4 && *x != 9).collect(),
);
}
#[test]
fn test_repeat_insert() {
let mut tree = VebTree::new(16);
for _ in 0..5 {
tree.insert(10);
}
assert!(tree.search(10));
let elements: Vec<u32> = (0..16).filter(|x| *x != 10).collect();
for element in elements {
assert!(!tree.search(element));
}
}
#[test]
fn test_linear() {
test_veb_tree(16, (0..10).collect(), (10..16).collect());
}
fn test_full(size: u32) {
test_veb_tree(size, (0..size).collect(), Vec::new());
}
#[test]
fn test_full_small() {
test_full(8);
test_full(10);
test_full(16);
test_full(20);
test_full(32);
}
#[test]
fn test_full_256() {
test_full(256);
}
#[test]
fn test_10_256() {
let mut rng = StdRng::seed_from_u64(0);
let elements: Vec<u32> = (0..10).map(|_| rng.random_range(0..255)).collect();
test_veb_tree(256, elements, Vec::new());
}
#[test]
fn test_100_256() {
let mut rng = StdRng::seed_from_u64(0);
let elements: Vec<u32> = (0..100).map(|_| rng.random_range(0..255)).collect();
test_veb_tree(256, elements, Vec::new());
}
#[test]
fn test_100_300() {
let mut rng = StdRng::seed_from_u64(0);
let elements: Vec<u32> = (0..100).map(|_| rng.random_range(0..255)).collect();
test_veb_tree(300, elements, Vec::new());
}
}