Skip to content

Commit c6c53c5

Browse files
committed
fix #117
1 parent f867c78 commit c6c53c5

File tree

4 files changed

+62
-26
lines changed

4 files changed

+62
-26
lines changed

src/parallel.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use roaring::{RoaringBitmap, RoaringTreemap};
1515

1616
use crate::internals::{KeyCodec, Leaf, NodeCodec};
1717
use crate::key::{Key, Prefix, PrefixCodec};
18-
use crate::node::{Node, SplitPlaneNormal, LEAF_TAG};
18+
use crate::node::{Node, SplitPlaneNormal};
1919
use crate::node_id::NodeMode;
2020
use crate::{Database, Distance, Error, ItemId, Result};
2121

@@ -84,7 +84,7 @@ impl<'a, DE: BytesEncode<'a>> TmpNodes<DE> {
8484
/// Delete the tmp_nodes and the node in the database.
8585
pub fn remove(&mut self, item: ItemId) {
8686
let deleted = self.deleted.insert(item);
87-
debug_assert!(deleted);
87+
debug_assert!(deleted, "Removed the same item with id {item} twice");
8888
}
8989

9090
/// Converts it into a readers to read the nodes.

src/reader.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,11 @@ impl<'t, D: Distance> Reader<'t, D> {
500500
rtxn: &RoTxn,
501501
node_id: NodeId,
502502
) -> Result<(RoaringBitmap, RoaringBitmap)> {
503-
match self.database.get(rtxn, &Key::new(self.index, node_id))?.unwrap() {
503+
match self
504+
.database
505+
.get(rtxn, &Key::new(self.index, node_id))?
506+
.unwrap_or_else(|| panic!("Could not find {node_id:?} in index {}", self.index))
507+
{
504508
Node::Leaf(_) => Ok((
505509
RoaringBitmap::new(),
506510
RoaringBitmap::from_sorted_iter(Some(node_id.item)).unwrap(),

src/tests/writer.rs

+33-18
Original file line numberDiff line numberDiff line change
@@ -332,9 +332,9 @@ fn overwrite_one_item_incremental() {
332332
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [0], distance: "euclidean" }
333333
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(0), right: Tree(4), normal: [1.0000, 0.0000] })
334334
Tree 1: Descendants(Descendants { descendants: [1, 5] })
335-
Tree 2: Descendants(Descendants { descendants: [3, 4] })
336-
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(2), right: Item(2), normal: [0.0000, 0.0000] })
335+
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(5), right: Item(2), normal: [0.0000, 0.0000] })
337336
Tree 4: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(1), right: Tree(3), normal: [0.0000, 0.0000] })
337+
Tree 5: Descendants(Descendants { descendants: [3, 4] })
338338
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
339339
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
340340
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] })
@@ -907,7 +907,6 @@ fn delete_extraneous_tree() {
907907
"###);
908908
}
909909

910-
// If we have multiple split node and empty all the branch of the top split node (root node)
911910
// See https://github.com/meilisearch/arroy/issues/117
912911
#[test]
913912
fn create_root_split_node_with_empty_child() {
@@ -952,16 +951,33 @@ fn create_root_split_node_with_empty_child() {
952951
==================
953952
Dumping index 0
954953
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 2, 3, 4]>, roots: [0], distance: "euclidean" }
955-
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(0), right: Tree(4), normal: [1.0000, 0.0000] })
956-
Tree 1: Descendants(Descendants { descendants: [] })
954+
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(0), right: Tree(3), normal: [1.0000, 0.0000] })
957955
Tree 2: Descendants(Descendants { descendants: [3, 4] })
958956
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(2), right: Item(2), normal: [0.0000, 0.0000] })
959-
Tree 4: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(1), right: Tree(3), normal: [0.0000, 0.0000] })
960957
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
961958
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] })
962959
Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000] })
963960
Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] })
964961
"###);
962+
963+
let mut wtxn = handle.env.write_txn().unwrap();
964+
let writer = Writer::new(handle.database, 0, 2);
965+
966+
// if we remove 0, then the root node must update itself as well
967+
writer.del_item(&mut wtxn, 0).unwrap();
968+
writer.builder(&mut rng).n_trees(1).build(&mut wtxn).unwrap();
969+
wtxn.commit().unwrap();
970+
971+
insta::assert_snapshot!(handle, @r###"
972+
==================
973+
Dumping index 0
974+
Root: Metadata { dimensions: 2, items: RoaringBitmap<[2, 3, 4]>, roots: [3], distance: "euclidean" }
975+
Tree 2: Descendants(Descendants { descendants: [3, 4] })
976+
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(2), right: Item(2), normal: [0.0000, 0.0000] })
977+
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] })
978+
Item 3: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [3.0000, 0.0000] })
979+
Item 4: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [4.0000, 0.0000] })
980+
"###);
965981
}
966982

967983
#[test]
@@ -1053,20 +1069,19 @@ fn reuse_node_id() {
10531069
insta::assert_snapshot!(handle, @r###"
10541070
==================
10551071
Dumping index 0
1056-
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [0, 6], distance: "euclidean" }
1072+
Root: Metadata { dimensions: 2, items: RoaringBitmap<[0, 1, 2, 3, 4, 5]>, roots: [0, 5], distance: "euclidean" }
10571073
Tree 0: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(0), right: Tree(4), normal: [1.0000, 0.0000] })
1058-
Tree 1: Descendants(Descendants { descendants: [5] })
1059-
Tree 2: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(5), right: Item(1), normal: [0.0000, 0.0000] })
1060-
Tree 3: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(2), right: Item(2), normal: [0.0000, 0.0000] })
1074+
Tree 1: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(2), right: Item(1), normal: [0.0000, 0.0000] })
1075+
Tree 2: Descendants(Descendants { descendants: [4, 5] })
1076+
Tree 3: Descendants(Descendants { descendants: [2, 3] })
10611077
Tree 4: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(1), right: Tree(3), normal: [0.0000, 0.0000] })
1062-
Tree 5: Descendants(Descendants { descendants: [3, 4] })
1063-
Tree 6: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(12), right: Item(0), normal: [-1.0000, 0.0000] })
1064-
Tree 7: Descendants(Descendants { descendants: [4, 5] })
1065-
Tree 8: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(2), right: Tree(7), normal: [0.0000, 0.0000] })
1066-
Tree 9: Descendants(Descendants { descendants: [] })
1067-
Tree 10: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(8), right: Tree(9), normal: [0.0000, 0.0000] })
1068-
Tree 11: Descendants(Descendants { descendants: [1, 3] })
1069-
Tree 12: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(10), right: Tree(11), normal: [0.0000, 0.0000] })
1078+
Tree 5: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(11), right: Item(0), normal: [-1.0000, 0.0000] })
1079+
Tree 6: Descendants(Descendants { descendants: [4, 5] })
1080+
Tree 7: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Item(2), right: Tree(6), normal: [0.0000, 0.0000] })
1081+
Tree 8: Descendants(Descendants { descendants: [] })
1082+
Tree 9: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(7), right: Tree(8), normal: [0.0000, 0.0000] })
1083+
Tree 10: Descendants(Descendants { descendants: [1, 3] })
1084+
Tree 11: SplitPlaneNormal(SplitPlaneNormal<euclidean> { left: Tree(9), right: Tree(10), normal: [0.0000, 0.0000] })
10701085
Item 0: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [0.0000, 0.0000] })
10711086
Item 1: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [1.0000, 0.0000] })
10721087
Item 2: Leaf(Leaf { header: NodeHeaderEuclidean { bias: 0.0 }, vector: [2.0000, 0.0000] })

src/writer.rs

+22-5
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ impl<D: Distance> Writer<D> {
496496
let concurrent_node_ids = ConcurrentNodeIds::new(used_node_ids);
497497

498498
// Before taking any references on the DB, remove all the items we must remove.
499-
self.scan_trees_and_delete_items(wtxn, options, &roots, &to_delete)?;
499+
self.scan_trees_and_delete_items(wtxn, options, &mut roots, &to_delete)?;
500500

501501
let mut descendants_too_big = self.insert_items_in_current_trees(
502502
wtxn,
@@ -812,7 +812,7 @@ impl<D: Distance> Writer<D> {
812812
&self,
813813
wtxn: &mut RwTxn,
814814
options: &BuildOption,
815-
roots: &[u32],
815+
roots: &mut [u32],
816816
to_delete: &RoaringBitmap,
817817
) -> Result<()> {
818818
(options.progress)(WriterProgress { main: MainStep::RemoveItems, sub: None });
@@ -823,9 +823,12 @@ impl<D: Distance> Writer<D> {
823823
};
824824

825825
// TODO: Parallelize
826-
for root in roots.iter().copied() {
827-
self.delete_items_in_file(options, wtxn, root, &mut tmp_nodes, to_delete)?;
826+
for root in roots.iter_mut() {
827+
let (new_root, _) =
828+
self.delete_items_in_file(options, wtxn, *root, &mut tmp_nodes, to_delete)?;
829+
*root = new_root;
828830
}
831+
roots.sort_unstable();
829832

830833
let tmp_nodes = tmp_nodes.into_bytes_reader()?;
831834
for item_id in tmp_nodes.to_delete() {
@@ -902,7 +905,7 @@ impl<D: Distance> Writer<D> {
902905
NodeMode::Metadata | NodeMode::Updated => unreachable!(),
903906
};
904907

905-
let total_items = left_items | right_items;
908+
let total_items = &left_items | &right_items;
906909

907910
if self.fit_in_descendant(opt, total_items.len()) {
908911
// Since we're shrinking we KNOW that new_left and new_right are descendants
@@ -923,6 +926,20 @@ impl<D: Distance> Writer<D> {
923926

924927
// we should merge both branch and update ourselves to be a single descendant node
925928
Ok((current_node, total_items))
929+
930+
// If we don't have any items in either the left or right we can delete ourselves and point directly to our child
931+
} else if left_items.is_empty() {
932+
if new_left.mode == NodeMode::Tree {
933+
tmp_nodes.remove(new_left.item);
934+
}
935+
tmp_nodes.remove(current_node);
936+
Ok((new_right.item, total_items))
937+
} else if right_items.is_empty() {
938+
if new_right.mode == NodeMode::Tree {
939+
tmp_nodes.remove(new_right.item);
940+
}
941+
tmp_nodes.remove(current_node);
942+
Ok((new_left.item, total_items))
926943
} else {
927944
// if either the left or the right changed we must update ourselves inplace
928945
if new_left != left || new_right != right {

0 commit comments

Comments
 (0)