Skip to content

Commit 01954d3

Browse files
Switching out flurry for more read-heavy performance (#191)
* Chore: Initial commit for issue progress tracking * Chore: Removed flurry use from db crate * Chore: Replaced all flurry usage with papaya * Swap flurry for papaya * Upgrading cargo to 1.85 * Fix conflicts with main * Papaya stores are lighter? --------- Co-authored-by: Diretnan Domnan <[email protected]>
1 parent edc59b5 commit 01954d3

File tree

23 files changed

+101
-132
lines changed

23 files changed

+101
-132
lines changed

.github/workflows/release.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ jobs:
4141
- name: Get Cargo toolchain
4242
uses: actions-rs/toolchain@v1
4343
with:
44-
toolchain: 1.81.0
44+
toolchain: 1.85.0
4545

4646
- name: Install Protoc
4747
uses: arduino/setup-protoc@v3
@@ -68,7 +68,7 @@ jobs:
6868
- name: Get Cargo toolchain
6969
uses: actions-rs/toolchain@v1
7070
with:
71-
toolchain: 1.81.0
71+
toolchain: 1.85.0
7272

7373
- name: Install Protoc
7474
uses: arduino/setup-protoc@v3
@@ -127,7 +127,7 @@ jobs:
127127
- name: Get Cargo toolchain
128128
uses: actions-rs/toolchain@v1
129129
with:
130-
toolchain: 1.81.0
130+
toolchain: 1.85.0
131131

132132
- name: Install Protoc
133133
uses: arduino/setup-protoc@v3

.github/workflows/rust_tag_and_deploy.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ jobs:
6666
- name: Get Cargo toolchain
6767
uses: actions-rs/toolchain@v1
6868
with:
69-
toolchain: 1.81.0
69+
toolchain: 1.85.0
7070

7171
- name: Install Protoc
7272
uses: arduino/setup-protoc@v3

ahnlich/Cargo.lock

+20-48
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ahnlich/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ pretty_assertions = "1.4.0"
2929
tracing = "0.1"
3030
thiserror = "1.0"
3131
blake3 = "1.5.1"
32-
flurry = { version = "0.5.1", features = ["serde"] }
3332
tokio = { version = "1.37.0", features = [
3433
"net",
3534
"macros",
@@ -48,6 +47,7 @@ log = "0.4"
4847
fallible_collections = "0.4.9"
4948
dirs = "5.0.1"
5049
strum = { version = "0.26", features = ["derive"] }
50+
papaya = { version = "0.2.0", features = ["serde"] }
5151
tonic = "0.12.3"
5252
tokio-stream = "0.1.17"
5353

ahnlich/ai/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ name = "ahnlich-ai"
1414
path = "src/main.rs"
1515

1616
[dependencies]
17-
flurry.workspace = true
1817
tokio.workspace = true
1918
serde.workspace = true
2019
ndarray.workspace = true
@@ -51,6 +50,7 @@ futures.workspace = true
5150
tiktoken-rs = "0.5.9"
5251
itertools.workspace = true
5352
tokenizers = { version = "0.20.1", features = ["hf-hub"] }
53+
papaya.workspace = true
5454

5555
[features]
5656
# ORT Execution providers

ahnlich/ai/src/engine/store.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use ahnlich_types::keyval::StoreName;
1212
use ahnlich_types::keyval::StoreValue;
1313
use ahnlich_types::metadata::MetadataValue;
1414
use fallible_collections::FallibleVec;
15-
use flurry::HashMap as ConcurrentHashMap;
15+
use papaya::HashMap as ConcurrentHashMap;
1616
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
1717
use serde::Deserialize;
1818
use serde::Serialize;

ahnlich/ai/src/tests/aiproxy_test.rs

-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ use ahnlich_types::{
1111
predicate::{Predicate, PredicateCondition},
1212
similarity::Algorithm,
1313
};
14-
// use flurry::HashMap;
1514
use utils::server::AhnlichServerUtils;
1615

1716
use once_cell::sync::Lazy;

ahnlich/client/src/db.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ mod tests {
547547
StoreInfo {
548548
name: StoreName("Main".to_string()),
549549
len: 0,
550-
size_in_bytes: 1720,
550+
size_in_bytes: 1056,
551551
},
552552
]))));
553553
let res = pipeline.exec().await.expect("Could not execute pipeline");
@@ -603,7 +603,7 @@ mod tests {
603603
ServerResponse::StoreList(HashSet::from_iter([StoreInfo {
604604
name: StoreName("Main".to_string()),
605605
len: 2,
606-
size_in_bytes: 2016,
606+
size_in_bytes: 1352,
607607
},]))
608608
);
609609
// error as different dimensions
@@ -628,7 +628,7 @@ mod tests {
628628
ServerResponse::StoreList(HashSet::from_iter([StoreInfo {
629629
name: StoreName("Main".to_string()),
630630
len: 1,
631-
size_in_bytes: 1904,
631+
size_in_bytes: 1240,
632632
},]))
633633
);
634634
}

ahnlich/db/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ bench = false
1616
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
1717

1818
[dependencies]
19-
flurry.workspace = true
2019
serde.workspace = true
2120
blake3.workspace = true
2221
ndarray.workspace = true
@@ -39,6 +38,7 @@ rayon.workspace = true
3938
log.workspace = true
4039
fallible_collections.workspace = true
4140
pulp = "0.21.4"
41+
papaya.workspace = true
4242
grpc_types = { path = "../grpc_types", version = "*" }
4343
tonic.workspace = true
4444
tokio-stream.workspace = true

ahnlich/db/src/algorithm/mod.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,13 @@ impl<'a> From<(&'a StoreKey, f32)> for SimilarityVector<'a> {
5454
}
5555
}
5656

57-
impl<'a> PartialEq for SimilarityVector<'a> {
57+
impl PartialEq for SimilarityVector<'_> {
5858
fn eq(&self, other: &Self) -> bool {
5959
*((self.0).0) == *((other.0).0)
6060
}
6161
}
6262

63-
impl<'a> Eq for SimilarityVector<'a> {}
63+
impl Eq for SimilarityVector<'_> {}
6464

6565
impl PartialOrd for SimilarityVector<'_> {
6666
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {

ahnlich/db/src/algorithm/non_linear.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use ahnlich_similarity::utils::VecF32Ordered;
55
use ahnlich_similarity::NonLinearAlgorithmWithIndexImpl;
66
use ahnlich_types::keyval::StoreKey;
77
use ahnlich_types::similarity::NonLinearAlgorithm;
8-
use flurry::HashMap as ConcurrentHashMap;
8+
use papaya::HashMap as ConcurrentHashMap;
99
use rayon::iter::IntoParallelIterator;
1010
use rayon::iter::ParallelIterator;
1111
use serde::Deserialize;
@@ -136,7 +136,7 @@ impl NonLinearAlgorithmIndices {
136136
let pinned = self.algorithm_to_index.pin();
137137
if let (true, Some(non_existing_index)) = (
138138
error_if_not_exists,
139-
indices.iter().find(|a| !pinned.contains_key(a)),
139+
indices.iter().find(|a| !pinned.contains_key(*a)),
140140
) {
141141
return Err(ServerError::NonLinearIndexNotFound(*non_existing_index));
142142
}

ahnlich/db/src/algorithm/similarity.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ struct Magnitude<'a> {
3232
second: &'a [f32],
3333
}
3434

35-
impl<'a> WithSimd for Magnitude<'a> {
35+
impl WithSimd for Magnitude<'_> {
3636
type Output = f32;
3737

3838
#[inline(always)]
@@ -134,13 +134,12 @@ fn cosine_similarity(first: &StoreKey, second: &StoreKey) -> f32 {
134134
///
135135
/// An Implementation for most similar items would be a MaxHeap.
136136
/// The larger the dot product between two vectors, the more similar
137-
138137
struct DotProduct<'a> {
139138
first: &'a [f32],
140139
second: &'a [f32],
141140
}
142141

143-
impl<'a> WithSimd for DotProduct<'a> {
142+
impl WithSimd for DotProduct<'_> {
144143
type Output = f32;
145144

146145
#[inline(always)]
@@ -199,17 +198,15 @@ fn dot_product(first: &StoreKey, second: &StoreKey) -> f32 {
199198
/// An Implementation for most similar items would be a MinHeap, The smaller the distance between
200199
/// two points, denotes higher similarity
201200
///
202-
203201
struct EuclideanDistance<'a> {
204202
first: &'a [f32],
205203
second: &'a [f32],
206204
}
207205

208-
impl<'a> WithSimd for EuclideanDistance<'a> {
206+
impl WithSimd for EuclideanDistance<'_> {
209207
type Output = f32;
210208

211209
#[inline(always)]
212-
213210
fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
214211
let (first_head, first_tail) = S::as_simd_f32s(self.first);
215212
let (second_head, second_tail) = S::as_simd_f32s(self.second);

ahnlich/db/src/engine/predicate.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ use ahnlich_types::metadata::MetadataKey;
66
use ahnlich_types::metadata::MetadataValue;
77
use ahnlich_types::predicate::Predicate;
88
use ahnlich_types::predicate::PredicateCondition;
9-
use flurry::HashMap as ConcurrentHashMap;
10-
use flurry::HashSet as ConcurrentHashSet;
119
use itertools::Itertools;
10+
use papaya::HashMap as ConcurrentHashMap;
11+
use papaya::HashSet as ConcurrentHashSet;
1212
use rayon::iter::IndexedParallelIterator;
1313
use rayon::iter::IntoParallelIterator;
1414
use rayon::iter::ParallelIterator;
@@ -114,7 +114,7 @@ impl PredicateIndices {
114114
// first check all predicates
115115
if let (true, Some(non_existing_index)) = (
116116
error_if_not_exists,
117-
predicates.iter().find(|a| !pinned_keys.contains(a)),
117+
predicates.iter().find(|a| !pinned_keys.contains(*a)),
118118
) {
119119
return Err(ServerError::PredicateNotFound(non_existing_index.clone()));
120120
}

ahnlich/db/src/engine/store.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use ahnlich_types::predicate::PredicateCondition;
1515
use ahnlich_types::similarity::Algorithm;
1616
use ahnlich_types::similarity::NonLinearAlgorithm;
1717
use ahnlich_types::similarity::Similarity;
18-
use flurry::HashMap as ConcurrentHashMap;
18+
use papaya::HashMap as ConcurrentHashMap;
1919
use serde::Deserialize;
2020
use serde::Serialize;
2121
use std::collections::HashMap as StdHashMap;
@@ -1130,12 +1130,12 @@ mod tests {
11301130
StoreInfo {
11311131
name: odd_store,
11321132
len: 2,
1133-
size_in_bytes: 2096,
1133+
size_in_bytes: 1432,
11341134
},
11351135
StoreInfo {
11361136
name: even_store,
11371137
len: 0,
1138-
size_in_bytes: 1744,
1138+
size_in_bytes: 1080,
11391139
},
11401140
])
11411141
)

0 commit comments

Comments
 (0)