Skip to content

Commit 558d881

Browse files
Simd Implementation of Linear Algorithmns (#187)
* WIP: implementing smid for magnitudes * Fixing broken simd variant and implementing comparison test for cosine * Implement simd variant of euclidean distance * implement a simd variant of dot product * use rayon to speed up array creation * format with clippy * Fix typo and some ergonomics --------- Co-authored-by: Diretnan Domnan <[email protected]>
1 parent 30dea19 commit 558d881

File tree

4 files changed

+402
-22
lines changed

4 files changed

+402
-22
lines changed

ahnlich/Cargo.lock

+48-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ahnlich/db/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,15 @@ async-trait.workspace = true
3838
rayon.workspace = true
3939
log.workspace = true
4040
fallible_collections.workspace = true
41-
41+
pulp = "0.21.4"
4242

4343
[dev-dependencies]
4444
futures.workspace = true
4545
once_cell.workspace = true
4646
pretty_assertions.workspace = true
4747
criterion = "0.4"
4848
rand.workspace = true
49+
fastrand = "1.9"
4950

5051
[[bench]]
5152
name = "database"

ahnlich/db/benches/database.rs

+20-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,31 @@
11
use ahnlich_db::engine::store::StoreHandler;
22
use ahnlich_types::keyval::StoreKey;
33
use ahnlich_types::keyval::StoreName;
4+
use ahnlich_types::keyval::StoreValue;
45
use ahnlich_types::similarity::Algorithm;
56
use ahnlich_types::similarity::NonLinearAlgorithm;
67
use criterion::{criterion_group, criterion_main, Criterion};
78
use ndarray::Array;
89
use ndarray::Array1;
10+
use rayon::iter::ParallelIterator;
11+
use rayon::slice::ParallelSlice;
912
use std::collections::HashMap;
1013
use std::collections::HashSet;
1114
use std::num::NonZeroUsize;
1215
use std::sync::atomic::AtomicBool;
1316
use std::sync::Arc;
1417

18+
fn generate_storekey_store_value(size: usize, dimension: usize) -> Vec<(StoreKey, StoreValue)> {
19+
let mut buffer: Vec<f32> = Vec::with_capacity(size * dimension);
20+
buffer.extend((0..size * dimension).map(|_| fastrand::f32()));
21+
22+
// Use Rayon to process the buffer in parallel
23+
buffer
24+
.par_chunks_exact(dimension)
25+
.map(|chunk| (StoreKey(Array::from(chunk.to_owned())), HashMap::new()))
26+
.collect()
27+
}
28+
1529
fn initialize_store_handler() -> Arc<StoreHandler> {
1630
let write_flag = Arc::new(AtomicBool::new(false));
1731
let handler = Arc::new(StoreHandler::new(write_flag));
@@ -130,7 +144,9 @@ fn bench_insertion(c: &mut Criterion) {
130144
let dimension = dimension.clone();
131145
let random_array = vec![(
132146
StoreKey(Array::from(
133-
(0..dimension).map(|_| rand::random()).collect::<Vec<f32>>(),
147+
(0..dimension)
148+
.map(|_| fastrand::f32())
149+
.collect::<Vec<f32>>(),
134150
)),
135151
HashMap::new(),
136152
)];
@@ -151,13 +167,9 @@ fn bench_insertion(c: &mut Criterion) {
151167
for size in sizes {
152168
let handler = initialize_store_handler();
153169
let dimension = 1024;
154-
let bulk_insert: Vec<_> = (0..size)
155-
.map(|_| {
156-
let random_array: Array1<f32> =
157-
Array::from((0..dimension).map(|_| rand::random()).collect::<Vec<f32>>());
158-
(StoreKey(random_array), HashMap::new())
159-
})
160-
.collect();
170+
171+
let bulk_insert = generate_storekey_store_value(size, dimension);
172+
161173
handler
162174
.create_store(
163175
StoreName(store_name.to_string()),

0 commit comments

Comments
 (0)