From 3bcf500bf28a22eff11e2f60e7ef711b7e3968ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 15 Aug 2021 17:43:58 +0200 Subject: [PATCH 1/4] Do not allocate in the get_random_age function --- src/bin/common.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/bin/common.rs b/src/bin/common.rs index f1fe7f7..40454c9 100644 --- a/src/bin/common.rs +++ b/src/bin/common.rs @@ -1,8 +1,7 @@ use fastrand; pub fn get_random_age() -> i8 { - let vs: Vec = vec![5, 10, 15]; - vs[ fastrand::usize(..vs.len()) ] + [5, 10, 15][fastrand::usize(0..3)] } pub fn get_random_active() -> i8 { From 5a4a7973e4076df1ea069914c70ac860bd6cf245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 15 Aug 2021 18:02:21 +0200 Subject: [PATCH 2/4] Move the common module into the lib.rs file as it doesn't contain a main --- src/bin/basic.rs | 3 +-- src/bin/basic_async.rs | 2 +- src/bin/basic_batched.rs | 2 +- src/bin/basic_batched_wp.rs | 2 +- src/bin/basic_prep.rs | 2 +- src/bin/busy.rs | 2 +- src/bin/threaded_batched.rs | 2 +- src/bin/threaded_busy.rs | 4 ++-- src/bin/threaded_str_batched.rs | 2 +- src/{bin/common.rs => lib.rs} | 0 10 files changed, 10 insertions(+), 11 deletions(-) rename src/{bin/common.rs => lib.rs} (100%) diff --git a/src/bin/basic.rs b/src/bin/basic.rs index 7e4d1de..6463eb3 100644 --- a/src/bin/basic.rs +++ b/src/bin/basic.rs @@ -6,10 +6,9 @@ //! //! next: basic_async.rs +use fast_sqlite3_inserts as common; use rusqlite::{params, Connection}; -mod common; - fn faker(mut conn: Connection, count: i64) { let tx = conn.transaction().unwrap(); for _ in 0..count { diff --git a/src/bin/basic_async.rs b/src/bin/basic_async.rs index 7b9ecf1..8b62026 100644 --- a/src/bin/basic_async.rs +++ b/src/bin/basic_async.rs @@ -11,7 +11,7 @@ use std::str::FromStr; use sqlx::sqlite::{SqliteConnectOptions, SqliteJournalMode, SqliteSynchronous}; use sqlx::{ConnectOptions, Connection, Executor, SqliteConnection, Statement}; -mod common; +use fast_sqlite3_inserts as common; async fn faker(mut conn: SqliteConnection, count: i64) -> Result<(), sqlx::Error> { let mut tx = conn.begin().await?; diff --git a/src/bin/basic_batched.rs b/src/bin/basic_batched.rs index 21700bc..f2ade42 100644 --- a/src/bin/basic_batched.rs +++ b/src/bin/basic_batched.rs @@ -9,7 +9,7 @@ use rusqlite::{Connection, ToSql, Transaction}; -mod common; +use fast_sqlite3_inserts as common; fn faker_wrapper(mut conn: Connection, count: i64) { let tx = conn.transaction().unwrap(); diff --git a/src/bin/basic_batched_wp.rs b/src/bin/basic_batched_wp.rs index 821ce67..ba37b7e 100644 --- a/src/bin/basic_batched_wp.rs +++ b/src/bin/basic_batched_wp.rs @@ -10,7 +10,7 @@ use rusqlite::Connection; -mod common; +use fast_sqlite3_inserts as common; fn faker(mut conn: Connection, count: i64) { let tx = conn.transaction().unwrap(); diff --git a/src/bin/basic_prep.rs b/src/bin/basic_prep.rs index 02d1697..b74a6a7 100644 --- a/src/bin/basic_prep.rs +++ b/src/bin/basic_prep.rs @@ -8,7 +8,7 @@ use rusqlite::{params, Connection, Transaction}; -mod common; +use fast_sqlite3_inserts as common; fn faker_wrapper(mut conn: Connection, count: i64) { let tx = conn.transaction().unwrap(); diff --git a/src/bin/busy.rs b/src/bin/busy.rs index 2c370b0..921cc48 100644 --- a/src/bin/busy.rs +++ b/src/bin/busy.rs @@ -5,7 +5,7 @@ //! //! next: threaded_busy.rs -mod common; +use fast_sqlite3_inserts as common; fn faker(count: i64) { let min_batch_size = 1_000_000; diff --git a/src/bin/threaded_batched.rs b/src/bin/threaded_batched.rs index d76b379..f8c9067 100644 --- a/src/bin/threaded_batched.rs +++ b/src/bin/threaded_batched.rs @@ -13,7 +13,7 @@ use std::sync::mpsc; use std::sync::mpsc::{Receiver, Sender}; use std::thread; -mod common; +use fast_sqlite3_inserts as common; static MIN_BATCH_SIZE: i64 = 50; diff --git a/src/bin/threaded_busy.rs b/src/bin/threaded_busy.rs index dda2362..fd2dd51 100644 --- a/src/bin/threaded_busy.rs +++ b/src/bin/threaded_busy.rs @@ -1,6 +1,6 @@ //! busy loop but threaded. //! -//! This code does not really do anything, just runs two for loops. It has no SQL code. The idea was to measure how much +//! This code does not really do anything, just runs two for loops. It has no SQL code. The idea was to measure how much //! time rust spending just to run a for loop, generating data. This builds upon busy.rs and uses multiple threads. //! //! previous: busy.rs @@ -8,7 +8,7 @@ use std::thread; extern crate num_cpus; -mod common; +use fast_sqlite3_inserts as common; fn faker(count: i64) { let min_batch_size = 1_000_000; diff --git a/src/bin/threaded_str_batched.rs b/src/bin/threaded_str_batched.rs index c8aa73c..9294e17 100644 --- a/src/bin/threaded_str_batched.rs +++ b/src/bin/threaded_str_batched.rs @@ -11,7 +11,7 @@ use std::sync::mpsc; use std::sync::mpsc::{Receiver, Sender}; use std::thread; -mod common; +use fast_sqlite3_inserts as common; fn consumer(rx: Receiver) { let mut conn = Connection::open("threaded_str_batched.db").unwrap(); diff --git a/src/bin/common.rs b/src/lib.rs similarity index 100% rename from src/bin/common.rs rename to src/lib.rs From 452a25a1109012896d99f719e12a38b2a045eeb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 15 Aug 2021 19:05:36 +0200 Subject: [PATCH 3/4] Remove the allocations in the get_random_area_code function --- Cargo.lock | 31 ++++++++++++++++++++++++++-- Cargo.toml | 2 ++ src/bin/basic_batched.rs | 2 +- src/bin/busy.rs | 10 +++------- src/bin/threaded_batched.rs | 4 ++-- src/bin/threaded_busy.rs | 8 ++------ src/lib.rs | 40 ++++++++++++++++++++++++++++++++++--- 7 files changed, 76 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf4c688..63e6707 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -216,9 +216,11 @@ version = "0.1.0" dependencies = [ "fastrand", "num_cpus", + "once_cell", "rand", "rusqlite", "sqlx", + "tinystr", "tokio", ] @@ -625,9 +627,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.7.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3" +checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" [[package]] name = "opaque-debug" @@ -1089,6 +1091,31 @@ dependencies = [ "syn", ] +[[package]] +name = "tinystr" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01e8700bce17d12d9653596139d85039dd63b8728839a9f2e0c540d5301ea0cd" +dependencies = [ + "tinystr-macros", + "tinystr-raw", +] + +[[package]] +name = "tinystr-macros" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f65be51117c325c2b58eec529be7a0857d11527a9029973b58810a4c63e77a6" +dependencies = [ + "tinystr-raw", +] + +[[package]] +name = "tinystr-raw" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f87ef8b0485e4efff5cac95608adc3251e412fef6039ecd56c5618c8003895" + [[package]] name = "tinyvec" version = "1.2.0" diff --git a/Cargo.toml b/Cargo.toml index a0b4104..d5f1ed9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,5 @@ rand = "0.8.3" fastrand = "1" num_cpus = "1.0" rusqlite = "0.25.3" +tinystr = "0.4.10" +once_cell = "1.8.0" diff --git a/src/bin/basic_batched.rs b/src/bin/basic_batched.rs index f2ade42..29b740e 100644 --- a/src/bin/basic_batched.rs +++ b/src/bin/basic_batched.rs @@ -53,7 +53,7 @@ fn faker(tx: &Transaction, count: i64) { let mut param_values: Vec<_> = Vec::new(); if with_area { // lets prepare the batch - let mut vector = Vec::<(String, i8, i8)>::new(); + let mut vector = Vec::<(&str, i8, i8)>::new(); for _ in 0..min_batch_size { let area_code = common::get_random_area_code(); vector.push((area_code, age, is_active)); diff --git a/src/bin/busy.rs b/src/bin/busy.rs index 921cc48..0547a31 100644 --- a/src/bin/busy.rs +++ b/src/bin/busy.rs @@ -1,6 +1,6 @@ //! busy loop //! -//! This code does not really do anything, just runs two for loops. It has no SQL code. The idea was to measure how much +//! This code does not really do anything, just runs two for loops. It has no SQL code. The idea was to measure how much //! time rust spending just to run a for loop, generating data. //! //! next: threaded_busy.rs @@ -11,7 +11,7 @@ fn faker(count: i64) { let min_batch_size = 1_000_000; for _ in 0..(count / min_batch_size) { let with_area = common::get_random_bool(); - let mut current_batch = Vec::<(String, i8, i8)>::new(); + let mut current_batch = Vec::<(&str, i8, i8)>::new(); for _ in 0..min_batch_size { if with_area { current_batch.push(( @@ -20,11 +20,7 @@ fn faker(count: i64) { common::get_random_active(), )); } else { - current_batch.push(( - "".parse().unwrap(), - common::get_random_age(), - common::get_random_active(), - )); + current_batch.push(("", common::get_random_age(), common::get_random_active())); } } } diff --git a/src/bin/threaded_batched.rs b/src/bin/threaded_batched.rs index f8c9067..053afd2 100644 --- a/src/bin/threaded_batched.rs +++ b/src/bin/threaded_batched.rs @@ -18,7 +18,7 @@ use fast_sqlite3_inserts as common; static MIN_BATCH_SIZE: i64 = 50; enum ParamValues { - WithArea(Vec<(String, i8, i8)>), + WithArea(Vec<(&'static str, i8, i8)>), WithoutArea(Vec<(i8, i8)>), } @@ -93,7 +93,7 @@ fn producer(tx: Sender, count: i64) { let mut param_values: Vec<_> = Vec::new(); if with_area { // lets prepare the batch - let mut vector = Vec::<(String, i8, i8)>::new(); + let mut vector = Vec::<(&str, i8, i8)>::new(); for _ in 0..MIN_BATCH_SIZE { let area_code = common::get_random_area_code(); vector.push((area_code, age, is_active)); diff --git a/src/bin/threaded_busy.rs b/src/bin/threaded_busy.rs index fd2dd51..42cdd88 100644 --- a/src/bin/threaded_busy.rs +++ b/src/bin/threaded_busy.rs @@ -14,7 +14,7 @@ fn faker(count: i64) { let min_batch_size = 1_000_000; for _ in 0..(count / min_batch_size) { let with_area = common::get_random_bool(); - let mut current_batch = Vec::<(String, i8, i8)>::new(); + let mut current_batch = Vec::<(&str, i8, i8)>::new(); for _ in 0..min_batch_size { if with_area { current_batch.push(( @@ -23,11 +23,7 @@ fn faker(count: i64) { common::get_random_active(), )); } else { - current_batch.push(( - "".parse().unwrap(), - common::get_random_age(), - common::get_random_active(), - )); + current_batch.push(("", common::get_random_age(), common::get_random_active())); } } } diff --git a/src/lib.rs b/src/lib.rs index 40454c9..c4acff3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,5 @@ -use fastrand; +use once_cell::sync::Lazy; +use tinystr::TinyStr8; pub fn get_random_age() -> i8 { [5, 10, 15][fastrand::usize(0..3)] @@ -12,6 +13,39 @@ pub fn get_random_bool() -> bool { fastrand::bool() } -pub fn get_random_area_code() -> String { - format!("{:06}", fastrand::u32(0..999_999)) +pub fn get_random_area_code() -> &'static str { + static AREA_CODES: Lazy> = Lazy::new(|| { + (0..=999_999) + .map(|i| TinyStr8::from_bytes(&format_6digits_number(i)).unwrap()) + .collect() + }); + + &AREA_CODES[fastrand::usize(0..=999_999)] +} + +/// Formats a number that is between 0 and 999_999, +/// the number will be padded with `0`s. +pub fn format_6digits_number(mut n: u32) -> [u8; 6] { + let mut buffer = [b'0'; 6]; + let mut i = buffer.len() - 1; + while i < buffer.len() { + buffer[i] = (n % 10) as u8 + b'0'; + n = n / 10; + i = i.wrapping_sub(1); + } + buffer +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn formatting() { + for n in 0..=999_999 { + let output = format_6digits_number(n); + let expected = format!("{:06}", n); + assert_eq!(output, expected.as_bytes()); + } + } } From ece15a09d08bc1bcf0632b0240bf4cf5a7bf2b88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 15 Aug 2021 19:42:52 +0200 Subject: [PATCH 4/4] Replace the once_cell by directly returning the AreaCode newtype --- Cargo.lock | 1 - Cargo.toml | 1 - src/bin/basic_async.rs | 2 +- src/bin/basic_batched.rs | 3 ++- src/bin/basic_batched_wp.rs | 2 +- src/bin/busy.rs | 7 ++++--- src/bin/threaded_batched.rs | 5 +++-- src/bin/threaded_busy.rs | 7 ++++--- src/bin/threaded_str_batched.rs | 2 +- src/lib.rs | 28 +++++++++++++++++++--------- 10 files changed, 35 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 63e6707..20b974d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -216,7 +216,6 @@ version = "0.1.0" dependencies = [ "fastrand", "num_cpus", - "once_cell", "rand", "rusqlite", "sqlx", diff --git a/Cargo.toml b/Cargo.toml index d5f1ed9..5c46d3d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,4 +13,3 @@ fastrand = "1" num_cpus = "1.0" rusqlite = "0.25.3" tinystr = "0.4.10" -once_cell = "1.8.0" diff --git a/src/bin/basic_async.rs b/src/bin/basic_async.rs index 8b62026..265815d 100644 --- a/src/bin/basic_async.rs +++ b/src/bin/basic_async.rs @@ -29,7 +29,7 @@ async fn faker(mut conn: SqliteConnection, count: i64) -> Result<(), sqlx::Error let area_code = common::get_random_area_code(); stmt_with_area .query() - .bind(area_code) + .bind(area_code.as_str()) .bind(age) .bind(is_active) .execute(&mut tx) diff --git a/src/bin/basic_batched.rs b/src/bin/basic_batched.rs index 29b740e..d2ad202 100644 --- a/src/bin/basic_batched.rs +++ b/src/bin/basic_batched.rs @@ -9,6 +9,7 @@ use rusqlite::{Connection, ToSql, Transaction}; +use crate::common::AreaCode; use fast_sqlite3_inserts as common; fn faker_wrapper(mut conn: Connection, count: i64) { @@ -53,7 +54,7 @@ fn faker(tx: &Transaction, count: i64) { let mut param_values: Vec<_> = Vec::new(); if with_area { // lets prepare the batch - let mut vector = Vec::<(&str, i8, i8)>::new(); + let mut vector = Vec::<(AreaCode, i8, i8)>::new(); for _ in 0..min_batch_size { let area_code = common::get_random_area_code(); vector.push((area_code, age, is_active)); diff --git a/src/bin/basic_batched_wp.rs b/src/bin/basic_batched_wp.rs index ba37b7e..5c5e90a 100644 --- a/src/bin/basic_batched_wp.rs +++ b/src/bin/basic_batched_wp.rs @@ -23,7 +23,7 @@ fn faker(mut conn: Connection, count: i64) { for _ in 0..min_batch_size { if with_area { let area_code = common::get_random_area_code(); - let params = format!(" (NULL, {}, {}, {}),", area_code, age, is_active); + let params = format!(" (NULL, {}, {}, {}),", area_code.as_str(), age, is_active); stmt.push_str(¶ms); } else { let params = format!(" (NULL, NULL, {}, {}),", age, is_active); diff --git a/src/bin/busy.rs b/src/bin/busy.rs index 0547a31..3297a57 100644 --- a/src/bin/busy.rs +++ b/src/bin/busy.rs @@ -5,22 +5,23 @@ //! //! next: threaded_busy.rs +use crate::common::AreaCode; use fast_sqlite3_inserts as common; fn faker(count: i64) { let min_batch_size = 1_000_000; for _ in 0..(count / min_batch_size) { let with_area = common::get_random_bool(); - let mut current_batch = Vec::<(&str, i8, i8)>::new(); + let mut current_batch = Vec::<(Option, i8, i8)>::new(); for _ in 0..min_batch_size { if with_area { current_batch.push(( - common::get_random_area_code(), + Some(common::get_random_area_code()), common::get_random_age(), common::get_random_active(), )); } else { - current_batch.push(("", common::get_random_age(), common::get_random_active())); + current_batch.push((None, common::get_random_age(), common::get_random_active())); } } } diff --git a/src/bin/threaded_batched.rs b/src/bin/threaded_batched.rs index 053afd2..f70d90b 100644 --- a/src/bin/threaded_batched.rs +++ b/src/bin/threaded_batched.rs @@ -13,12 +13,13 @@ use std::sync::mpsc; use std::sync::mpsc::{Receiver, Sender}; use std::thread; +use crate::common::AreaCode; use fast_sqlite3_inserts as common; static MIN_BATCH_SIZE: i64 = 50; enum ParamValues { - WithArea(Vec<(&'static str, i8, i8)>), + WithArea(Vec<(AreaCode, i8, i8)>), WithoutArea(Vec<(i8, i8)>), } @@ -93,7 +94,7 @@ fn producer(tx: Sender, count: i64) { let mut param_values: Vec<_> = Vec::new(); if with_area { // lets prepare the batch - let mut vector = Vec::<(&str, i8, i8)>::new(); + let mut vector = Vec::<(AreaCode, i8, i8)>::new(); for _ in 0..MIN_BATCH_SIZE { let area_code = common::get_random_area_code(); vector.push((area_code, age, is_active)); diff --git a/src/bin/threaded_busy.rs b/src/bin/threaded_busy.rs index 42cdd88..cc00056 100644 --- a/src/bin/threaded_busy.rs +++ b/src/bin/threaded_busy.rs @@ -8,22 +8,23 @@ use std::thread; extern crate num_cpus; +use crate::common::AreaCode; use fast_sqlite3_inserts as common; fn faker(count: i64) { let min_batch_size = 1_000_000; for _ in 0..(count / min_batch_size) { let with_area = common::get_random_bool(); - let mut current_batch = Vec::<(&str, i8, i8)>::new(); + let mut current_batch = Vec::<(Option, i8, i8)>::new(); for _ in 0..min_batch_size { if with_area { current_batch.push(( - common::get_random_area_code(), + Some(common::get_random_area_code()), common::get_random_age(), common::get_random_active(), )); } else { - current_batch.push(("", common::get_random_age(), common::get_random_active())); + current_batch.push((None, common::get_random_age(), common::get_random_active())); } } } diff --git a/src/bin/threaded_str_batched.rs b/src/bin/threaded_str_batched.rs index 9294e17..8b2839e 100644 --- a/src/bin/threaded_str_batched.rs +++ b/src/bin/threaded_str_batched.rs @@ -49,7 +49,7 @@ fn producer(tx: Sender, count: i64) { for _ in 0..min_batch_size { if with_area { let area_code = common::get_random_area_code(); - let params = format!(" (NULL, {}, {}, {}),", area_code, age, is_active); + let params = format!(" (NULL, {}, {}, {}),", area_code.as_str(), age, is_active); stmt.push_str(¶ms); } else { let params = format!(" (NULL, NULL, {}, {}),", age, is_active); diff --git a/src/lib.rs b/src/lib.rs index c4acff3..28810ec 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,4 @@ -use once_cell::sync::Lazy; +use rusqlite::{types::ToSqlOutput, ToSql}; use tinystr::TinyStr8; pub fn get_random_age() -> i8 { @@ -13,14 +13,10 @@ pub fn get_random_bool() -> bool { fastrand::bool() } -pub fn get_random_area_code() -> &'static str { - static AREA_CODES: Lazy> = Lazy::new(|| { - (0..=999_999) - .map(|i| TinyStr8::from_bytes(&format_6digits_number(i)).unwrap()) - .collect() - }); - - &AREA_CODES[fastrand::usize(0..=999_999)] +pub fn get_random_area_code() -> AreaCode { + let n = fastrand::u32(0..=999_999); + let buffer = format_6digits_number(n); + TinyStr8::from_bytes(&buffer).map(AreaCode).unwrap() } /// Formats a number that is between 0 and 999_999, @@ -49,3 +45,17 @@ mod tests { } } } + +pub struct AreaCode(TinyStr8); + +impl AreaCode { + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl ToSql for AreaCode { + fn to_sql(&self) -> rusqlite::Result> { + Ok(ToSqlOutput::from(self.0.as_str())) + } +}