Skip to content

Commit 0097cd2

Browse files
author
Artem Ryzhov
committed
Last updates
1 parent 4783e48 commit 0097cd2

File tree

16 files changed

+669
-1
lines changed

16 files changed

+669
-1
lines changed

--concurrent-mode

Whitespace-only changes.

--num-workers

Whitespace-only changes.

--prompt

Whitespace-only changes.

--which

Whitespace-only changes.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ candle-onnx = { path = "./candle-onnx", version = "0.9.1" }
4343
candle-transformers = { path = "./candle-transformers", version = "0.9.1" }
4444
clap = { version = "4.2.4", features = ["derive"] }
4545
criterion = { version = "0.5.1", default-features=false }
46-
cudarc = { version = "0.16.3", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
46+
cudarc = { version = "0.16.1", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
4747
fancy-regex = "0.13.0"
4848
gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
4949
hf-hub = "0.4.1"

Cargo.toml.backup

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
[workspace]
2+
members = [
3+
"candle-lora",
4+
"candle-lora-transformers",
5+
"candle-lora-examples",
6+
"candle-lora-macro",
7+
]
8+
exclude = []
9+
resolver = "2"
10+
11+
[workspace.package]
12+
version = "0.9.1"
13+
edition = "2021"
14+
description = "Minimalist ML framework."
15+
repository = "https://github.com/huggingface/candle"
16+
homepage = "https://github.com/EricLBuehler/candle-lora"
17+
keywords = ["blas", "tensor", "machine-learning"]
18+
categories = ["science"]
19+
license = "MIT OR Apache-2.0"
20+
21+
[workspace.dependencies]
22+
ab_glyph = "0.2.23"
23+
accelerate-src = { version = "0.3.2" }
24+
anyhow = { version = "1", features = ["backtrace"] }
25+
byteorder = "1.4.3"
26+
candle = { path = "../candle-core", package = "candle-core", version = "0.9.1" }
27+
candle-datasets = { path = "../candle-datasets", version = "0.9.1" }
28+
candle-nn = { path = "../candle-nn", version = "0.9.1" }
29+
candle-transformers = { path = "../candle-transformers", version = "0.9.1" }
30+
candle-flash-attn = { path = "../candle-flash-attn", version = "0.9.1" }
31+
candle-kernels = { path = "../candle-kernels", version = "0.9.1" }
32+
candle-metal-kernels = { path = "../candle-metal-kernels", version = "0.9.1" }
33+
candle-onnx = { path = "../candle-onnx", version = "0.9.1" }
34+
clap = { version = "4.2.4", features = ["derive"] }
35+
criterion = { version = "0.5.1", default-features = false }
36+
cudarc = { version = "0.16.3", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features = false }
37+
fancy-regex = "0.13.0"
38+
gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
39+
hf-hub = "0.4.1"
40+
half = { version = "2.5.0", features = ["num-traits", "use-intrinsics", "rand_distr"] }
41+
hound = "3.5.1"
42+
image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
43+
imageproc = { version = "0.24.0", default-features = false }
44+
intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] }
45+
libc = { version = "0.2.147" }
46+
log = "0.4"
47+
memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] }
48+
num_cpus = "1.15.0"
49+
num-traits = "0.2.15"
50+
parquet = { version = "51.0.0" }
51+
rand = "0.9.0"
52+
rand_distr = "0.5.1"
53+
rayon = "1.7.0"
54+
safetensors = "0.4.1"
55+
serde = { version = "1.0.171", features = ["derive"] }
56+
serde_plain = "1.0.2"
57+
serde_json = "1.0.99"
58+
thiserror = "1"
59+
tokenizers = { version = "0.21.0", default-features = false }
60+
tracing = "0.1.37"
61+
tracing-chrome = "0.7.1"
62+
tracing-subscriber = "0.3.7"
63+
ug = "0.4.0"
64+
ug-cuda = "0.4.0"
65+
ug-metal = "0.4.0"
66+
yoke = { version = "0.7.2", features = ["derive"] }
67+
zip = { version = "1.1.1", default-features = false }
68+
metal = { version = "0.27.0", features = ["mps"] }
69+
70+
[profile.release-with-debug]
71+
inherits = "release"
72+
debug = true

bert_lora

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 725b8056ecf78182ad5b8fae7e2e4aa0a614da95

candle-examples/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ candle-nn = { workspace = true }
1717
candle-transformers = { workspace = true }
1818
candle-flash-attn = { workspace = true, optional = true }
1919
candle-onnx = { workspace = true, optional = true }
20+
tokio = { version = "1.0", features = ["rt", "rt-multi-thread", "macros"] }
2021

2122
csv = "1.3.0"
2223
cudarc = { workspace = true, optional = true }
@@ -72,6 +73,7 @@ mimi = ["cpal", "symphonia", "rubato"]
7273
snac = ["cpal", "symphonia", "rubato"]
7374
depth_anything_v2 = ["palette", "enterpolation"]
7475

76+
7577
[[example]]
7678
name = "llama_multiprocess"
7779
required-features = ["cuda", "nccl", "flash-attn"]
@@ -80,6 +82,9 @@ required-features = ["cuda", "nccl", "flash-attn"]
8082
name = "reinforcement-learning"
8183
required-features = ["pyo3"]
8284

85+
[[example]]
86+
name = "simple-lora"
87+
8388
[[example]]
8489
name = "onnx"
8590
required-features = ["onnx"]
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
use candle::quantized::gguf_file;
2+
use candle::{Device, Result, Tensor};
3+
use std::collections::HashMap;
4+
use std::sync::Arc;
5+
6+
// For now, let's create a simple concurrent wrapper
7+
#[derive(Clone)]
8+
pub struct ConcurrentPhi3Model {
9+
// We'll use the existing Model enum from main.rs
10+
inner: Arc<std::sync::Mutex<super::Model>>,
11+
}
12+
13+
impl ConcurrentPhi3Model {
14+
pub fn new(model: super::Model) -> Self {
15+
Self {
16+
inner: Arc::new(std::sync::Mutex::new(model)),
17+
}
18+
}
19+
20+
/// Create a new inference context (thread-safe)
21+
pub fn create_context(&self) -> InferenceContext {
22+
InferenceContext {
23+
model: self.clone(),
24+
local_state: HashMap::new(),
25+
}
26+
}
27+
}
28+
29+
pub struct InferenceContext {
30+
model: ConcurrentPhi3Model,
31+
local_state: HashMap<String, Tensor>, // Thread-local mutable state
32+
}
33+
34+
impl InferenceContext {
35+
/// Thread-safe forward pass
36+
pub fn forward(&mut self, xs: &Tensor, index_pos: usize) -> Result<Tensor> {
37+
let mut model_guard = self.model.inner.lock().unwrap();
38+
let result = model_guard.forward(xs, index_pos);
39+
drop(model_guard); // Release lock immediately
40+
result
41+
}
42+
}
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[package]
2+
name = "simple-lora-example"
3+
version = "0.1.0"
4+
edition = "2021"
5+
6+
[dependencies]
7+
# Fix paths - remove one level of "../candle/"
8+
candle-core = { path = "../../candle-core" }
9+
candle-nn = { path = "../../candle-nn" }
10+
candle-transformers = { path = "../../candle-transformers" }
11+
candle-lora = { path = "../../candle_lora_examples/candle-lora" }
12+
candle-lora-transformers = { path = "../../candle_lora_examples/candle-lora-transformers" }
13+
candle-lora-macro = { path = "../../candle_lora_examples/candle-lora-macro" }
14+
candle-examples = { path = "../../candle-examples" }
15+
16+
# External dependencies
17+
anyhow = "1.0"
18+
hf-hub = "0.3"
19+
tokenizers = "0.19"
20+
clap = { version = "4.0", features = ["derive"] }
21+
serde = "1.0"
22+
serde_json = "1.0"

0 commit comments

Comments
 (0)