huggingface
diff --git a/‎--concurrent-mode b/‎--concurrent-mode
diff --git a/‎--num-workers b/‎--num-workers
diff --git a/‎--prompt b/‎--prompt
diff --git a/‎--which b/‎--which
diff --git a/‎Cargo.toml
Lines changed: 1 addition & 1 deletion b/‎Cargo.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎Cargo.toml.backup
Lines changed: 72 additions & 0 deletions b/‎Cargo.toml.backup
Lines changed: 72 additions & 0 deletions
diff --git a/‎bert_lora
Lines changed: 1 addition & 0 deletions b/‎bert_lora
Lines changed: 1 addition & 0 deletions
diff --git a/‎candle-examples/Cargo.toml
Lines changed: 5 additions & 0 deletions b/‎candle-examples/Cargo.toml
Lines changed: 5 additions & 0 deletions
diff --git a/‎candle-examples/examples/quantized-phi/concurrent_model.rs
Lines changed: 42 additions & 0 deletions b/‎candle-examples/examples/quantized-phi/concurrent_model.rs
Lines changed: 42 additions & 0 deletions
diff --git a/‎candle-examples/simple-lora-example/Cargo.toml
Lines changed: 22 additions & 0 deletions b/‎candle-examples/simple-lora-example/Cargo.toml
Lines changed: 22 additions & 0 deletions
@@ -43,7 +43,7 @@ candle-onnx = { path = "./candle-onnx", version = "0.9.1" }
 candle-transformers = { path = "./candle-transformers", version = "0.9.1" }
 clap = { version = "4.2.4", features = ["derive"] }
 criterion = { version = "0.5.1", default-features=false }
-cudarc = { version = "0.16.3", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
+cudarc = { version = "0.16.1", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features=false }
 fancy-regex = "0.13.0"
 gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
 hf-hub = "0.4.1"
 
@@ -0,0 +1,72 @@
+[workspace]
+members = [
+    "candle-lora",
+    "candle-lora-transformers", 
+    "candle-lora-examples",
+    "candle-lora-macro",
+]
+exclude = []
+resolver = "2"
+
+[workspace.package]
+version = "0.9.1"
+edition = "2021"
+description = "Minimalist ML framework."
+repository = "https://github.com/huggingface/candle"
+homepage = "https://github.com/EricLBuehler/candle-lora"
+keywords = ["blas", "tensor", "machine-learning"]
+categories = ["science"]
+license = "MIT OR Apache-2.0"
+
+[workspace.dependencies]
+ab_glyph = "0.2.23"
+accelerate-src = { version = "0.3.2" }
+anyhow = { version = "1", features = ["backtrace"] }
+byteorder = "1.4.3"
+candle = { path = "../candle-core", package = "candle-core", version = "0.9.1" }
+candle-datasets = { path = "../candle-datasets", version = "0.9.1" }
+candle-nn = { path = "../candle-nn", version = "0.9.1" }
+candle-transformers = { path = "../candle-transformers", version = "0.9.1" }
+candle-flash-attn = { path = "../candle-flash-attn", version = "0.9.1" }
+candle-kernels = { path = "../candle-kernels", version = "0.9.1" }
+candle-metal-kernels = { path = "../candle-metal-kernels", version = "0.9.1" }
+candle-onnx = { path = "../candle-onnx", version = "0.9.1" }
+clap = { version = "4.2.4", features = ["derive"] }
+criterion = { version = "0.5.1", default-features = false }
+cudarc = { version = "0.16.3", features = ["std", "cublas", "cublaslt", "curand", "driver", "nvrtc", "f16", "cuda-version-from-build-system", "dynamic-linking"], default-features = false }
+fancy-regex = "0.13.0"
+gemm = { version = "0.17.0", features = ["wasm-simd128-enable"] }
+hf-hub = "0.4.1"
+half = { version = "2.5.0", features = ["num-traits", "use-intrinsics", "rand_distr"] }
+hound = "3.5.1"
+image = { version = "0.25.2", default-features = false, features = ["jpeg", "png"] }
+imageproc = { version = "0.24.0", default-features = false }
+intel-mkl-src = { version = "0.8.1", features = ["mkl-static-lp64-iomp"] }
+libc = { version = "0.2.147" }
+log = "0.4"
+memmap2 = { version = "0.9.3", features = ["stable_deref_trait"] }
+num_cpus = "1.15.0"
+num-traits = "0.2.15"
+parquet = { version = "51.0.0" }
+rand = "0.9.0"
+rand_distr = "0.5.1"
+rayon = "1.7.0"
+safetensors = "0.4.1"
+serde = { version = "1.0.171", features = ["derive"] }
+serde_plain = "1.0.2"
+serde_json = "1.0.99"
+thiserror = "1"
+tokenizers = { version = "0.21.0", default-features = false }
+tracing = "0.1.37"
+tracing-chrome = "0.7.1"
+tracing-subscriber = "0.3.7"
+ug = "0.4.0"
+ug-cuda = "0.4.0"
+ug-metal = "0.4.0"
+yoke = { version = "0.7.2", features = ["derive"] }
+zip = { version = "1.1.1", default-features = false }
+metal = { version = "0.27.0", features = ["mps"] }
+
+[profile.release-with-debug]
+inherits = "release"
+debug = true
@@ -0,0 +1 @@
+Subproject commit 725b8056ecf78182ad5b8fae7e2e4aa0a614da95
@@ -17,6 +17,7 @@ candle-nn = { workspace = true }
 candle-transformers = { workspace = true }
 candle-flash-attn = { workspace = true, optional = true }
 candle-onnx = { workspace = true, optional = true }
+tokio = { version = "1.0", features = ["rt", "rt-multi-thread", "macros"] }
 
 csv = "1.3.0"
 cudarc = { workspace = true, optional = true }
@@ -72,6 +73,7 @@ mimi = ["cpal", "symphonia", "rubato"]
 snac = ["cpal", "symphonia", "rubato"]
 depth_anything_v2 = ["palette", "enterpolation"]
 
+
 [[example]]
 name = "llama_multiprocess"
 required-features = ["cuda", "nccl", "flash-attn"]
@@ -80,6 +82,9 @@ required-features = ["cuda", "nccl", "flash-attn"]
 name = "reinforcement-learning"
 required-features = ["pyo3"]
 
+[[example]]
+name = "simple-lora"
+
 [[example]]
 name = "onnx"
 required-features = ["onnx"]
 
@@ -0,0 +1,42 @@
+use candle::quantized::gguf_file;
+use candle::{Device, Result, Tensor};
+use std::collections::HashMap;
+use std::sync::Arc;
+
+// For now, let's create a simple concurrent wrapper
+#[derive(Clone)]
+pub struct ConcurrentPhi3Model {
+    // We'll use the existing Model enum from main.rs
+    inner: Arc<std::sync::Mutex<super::Model>>,
+}
+
+impl ConcurrentPhi3Model {
+    pub fn new(model: super::Model) -> Self {
+        Self {
+            inner: Arc::new(std::sync::Mutex::new(model)),
+        }
+    }
+
+    /// Create a new inference context (thread-safe)
+    pub fn create_context(&self) -> InferenceContext {
+        InferenceContext {
+            model: self.clone(),
+            local_state: HashMap::new(),
+        }
+    }
+}
+
+pub struct InferenceContext {
+    model: ConcurrentPhi3Model,
+    local_state: HashMap<String, Tensor>, // Thread-local mutable state
+}
+
+impl InferenceContext {
+    /// Thread-safe forward pass
+    pub fn forward(&mut self, xs: &Tensor, index_pos: usize) -> Result<Tensor> {
+        let mut model_guard = self.model.inner.lock().unwrap();
+        let result = model_guard.forward(xs, index_pos);
+        drop(model_guard); // Release lock immediately
+        result
+    }
+}
@@ -0,0 +1,22 @@
+[package]
+name = "simple-lora-example"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+# Fix paths - remove one level of "../candle/"
+candle-core = { path = "../../candle-core" }
+candle-nn = { path = "../../candle-nn" }
+candle-transformers = { path = "../../candle-transformers" }
+candle-lora = { path = "../../candle_lora_examples/candle-lora" }
+candle-lora-transformers = { path = "../../candle_lora_examples/candle-lora-transformers" }
+candle-lora-macro = { path = "../../candle_lora_examples/candle-lora-macro" }
+candle-examples = { path = "../../candle-examples" }
+
+# External dependencies
+anyhow = "1.0"
+hf-hub = "0.3"
+tokenizers = "0.19"
+clap = { version = "4.0", features = ["derive"] }
+serde = "1.0"
+serde_json = "1.0"
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+Subproject commit 725b8056ecf78182ad5b8fae7e2e4aa0a614da95`