Skip to content

Commit 702810f

Browse files
committed
llamamodel: fix setting of n_threads
We weren't setting n_threads_batch, and setThreadCount was a no-op, because we're using llama_decode which doesn't take an n_threads argument.
1 parent c3b8c95 commit 702810f

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

gpt4all-backend/llamamodel.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ bool LLamaModel::loadModel(const std::string &modelPath)
162162
d_ptr->ctx_params.seed = params.seed;
163163
d_ptr->ctx_params.f16_kv = params.memory_f16;
164164

165+
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
166+
d_ptr->ctx_params.n_threads = d_ptr->n_threads;
167+
d_ptr->ctx_params.n_threads_batch = d_ptr->n_threads;
168+
165169
#ifdef GGML_USE_METAL
166170
if (llama_verbose()) {
167171
std::cerr << "llama.cpp: using Metal" << std::endl;
@@ -206,14 +210,14 @@ bool LLamaModel::loadModel(const std::string &modelPath)
206210
}
207211
#endif
208212

209-
d_ptr->n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
210213
d_ptr->modelLoaded = true;
211214
fflush(stderr);
212215
return true;
213216
}
214217

215218
void LLamaModel::setThreadCount(int32_t n_threads) {
216219
d_ptr->n_threads = n_threads;
220+
llama_set_n_threads(d_ptr->ctx, n_threads, n_threads);
217221
}
218222

219223
int32_t LLamaModel::threadCount() const {

0 commit comments

Comments
 (0)