Skip to content

Commit 6984c5e

Browse files
committed
Add CLI arg to llama-run to adjust the number of threads used
We default to 4, sometimes we want to manually adjust this Signed-off-by: Eric Curtin <[email protected]>
1 parent be7c303 commit 6984c5e

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

examples/run/run.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ class Opt {
7979
ctx_params = llama_context_default_params();
8080
model_params = llama_model_default_params();
8181
context_size_default = ctx_params.n_batch;
82+
n_threads_default = ctx_params.n_threads;
8283
ngl_default = model_params.n_gpu_layers;
8384
common_params_sampling sampling;
8485
temperature_default = sampling.temp;
@@ -104,6 +105,7 @@ class Opt {
104105

105106
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
106107
ctx_params.n_ctx = ctx_params.n_batch;
108+
ctx_params.n_threads = ctx_params.n_threads_batch = n_threads >= 0 ? n_threads : n_threads_default;
107109
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
108110
temperature = temperature >= 0 ? temperature : temperature_default;
109111

@@ -116,12 +118,12 @@ class Opt {
116118
std::string chat_template_file;
117119
std::string user;
118120
bool use_jinja = false;
119-
int context_size = -1, ngl = -1;
121+
int context_size = -1, ngl = -1, n_threads = -1;
120122
float temperature = -1;
121123
bool verbose = false;
122124

123125
private:
124-
int context_size_default = -1, ngl_default = -1;
126+
int context_size_default = -1, ngl_default = -1, n_threads_default = -1;
125127
float temperature_default = -1;
126128
bool help = false;
127129

@@ -171,6 +173,10 @@ class Opt {
171173
if (handle_option_with_value(argc, argv, i, ngl) == 1) {
172174
return 1;
173175
}
176+
} else if (options_parsing && (strcmp(argv[i], "-t") == 0 || strcmp(argv[i], "--threads") == 0) == 0) {
177+
if (handle_option_with_value(argc, argv, i, n_threads) == 1) {
178+
return 1;
179+
}
174180
} else if (options_parsing && strcmp(argv[i], "--temp") == 0) {
175181
if (handle_option_with_value(argc, argv, i, temperature) == 1) {
176182
return 1;
@@ -180,7 +186,7 @@ class Opt {
180186
verbose = true;
181187
} else if (options_parsing && strcmp(argv[i], "--jinja") == 0) {
182188
use_jinja = true;
183-
} else if (options_parsing && strcmp(argv[i], "--chat-template-file") == 0){
189+
} else if (options_parsing && strcmp(argv[i], "--chat-template-file") == 0) {
184190
if (handle_option_with_value(argc, argv, i, chat_template_file) == 1) {
185191
return 1;
186192
}
@@ -232,6 +238,8 @@ class Opt {
232238
" Number of GPU layers (default: %d)\n"
233239
" --temp <value>\n"
234240
" Temperature (default: %.1f)\n"
241+
" -t, --threads N\n"
242+
" Number of threads to use during generation (default: %d)\n"
235243
" -v, --verbose, --log-verbose\n"
236244
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n"
237245
" -h, --help\n"
@@ -260,7 +268,7 @@ class Opt {
260268
" llama-run file://some-file3.gguf\n"
261269
" llama-run --ngl 999 some-file4.gguf\n"
262270
" llama-run --ngl 999 some-file5.gguf Hello World\n",
263-
context_size_default, ngl_default, temperature_default);
271+
context_size_default, ngl_default, temperature_default, n_threads_default);
264272
}
265273
};
266274

0 commit comments

Comments
 (0)