@@ -79,6 +79,7 @@ class Opt {
79
79
ctx_params = llama_context_default_params ();
80
80
model_params = llama_model_default_params ();
81
81
context_size_default = ctx_params.n_batch ;
82
+ n_threads_default = ctx_params.n_threads ;
82
83
ngl_default = model_params.n_gpu_layers ;
83
84
common_params_sampling sampling;
84
85
temperature_default = sampling.temp ;
@@ -104,6 +105,7 @@ class Opt {
104
105
105
106
ctx_params.n_batch = context_size >= 0 ? context_size : context_size_default;
106
107
ctx_params.n_ctx = ctx_params.n_batch ;
108
+ ctx_params.n_threads = ctx_params.n_threads_batch = n_threads >= 0 ? n_threads : n_threads_default;
107
109
model_params.n_gpu_layers = ngl >= 0 ? ngl : ngl_default;
108
110
temperature = temperature >= 0 ? temperature : temperature_default;
109
111
@@ -116,12 +118,12 @@ class Opt {
116
118
std::string chat_template_file;
117
119
std::string user;
118
120
bool use_jinja = false ;
119
- int context_size = -1 , ngl = -1 ;
121
+ int context_size = -1 , ngl = -1 , n_threads = - 1 ;
120
122
float temperature = -1 ;
121
123
bool verbose = false ;
122
124
123
125
private:
124
- int context_size_default = -1 , ngl_default = -1 ;
126
+ int context_size_default = -1 , ngl_default = -1 , n_threads_default = - 1 ;
125
127
float temperature_default = -1 ;
126
128
bool help = false ;
127
129
@@ -171,6 +173,10 @@ class Opt {
171
173
if (handle_option_with_value (argc, argv, i, ngl) == 1 ) {
172
174
return 1 ;
173
175
}
176
+ } else if (options_parsing && (strcmp (argv[i], " -t" ) == 0 || strcmp (argv[i], " --threads" ) == 0 ) == 0 ) {
177
+ if (handle_option_with_value (argc, argv, i, n_threads) == 1 ) {
178
+ return 1 ;
179
+ }
174
180
} else if (options_parsing && strcmp (argv[i], " --temp" ) == 0 ) {
175
181
if (handle_option_with_value (argc, argv, i, temperature) == 1 ) {
176
182
return 1 ;
@@ -180,7 +186,7 @@ class Opt {
180
186
verbose = true ;
181
187
} else if (options_parsing && strcmp (argv[i], " --jinja" ) == 0 ) {
182
188
use_jinja = true ;
183
- } else if (options_parsing && strcmp (argv[i], " --chat-template-file" ) == 0 ){
189
+ } else if (options_parsing && strcmp (argv[i], " --chat-template-file" ) == 0 ) {
184
190
if (handle_option_with_value (argc, argv, i, chat_template_file) == 1 ) {
185
191
return 1 ;
186
192
}
@@ -232,6 +238,8 @@ class Opt {
232
238
" Number of GPU layers (default: %d)\n "
233
239
" --temp <value>\n "
234
240
" Temperature (default: %.1f)\n "
241
+ " -t, --threads N\n "
242
+ " Number of threads to use during generation (default: %d)\n "
235
243
" -v, --verbose, --log-verbose\n "
236
244
" Set verbosity level to infinity (i.e. log all messages, useful for debugging)\n "
237
245
" -h, --help\n "
@@ -260,7 +268,7 @@ class Opt {
260
268
" llama-run file://some-file3.gguf\n "
261
269
" llama-run --ngl 999 some-file4.gguf\n "
262
270
" llama-run --ngl 999 some-file5.gguf Hello World\n " ,
263
- context_size_default, ngl_default, temperature_default);
271
+ context_size_default, ngl_default, temperature_default, n_threads_default );
264
272
}
265
273
};
266
274
0 commit comments