Skip to content

Commit 37c9717

Browse files
krassermYellowRoseCx
authored andcommitted
Fix grammar-based sampling issue in server (ggml-org#2566)
1 parent d18ecd5 commit 37c9717

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

examples/server/server.cpp

+6-4
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,7 @@ struct llama_server_context
196196
llama_context *ctx = nullptr;
197197
gpt_params params;
198198

199+
grammar_parser::parse_state parsed_grammar;
199200
llama_grammar *grammar = nullptr;
200201

201202
bool truncated = false;
@@ -241,10 +242,13 @@ struct llama_server_context
241242
stopped_limit = false;
242243
stopping_word = "";
243244
multibyte_pending = 0;
244-
grammar = nullptr;
245-
246245
n_remain = 0;
247246
n_past = 0;
247+
248+
if (grammar != nullptr) {
249+
llama_grammar_free(grammar);
250+
grammar = nullptr;
251+
}
248252
}
249253

250254
bool loadModel(const gpt_params &params_)
@@ -265,8 +269,6 @@ struct llama_server_context
265269
bool loadGrammar()
266270
{
267271
if (!params.grammar.empty()) {
268-
grammar_parser::parse_state parsed_grammar;
269-
270272
parsed_grammar = grammar_parser::parse(params.grammar.c_str());
271273
// will be empty (default) if there are parse errors
272274
if (parsed_grammar.rules.empty()) {

0 commit comments

Comments
 (0)