Skip to content

Commit d9d398f

Browse files
authored
sampling : when top-k <= 0 -> noop (ggml-org#13173)
ggml-ci
1 parent 5a63980 commit d9d398f

File tree

2 files changed

+3
-1
lines changed

2 files changed

+3
-1
lines changed

include/llama.h

+1
Original file line numberDiff line numberDiff line change
@@ -1232,6 +1232,7 @@ extern "C" {
12321232
"will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
12331233

12341234
/// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
1235+
/// Setting k <= 0 makes this a noop
12351236
LLAMA_API struct llama_sampler * llama_sampler_init_top_k (int32_t k);
12361237

12371238
/// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751

src/llama-sampling.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
232232
// }
233233

234234
if (k <= 0) {
235-
k = cur_p->size;
235+
return;
236236
}
237237

238238
k = std::min(k, (int) cur_p->size);
@@ -298,6 +298,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
298298
}
299299
cur_p->sorted = true;
300300
}
301+
301302
cur_p->size = k;
302303
}
303304

0 commit comments

Comments
 (0)