sampling : when top-k <= 0 -> noop (ggml-org#13173)

ggerganov · web-flow · commit d9d398f84f96 · 2025-04-29T20:22:57.000+03:00
ggml-ci
diff --git a/include/llama.h b/include/llama.h
@@ -1232,6 +1232,7 @@ extern "C" {
         "will be removed in the future (see https://github.com/ggml-org/llama.cpp/pull/9896#discussion_r1800920915)");
 
     /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
+    /// Setting k <= 0 makes this a noop
     LLAMA_API struct llama_sampler * llama_sampler_init_top_k      (int32_t k);
 
     /// @details Nucleus sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
@@ -232,7 +232,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
     // }
 
     if (k <= 0) {
-        k = cur_p->size;
+        return;
     }
 
     k = std::min(k, (int) cur_p->size);
@@ -298,6 +298,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)
         }
         cur_p->sorted = true;
     }
+
     cur_p->size = k;
 }
 

Original file line number	Diff line number	Diff line change
`@@ -232,7 +232,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)`
`232`	`232`	`// }`
`233`	`233`
`234`	`234`	`if (k <= 0) {`
`235`		`- k = cur_p->size;`
	`235`	`+ return;`
`236`	`236`	`}`
`237`	`237`
`238`	`238`	`k = std::min(k, (int) cur_p->size);`
`@@ -298,6 +298,7 @@ static void llama_sampler_top_k_impl(llama_token_data_array * cur_p, int32_t k)`
`298`	`298`	`}`
`299`	`299`	`cur_p->sorted = true;`
`300`	`300`	`}`
	`301`	`+`
`301`	`302`	`cur_p->size = k;`
`302`	`303`	`}`
`303`	`304`