Skip to content

Commit ed8942a

Browse files
committed
llama : remove reference of memory during encode
ggml-ci
1 parent 6438eb5 commit ed8942a

File tree

1 file changed

+4
-6
lines changed

1 file changed

+4
-6
lines changed

src/llama-context.cpp

+4-6
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,8 @@ llama_context::llama_context(
179179
// init the memory module
180180
if (!hparams.vocab_only) {
181181
llama_memory_params params_mem = {
182-
/*.type_k =*/ params.type_k,
183-
/*.type_v =*/ params.type_v,
182+
/*.type_k =*/ params.type_k,
183+
/*.type_v =*/ params.type_v,
184184
};
185185

186186
memory.reset(model.create_memory(cparams, params_mem));
@@ -1008,11 +1008,9 @@ int llama_context::encode(llama_batch & inp_batch) {
10081008
return -1;
10091009
}
10101010

1011-
llama_kv_cache * kv_self = static_cast<llama_kv_cache *>(memory.get());
1012-
10131011
// temporary allocate memory for the input batch if needed
1014-
// TODO: this is incorrect for multiple sequences because get_pos_max() is the maximum across all sequences
1015-
llama_batch_allocr batch_allocr(inp_batch, inp_batch.pos ? -1 : kv_self->get_pos_max() + 1);
1012+
// note: during encode, we always pass the full sequence starting from pos = 0
1013+
llama_batch_allocr batch_allocr(inp_batch, inp_batch.pos ? -1 : 0);
10161014

10171015
const llama_batch & batch = batch_allocr.batch;
10181016
const int32_t n_tokens = batch.n_tokens;

0 commit comments

Comments
 (0)