File tree 1 file changed +4
-6
lines changed
1 file changed +4
-6
lines changed Original file line number Diff line number Diff line change @@ -179,8 +179,8 @@ llama_context::llama_context(
179
179
// init the memory module
180
180
if (!hparams.vocab_only ) {
181
181
llama_memory_params params_mem = {
182
- /* .type_k =*/ params.type_k ,
183
- /* .type_v =*/ params.type_v ,
182
+ /* .type_k =*/ params.type_k ,
183
+ /* .type_v =*/ params.type_v ,
184
184
};
185
185
186
186
memory.reset (model.create_memory (cparams, params_mem));
@@ -1008,11 +1008,9 @@ int llama_context::encode(llama_batch & inp_batch) {
1008
1008
return -1 ;
1009
1009
}
1010
1010
1011
- llama_kv_cache * kv_self = static_cast <llama_kv_cache *>(memory.get ());
1012
-
1013
1011
// temporary allocate memory for the input batch if needed
1014
- // TODO: this is incorrect for multiple sequences because get_pos_max() is the maximum across all sequences
1015
- llama_batch_allocr batch_allocr (inp_batch, inp_batch.pos ? -1 : kv_self-> get_pos_max () + 1 );
1012
+ // note: during encode, we always pass the full sequence starting from pos = 0
1013
+ llama_batch_allocr batch_allocr (inp_batch, inp_batch.pos ? -1 : 0 );
1016
1014
1017
1015
const llama_batch & batch = batch_allocr.batch ;
1018
1016
const int32_t n_tokens = batch.n_tokens ;
You can’t perform that action at this time.
0 commit comments