Skip to content

Commit 01c623e

Browse files
committed
fix ctx_shift
1 parent 036f682 commit 01c623e

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

tools/server/server.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -2960,6 +2960,7 @@ struct server_context {
29602960
new_tokens[i - n_discard] = new_tokens[i];
29612961
}
29622962

2963+
new_tokens.resize(slot.cache_tokens.size() - n_discard);
29632964
slot.cache_tokens.clear();
29642965
slot.cache_tokens.insert(new_tokens);
29652966
}
@@ -3095,12 +3096,12 @@ struct server_context {
30953096
// we should never reach this
30963097
GGML_ABORT("not supported by multimodal");
30973098
}
3098-
llama_tokens curr_tokens = slot.prompt_tokens.get_text_tokens(); // copy
30993099
const int n_left = slot.n_ctx - slot.params.n_keep;
31003100

31013101
const int n_block_size = n_left / 2;
31023102
const int erased_blocks = (slot.n_prompt_tokens - slot.params.n_keep - n_block_size) / n_block_size;
31033103

3104+
const llama_tokens & curr_tokens = slot.prompt_tokens.get_text_tokens();
31043105
llama_tokens new_tokens(
31053106
curr_tokens.begin(),
31063107
curr_tokens.begin() + slot.params.n_keep);
@@ -3208,10 +3209,9 @@ struct server_context {
32083209
// remove the non-common part from the cache
32093210
slot.cache_tokens.resize(slot.n_past);
32103211

3211-
llama_token cur_tok = slot.prompt_tokens[slot.n_past];
3212-
32133212
// check if we should process the image
3214-
if (cur_tok == LLAMA_TOKEN_NULL) {
3213+
if (slot.n_past < slot.n_prompt_tokens
3214+
&& slot.prompt_tokens[slot.n_past] == LLAMA_TOKEN_NULL) {
32153215
// process the image
32163216
int32_t new_n_past;
32173217
int32_t res = slot.prompt_tokens.process_chunk(ctx, mctx, slot.n_past, slot.id, new_n_past);

tools/server/tests/utils.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
import wget
2727

2828

29-
DEFAULT_HTTP_TIMEOUT = 12
29+
DEFAULT_HTTP_TIMEOUT = 120
3030

3131
if "LLAMA_SANITIZE" in os.environ or "GITHUB_ACTION" in os.environ:
3232
DEFAULT_HTTP_TIMEOUT = 30

0 commit comments

Comments
 (0)