File tree Expand file tree Collapse file tree 2 files changed +9
-2
lines changed
mistralrs-core/src/kv_cache Expand file tree Collapse file tree 2 files changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -70,7 +70,7 @@ impl RotatingCache {
70
70
71
71
pub fn set_len ( & mut self , len : usize ) -> candle_core:: Result < ( ) > {
72
72
// If trying to roll it back past the boundary of max_seq_len, fail early.
73
- if self . current_seq_len - len > self . max_seq_len {
73
+ if self . current_seq_len . saturating_sub ( len) > self . max_seq_len {
74
74
candle_core:: bail!(
75
75
"Rotating KV cache (usually for sliding window) tried to reset to len {len} while current is {} and max retained is {}" ,
76
76
self . current_seq_len,
@@ -98,7 +98,7 @@ impl RotatingCache {
98
98
&& self . current_seq_len + seq_len < self . max_seq_len )
99
99
|| self . current_seq_len == 0
100
100
{
101
- let diff = self . current_seq_len + seq_len - self . capacity_seq_len ;
101
+ let diff = self . current_seq_len + seq_len. saturating_sub ( self . capacity_seq_len ) ;
102
102
let n_blocks_needed = diff. div_ceil ( NormalCache :: CACHE_GROW_SIZE ) ;
103
103
self . capacity_seq_len += n_blocks_needed * NormalCache :: CACHE_GROW_SIZE ;
104
104
self . capacity_seq_len = self . capacity_seq_len . min ( self . max_seq_len ) ;
Original file line number Diff line number Diff line change @@ -56,6 +56,13 @@ impl SingleCache {
56
56
}
57
57
58
58
pub fn set_len ( & mut self , len : usize ) -> candle_core:: Result < ( ) > {
59
+ if len > self . capacity_seq_len {
60
+ candle_core:: bail!(
61
+ "kv-cache: requested length ({}) exceeds current capacity ({})" ,
62
+ len,
63
+ self . capacity_seq_len
64
+ ) ;
65
+ }
59
66
self . current_seq_len = len;
60
67
Ok ( ( ) )
61
68
}
You can’t perform that action at this time.
0 commit comments