Skip to content

Commit dfaad36

Browse files
authored
zstd: Use individual reset threshold (#703)
* zstd: Use individual reset threshold Instead of setting the limit to the largest window size, set it to the chosen one. ``` λ benchcmp before.txt after.txt benchmark old ns/op new ns/op delta BenchmarkEncoder_EncodeAllSimple4K/fastest-32 3145 3133 -0.38% BenchmarkEncoder_EncodeAllSimple4K/default-32 41485 40624 -2.08% BenchmarkEncoder_EncodeAllSimple4K/better-32 49352 49197 -0.31% BenchmarkEncoder_EncodeAllSimple4K/best-32 421522 407392 -3.35% benchmark old MB/s new MB/s speedup BenchmarkEncoder_EncodeAllSimple4K/fastest-32 1302.48 1307.39 1.00x BenchmarkEncoder_EncodeAllSimple4K/default-32 98.74 100.83 1.02x BenchmarkEncoder_EncodeAllSimple4K/better-32 83.00 83.26 1.00x BenchmarkEncoder_EncodeAllSimple4K/best-32 9.72 10.05 1.03x ```
1 parent d3349be commit dfaad36

8 files changed

+25
-26
lines changed

zstd/enc_base.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ type fastBase struct {
1616
cur int32
1717
// maximum offset. Should be at least 2x block size.
1818
maxMatchOff int32
19+
bufferReset int32
1920
hist []byte
2021
crc *xxhash.Digest
2122
tmp [8]byte
@@ -56,8 +57,8 @@ func (e *fastBase) Block() *blockEnc {
5657
}
5758

5859
func (e *fastBase) addBlock(src []byte) int32 {
59-
if debugAsserts && e.cur > bufferReset {
60-
panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
60+
if debugAsserts && e.cur > e.bufferReset {
61+
panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
6162
}
6263
// check if we have space already
6364
if len(e.hist)+len(src) > cap(e.hist) {
@@ -154,7 +155,7 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
154155

155156
// We offset current position so everything will be out of reach.
156157
// If above reset line, history will be purged.
157-
if e.cur < bufferReset {
158+
if e.cur < e.bufferReset {
158159
e.cur += e.maxMatchOff + int32(len(e.hist))
159160
}
160161
e.hist = e.hist[:0]

zstd/enc_best.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
8585
)
8686

8787
// Protect against e.cur wraparound.
88-
for e.cur >= bufferReset {
88+
for e.cur >= e.bufferReset-int32(len(e.hist)) {
8989
if len(e.hist) == 0 {
9090
e.table = [bestShortTableSize]prevEntry{}
9191
e.longTable = [bestLongTableSize]prevEntry{}

zstd/enc_better.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
6262
)
6363

6464
// Protect against e.cur wraparound.
65-
for e.cur >= bufferReset {
65+
for e.cur >= e.bufferReset-int32(len(e.hist)) {
6666
if len(e.hist) == 0 {
6767
e.table = [betterShortTableSize]tableEntry{}
6868
e.longTable = [betterLongTableSize]prevEntry{}
@@ -583,7 +583,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
583583
)
584584

585585
// Protect against e.cur wraparound.
586-
for e.cur >= bufferReset {
586+
for e.cur >= e.bufferReset-int32(len(e.hist)) {
587587
if len(e.hist) == 0 {
588588
for i := range e.table[:] {
589589
e.table[i] = tableEntry{}

zstd/enc_dfast.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
4444
)
4545

4646
// Protect against e.cur wraparound.
47-
for e.cur >= bufferReset {
47+
for e.cur >= e.bufferReset-int32(len(e.hist)) {
4848
if len(e.hist) == 0 {
4949
e.table = [dFastShortTableSize]tableEntry{}
5050
e.longTable = [dFastLongTableSize]tableEntry{}
@@ -384,7 +384,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
384384
)
385385

386386
// Protect against e.cur wraparound.
387-
if e.cur >= bufferReset {
387+
if e.cur >= e.bufferReset {
388388
for i := range e.table[:] {
389389
e.table[i] = tableEntry{}
390390
}
@@ -681,7 +681,7 @@ encodeLoop:
681681
}
682682

683683
// We do not store history, so we must offset e.cur to avoid false matches for next user.
684-
if e.cur < bufferReset {
684+
if e.cur < e.bufferReset {
685685
e.cur += int32(len(src))
686686
}
687687
}
@@ -696,7 +696,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
696696
)
697697

698698
// Protect against e.cur wraparound.
699-
for e.cur >= bufferReset {
699+
for e.cur >= e.bufferReset-int32(len(e.hist)) {
700700
if len(e.hist) == 0 {
701701
for i := range e.table[:] {
702702
e.table[i] = tableEntry{}

zstd/enc_fast.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
4343
)
4444

4545
// Protect against e.cur wraparound.
46-
for e.cur >= bufferReset {
46+
for e.cur >= e.bufferReset-int32(len(e.hist)) {
4747
if len(e.hist) == 0 {
4848
for i := range e.table[:] {
4949
e.table[i] = tableEntry{}
@@ -310,7 +310,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
310310
}
311311

312312
// Protect against e.cur wraparound.
313-
if e.cur >= bufferReset {
313+
if e.cur >= e.bufferReset {
314314
for i := range e.table[:] {
315315
e.table[i] = tableEntry{}
316316
}
@@ -538,7 +538,7 @@ encodeLoop:
538538
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
539539
}
540540
// We do not store history, so we must offset e.cur to avoid false matches for next user.
541-
if e.cur < bufferReset {
541+
if e.cur < e.bufferReset {
542542
e.cur += int32(len(src))
543543
}
544544
}
@@ -555,7 +555,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
555555
return
556556
}
557557
// Protect against e.cur wraparound.
558-
for e.cur >= bufferReset {
558+
for e.cur >= e.bufferReset-int32(len(e.hist)) {
559559
if len(e.hist) == 0 {
560560
e.table = [tableSize]tableEntry{}
561561
e.cur = e.maxMatchOff

zstd/encoder_options.go

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package zstd
33
import (
44
"errors"
55
"fmt"
6+
"math"
67
"runtime"
78
"strings"
89
)
@@ -47,22 +48,22 @@ func (o encoderOptions) encoder() encoder {
4748
switch o.level {
4849
case SpeedFastest:
4950
if o.dict != nil {
50-
return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
51+
return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
5152
}
52-
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
53+
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
5354

5455
case SpeedDefault:
5556
if o.dict != nil {
56-
return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
57+
return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
5758
}
58-
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
59+
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
5960
case SpeedBetterCompression:
6061
if o.dict != nil {
61-
return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
62+
return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
6263
}
63-
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
64+
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
6465
case SpeedBestCompression:
65-
return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
66+
return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
6667
}
6768
panic("unknown compression level")
6869
}

zstd/fuzz_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,8 @@ func FuzzEncoding(f *testing.F) {
181181
// Just test if we crash...
182182
defer func() {
183183
if r := recover(); r != nil {
184-
rdebug.PrintStack()
185-
t.Fatal(r)
184+
stack := rdebug.Stack()
185+
t.Fatalf("%v:\n%v", r, string(stack))
186186
}
187187
}()
188188
if len(data) > maxSize {

zstd/zstd.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,6 @@ const forcePreDef = false
3636
// zstdMinMatch is the minimum zstd match length.
3737
const zstdMinMatch = 3
3838

39-
// Reset the buffer offset when reaching this.
40-
const bufferReset = math.MaxInt32 - MaxWindowSize
41-
4239
// fcsUnknown is used for unknown frame content size.
4340
const fcsUnknown = math.MaxUint64
4441

0 commit comments

Comments
 (0)