Skip to content

Commit b5a73a1

Browse files
authored
Improve incompressible data speed (#491)
Improve speed for zstd fastest and huffman-only compression of random data. flate: 590.71MB/s -> 1512.99MB/s zstd l1: 1840.07MB/s -> 2489.23MB/s
1 parent 43829fc commit b5a73a1

File tree

2 files changed

+22
-3
lines changed

2 files changed

+22
-3
lines changed

flate/huffman_bit_writer.go

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1008,6 +1008,26 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
10081008
// https://stackoverflow.com/a/25454430
10091009
const guessHeaderSizeBits = 70 * 8
10101010
histogram(input, w.literalFreq[:numLiterals], fill)
1011+
ssize, storable := w.storedSize(input)
1012+
if storable && len(input) > 1024 {
1013+
// Quick check for incompressible content.
1014+
abs := float64(0)
1015+
avg := float64(len(input)) / 256
1016+
max := float64(len(input) * 2)
1017+
for _, v := range w.literalFreq[:256] {
1018+
diff := float64(v) - avg
1019+
abs += diff * diff
1020+
if abs > max {
1021+
break
1022+
}
1023+
}
1024+
if abs < max {
1025+
// No chance we can compress this...
1026+
w.writeStoredHeader(len(input), eof)
1027+
w.writeBytes(input)
1028+
return
1029+
}
1030+
}
10111031
w.literalFreq[endBlockMarker] = 1
10121032
w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
10131033
if fill {
@@ -1025,7 +1045,6 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
10251045
estBits += estBits >> w.logNewTablePenalty
10261046

10271047
// Store bytes, if we don't get a reasonable improvement.
1028-
ssize, storable := w.storedSize(input)
10291048
if storable && ssize <= estBits {
10301049
w.writeStoredHeader(len(input), eof)
10311050
w.writeBytes(input)

zstd/enc_fast.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
8585
// TEMPLATE
8686
const hashLog = tableBits
8787
// seems global, but would be nice to tweak.
88-
const kSearchStrength = 7
88+
const kSearchStrength = 6
8989

9090
// nextEmit is where in src the next emitLiteral should start from.
9191
nextEmit := s
@@ -334,7 +334,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
334334
// TEMPLATE
335335
const hashLog = tableBits
336336
// seems global, but would be nice to tweak.
337-
const kSearchStrength = 8
337+
const kSearchStrength = 6
338338

339339
// nextEmit is where in src the next emitLiteral should start from.
340340
nextEmit := s

0 commit comments

Comments
 (0)