Skip to content

Commit 6810f22

Browse files
authored
zstd: Import xxhash v2.2.0 (#708)
Unsafe parts omitted, noasm build tag added back in. writeBlocks argument renamed s to please go vet.
1 parent 0d2a371 commit 6810f22

File tree

7 files changed

+299
-298
lines changed

7 files changed

+299
-298
lines changed

zstd/internal/xxhash/README.md

+31-18
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,7 @@
22

33
VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package.
44

5-
6-
[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash)
7-
[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash)
8-
9-
xxhash is a Go implementation of the 64-bit
10-
[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a
5+
xxhash is a Go implementation of the 64-bit [xxHash] algorithm, XXH64. This is a
116
high-quality hashing algorithm that is much faster than anything in the Go
127
standard library.
138

@@ -28,31 +23,49 @@ func (*Digest) WriteString(string) (int, error)
2823
func (*Digest) Sum64() uint64
2924
```
3025

31-
This implementation provides a fast pure-Go implementation and an even faster
32-
assembly implementation for amd64.
26+
The package is written with optimized pure Go and also contains even faster
27+
assembly implementations for amd64 and arm64. If desired, the `purego` build tag
28+
opts into using the Go code even on those architectures.
29+
30+
[xxHash]: http://cyan4973.github.io/xxHash/
31+
32+
## Compatibility
33+
34+
This package is in a module and the latest code is in version 2 of the module.
35+
You need a version of Go with at least "minimal module compatibility" to use
36+
github.com/cespare/xxhash/v2:
37+
38+
* 1.9.7+ for Go 1.9
39+
* 1.10.3+ for Go 1.10
40+
* Go 1.11 or later
41+
42+
I recommend using the latest release of Go.
3343

3444
## Benchmarks
3545

3646
Here are some quick benchmarks comparing the pure-Go and assembly
3747
implementations of Sum64.
3848

39-
| input size | purego | asm |
40-
| --- | --- | --- |
41-
| 5 B | 979.66 MB/s | 1291.17 MB/s |
42-
| 100 B | 7475.26 MB/s | 7973.40 MB/s |
43-
| 4 KB | 17573.46 MB/s | 17602.65 MB/s |
44-
| 10 MB | 17131.46 MB/s | 17142.16 MB/s |
49+
| input size | purego | asm |
50+
| ---------- | --------- | --------- |
51+
| 4 B | 1.3 GB/s | 1.2 GB/s |
52+
| 16 B | 2.9 GB/s | 3.5 GB/s |
53+
| 100 B | 6.9 GB/s | 8.1 GB/s |
54+
| 4 KB | 11.7 GB/s | 16.7 GB/s |
55+
| 10 MB | 12.0 GB/s | 17.3 GB/s |
4556

46-
These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using
47-
the following commands under Go 1.11.2:
57+
These numbers were generated on Ubuntu 20.04 with an Intel Xeon Platinum 8252C
58+
CPU using the following commands under Go 1.19.2:
4859

4960
```
50-
$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes'
51-
$ go test -benchtime 10s -bench '/xxhash,direct,bytes'
61+
benchstat <(go test -tags purego -benchtime 500ms -count 15 -bench 'Sum64$')
62+
benchstat <(go test -benchtime 500ms -count 15 -bench 'Sum64$')
5263
```
5364

5465
## Projects using this package
5566

5667
- [InfluxDB](https://github.com/influxdata/influxdb)
5768
- [Prometheus](https://github.com/prometheus/prometheus)
69+
- [VictoriaMetrics](https://github.com/VictoriaMetrics/VictoriaMetrics)
5870
- [FreeCache](https://github.com/coocood/freecache)
71+
- [FastCache](https://github.com/VictoriaMetrics/fastcache)

zstd/internal/xxhash/xxhash.go

+20-27
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,11 @@ const (
1818
prime5 uint64 = 2870177450012600261
1919
)
2020

21-
// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
22-
// possible in the Go code is worth a small (but measurable) performance boost
23-
// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
24-
// convenience in the Go code in a few places where we need to intentionally
25-
// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
26-
// result overflows a uint64).
27-
var (
28-
prime1v = prime1
29-
prime2v = prime2
30-
prime3v = prime3
31-
prime4v = prime4
32-
prime5v = prime5
33-
)
21+
// Store the primes in an array as well.
22+
//
23+
// The consts are used when possible in Go code to avoid MOVs but we need a
24+
// contiguous array of the assembly code.
25+
var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
3426

3527
// Digest implements hash.Hash64.
3628
type Digest struct {
@@ -52,10 +44,10 @@ func New() *Digest {
5244

5345
// Reset clears the Digest's state so that it can be reused.
5446
func (d *Digest) Reset() {
55-
d.v1 = prime1v + prime2
47+
d.v1 = primes[0] + prime2
5648
d.v2 = prime2
5749
d.v3 = 0
58-
d.v4 = -prime1v
50+
d.v4 = -primes[0]
5951
d.total = 0
6052
d.n = 0
6153
}
@@ -71,21 +63,23 @@ func (d *Digest) Write(b []byte) (n int, err error) {
7163
n = len(b)
7264
d.total += uint64(n)
7365

66+
memleft := d.mem[d.n&(len(d.mem)-1):]
67+
7468
if d.n+n < 32 {
7569
// This new data doesn't even fill the current block.
76-
copy(d.mem[d.n:], b)
70+
copy(memleft, b)
7771
d.n += n
7872
return
7973
}
8074

8175
if d.n > 0 {
8276
// Finish off the partial block.
83-
copy(d.mem[d.n:], b)
77+
c := copy(memleft, b)
8478
d.v1 = round(d.v1, u64(d.mem[0:8]))
8579
d.v2 = round(d.v2, u64(d.mem[8:16]))
8680
d.v3 = round(d.v3, u64(d.mem[16:24]))
8781
d.v4 = round(d.v4, u64(d.mem[24:32]))
88-
b = b[32-d.n:]
82+
b = b[c:]
8983
d.n = 0
9084
}
9185

@@ -135,21 +129,20 @@ func (d *Digest) Sum64() uint64 {
135129

136130
h += d.total
137131

138-
i, end := 0, d.n
139-
for ; i+8 <= end; i += 8 {
140-
k1 := round(0, u64(d.mem[i:i+8]))
132+
b := d.mem[:d.n&(len(d.mem)-1)]
133+
for ; len(b) >= 8; b = b[8:] {
134+
k1 := round(0, u64(b[:8]))
141135
h ^= k1
142136
h = rol27(h)*prime1 + prime4
143137
}
144-
if i+4 <= end {
145-
h ^= uint64(u32(d.mem[i:i+4])) * prime1
138+
if len(b) >= 4 {
139+
h ^= uint64(u32(b[:4])) * prime1
146140
h = rol23(h)*prime2 + prime3
147-
i += 4
141+
b = b[4:]
148142
}
149-
for i < end {
150-
h ^= uint64(d.mem[i]) * prime5
143+
for ; len(b) > 0; b = b[1:] {
144+
h ^= uint64(b[0]) * prime5
151145
h = rol11(h) * prime1
152-
i++
153146
}
154147

155148
h ^= h >> 33

0 commit comments

Comments
 (0)