From 3d8e96a22e2e15120729e4470fb12f5e032e142e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristoffer=20Str=C3=B6m?= Date: Mon, 6 Apr 2015 16:45:23 +0200 Subject: [PATCH] Make bloom filters simpler These did not work before, and had some unnecessary complexity. Now the filters use only one hashing function, no bignum arithmetic, and gets the additional bit positions by repeatedly hashing the result of prior hash. Since we're not concerned about crypto hashing here, this should be a win. External interfaces unchanged. --- Godeps/Godeps.json | 12 ++- .../github.com/mtchavez/jenkins/.gitignore | 23 ++++ .../github.com/mtchavez/jenkins/.travis.yml | 8 ++ .../src/github.com/mtchavez/jenkins/Makefile | 11 ++ .../src/github.com/mtchavez/jenkins/README.md | 45 ++++++++ .../github.com/mtchavez/jenkins/jenkins.go | 48 +++++++++ .../mtchavez/jenkins/jenkins_suite_test.go | 13 +++ .../mtchavez/jenkins/jenkins_test.go | 101 ++++++++++++++++++ blocks/bloom/filter.go | 86 ++++++++------- blocks/bloom/filter_test.go | 54 +++++++++- 10 files changed, 355 insertions(+), 46 deletions(-) create mode 100644 Godeps/_workspace/src/github.com/mtchavez/jenkins/.gitignore create mode 100644 Godeps/_workspace/src/github.com/mtchavez/jenkins/.travis.yml create mode 100644 Godeps/_workspace/src/github.com/mtchavez/jenkins/Makefile create mode 100644 Godeps/_workspace/src/github.com/mtchavez/jenkins/README.md create mode 100644 Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins.go create mode 100644 Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_suite_test.go create mode 100644 Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_test.go diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json index 263f6f0a95a..617096eab38 100644 --- a/Godeps/Godeps.json +++ b/Godeps/Godeps.json @@ -213,6 +213,10 @@ "ImportPath": "github.com/mitchellh/go-homedir", "Rev": "7d2d8c8a4e078ce3c58736ab521a40b37a504c52" }, + { + "ImportPath": "github.com/mtchavez/jenkins", + "Rev": "5a816af6ef21ef401bff5e4b7dd255d63400f497" + }, { "ImportPath": "github.com/syndtr/goleveldb/leveldb", "Rev": "87e4e645d80ae9c537e8f2dee52b28036a5dd75e" @@ -221,6 +225,10 @@ "ImportPath": "github.com/syndtr/gosnappy/snappy", "Rev": "156a073208e131d7d2e212cb749feae7c339e846" }, + { + "ImportPath": "github.com/whyrusleeping/go-metrics", + "Rev": "1cd8009604ec2238b5a71305a0ecd974066e0e16" + }, { "ImportPath": "golang.org/x/crypto/blowfish", "Rev": "b7d6bf2c61544745a02f83dec90393985fc3a065" @@ -233,10 +241,6 @@ "ImportPath": "golang.org/x/net/context", "Rev": "7dbad50ab5b31073856416cdcfeb2796d682f844" }, - { - "ImportPath": "github.com/whyrusleeping/go-metrics", - "Rev": "1cd8009604ec2238b5a71305a0ecd974066e0e16" - }, { "ImportPath": "gopkg.in/fsnotify.v1", "Comment": "v1.2.0", diff --git a/Godeps/_workspace/src/github.com/mtchavez/jenkins/.gitignore b/Godeps/_workspace/src/github.com/mtchavez/jenkins/.gitignore new file mode 100644 index 00000000000..836562412fe --- /dev/null +++ b/Godeps/_workspace/src/github.com/mtchavez/jenkins/.gitignore @@ -0,0 +1,23 @@ +# Compiled Object files, Static and Dynamic libs (Shared Objects) +*.o +*.a +*.so + +# Folders +_obj +_test + +# Architecture specific extensions/prefixes +*.[568vq] +[568vq].out + +*.cgo1.go +*.cgo2.c +_cgo_defun.c +_cgo_gotypes.go +_cgo_export.* + +_testmain.go + +*.exe +*.test diff --git a/Godeps/_workspace/src/github.com/mtchavez/jenkins/.travis.yml b/Godeps/_workspace/src/github.com/mtchavez/jenkins/.travis.yml new file mode 100644 index 00000000000..25a4cf32346 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mtchavez/jenkins/.travis.yml @@ -0,0 +1,8 @@ +go: + - 1.1 + - tip +install: + - go get github.com/onsi/ginkgo + - go get github.com/onsi/gomega +before_script: go test -i ./... +script: go test ./... diff --git a/Godeps/_workspace/src/github.com/mtchavez/jenkins/Makefile b/Godeps/_workspace/src/github.com/mtchavez/jenkins/Makefile new file mode 100644 index 00000000000..f05d467e031 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mtchavez/jenkins/Makefile @@ -0,0 +1,11 @@ +build: + go build jenkins.go + +run: + go run jenkins.go + +test: + go test -cover + +default: + go run jenkins.go diff --git a/Godeps/_workspace/src/github.com/mtchavez/jenkins/README.md b/Godeps/_workspace/src/github.com/mtchavez/jenkins/README.md new file mode 100644 index 00000000000..409f9961235 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mtchavez/jenkins/README.md @@ -0,0 +1,45 @@ +Jenkins +================= + +Golang Jenkins hash + +[![Build Status](https://travis-ci.org/mtchavez/go-jenkins-hashes.png?branch=master)](https://travis-ci.org/mtchavez/go-jenkins-hashes) + +## Install + +`go get -u github.com/mtchavez/jenkins` + +## Usage + +Jenkins follows the [Hash32](http://golang.org/pkg/hash/#Hash32) interface from the Go standard library + +```go +// Create a new hash +jenkhash := New() + +// Write a string of bytes to hash +key := []byte("my-random-key") +length, err := jenkhash(key) + +// Get uint32 sum of hash +sum := jenkhash.Sum32() + +// Sum hash with byte string +sumbytes := jenkhash.Sum(key) +``` + +## Testing + +Uses [Ginkgo](http://onsi.github.io/ginkgo/) for testing. + +Run via `make test` which will run `go test -cover` + +## Documentation + +Docs on [godoc](http://godoc.org/github.com/mtchavez/jenkins) + +## License + +Written by Chavez + +Released under the MIT License: http://www.opensource.org/licenses/mit-license.php diff --git a/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins.go b/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins.go new file mode 100644 index 00000000000..79667623cd7 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins.go @@ -0,0 +1,48 @@ +package jenkins + +import "hash" + +type jenkhash uint32 + +func New() hash.Hash32 { + var j jenkhash = 0 + return &j +} + +func (j *jenkhash) Write(key []byte) (int, error) { + hash := *j + + for _, b := range key { + hash += jenkhash(b) + hash += (hash << 10) + hash ^= (hash >> 6) + } + + hash += (hash << 3) + hash ^= (hash >> 11) + hash += (hash << 15) + + *j = hash + return len(key), nil +} + +func (j *jenkhash) Reset() { + *j = 0 +} + +func (j *jenkhash) Size() int { + return 4 +} + +func (j *jenkhash) BlockSize() int { + return 1 +} + +func (j *jenkhash) Sum32() uint32 { + return uint32(*j) +} + +func (j *jenkhash) Sum(in []byte) []byte { + v := j.Sum32() + return append(in, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) +} diff --git a/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_suite_test.go b/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_suite_test.go new file mode 100644 index 00000000000..ec3911ba30e --- /dev/null +++ b/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_suite_test.go @@ -0,0 +1,13 @@ +package jenkins + +import ( + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + "testing" +) + +func TestJenkins(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Jenkins Suite") +} diff --git a/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_test.go b/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_test.go new file mode 100644 index 00000000000..1cc4484ef44 --- /dev/null +++ b/Godeps/_workspace/src/github.com/mtchavez/jenkins/jenkins_test.go @@ -0,0 +1,101 @@ +package jenkins + +import ( + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + "hash" +) + +var _ = Describe("Jenkins", func() { + + var jhash hash.Hash32 + var key []byte + + BeforeEach(func() { + jhash = New() + key = []byte("Apple") + }) + + Describe("New", func() { + + It("returns jenkhash", func() { + var h *jenkhash + Expect(jhash).To(BeAssignableToTypeOf(h)) + }) + + It("initializes offset to 0", func() { + Expect(jhash.Sum32()).To(Equal(uint32(0))) + }) + }) + + Describe("Write", func() { + + It("returns key length", func() { + length, _ := jhash.Write(key) + Expect(length).To(Equal(5)) + }) + + It("has no error", func() { + _, err := jhash.Write(key) + Expect(err).To(BeNil()) + }) + + }) + + Describe("Reset", func() { + + It("sets back to 0", func() { + Expect(jhash.Sum32()).To(Equal(uint32(0))) + jhash.Write(key) + Expect(jhash.Sum32()).NotTo(Equal(uint32(0))) + jhash.Reset() + Expect(jhash.Sum32()).To(Equal(uint32(0))) + }) + + }) + + Describe("Size", func() { + + It("is 4", func() { + Expect(jhash.Size()).To(Equal(4)) + }) + + }) + + Describe("BlockSize", func() { + + It("is 1", func() { + Expect(jhash.BlockSize()).To(Equal(1)) + }) + + }) + + Describe("Sum32", func() { + + It("defaults to 0", func() { + Expect(jhash.Sum32()).To(Equal(uint32(0))) + }) + + It("sums hash", func() { + jhash.Write(key) + Expect(jhash.Sum32()).To(Equal(uint32(884782484))) + }) + + }) + + Describe("Sum", func() { + + It("default 0 hash byte returned", func() { + expected := []byte{0x41, 0x70, 0x70, 0x6c, 0x65, 0x0, 0x0, 0x0, 0x0} + Expect(jhash.Sum(key)).To(Equal(expected)) + }) + + It("returns sum byte array", func() { + jhash.Write(key) + expected := []byte{0x41, 0x70, 0x70, 0x6c, 0x65, 0x34, 0xbc, 0xb5, 0x94} + Expect(jhash.Sum(key)).To(Equal(expected)) + }) + + }) + +}) diff --git a/blocks/bloom/filter.go b/blocks/bloom/filter.go index ea539f77ac6..2697c3eab77 100644 --- a/blocks/bloom/filter.go +++ b/blocks/bloom/filter.go @@ -2,13 +2,11 @@ package bloom import ( + "encoding/binary" "errors" - "fmt" + // Non crypto hash, because speed + "github.com/ipfs/go-ipfs/Godeps/_workspace/src/github.com/mtchavez/jenkins" "hash" - "hash/adler32" - "hash/crc32" - "hash/fnv" - "math/big" ) type Filter interface { @@ -17,61 +15,66 @@ type Filter interface { Merge(Filter) (Filter, error) } -func BasicFilter() Filter { - // Non crypto hashes, because speed - return NewFilter(2048, adler32.New(), fnv.New32(), crc32.NewIEEE()) -} - -func NewFilter(size int, hashes ...hash.Hash) Filter { +func NewFilter(size int) Filter { return &filter{ + hash: jenkins.New(), filter: make([]byte, size), - hashes: hashes, + k: 3, } } type filter struct { filter []byte - hashes []hash.Hash + hash hash.Hash32 + k int +} + +func BasicFilter() Filter { + return NewFilter(2048) } -func (f *filter) Add(k []byte) { - for _, h := range f.hashes { - i := bytesMod(h.Sum(k), int64(len(f.filter)*8)) - f.setBit(i) +func (f *filter) Add(bytes []byte) { + for _, bit := range f.getBitIndicies(bytes) { + f.setBit(bit) } } -func (f *filter) Find(k []byte) bool { - for _, h := range f.hashes { - i := bytesMod(h.Sum(k), int64(len(f.filter)*8)) - if !f.getBit(i) { +func (f *filter) getBitIndicies(bytes []byte) []uint32 { + indicies := make([]uint32, f.k) + + f.hash.Write(bytes) + b := make([]byte, 4) + + for i := 0; i < f.k; i++ { + res := f.hash.Sum32() + indicies[i] = res % (uint32(len(f.filter)) * 8) + + binary.LittleEndian.PutUint32(b, res) + f.hash.Write(b) + } + + f.hash.Reset() + + return indicies +} + +func (f *filter) Find(bytes []byte) bool { + for _, bit := range f.getBitIndicies(bytes) { + if !f.getBit(bit) { return false } } return true } -func (f *filter) setBit(i int64) { - fmt.Printf("setting bit %d\n", i) +func (f *filter) setBit(i uint32) { f.filter[i/8] |= (1 << byte(i%8)) } -func (f *filter) getBit(i int64) bool { - fmt.Printf("getting bit %d\n", i) +func (f *filter) getBit(i uint32) bool { return f.filter[i/8]&(1<