This repository was archived by the owner on Nov 9, 2017. It is now read-only.
This repository was archived by the owner on Nov 9, 2017. It is now read-only.
NewHistogram vs Snapshot deadlock #15
Closed
Description
We observed deadlock in a program which uses codahale/metrics. Here are the relevant backtraces:
goroutine 1549 [semacquire, 828 minutes]:
sync.runtime_Semacquire(0xc878bc)
/usr/local/go/src/runtime/sema.go:47 +0x30
sync.(*Mutex).Lock(0xc878b8)
/usr/local/go/src/sync/mutex.go:85 +0xd0
github.com/codahale/metrics.Gauge.SetBatchFunc(0xc421b78940, 0x17, 0x91b5c0, 0xc4263433f0, 0xc4263433e0, 0xc421b78900)
/home/meng/prod/ghostback/src/github.com/codahale/metrics/metrics.go:122 +0x31
github.com/codahale/metrics.NewHistogram(0xc420c18080, 0x13, 0x0, 0xf4240, 0x3, 0x0)
/home/meng/prod/ghostback/src/github.com/codahale/metrics/metrics.go:212 +0x6d2
common.(*TelemetryInstance).RecordTime(0xc4203f0dc0, 0xc420c18080, 0x13, 0x11f03447)
/home/meng/prod/ghostback/src/common/telemetry.go:158 +0xfa
common.TelemetryLag(0xc420c18080, 0x13, 0x11f03447)
/home/meng/prod/ghostback/src/common/telemetry.go:182 +0x10a
common.TelemetryProcessResponse(0xc4219c2410, 0xc422b41770, 0x1478abbbaeef0ea3)
/home/meng/prod/ghostback/src/common/telemetry.go:76 +0x81
httpworker.(*Instance).incomingHandler(0xc420077680, 0xc4239587e0, 0x0, 0x0)
/home/meng/prod/ghostback/src/httpworker/worker.go:172 +0x22b
httpworker.(*Instance).(httpworker.incomingHandler)-fm(0xc4239587e0, 0xc42514f201, 0x101)
/home/meng/prod/ghostback/src/httpworker/worker.go:91 +0x34
github.com/nsqio/go-nsq.HandlerFunc.HandleMessage(0xc420888040, 0xc4239587e0, 0x94c200, 0xc420888040)
/home/meng/prod/ghostback/src/github.com/nsqio/go-nsq/consumer.go:42 +0x30
github.com/nsqio/go-nsq.(*Consumer).handlerLoop(0xc420904000, 0xc40160, 0xc420888040)
/home/meng/prod/ghostback/src/github.com/nsqio/go-nsq/consumer.go:1105 +0x14a
created by github.com/nsqio/go-nsq.(*Consumer).AddConcurrentHandlers
/home/meng/prod/ghostback/src/github.com/nsqio/go-nsq/consumer.go:1087 +0xaa
goroutine 1606087 [semacquire, 828 minutes]:
sync.runtime_Semacquire(0xc878c4)
/usr/local/go/src/runtime/sema.go:47 +0x30
sync.(*Mutex).Lock(0xc878c0)
/usr/local/go/src/sync/mutex.go:85 +0xd0
github.com/codahale/metrics.Snapshot(0x0, 0x0)
/home/meng/prod/ghostback/src/github.com/codahale/metrics/metrics.go:166 +0xd2
github.com/codahale/metrics.init.1.func1(0xc423aa1800, 0x455d50)
/home/meng/prod/ghostback/src/github.com/codahale/metrics/metrics.go:313 +0x26
expvar.Func.String(0xa36258, 0xa36210, 0xc426c8aa80)
/usr/local/go/src/expvar/expvar.go:241 +0x27
fmt.(*pp).handleMethods(0xc426c8aa80, 0x73, 0x1)
/usr/local/go/src/fmt/print.go:590 +0x2f6
fmt.(*pp).printArg(0xc426c8aa80, 0x946560, 0xa36258, 0x73)
/usr/local/go/src/fmt/print.go:665 +0x17b
fmt.(*pp).doPrintf(0xc426c8aa80, 0x9f4e7a, 0x6, 0xc42c6b5b60, 0x2, 0x2)
/usr/local/go/src/fmt/print.go:985 +0x123d
fmt.Fprintf(0xc3f560, 0xc4215b5e10, 0x9f4e7a, 0x6, 0xc42c6b5b60, 0x2, 0x2, 0x2, 0x0, 0x0)
/usr/local/go/src/fmt/print.go:181 +0x76
expvar.expvarHandler.func1(0x9f6a34, 0x7, 0xc40060, 0xa36258)
/usr/local/go/src/expvar/expvar.go:320 +0x15b
expvar.Do(0xc42c6b5c48)
/usr/local/go/src/expvar/expvar.go:307 +0xef
expvar.expvarHandler(0xc43980, 0xc4215b5e10, 0xc420a2be00)
/usr/local/go/src/expvar/expvar.go:321 +0x12b
net/http.HandlerFunc.ServeHTTP(0xa361f8, 0xc43980, 0xc4215b5e10, 0xc420a2be00)
/usr/local/go/src/net/http/server.go:1726 +0x44
net/http.(*ServeMux).ServeHTTP(0xc683e0, 0xc43980, 0xc4215b5e10, 0xc420a2be00)
/usr/local/go/src/net/http/server.go:2022 +0x7f
net/http.serverHandler.ServeHTTP(0xc42085e000, 0xc43980, 0xc4215b5e10, 0xc420a2be00)
/usr/local/go/src/net/http/server.go:2202 +0x7d
net/http.(*conn).serve(0xc421063380, 0xc44580, 0xc423ebb600)
/usr/local/go/src/net/http/server.go:1579 +0x4b7
created by net/http.(*Server).Serve
/usr/local/go/src/net/http/server.go:2293 +0x44d
One thread is creating a NewHistogram, acquires hm and tries for gm:
func NewHistogram(name string, minValue, maxValue int64, sigfigs int) *Histogram {
hm.Lock()
defer hm.Unlock()
if _, ok := histograms[name]; ok {
panic(name + " already exists")
}
hist := &Histogram{
name: name,
hist: hdrhistogram.NewWindowed(5, minValue, maxValue, sigfigs),
}
histograms[name] = hist
Gauge(name+".P50").SetBatchFunc(hname(name), hist.merge, hist.valueAt(50))
Gauge(name+".P75").SetBatchFunc(hname(name), hist.merge, hist.valueAt(75))
Gauge(name+".P90").SetBatchFunc(hname(name), hist.merge, hist.valueAt(90))
Gauge(name+".P95").SetBatchFunc(hname(name), hist.merge, hist.valueAt(95))
Gauge(name+".P99").SetBatchFunc(hname(name), hist.merge, hist.valueAt(99))
Gauge(name+".P999").SetBatchFunc(hname(name), hist.merge, hist.valueAt(99.9))
return hist
}
func (g Gauge) SetBatchFunc(key interface{}, init func(), f func() int64) {
gm.Lock()
defer gm.Unlock()
gauges[string(g)] = f
if _, ok := inits[key]; !ok {
inits[key] = init
}
}
The other thread is sampling the data, acquires gm and tries for hm:
func Snapshot() (c map[string]uint64, g map[string]int64) {
cm.Lock()
defer cm.Unlock()
gm.Lock()
defer gm.Unlock()
hm.Lock()
defer hm.Unlock()
....
Metadata
Metadata
Assignees
Labels
No labels