Skip to content

Commit 85c099f

Browse files
committed
runtime: fix missing pprof labels
Use gp.m.curg instead of the gp when recording cpu profiler stack traces. This ensures profiler labels are captured when systemstack or similar is executing on behalf of the current goroutine. After this there are still rare cases of samples containing the labelHog function, so more work might be needed. This patch should fix ~99% of the problem. Fixes golang#48577. Change-Id: I27132110e3d09721ec3b3ef417122bc70d8f3279
1 parent 8ac5cbe commit 85c099f

File tree

3 files changed

+76
-11
lines changed

3 files changed

+76
-11
lines changed

src/runtime/cpuprof.go

+1-10
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ func SetCPUProfileRate(hz int) {
8989
// held at the time of the signal, nor can it use substantial amounts
9090
// of stack.
9191
//go:nowritebarrierrec
92-
func (p *cpuProfile) add(gp *g, stk []uintptr) {
92+
func (p *cpuProfile) add(tagPtr *unsafe.Pointer, stk []uintptr) {
9393
// Simple cas-lock to coordinate with setcpuprofilerate.
9494
for !atomic.Cas(&prof.signalLock, 0, 1) {
9595
osyield()
@@ -104,15 +104,6 @@ func (p *cpuProfile) add(gp *g, stk []uintptr) {
104104
// because otherwise its write barrier behavior may not
105105
// be correct. See the long comment there before
106106
// changing the argument here.
107-
//
108-
// Note: it can happen on Windows, where we are calling
109-
// p.add with a gp that is not the current g, that gp is nil,
110-
// meaning we interrupted a system thread with no g.
111-
// Avoid faulting in that case.
112-
var tagPtr *unsafe.Pointer
113-
if gp != nil {
114-
tagPtr = &gp.labels
115-
}
116107
cpuprof.log.write(tagPtr, nanotime(), hdr[:], stk)
117108
}
118109

src/runtime/pprof/pprof_test.go

+67
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,73 @@ func TestLabelRace(t *testing.T) {
13631363
})
13641364
}
13651365

1366+
func TestLabelSystemstack(t *testing.T) {
1367+
// See http://golang.org/cl/351751.
1368+
prof := testCPUProfile(t, stackContainsLabeled, []string{"runtime.systemstack;key=value"}, avoidFunctions(), func(dur time.Duration) {
1369+
Do(context.Background(), Labels("key", "value"), func(context.Context) {
1370+
var wg sync.WaitGroup
1371+
stop := make(chan struct{})
1372+
for i := 0; i < runtime.GOMAXPROCS(0); i++ {
1373+
wg.Add(1)
1374+
go func() {
1375+
defer wg.Done()
1376+
labelHog(stop)
1377+
}()
1378+
}
1379+
1380+
time.Sleep(dur)
1381+
close(stop)
1382+
wg.Wait()
1383+
})
1384+
})
1385+
1386+
var withLabel, withoutLabel int64
1387+
for _, s := range prof.Sample {
1388+
var systemstack, labelHog bool
1389+
for _, loc := range s.Location {
1390+
for _, l := range loc.Line {
1391+
switch l.Function.Name {
1392+
case "runtime.systemstack":
1393+
systemstack = true
1394+
case "runtime/pprof.labelHog":
1395+
labelHog = true
1396+
}
1397+
}
1398+
}
1399+
1400+
if systemstack && labelHog {
1401+
if s.Label != nil && contains(s.Label["key"], "value") {
1402+
withLabel += s.Value[0]
1403+
} else {
1404+
withoutLabel += s.Value[0]
1405+
}
1406+
}
1407+
}
1408+
1409+
// ratio on 2019 Intel MBP before/after CL 351751 for n=30 runs:
1410+
// before: mean=0.013 stddev=0.013 min=0.000 max=0.039
1411+
// after : mean=0.996 stddev=0.007 min=0.967 max=1.000
1412+
//
1413+
// TODO: Figure out why some samples still contain labelHog without labels.
1414+
// Once fixed this test case can be simplified to just check that all samples
1415+
// containing labelHog() have the label, and no other samples do.
1416+
ratio := float64(withLabel) / float64((withLabel + withoutLabel))
1417+
if ratio < 0.9 {
1418+
t.Fatalf("only %.1f%% of labelHog(systemstack()) samples have label", ratio*100)
1419+
}
1420+
}
1421+
1422+
func labelHog(stop chan struct{}) {
1423+
for i := 0; ; i++ {
1424+
select {
1425+
case <-stop:
1426+
return
1427+
default:
1428+
fmt.Fprintf(io.Discard, "%d", i)
1429+
}
1430+
}
1431+
}
1432+
13661433
// Check that there is no deadlock when the program receives SIGPROF while in
13671434
// 64bit atomics' critical section. Used to happen on mips{,le}. See #20146.
13681435
func TestAtomicLoadStore64(t *testing.T) {

src/runtime/proc.go

+8-1
Original file line numberDiff line numberDiff line change
@@ -4700,7 +4700,14 @@ func sigprof(pc, sp, lr uintptr, gp *g, mp *m) {
47004700
}
47014701

47024702
if prof.hz != 0 {
4703-
cpuprof.add(gp, stk[:n])
4703+
// Note: it can happen on Windows that we interrupted a system thread
4704+
// with no g, so gp could nil. The other nil checks are done out of
4705+
// caution, but not expected to be nil in practice.
4706+
var tagPtr *unsafe.Pointer
4707+
if gp != nil && gp.m != nil && gp.m.curg != nil {
4708+
tagPtr = &gp.m.curg.labels
4709+
}
4710+
cpuprof.add(tagPtr, stk[:n])
47044711
}
47054712
getg().m.mallocing--
47064713
}

0 commit comments

Comments
 (0)