Skip to content

Commit 75d884d

Browse files
committed
runtime,runtime/metrics: track on-cpu time per goroutine
Fixes golang#41554. This commit introduces a /sched/goroutine/running:nanoseconds metric, defined as the total time spent by a goroutine in the running state. This measurement is useful for systems that would benefit from fine-grained CPU attribution. An alternative to scheduler-backed CPU attribution would be the use of profiler labels. Given it's common to spawn multiple goroutines for the same task, goroutine-backed statistics can easily become misleading. Profiler labels instead let you measure CPU performance across a set of cooperating goroutines. That said, it comes with overhead that makes it unfeasible to always enable. For high-performance systems that care about fine-grained CPU attribution (databases for e.g. that want to measure total CPU time spent processing each request), profiler labels are too cost-prohibitive, especially given the Go runtime has a much cheaper view of the data needed. It's worth noting that we already export /sched/latencies:seconds to track scheduling latencies, i.e. time spent by a goroutine in the runnable state (go-review.googlesource.com/c/go/+/308933). This commit does effectively the same except for the running state. Users are free to use this metric to power histograms or tracking on-CPU time across a set of goroutines. Change-Id: Ie78336a3ddeca0521ae29cce57bc7a5ea67da297
1 parent 0261fa6 commit 75d884d

File tree

6 files changed

+38
-2
lines changed

6 files changed

+38
-2
lines changed

src/runtime/metrics.go

+6
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,12 @@ func initMetrics() {
274274
in.sysStats.gcMiscSys + in.sysStats.otherSys
275275
},
276276
},
277+
"/sched/goroutine/running:nanoseconds": {
278+
compute: func(_ *statAggregate, out *metricValue) {
279+
out.kind = metricKindUint64
280+
out.scalar = uint64(grunningnanos())
281+
},
282+
},
277283
"/sched/goroutines:goroutines": {
278284
compute: func(_ *statAggregate, out *metricValue) {
279285
out.kind = metricKindUint64

src/runtime/metrics/description.go

+5
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ var allDesc = []Description{
214214
Description: "All memory mapped by the Go runtime into the current process as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes.",
215215
Kind: KindUint64,
216216
},
217+
{
218+
Name: "/sched/goroutine/running:nanoseconds",
219+
Description: "Time spent by the current goroutine in the running state.",
220+
Kind: KindUint64,
221+
},
217222
{
218223
Name: "/sched/goroutines:goroutines",
219224
Description: "Count of live goroutines.",

src/runtime/metrics/doc.go

+3
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ Below is the full list of supported metrics, ordered lexicographically.
164164
by code called via cgo or via the syscall package.
165165
Sum of all metrics in /memory/classes.
166166
167+
/sched/goroutine/running:nanoseconds
168+
Wall time spent by the current goroutine in the running state.
169+
167170
/sched/goroutines:goroutines
168171
Count of live goroutines.
169172

src/runtime/proc.go

+21-1
Original file line numberDiff line numberDiff line change
@@ -990,8 +990,18 @@ func casgstatus(gp *g, oldval, newval uint32) {
990990
}
991991
}
992992

993-
// Handle tracking for scheduling latencies.
993+
// Handle tracking for scheduling and running latencies.
994+
now := nanotime()
995+
if newval == _Grunning {
996+
// We're transitioning into the running state, record the timestamp for
997+
// subsequent use.
998+
gp.lastsched = now
999+
}
9941000
if oldval == _Grunning {
1001+
// We're transitioning out of running, record how long we were in the
1002+
// state.
1003+
gp.runningnanos += now - gp.lastsched
1004+
9951005
// Track every 8th time a goroutine transitions out of running.
9961006
if gp.trackingSeq%gTrackingPeriod == 0 {
9971007
gp.tracking = true
@@ -3240,6 +3250,14 @@ func dropg() {
32403250
setGNoWB(&_g_.m.curg, nil)
32413251
}
32423252

3253+
// grunningnanos returns the wall time spent by current g in the running state.
3254+
// A goroutine may be running on an OS thread that's descheduled by the OS
3255+
// scheduler, this time still counts towards the metric.
3256+
func grunningnanos() int64 {
3257+
gp := getg()
3258+
return gp.runningnanos + nanotime() - gp.lastsched
3259+
}
3260+
32433261
// checkTimers runs any timers for the P that are ready.
32443262
// If now is not 0 it is the current time.
32453263
// It returns the passed time or the current time if now was passed as 0.
@@ -3472,6 +3490,8 @@ func goexit0(gp *g) {
34723490
gp.param = nil
34733491
gp.labels = nil
34743492
gp.timer = nil
3493+
gp.lastsched = 0
3494+
gp.runningnanos = 0
34753495

34763496
if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
34773497
// Flush assist credit to the global pool. This gives

src/runtime/runtime2.go

+2
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,8 @@ type g struct {
486486
labels unsafe.Pointer // profiler labels
487487
timer *timer // cached timer for time.Sleep
488488
selectDone uint32 // are we participating in a select and did someone win the race?
489+
lastsched int64 // timestamp when the G last started running
490+
runningnanos int64 // wall time spent in the running state
489491

490492
// Per-G GC state
491493

src/runtime/sizeof_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
2121
_32bit uintptr // size on 32bit platforms
2222
_64bit uintptr // size on 64bit platforms
2323
}{
24-
{runtime.G{}, 236, 392}, // g, but exported for testing
24+
{runtime.G{}, 252, 408}, // g, but exported for testing
2525
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
2626
}
2727

0 commit comments

Comments
 (0)