Skip to content

Commit 2b9dc3c

Browse files
committed
runtime,runtime/metrics: track running time per goroutine
Fixes golang#41554. This commit introduces a /sched/goroutine/running:nanoseconds metric, defined as the total time spent by a goroutine in the running state. This measurement is useful for systems that would benefit from fine-grained CPU attribution. An alternative to scheduler-backed CPU attribution would be the use of profiler labels. Given it's common to spawn multiple goroutines for the same task, goroutine-backed statistics can easily become misleading. Profiler labels instead let you measure CPU performance across a set of cooperating goroutines. That said, it has two downsides: - performance overhead; for high-performance systems that care about fine-grained CPU attribution (databases for e.g. that want to measure total CPU time spent processing each request), profiler labels are too cost-prohibitive, especially given the Go runtime has a much cheaper and more granular view of the data needed - inaccuracy and imprecision, as evaluated in golang#36821 It's worth noting that we already export /sched/latencies:seconds to track scheduling latencies, i.e. time spent by a goroutine in the runnable state (go-review.googlesource.com/c/go/+/308933). This commit does effectively the same except for the running state on the requesting goroutine. Users are free to use this metric to power histograms or tracking on-CPU time across a set of goroutines. Change-Id: Ie78336a3ddeca0521ae29cce57bc7a5ea67da297
1 parent 7e33e9e commit 2b9dc3c

File tree

6 files changed

+39
-5
lines changed

6 files changed

+39
-5
lines changed

src/runtime/metrics.go

+6
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,12 @@ func initMetrics() {
286286
out.scalar = uint64(gomaxprocs)
287287
},
288288
},
289+
"/sched/goroutine/running:nanoseconds": {
290+
compute: func(_ *statAggregate, out *metricValue) {
291+
out.kind = metricKindUint64
292+
out.scalar = uint64(grunningnanos())
293+
},
294+
},
289295
"/sched/goroutines:goroutines": {
290296
compute: func(_ *statAggregate, out *metricValue) {
291297
out.kind = metricKindUint64

src/runtime/metrics/description.go

+5
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,11 @@ var allDesc = []Description{
225225
Description: "The current runtime.GOMAXPROCS setting, or the number of operating system threads that can execute user-level Go code simultaneously.",
226226
Kind: KindUint64,
227227
},
228+
{
229+
Name: "/sched/goroutine/running:nanoseconds",
230+
Description: "Wall time spent by the current goroutine in the running state.",
231+
Kind: KindUint64,
232+
},
228233
{
229234
Name: "/sched/goroutines:goroutines",
230235
Description: "Count of live goroutines.",

src/runtime/metrics/doc.go

+3
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,9 @@ Below is the full list of supported metrics, ordered lexicographically.
172172
operating system threads that can execute user-level Go code
173173
simultaneously.
174174
175+
/sched/goroutine/running:nanoseconds
176+
Wall time spent by the current goroutine in the running state.
177+
175178
/sched/goroutines:goroutines
176179
Count of live goroutines.
177180

src/runtime/proc.go

+21-3
Original file line numberDiff line numberDiff line change
@@ -994,8 +994,18 @@ func casgstatus(gp *g, oldval, newval uint32) {
994994
}
995995
}
996996

997-
// Handle tracking for scheduling latencies.
997+
// Handle tracking for scheduling and running latencies.
998+
now := nanotime()
999+
if newval == _Grunning {
1000+
// We're transitioning into the running state, record the timestamp for
1001+
// subsequent use.
1002+
gp.lastsched = now
1003+
}
9981004
if oldval == _Grunning {
1005+
// We're transitioning out of running, record how long we were in the
1006+
// state.
1007+
gp.runningnanos += now - gp.lastsched
1008+
9991009
// Track every 8th time a goroutine transitions out of running.
10001010
if gp.trackingSeq%gTrackingPeriod == 0 {
10011011
gp.tracking = true
@@ -1007,14 +1017,12 @@ func casgstatus(gp *g, oldval, newval uint32) {
10071017
// We transitioned out of runnable, so measure how much
10081018
// time we spent in this state and add it to
10091019
// runnableTime.
1010-
now := nanotime()
10111020
gp.runnableTime += now - gp.runnableStamp
10121021
gp.runnableStamp = 0
10131022
}
10141023
if newval == _Grunnable {
10151024
// We just transitioned into runnable, so record what
10161025
// time that happened.
1017-
now := nanotime()
10181026
gp.runnableStamp = now
10191027
} else if newval == _Grunning {
10201028
// We're transitioning into running, so turn off
@@ -3258,6 +3266,14 @@ func dropg() {
32583266
setGNoWB(&_g_.m.curg, nil)
32593267
}
32603268

3269+
// grunningnanos returns the wall time spent by current g in the running state.
3270+
// A goroutine may be running on an OS thread that's descheduled by the OS
3271+
// scheduler, this time still counts towards the metric.
3272+
func grunningnanos() int64 {
3273+
gp := getg()
3274+
return gp.runningnanos + nanotime() - gp.lastsched
3275+
}
3276+
32613277
// checkTimers runs any timers for the P that are ready.
32623278
// If now is not 0 it is the current time.
32633279
// It returns the passed time or the current time if now was passed as 0.
@@ -3491,6 +3507,8 @@ func goexit0(gp *g) {
34913507
gp.param = nil
34923508
gp.labels = nil
34933509
gp.timer = nil
3510+
gp.lastsched = 0
3511+
gp.runningnanos = 0
34943512

34953513
if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
34963514
// Flush assist credit to the global pool. This gives

src/runtime/runtime2.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -474,7 +474,6 @@ type g struct {
474474
traceseq uint64 // trace event sequencer
475475
tracelastp puintptr // last P emitted an event for this goroutine
476476
lockedm muintptr
477-
sig uint32
478477
writebuf []byte
479478
sigcode0 uintptr
480479
sigcode1 uintptr
@@ -488,6 +487,9 @@ type g struct {
488487
labels unsafe.Pointer // profiler labels
489488
timer *timer // cached timer for time.Sleep
490489
selectDone uint32 // are we participating in a select and did someone win the race?
490+
sig uint32
491+
lastsched int64 // timestamp when the G last started running
492+
runningnanos int64 // wall time spent in the running state
491493

492494
// goroutineProfiled indicates the status of this goroutine's stack for the
493495
// current in-progress goroutine profile

src/runtime/sizeof_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
2121
_32bit uintptr // size on 32bit platforms
2222
_64bit uintptr // size on 64bit platforms
2323
}{
24-
{runtime.G{}, 240, 392}, // g, but exported for testing
24+
{runtime.G{}, 256, 408}, // g, but exported for testing
2525
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
2626
}
2727

0 commit comments

Comments
 (0)