Skip to content

Commit 231b71c

Browse files
committed
runtime,runtime/metrics: track on-cpu time per goroutine
Fixes golang#41554. This commit introduces a /sched/goroutine/running:nanoseconds metric, defined as the total time spent by a goroutine in the running state. This measurement is useful for systems that would benefit from fine-grained CPU attribution. An alternative to scheduler-backed CPU attribution would be the use of profiler labels. Given it's common to spawn multiple goroutines for the same task, goroutine-backed statistics can easily become misleading. Profiler labels instead let you measure CPU performance across a set of cooperating goroutines. That said, it comes with overhead that makes it unfeasible to always enable. For high-performance systems that care about fine-grained CPU attribution (databases for e.g. that want to measure total CPU time spent processing each request), profiler labels are too cost-prohibitive, especially given the Go runtime has a much cheaper view of the data needed. It's worth noting that we already export /sched/latencies:seconds to track scheduling latencies, i.e. time spent by a goroutine in the runnable state (go-review.googlesource.com/c/go/+/308933). This commit does effectively the same except for the running state. Users are free to use this metric to power histograms or tracking on-CPU time across a set of goroutines. Change-Id: Ie78336a3ddeca0521ae29cce57bc7a5ea67da297
1 parent 0261fa6 commit 231b71c

File tree

7 files changed

+40
-2
lines changed

7 files changed

+40
-2
lines changed

src/runtime/metrics.go

+6
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,12 @@ func initMetrics() {
274274
in.sysStats.gcMiscSys + in.sysStats.otherSys
275275
},
276276
},
277+
"/sched/goroutine/running:nanoseconds": {
278+
compute: func(_ *statAggregate, out *metricValue) {
279+
out.kind = metricKindUint64
280+
out.scalar = uint64(grunningnanos())
281+
},
282+
},
277283
"/sched/goroutines:goroutines": {
278284
compute: func(_ *statAggregate, out *metricValue) {
279285
out.kind = metricKindUint64

src/runtime/metrics/description.go

+5
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,11 @@ var allDesc = []Description{
214214
Description: "All memory mapped by the Go runtime into the current process as read-write. Note that this does not include memory mapped by code called via cgo or via the syscall package. Sum of all metrics in /memory/classes.",
215215
Kind: KindUint64,
216216
},
217+
{
218+
Name: "/sched/goroutine/running:nanoseconds",
219+
Description: "Time spent by the current goroutine in the running state.",
220+
Kind: KindUint64,
221+
},
217222
{
218223
Name: "/sched/goroutines:goroutines",
219224
Description: "Count of live goroutines.",

src/runtime/metrics/doc.go

+3
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ Below is the full list of supported metrics, ordered lexicographically.
164164
by code called via cgo or via the syscall package.
165165
Sum of all metrics in /memory/classes.
166166
167+
/sched/goroutine/running:nanoseconds
168+
Time spent by the current goroutine in the running state.
169+
167170
/sched/goroutines:goroutines
168171
Count of live goroutines.
169172

src/runtime/metrics_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,10 @@ func TestReadMetricsConsistency(t *testing.T) {
222222
for i := range h.Counts {
223223
gc.pauses += h.Counts[i]
224224
}
225+
case "/sched/goroutine/running:nanoseconds":
226+
if samples[i].Value.Uint64() < 1 {
227+
t.Error("goroutine running nanoseconds is less than one")
228+
}
225229
case "/sched/goroutines:goroutines":
226230
if samples[i].Value.Uint64() < 1 {
227231
t.Error("number of goroutines is less than one")

src/runtime/proc.go

+19-1
Original file line numberDiff line numberDiff line change
@@ -990,8 +990,18 @@ func casgstatus(gp *g, oldval, newval uint32) {
990990
}
991991
}
992992

993-
// Handle tracking for scheduling latencies.
993+
// Handle tracking for scheduling and running latencies.
994+
now := nanotime()
995+
if newval == _Grunning {
996+
// We're transitioning into the running state, record the timestamp for
997+
// subsequent use.
998+
gp.lastsched = now
999+
}
9941000
if oldval == _Grunning {
1001+
// We're transitioning out of running, record how long we were in the
1002+
// state.
1003+
gp.runningnanos += now - gp.lastsched
1004+
9951005
// Track every 8th time a goroutine transitions out of running.
9961006
if gp.trackingSeq%gTrackingPeriod == 0 {
9971007
gp.tracking = true
@@ -3240,6 +3250,12 @@ func dropg() {
32403250
setGNoWB(&_g_.m.curg, nil)
32413251
}
32423252

3253+
// grunningnanos returns the running time observed by the current g.
3254+
func grunningnanos() int64 {
3255+
gp := getg()
3256+
return gp.runningnanos + nanotime() - gp.lastsched
3257+
}
3258+
32433259
// checkTimers runs any timers for the P that are ready.
32443260
// If now is not 0 it is the current time.
32453261
// It returns the passed time or the current time if now was passed as 0.
@@ -3472,6 +3488,8 @@ func goexit0(gp *g) {
34723488
gp.param = nil
34733489
gp.labels = nil
34743490
gp.timer = nil
3491+
gp.lastsched = 0
3492+
gp.runningnanos = 0
34753493

34763494
if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 {
34773495
// Flush assist credit to the global pool. This gives

src/runtime/runtime2.go

+2
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,8 @@ type g struct {
486486
labels unsafe.Pointer // profiler labels
487487
timer *timer // cached timer for time.Sleep
488488
selectDone uint32 // are we participating in a select and did someone win the race?
489+
lastsched int64 // timestamp when the G last started running
490+
runningnanos int64 // total time spent in the running state
489491

490492
// Per-G GC state
491493

src/runtime/sizeof_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func TestSizeof(t *testing.T) {
2121
_32bit uintptr // size on 32bit platforms
2222
_64bit uintptr // size on 64bit platforms
2323
}{
24-
{runtime.G{}, 236, 392}, // g, but exported for testing
24+
{runtime.G{}, 236, 408}, // g, but exported for testing
2525
{runtime.Sudog{}, 56, 88}, // sudog, but exported for testing
2626
}
2727

0 commit comments

Comments
 (0)