Skip to content

Commit a035d2e

Browse files
craig[bot]DrewKimball
craig[bot]
andcommitted
Merge #89256
89256: sql: output RU estimate for EXPLAIN ANALYZE on tenants r=DrewKimball a=DrewKimball **sql: surface query request units consumed by network egress** This commit adds a top-level field to the output of `EXPLAIN ANALYZE` that shows the estimated number of RUs that would be consumed due to network egress to the client. The estimate is obtained by measuring the in-memory size of the query result, and passing that to the tenant cost config's `PGWireEgressCost` method. **sql: surface query request units consumed due to cpu usage** This commit adds the ability for clients to estimate the number of RUs consumed by a query due to CPU usage. This is accomplished by keeping a moving average of the CPU usage for the entire tenant process, then using that to obtain an estimate for what the CPU usage *would* be if the query wasn't running. This is then compared against the actual measured CPU usage during the query's execution to get the estimate. For local flows this is done at the `connExecutor` level; for remote flows this is handled by the last outbox on the node (which gathers and sends the flow's metadata). The resulting RU estimate is added to the existing estimate from network egress and displayed in the output of `EXPLAIN ANALYZE`. **sql: surface query request units consumed by IO** This commit adds tracking for request units consumed by IO operations for all execution operators that perform KV operations. The corresponding RU count is recorded in the span and later aggregated with the RU consumption due to network egress and CPU usage. The resulting query RU consumption estimate is visible in the output of `EXPLAIN ANALYZE`. **multitenantccl: add sanity testing for ru estimation** This commit adds a sanity test for the RU estimates produced by running queries with `EXPLAIN ANALYZE` on a tenant. The test runs each test query several times with `EXPLAIN ANALYZE`, then runs all test queries without `EXPLAIN ANALYZE` and compares the resulting actual RU measurement to the aggregated estimates. Informs #74441 Release note (sql change): Added an estimate for the number of request units consumed by a query to the output of `EXPLAIN ANALYZE` for tenant sessions. Co-authored-by: Drew Kimball <[email protected]>
2 parents 8814521 + f95b046 commit a035d2e

37 files changed

+678
-56
lines changed

pkg/BUILD.bazel

+2
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,7 @@ GO_TARGETS = [
12361236
"//pkg/kv/kvserver:kvserver_test",
12371237
"//pkg/kv:kv",
12381238
"//pkg/kv:kv_test",
1239+
"//pkg/multitenant/multitenantcpu:multitenantcpu",
12391240
"//pkg/multitenant/multitenantio:multitenantio",
12401241
"//pkg/multitenant/tenantcostmodel:tenantcostmodel",
12411242
"//pkg/multitenant:multitenant",
@@ -2535,6 +2536,7 @@ GET_X_DATA_TARGETS = [
25352536
"//pkg/kv/kvserver/txnwait:get_x_data",
25362537
"//pkg/kv/kvserver/uncertainty:get_x_data",
25372538
"//pkg/multitenant:get_x_data",
2539+
"//pkg/multitenant/multitenantcpu:get_x_data",
25382540
"//pkg/multitenant/multitenantio:get_x_data",
25392541
"//pkg/multitenant/tenantcostmodel:get_x_data",
25402542
"//pkg/obs:get_x_data",

pkg/ccl/multitenantccl/tenantcostclient/BUILD.bazel

+7
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ go_library(
2626
"//pkg/util/stop",
2727
"//pkg/util/syncutil",
2828
"//pkg/util/timeutil",
29+
"//pkg/util/tracing",
30+
"//pkg/util/tracing/tracingpb",
2931
"@com_github_cockroachdb_errors//:errors",
3032
"@com_github_cockroachdb_errors//errorspb",
3133
],
@@ -36,6 +38,7 @@ go_test(
3638
srcs = [
3739
"limiter_test.go",
3840
"main_test.go",
41+
"query_ru_estimate_test.go",
3942
"tenant_side_test.go",
4043
"token_bucket_test.go",
4144
],
@@ -47,6 +50,8 @@ go_test(
4750
"//pkg/blobs",
4851
"//pkg/ccl",
4952
"//pkg/ccl/changefeedccl",
53+
"//pkg/ccl/kvccl/kvtenantccl",
54+
"//pkg/ccl/multitenantccl/tenantcostserver",
5055
"//pkg/ccl/utilccl",
5156
"//pkg/cloud",
5257
"//pkg/cloud/nodelocal",
@@ -72,12 +77,14 @@ go_test(
7277
"//pkg/sql/stats",
7378
"//pkg/testutils",
7479
"//pkg/testutils/serverutils",
80+
"//pkg/testutils/skip",
7581
"//pkg/testutils/sqlutils",
7682
"//pkg/testutils/testcluster",
7783
"//pkg/util/ctxgroup",
7884
"//pkg/util/ioctx",
7985
"//pkg/util/leaktest",
8086
"//pkg/util/log",
87+
"//pkg/util/protoutil",
8188
"//pkg/util/randutil",
8289
"//pkg/util/stop",
8390
"//pkg/util/syncutil",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
// Copyright 2022 The Cockroach Authors.
2+
//
3+
// Licensed as a CockroachDB Enterprise file under the Cockroach Community
4+
// License (the "License"); you may not use this file except in compliance with
5+
// the License. You may obtain a copy of the License at
6+
//
7+
// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt
8+
9+
package tenantcostclient_test
10+
11+
import (
12+
"context"
13+
"fmt"
14+
"strconv"
15+
"strings"
16+
"testing"
17+
"time"
18+
19+
"github.com/cockroachdb/cockroach/pkg/base"
20+
_ "github.com/cockroachdb/cockroach/pkg/ccl" // ccl init hooks
21+
_ "github.com/cockroachdb/cockroach/pkg/ccl/kvccl/kvtenantccl"
22+
"github.com/cockroachdb/cockroach/pkg/ccl/multitenantccl/tenantcostclient"
23+
_ "github.com/cockroachdb/cockroach/pkg/ccl/multitenantccl/tenantcostserver"
24+
"github.com/cockroachdb/cockroach/pkg/roachpb"
25+
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
26+
"github.com/cockroachdb/cockroach/pkg/sql/stats"
27+
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
28+
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
29+
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
30+
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
31+
"github.com/cockroachdb/cockroach/pkg/util/log"
32+
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
33+
"github.com/stretchr/testify/require"
34+
)
35+
36+
// TestEstimateQueryRUConsumption is a sanity check for the RU estimates
37+
// produced for queries that are run by a tenant under EXPLAIN ANALYZE. The RU
38+
// consumption of a query is not deterministic, since it depends on inexact
39+
// quantities like the (already estimated) CPU usage. Therefore, the test runs
40+
// each query multiple times and then checks that the total estimated RU
41+
// consumption is within reasonable distance from the actual measured RUs for
42+
// the tenant.
43+
func TestEstimateQueryRUConsumption(t *testing.T) {
44+
defer leaktest.AfterTest(t)()
45+
defer log.Scope(t).Close(t)
46+
47+
// This test becomes flaky when the machine/cluster is under significant
48+
// background load, so it should only be run manually.
49+
skip.IgnoreLint(t, "intended to be manually run as a sanity test")
50+
51+
ctx := context.Background()
52+
53+
st := cluster.MakeTestingClusterSettings()
54+
stats.AutomaticStatisticsClusterMode.Override(ctx, &st.SV, false)
55+
stats.UseStatisticsOnSystemTables.Override(ctx, &st.SV, false)
56+
stats.AutomaticStatisticsOnSystemTables.Override(ctx, &st.SV, false)
57+
58+
// Lower the target duration for reporting tenant usage so that it can be
59+
// measured accurately. Avoid decreasing too far, since doing so can add
60+
// measurable overhead.
61+
tenantcostclient.TargetPeriodSetting.Override(ctx, &st.SV, time.Millisecond*500)
62+
63+
params := base.TestServerArgs{
64+
Settings: st,
65+
DisableDefaultTestTenant: true,
66+
}
67+
68+
s, mainDB, _ := serverutils.StartServer(t, params)
69+
defer s.Stopper().Stop(ctx)
70+
sysDB := sqlutils.MakeSQLRunner(mainDB)
71+
72+
tenantID := serverutils.TestTenantID()
73+
tenant1, tenantDB1 := serverutils.StartTenant(t, s, base.TestTenantArgs{
74+
TenantID: tenantID,
75+
Settings: st,
76+
})
77+
defer tenant1.Stopper().Stop(ctx)
78+
defer tenantDB1.Close()
79+
tdb := sqlutils.MakeSQLRunner(tenantDB1)
80+
tdb.Exec(t, "SET CLUSTER SETTING sql.stats.automatic_collection.enabled=false")
81+
tdb.Exec(t, "CREATE TABLE abcd (a INT, b INT, c INT, d INT, INDEX (a, b, c))")
82+
83+
type testCase struct {
84+
sql string
85+
count int
86+
}
87+
testCases := []testCase{
88+
{ // Insert statement
89+
sql: "INSERT INTO abcd (SELECT t%2, t%3, t, -t FROM generate_series(1,50000) g(t))",
90+
count: 1,
91+
},
92+
{ // Point query
93+
sql: "SELECT a FROM abcd WHERE (a, b) = (1, 1)",
94+
count: 10,
95+
},
96+
{ // Range query
97+
sql: "SELECT a FROM abcd WHERE (a, b) = (1, 1) AND c > 0 AND c < 10000",
98+
count: 10,
99+
},
100+
{ // Aggregate
101+
sql: "SELECT count(*) FROM abcd",
102+
count: 10,
103+
},
104+
{ // Distinct
105+
sql: "SELECT DISTINCT ON (a, b) * FROM abcd",
106+
count: 10,
107+
},
108+
{ // Full table scan
109+
sql: "SELECT a FROM abcd",
110+
count: 10,
111+
},
112+
{ // Lookup join
113+
sql: "SELECT a FROM (VALUES (1, 1), (0, 2)) v(x, y) INNER LOOKUP JOIN abcd ON (a, b) = (x, y)",
114+
count: 10,
115+
},
116+
{ // Index join
117+
sql: "SELECT * FROM abcd WHERE (a, b) = (0, 0)",
118+
count: 10,
119+
},
120+
{ // No kv IO, lots of network egress.
121+
sql: "SELECT 'deadbeef' FROM generate_series(1, 50000)",
122+
count: 10,
123+
},
124+
}
125+
126+
var err error
127+
var tenantEstimatedRUs int
128+
for _, tc := range testCases {
129+
for i := 0; i < tc.count; i++ {
130+
output := tdb.QueryStr(t, "EXPLAIN ANALYZE "+tc.sql)
131+
var estimatedRU int
132+
for _, row := range output {
133+
if len(row) != 1 {
134+
t.Fatalf("expected one column")
135+
}
136+
val := row[0]
137+
if strings.Contains(val, "estimated RUs consumed") {
138+
substr := strings.Split(val, " ")
139+
require.Equalf(t, 4, len(substr), "expected RU consumption message to have four words")
140+
ruCountStr := strings.Replace(strings.TrimSpace(substr[3]), ",", "", -1)
141+
estimatedRU, err = strconv.Atoi(ruCountStr)
142+
require.NoError(t, err, "failed to retrieve estimated RUs")
143+
break
144+
}
145+
}
146+
tenantEstimatedRUs += estimatedRU
147+
}
148+
}
149+
150+
getTenantRUs := func() float64 {
151+
// Sleep to ensure the measured RU consumption gets recorded in the
152+
// tenant_usage table.
153+
time.Sleep(time.Second)
154+
var consumptionBytes []byte
155+
var consumption roachpb.TenantConsumption
156+
var tenantRUs float64
157+
rows := sysDB.Query(t,
158+
fmt.Sprintf(
159+
"SELECT total_consumption FROM system.tenant_usage WHERE tenant_id = %d AND instance_id = 0",
160+
tenantID.ToUint64(),
161+
),
162+
)
163+
for rows.Next() {
164+
require.NoError(t, rows.Scan(&consumptionBytes))
165+
if len(consumptionBytes) == 0 {
166+
continue
167+
}
168+
require.NoError(t, protoutil.Unmarshal(consumptionBytes, &consumption))
169+
tenantRUs += consumption.RU
170+
}
171+
return tenantRUs
172+
}
173+
tenantStartRUs := getTenantRUs()
174+
175+
var tenantMeasuredRUs float64
176+
for _, tc := range testCases {
177+
for i := 0; i < tc.count; i++ {
178+
tdb.QueryStr(t, tc.sql)
179+
}
180+
}
181+
182+
// Check the estimated RU aggregate for all the queries against the actual
183+
// measured RU consumption for the tenant.
184+
tenantMeasuredRUs = getTenantRUs() - tenantStartRUs
185+
const deltaFraction = 0.25
186+
allowedDelta := tenantMeasuredRUs * deltaFraction
187+
require.InDeltaf(t, tenantMeasuredRUs, tenantEstimatedRUs, allowedDelta,
188+
"estimated RUs (%d) were not within %f RUs of the expected value (%f)",
189+
tenantEstimatedRUs,
190+
allowedDelta,
191+
tenantMeasuredRUs,
192+
)
193+
}

pkg/ccl/multitenantccl/tenantcostclient/tenant_side.go

+40-4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import (
2525
"github.com/cockroachdb/cockroach/pkg/util/stop"
2626
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
2727
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
28+
"github.com/cockroachdb/cockroach/pkg/util/tracing"
29+
"github.com/cockroachdb/cockroach/pkg/util/tracing/tracingpb"
2830
"github.com/cockroachdb/errors"
2931
"github.com/cockroachdb/errors/errorspb"
3032
)
@@ -129,6 +131,9 @@ const defaultTickInterval = time.Second
129131
// 0.5^(1 second / tickInterval)
130132
const movingAvgRUPerSecFactor = 0.5
131133

134+
// movingAvgCPUPerSecFactor is the weight applied to a new sample of CPU usage.
135+
const movingAvgCPUPerSecFactor = 0.5
136+
132137
// We request more tokens when the available RUs go below a threshold. The
133138
// threshold is a fraction of the last granted RUs.
134139
const notifyFraction = 0.1
@@ -260,6 +265,11 @@ type tenantSideCostController struct {
260265
// It is read and written on multiple goroutines and so must be protected
261266
// by a mutex.
262267
consumption roachpb.TenantConsumption
268+
269+
// avgCPUPerSec is an exponentially-weighted moving average of the CPU usage
270+
// per second; used to estimate the CPU usage of a query. It is only written
271+
// in the main loop, but can be read by multiple goroutines so is protected.
272+
avgCPUPerSec float64
263273
}
264274

265275
// lowRUNotifyChan is used when the number of available RUs is running low and
@@ -389,23 +399,28 @@ func (c *tenantSideCostController) onTick(ctx context.Context, newTime time.Time
389399
// Update CPU consumption.
390400
deltaCPU := newExternalUsage.CPUSecs - c.run.externalUsage.CPUSecs
391401

392-
// Subtract any allowance that we consider free background usage.
393402
deltaTime := newTime.Sub(c.run.lastTick)
394403
if deltaTime > 0 {
404+
// Subtract any allowance that we consider free background usage.
395405
allowance := CPUUsageAllowance.Get(&c.settings.SV).Seconds() * deltaTime.Seconds()
396406
deltaCPU -= allowance
397407

408+
avgCPU := deltaCPU / deltaTime.Seconds()
409+
410+
c.mu.Lock()
398411
// If total CPU usage is small (less than 3% of a single CPU by default)
399412
// and there have been no recent read/write operations, then ignore the
400413
// recent usage altogether. This is intended to minimize RU usage when the
401414
// cluster is idle.
402-
c.mu.Lock()
403415
if deltaCPU < allowance*2 {
404416
if c.mu.consumption.ReadBatches == c.run.consumption.ReadBatches &&
405417
c.mu.consumption.WriteBatches == c.run.consumption.WriteBatches {
406418
deltaCPU = 0
407419
}
408420
}
421+
// Keep track of an exponential moving average of CPU usage.
422+
c.mu.avgCPUPerSec *= 1 - movingAvgCPUPerSecFactor
423+
c.mu.avgCPUPerSec += avgCPU * movingAvgCPUPerSecFactor
409424
c.mu.Unlock()
410425
}
411426
if deltaCPU < 0 {
@@ -753,7 +768,8 @@ func (c *tenantSideCostController) OnRequestWait(ctx context.Context) error {
753768
return c.limiter.Wait(ctx, 0)
754769
}
755770

756-
// OnResponse is part of the multitenant.TenantSideBatchInterceptor interface.
771+
// OnResponseWait is part of the multitenant.TenantSideBatchInterceptor
772+
// interface.
757773
func (c *tenantSideCostController) OnResponseWait(
758774
ctx context.Context, req tenantcostmodel.RequestInfo, resp tenantcostmodel.ResponseInfo,
759775
) error {
@@ -773,6 +789,13 @@ func (c *tenantSideCostController) OnResponseWait(
773789
return err
774790
}
775791

792+
// Record the number of RUs consumed by the IO request.
793+
if sp := tracing.SpanFromContext(ctx); sp != nil && sp.RecordingType() != tracingpb.RecordingOff {
794+
sp.RecordStructured(&roachpb.TenantConsumption{
795+
RU: float64(totalRU),
796+
})
797+
}
798+
776799
c.mu.Lock()
777800
defer c.mu.Unlock()
778801

@@ -814,7 +837,7 @@ func (c *tenantSideCostController) OnExternalIOWait(
814837
}
815838

816839
// OnExternalIO is part of the multitenant.TenantSideExternalIORecorder
817-
// interface.
840+
// interface. TODO(drewk): collect this for queries.
818841
func (c *tenantSideCostController) OnExternalIO(
819842
ctx context.Context, usage multitenant.ExternalIOUsage,
820843
) {
@@ -853,3 +876,16 @@ func (c *tenantSideCostController) onExternalIO(
853876

854877
return nil
855878
}
879+
880+
// GetCPUMovingAvg is used to obtain an exponential moving average estimate
881+
// for the CPU usage in seconds per each second of wall-clock time.
882+
func (c *tenantSideCostController) GetCPUMovingAvg() float64 {
883+
c.mu.Lock()
884+
defer c.mu.Unlock()
885+
return c.mu.avgCPUPerSec
886+
}
887+
888+
// GetCostConfig is part of the multitenant.TenantSideCostController interface.
889+
func (c *tenantSideCostController) GetCostConfig() *tenantcostmodel.Config {
890+
return &c.costCfg
891+
}

pkg/multitenant/cost_controller.go

+8
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,14 @@ type TenantSideCostController interface {
3232
nextLiveInstanceIDFn NextLiveInstanceIDFn,
3333
) error
3434

35+
// GetCPUMovingAvg returns an exponential moving average used for estimating
36+
// the CPU usage (in CPU secs) per wall-clock second.
37+
GetCPUMovingAvg() float64
38+
39+
// GetCostConfig returns the cost model config this TenantSideCostController
40+
// is using.
41+
GetCostConfig() *tenantcostmodel.Config
42+
3543
TenantSideKVInterceptor
3644

3745
TenantSideExternalIORecorder
+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
load("//build/bazelutil/unused_checker:unused.bzl", "get_x_data")
2+
load("@io_bazel_rules_go//go:def.bzl", "go_library")
3+
4+
go_library(
5+
name = "multitenantcpu",
6+
srcs = ["cpu_usage.go"],
7+
importpath = "github.com/cockroachdb/cockroach/pkg/multitenant/multitenantcpu",
8+
visibility = ["//visibility:public"],
9+
deps = [
10+
"//pkg/multitenant",
11+
"//pkg/server/status",
12+
"//pkg/util/log",
13+
"//pkg/util/timeutil",
14+
],
15+
)
16+
17+
get_x_data(name = "get_x_data")

0 commit comments

Comments
 (0)