Skip to content

Commit ad3ea42

Browse files
authored
Reduce ingester memory by using TSDB's index (#3951)
* Add BenchmarkIngesterV2Push So we can check the efficiency of changes to `v2Push()`. Broadly copied from `BenchmarkIngesterPush()`. Signed-off-by: Bryan Boreham <[email protected]> * Use TSDB's index of series, remove RefCache prometheus/prometheus#8600 adds a method to `TSDB.Appender` which allows us to save building a parallel cache, reducing ingester heap by about 20%. We depend on values from GetRef() remaining valid while v2Push() uses them. Currently the only way a ref can be invalidated is by a head compaction, which cannot happen while v2Push() holds the append lock. Signed-off-by: Bryan Boreham <[email protected]> * New version of GetRef() that returns labels Now we only need to make a copy if GetRef() returns zero Note Prometheus update brings in JSON marshalling of model.Duration prometheus/common#280 Signed-off-by: Bryan Boreham <[email protected]> * Add comment on use of copiedLabels Signed-off-by: Bryan Boreham <[email protected]> * Update to Prometheus main branch Pinned gRPC and other dependencies changed by the update in Prometheus to avoid taking so much change on this PR. Signed-off-by: Bryan Boreham <[email protected]>
1 parent 7691456 commit ad3ea42

File tree

14 files changed

+197
-973
lines changed

14 files changed

+197
-973
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* `-alertmanager.cluster.peers` instead of `-cluster.peer`
1313
* `-alertmanager.cluster.peer-timeout` instead of `-cluster.peer-timeout`
1414
* [FEATURE] Ruler: added `local` backend support to the ruler storage configuration under the `-ruler-storage.` flag prefix. #3932
15+
* [ENHANCEMENT] Blocks storage: reduce ingester memory by eliminating series reference cache. #3951
1516
* [ENHANCEMENT] Ruler: optimized `<prefix>/api/v1/rules` and `<prefix>/api/v1/alerts` when ruler sharding is enabled. #3916
1617
* [ENHANCEMENT] Ruler: added the following metrics when ruler sharding is enabled: #3916
1718
* `cortex_ruler_clients`

go.mod

+24-9
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ require (
1212
github.com/alecthomas/units v0.0.0-20210208195552-ff826a37aa15
1313
github.com/alicebob/miniredis v2.5.0+incompatible
1414
github.com/armon/go-metrics v0.3.6
15-
github.com/aws/aws-sdk-go v1.37.8
15+
github.com/aws/aws-sdk-go v1.38.3
1616
github.com/bradfitz/gomemcache v0.0.0-20190913173617-a41fca850d0b
1717
github.com/cespare/xxhash v1.1.0
1818
github.com/dustin/go-humanize v1.0.0
@@ -44,10 +44,10 @@ require (
4444
github.com/opentracing/opentracing-go v1.2.0
4545
github.com/pkg/errors v0.9.1
4646
github.com/prometheus/alertmanager v0.21.1-0.20210310093010-0f9cab6991e6
47-
github.com/prometheus/client_golang v1.9.0
47+
github.com/prometheus/client_golang v1.10.0
4848
github.com/prometheus/client_model v0.2.0
49-
github.com/prometheus/common v0.18.0
50-
github.com/prometheus/prometheus v1.8.2-0.20210321183757-31a518faab18
49+
github.com/prometheus/common v0.20.0
50+
github.com/prometheus/prometheus v1.8.2-0.20210324152458-c7a62b95cea0
5151
github.com/segmentio/fasthash v0.0.0-20180216231524-a72b379d632e
5252
github.com/sony/gobreaker v0.4.1
5353
github.com/spf13/afero v1.2.2
@@ -60,11 +60,11 @@ require (
6060
go.etcd.io/etcd/client/v3 v3.5.0-alpha.0.0.20210225194612-fa82d11a958a
6161
go.etcd.io/etcd/server/v3 v3.5.0-alpha.0.0.20210225194612-fa82d11a958a
6262
go.uber.org/atomic v1.7.0
63-
golang.org/x/net v0.0.0-20210119194325-5f4716e94777
64-
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a
65-
golang.org/x/time v0.0.0-20201208040808-7e3f01d25324
66-
google.golang.org/api v0.39.0
67-
google.golang.org/grpc v1.34.0
63+
golang.org/x/net v0.0.0-20210324051636-2c4c8ecb7826
64+
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
65+
golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba
66+
google.golang.org/api v0.42.0
67+
google.golang.org/grpc v1.36.0
6868
gopkg.in/yaml.v2 v2.4.0
6969
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
7070
sigs.k8s.io/yaml v1.2.0
@@ -101,3 +101,18 @@ replace github.com/go-openapi/strfmt => github.com/go-openapi/strfmt v0.19.5
101101
replace github.com/go-openapi/swag => github.com/go-openapi/swag v0.19.9
102102

103103
replace github.com/go-openapi/validate => github.com/go-openapi/validate v0.19.8
104+
105+
// Pin these, which are updated as dependencies in Prometheus; we will take those updates separately and carefully
106+
replace (
107+
github.com/aws/aws-sdk-go => github.com/aws/aws-sdk-go v1.37.8
108+
github.com/google/pprof => github.com/google/pprof v0.0.0-20210208152844-1612e9be7af6
109+
github.com/miekg/dns => github.com/miekg/dns v1.1.38
110+
github.com/prometheus/client_golang => github.com/prometheus/client_golang v1.9.0
111+
golang.org/x/crypto => golang.org/x/crypto v0.0.0-20201208171446-5f87f3452ae9
112+
golang.org/x/net => golang.org/x/net v0.0.0-20210119194325-5f4716e94777
113+
golang.org/x/oauth2 => golang.org/x/oauth2 v0.0.0-20210210192628-66670185b0cd
114+
golang.org/x/sync => golang.org/x/sync v0.0.0-20201207232520-09787c993a3a
115+
golang.org/x/sys => golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c
116+
google.golang.org/api => google.golang.org/api v0.39.0
117+
google.golang.org/grpc => google.golang.org/grpc v1.34.0
118+
)

go.sum

+20-349
Large diffs are not rendered by default.

pkg/ingester/ingester_v2.go

+18-50
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ const (
101101
type userTSDB struct {
102102
db *tsdb.DB
103103
userID string
104-
refCache *cortex_tsdb.RefCache
105104
activeSeries *ActiveSeries
106105
seriesInMetric *metricCounter
107106
limiter *Limiter
@@ -185,7 +184,8 @@ func (u *userTSDB) compactHead(blockDuration int64) error {
185184

186185
defer u.casState(forceCompacting, active)
187186

188-
// Ingestion of samples in parallel with forced compaction can lead to overlapping blocks.
187+
// Ingestion of samples in parallel with forced compaction can lead to overlapping blocks,
188+
// and possible invalidation of the references returned from Appender.GetRef().
189189
// So we wait for existing in-flight requests to finish. Future push requests would fail until compaction is over.
190190
u.pushesInFlight.Wait()
191191

@@ -383,7 +383,6 @@ type TSDBState struct {
383383
walReplayTime prometheus.Histogram
384384
appenderAddDuration prometheus.Histogram
385385
appenderCommitDuration prometheus.Histogram
386-
refCachePurgeDuration prometheus.Histogram
387386
idleTsdbChecks *prometheus.CounterVec
388387
}
389388

@@ -435,11 +434,6 @@ func newTSDBState(bucketClient objstore.Bucket, registerer prometheus.Registerer
435434
Help: "The total time it takes for a push request to commit samples appended to TSDB.",
436435
Buckets: []float64{.001, .005, .01, .025, .05, .1, .25, .5, 1, 2.5, 5, 10},
437436
}),
438-
refCachePurgeDuration: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{
439-
Name: "cortex_ingester_tsdb_refcache_purge_duration_seconds",
440-
Help: "The total time it takes to purge the TSDB series reference cache for a single tenant.",
441-
Buckets: prometheus.DefBuckets,
442-
}),
443437

444438
idleTsdbChecks: idleTsdbChecks,
445439
}
@@ -619,11 +613,6 @@ func (i *Ingester) updateLoop(ctx context.Context) error {
619613
rateUpdateTicker := time.NewTicker(i.cfg.RateUpdatePeriod)
620614
defer rateUpdateTicker.Stop()
621615

622-
// We use an hardcoded value for this ticker because there should be no
623-
// real value in customizing it.
624-
refCachePurgeTicker := time.NewTicker(5 * time.Minute)
625-
defer refCachePurgeTicker.Stop()
626-
627616
var activeSeriesTickerChan <-chan time.Time
628617
if i.cfg.ActiveSeriesMetricsEnabled {
629618
t := time.NewTicker(i.cfg.ActiveSeriesMetricsUpdatePeriod)
@@ -646,17 +635,6 @@ func (i *Ingester) updateLoop(ctx context.Context) error {
646635
db.ingestedRuleSamples.tick()
647636
}
648637
i.userStatesMtx.RUnlock()
649-
case <-refCachePurgeTicker.C:
650-
for _, userID := range i.getTSDBUsers() {
651-
userDB := i.getTSDB(userID)
652-
if userDB == nil {
653-
continue
654-
}
655-
656-
startTime := time.Now()
657-
userDB.refCache.Purge(startTime.Add(-cortex_tsdb.DefaultRefCacheTTL))
658-
i.TSDBState.refCachePurgeDuration.Observe(time.Since(startTime).Seconds())
659-
}
660638

661639
case <-activeSeriesTickerChan:
662640
i.v2UpdateActiveSeries()
@@ -683,6 +661,12 @@ func (i *Ingester) v2UpdateActiveSeries() {
683661
}
684662
}
685663

664+
// GetRef() is an extra method added to TSDB to let Cortex check before calling Add()
665+
type extendedAppender interface {
666+
storage.Appender
667+
storage.GetRef
668+
}
669+
686670
// v2Push adds metrics to a block
687671
func (i *Ingester) v2Push(ctx context.Context, req *cortexpb.WriteRequest) (*cortexpb.WriteResponse, error) {
688672
var firstPartialErr error
@@ -738,13 +722,13 @@ func (i *Ingester) v2Push(ctx context.Context, req *cortexpb.WriteRequest) (*cor
738722
)
739723

740724
// Walk the samples, appending them to the users database
741-
app := db.Appender(ctx)
725+
app := db.Appender(ctx).(extendedAppender)
742726
for _, ts := range req.Timeseries {
743-
// Check if we already have a cached reference for this series. Be aware
744-
// that even if we have a reference it's not guaranteed to be still valid.
745727
// The labels must be sorted (in our case, it's guaranteed a write request
746728
// has sorted labels once hit the ingester).
747-
cachedRef, copiedLabels, cachedRefExists := db.refCache.Ref(startAppend, cortexpb.FromLabelAdaptersToLabels(ts.Labels))
729+
730+
// Look up a reference for this series.
731+
ref, copiedLabels := app.GetRef(cortexpb.FromLabelAdaptersToLabels(ts.Labels))
748732

749733
// To find out if any sample was added to this series, we keep old value.
750734
oldSucceededSamplesCount := succeededSamplesCount
@@ -753,30 +737,18 @@ func (i *Ingester) v2Push(ctx context.Context, req *cortexpb.WriteRequest) (*cor
753737
var err error
754738

755739
// If the cached reference exists, we try to use it.
756-
if cachedRefExists {
757-
var ref uint64
758-
if ref, err = app.Append(cachedRef, copiedLabels, s.TimestampMs, s.Value); err == nil {
740+
if ref != 0 {
741+
if _, err = app.Append(ref, copiedLabels, s.TimestampMs, s.Value); err == nil {
759742
succeededSamplesCount++
760-
// This means the reference changes which means we need to update our cache.
761-
if ref != cachedRef {
762-
db.refCache.SetRef(startAppend, copiedLabels, ref)
763-
}
764743
continue
765744
}
766745

767746
} else {
768-
var ref uint64
769-
770-
// Copy the label set because both TSDB and the cache may retain it.
747+
// Copy the label set because both TSDB and the active series tracker may retain it.
771748
copiedLabels = cortexpb.FromLabelAdaptersToLabelsWithCopy(ts.Labels)
772749

750+
// Retain the reference in case there are multiple samples for the series.
773751
if ref, err = app.Append(0, copiedLabels, s.TimestampMs, s.Value); err == nil {
774-
db.refCache.SetRef(startAppend, copiedLabels, ref)
775-
776-
// Set these in case there are multiple samples for the series.
777-
cachedRef = ref
778-
cachedRefExists = true
779-
780752
succeededSamplesCount++
781753
continue
782754
}
@@ -827,11 +799,8 @@ func (i *Ingester) v2Push(ctx context.Context, req *cortexpb.WriteRequest) (*cor
827799

828800
if i.cfg.ActiveSeriesMetricsEnabled && succeededSamplesCount > oldSucceededSamplesCount {
829801
db.activeSeries.UpdateSeries(cortexpb.FromLabelAdaptersToLabels(ts.Labels), startAppend, func(l labels.Labels) labels.Labels {
830-
// If we have already made a copy during this push, no need to create new one.
831-
if copiedLabels != nil {
832-
return copiedLabels
833-
}
834-
return cortexpb.CopyLabels(l)
802+
// we must already have copied the labels if succeededSamplesCount has been incremented.
803+
return copiedLabels
835804
})
836805
}
837806
}
@@ -1435,7 +1404,6 @@ func (i *Ingester) createTSDB(userID string) (*userTSDB, error) {
14351404

14361405
userDB := &userTSDB{
14371406
userID: userID,
1438-
refCache: cortex_tsdb.NewRefCache(),
14391407
activeSeries: NewActiveSeries(),
14401408
seriesInMetric: newMetricCounter(i.limiter),
14411409
ingestedAPISamples: newEWMARate(0.2, i.cfg.RateUpdatePeriod),

0 commit comments

Comments
 (0)