Skip to content

Commit c8363d8

Browse files
krajoramavincentfree
authored andcommitted
[prometheusreceiver]: allow mixed classic and native histogram (open-telemetry#39451)
Depends on open-telemetry#38645 for testing. #### Description This is technical dept from open-telemetry#28663, especially in migration scenarios we want to receive both versions of the same histogram, the classic explicit and exponential bucket histogram. The current internal model of the receiver does not allow this, so I modified the model to not only key metrics by metric family name, but also whether it's an exponential histogram. Some callbacks from Promtheus scrape don't tell us what the metric type is up front. For AppendExemplar to work I made transaction stateful to remember if it appended an exponential histogram before. For Append of staleness marker to work, I modified the algorithm to discover exponential histogram from the cached metadata and the name. Due to prometheus/prometheus#16217 I also had to modify AppendExemplar to detect trying to add exemplars from native histograms even if native histograms are turned off. #### Link to tracking issue Related to open-telemetry#26555 #### Testing Unit test updated. #### Documentation N/A - does not contradict. --------- Signed-off-by: György Krajcsovits <[email protected]>
1 parent 4f06a28 commit c8363d8

File tree

3 files changed

+199
-54
lines changed

3 files changed

+199
-54
lines changed
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# Use this changelog template to create an entry for release notes.
2+
3+
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
4+
change_type: bug_fix
5+
6+
# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
7+
component: prometheusreceiver
8+
9+
# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
10+
note: When a histogram metric has both classic and native histogram buckets, keep both, instead of throwing away the native histogram buckets.
11+
12+
# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
13+
issues: [26555]
14+
15+
# (Optional) One or more lines of additional information to render under the primary note.
16+
# These lines will be padded with 2 spaces and then inserted directly into the document.
17+
# Use pipe (|) for multiline entries.
18+
subtext: This was a technical dept from the previous implementation in PR 28663.
19+
20+
# If your change doesn't affect end users or the exported elements of any package,
21+
# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
22+
# Optional: The change log or logs in which this entry should be included.
23+
# e.g. '[user]' or '[user, api]'
24+
# Include 'user' if the change is relevant to end users.
25+
# Include 'api' if there is a change to a library API.
26+
# Default: '[user]'
27+
change_logs: [user]

receiver/prometheusreceiver/internal/transaction.go

Lines changed: 87 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,23 @@ type resourceKey struct {
4141
job string
4242
instance string
4343
}
44+
45+
// The name of the metric family doesn't include magic suffixes (e.g. _bucket),
46+
// so for a classic histgram and a native histogram of the same family, the
47+
// metric family will be the same. To be able to tell them apart, we need to
48+
// store whether the metric is a native histogram or not.
49+
type metricFamilyKey struct {
50+
isExponentialHistogram bool
51+
name string
52+
}
53+
4454
type transaction struct {
4555
isNew bool
4656
trimSuffixes bool
4757
enableNativeHistograms bool
58+
addingNativeHistogram bool // true if the last sample was a native histogram.
4859
ctx context.Context
49-
families map[resourceKey]map[scopeID]map[string]*metricFamily
60+
families map[resourceKey]map[scopeID]map[metricFamilyKey]*metricFamily
5061
mc scrape.MetricMetadataStore
5162
sink consumer.Metrics
5263
externalLabels labels.Labels
@@ -79,7 +90,7 @@ func newTransaction(
7990
) *transaction {
8091
return &transaction{
8192
ctx: ctx,
82-
families: make(map[resourceKey]map[scopeID]map[string]*metricFamily),
93+
families: make(map[resourceKey]map[scopeID]map[metricFamilyKey]*metricFamily),
8394
isNew: true,
8495
trimSuffixes: trimSuffixes,
8596
enableNativeHistograms: enableNativeHistograms,
@@ -97,6 +108,8 @@ func newTransaction(
97108

98109
// Append always returns 0 to disable label caching.
99110
func (t *transaction) Append(_ storage.SeriesRef, ls labels.Labels, atMs int64, val float64) (storage.SeriesRef, error) {
111+
t.addingNativeHistogram = false
112+
100113
select {
101114
case <-t.ctx.Done():
102115
return 0, errTransactionAborted
@@ -157,64 +170,88 @@ func (t *transaction) Append(_ storage.SeriesRef, ls labels.Labels, atMs int64,
157170
return 0, nil
158171
}
159172

160-
curMF, existing := t.getOrCreateMetricFamily(*rKey, getScopeID(ls), metricName)
173+
scope := getScopeID(ls)
161174

162-
if t.enableNativeHistograms && curMF.mtype == pmetric.MetricTypeExponentialHistogram {
163-
// If a histogram has both classic and native version, the native histogram is scraped
164-
// first. Getting a float sample for the same series means that `scrape_classic_histogram`
165-
// is set to true in the scrape config. In this case, we should ignore the native histogram.
166-
curMF.mtype = pmetric.MetricTypeHistogram
175+
if t.enableNativeHistograms && value.IsStaleNaN(val) {
176+
if t.detectAndStoreNativeHistogramStaleness(atMs, rKey, scope, metricName, ls) {
177+
return 0, nil
178+
}
167179
}
168180

181+
curMF := t.getOrCreateMetricFamily(*rKey, scope, metricName)
182+
169183
seriesRef := t.getSeriesRef(ls, curMF.mtype)
170184
err = curMF.addSeries(seriesRef, metricName, ls, atMs, val)
171185
if err != nil {
172-
// Handle special case of float sample indicating staleness of native
173-
// histogram. This is similar to how Prometheus handles it, but we
174-
// don't have access to the previous value so we're applying some
175-
// heuristics to figure out if this is native histogram or not.
176-
// The metric type will indicate histogram, but presumably there will be no
177-
// _bucket, _count, _sum suffix or `le` label, which makes addSeries fail
178-
// with errEmptyLeLabel.
179-
if t.enableNativeHistograms && errors.Is(err, errEmptyLeLabel) && !existing && value.IsStaleNaN(val) && curMF.mtype == pmetric.MetricTypeHistogram {
180-
mg := curMF.loadMetricGroupOrCreate(seriesRef, ls, atMs)
181-
curMF.mtype = pmetric.MetricTypeExponentialHistogram
182-
mg.mtype = pmetric.MetricTypeExponentialHistogram
183-
_ = curMF.addExponentialHistogramSeries(seriesRef, metricName, ls, atMs, &histogram.Histogram{Sum: math.Float64frombits(value.StaleNaN)}, nil)
184-
// ignore errors here, this is best effort.
185-
} else {
186-
t.logger.Warn("failed to add datapoint", zap.Error(err), zap.String("metric_name", metricName), zap.Any("labels", ls))
187-
}
186+
t.logger.Warn("failed to add datapoint", zap.Error(err), zap.String("metric_name", metricName), zap.Any("labels", ls))
188187
}
189188

190189
return 0, nil // never return errors, as that fails the whole scrape
191190
}
192191

192+
// detectAndStoreNativeHistogramStaleness returns true if it detects
193+
// and stores a native histogram staleness marker.
194+
func (t *transaction) detectAndStoreNativeHistogramStaleness(atMs int64, key *resourceKey, scope scopeID, metricName string, ls labels.Labels) bool {
195+
// Detect the special case of stale native histogram series.
196+
// Currently Prometheus does not store the histogram type in
197+
// its staleness tracker.
198+
md, ok := t.mc.GetMetadata(metricName)
199+
if !ok {
200+
// Native histograms always have metadata.
201+
return false
202+
}
203+
if md.Type != model.MetricTypeHistogram {
204+
// Not a histogram.
205+
return false
206+
}
207+
if md.Metric != metricName {
208+
// Not a native histogram because it has magic suffixes (e.g. _bucket).
209+
return false
210+
}
211+
// Store the staleness marker as a native histogram.
212+
t.addingNativeHistogram = true
213+
214+
curMF := t.getOrCreateMetricFamily(*key, scope, metricName)
215+
seriesRef := t.getSeriesRef(ls, curMF.mtype)
216+
217+
_ = curMF.addExponentialHistogramSeries(seriesRef, metricName, ls, atMs, &histogram.Histogram{Sum: math.Float64frombits(value.StaleNaN)}, nil)
218+
// ignore errors here, this is best effort.
219+
220+
return true
221+
}
222+
193223
// getOrCreateMetricFamily returns the metric family for the given metric name and scope,
194224
// and true if an existing family was found.
195-
func (t *transaction) getOrCreateMetricFamily(key resourceKey, scope scopeID, mn string) (*metricFamily, bool) {
225+
func (t *transaction) getOrCreateMetricFamily(key resourceKey, scope scopeID, mn string) *metricFamily {
196226
if _, ok := t.families[key]; !ok {
197-
t.families[key] = make(map[scopeID]map[string]*metricFamily)
227+
t.families[key] = make(map[scopeID]map[metricFamilyKey]*metricFamily)
198228
}
199229
if _, ok := t.families[key][scope]; !ok {
200-
t.families[key][scope] = make(map[string]*metricFamily)
230+
t.families[key][scope] = make(map[metricFamilyKey]*metricFamily)
201231
}
202232

203-
curMf, ok := t.families[key][scope][mn]
233+
mfKey := metricFamilyKey{isExponentialHistogram: t.addingNativeHistogram, name: mn}
234+
235+
curMf, ok := t.families[key][scope][mfKey]
236+
204237
if !ok {
205238
fn := mn
206239
if _, ok := t.mc.GetMetadata(mn); !ok {
207240
fn = normalizeMetricName(mn)
208241
}
209-
mf, ok := t.families[key][scope][fn]
242+
fnKey := metricFamilyKey{isExponentialHistogram: mfKey.isExponentialHistogram, name: fn}
243+
mf, ok := t.families[key][scope][fnKey]
210244
if !ok || !mf.includesMetric(mn) {
211245
curMf = newMetricFamily(mn, t.mc, t.logger)
212-
t.families[key][scope][curMf.name] = curMf
213-
return curMf, false
246+
if curMf.mtype == pmetric.MetricTypeHistogram && mfKey.isExponentialHistogram {
247+
curMf.mtype = pmetric.MetricTypeExponentialHistogram
248+
}
249+
t.families[key][scope][metricFamilyKey{isExponentialHistogram: mfKey.isExponentialHistogram, name: curMf.name}] = curMf
250+
return curMf
214251
}
215252
curMf = mf
216253
}
217-
return curMf, true
254+
return curMf
218255
}
219256

220257
func (t *transaction) AppendExemplar(_ storage.SeriesRef, l labels.Labels, e exemplar.Exemplar) (storage.SeriesRef, error) {
@@ -240,7 +277,13 @@ func (t *transaction) AppendExemplar(_ storage.SeriesRef, l labels.Labels, e exe
240277
return 0, errMetricNameNotFound
241278
}
242279

243-
mf, _ := t.getOrCreateMetricFamily(*rKey, getScopeID(l), mn)
280+
mf := t.getOrCreateMetricFamily(*rKey, getScopeID(l), mn)
281+
282+
// Workaround for https://github.com/prometheus/prometheus/issues/16217
283+
if !t.enableNativeHistograms && mf.mtype == pmetric.MetricTypeHistogram && l.Get(model.MetricNameLabel) == mf.name {
284+
return 0, nil
285+
}
286+
244287
mf.addExemplar(t.getSeriesRef(l, mf.mtype), e)
245288

246289
return 0, nil
@@ -257,6 +300,8 @@ func (t *transaction) AppendHistogram(_ storage.SeriesRef, ls labels.Labels, atM
257300
default:
258301
}
259302

303+
t.addingNativeHistogram = true
304+
260305
if t.externalLabels.Len() != 0 {
261306
b := labels.NewBuilder(ls)
262307
t.externalLabels.Range(func(l labels.Label) {
@@ -286,13 +331,7 @@ func (t *transaction) AppendHistogram(_ storage.SeriesRef, ls labels.Labels, atM
286331
// The `up`, `target_info`, `otel_scope_info` metrics should never generate native histograms,
287332
// thus we don't check for them here as opposed to the Append function.
288333

289-
curMF, existing := t.getOrCreateMetricFamily(*rKey, getScopeID(ls), metricName)
290-
if !existing {
291-
curMF.mtype = pmetric.MetricTypeExponentialHistogram
292-
} else if curMF.mtype != pmetric.MetricTypeExponentialHistogram {
293-
// Already scraped as classic histogram.
294-
return 0, nil
295-
}
334+
curMF := t.getOrCreateMetricFamily(*rKey, getScopeID(ls), metricName)
296335

297336
if h != nil && h.CounterResetHint == histogram.GaugeType || fh != nil && fh.CounterResetHint == histogram.GaugeType {
298337
t.logger.Warn("dropping unsupported gauge histogram datapoint", zap.String("metric_name", metricName), zap.Any("labels", ls))
@@ -307,14 +346,16 @@ func (t *transaction) AppendHistogram(_ storage.SeriesRef, ls labels.Labels, atM
307346
}
308347

309348
func (t *transaction) AppendCTZeroSample(_ storage.SeriesRef, ls labels.Labels, atMs, ctMs int64) (storage.SeriesRef, error) {
310-
return t.setCreationTimestamp(ls, atMs, ctMs, false)
349+
t.addingNativeHistogram = false
350+
return t.setCreationTimestamp(ls, atMs, ctMs)
311351
}
312352

313353
func (t *transaction) AppendHistogramCTZeroSample(_ storage.SeriesRef, ls labels.Labels, atMs, ctMs int64, _ *histogram.Histogram, _ *histogram.FloatHistogram) (storage.SeriesRef, error) {
314-
return t.setCreationTimestamp(ls, atMs, ctMs, true)
354+
t.addingNativeHistogram = true
355+
return t.setCreationTimestamp(ls, atMs, ctMs)
315356
}
316357

317-
func (t *transaction) setCreationTimestamp(ls labels.Labels, atMs, ctMs int64, histogram bool) (storage.SeriesRef, error) {
358+
func (t *transaction) setCreationTimestamp(ls labels.Labels, atMs, ctMs int64) (storage.SeriesRef, error) {
318359
select {
319360
case <-t.ctx.Done():
320361
return 0, errTransactionAborted
@@ -347,16 +388,7 @@ func (t *transaction) setCreationTimestamp(ls labels.Labels, atMs, ctMs int64, h
347388
return 0, errMetricNameNotFound
348389
}
349390

350-
curMF, existing := t.getOrCreateMetricFamily(*rKey, getScopeID(ls), metricName)
351-
352-
if histogram {
353-
if !existing {
354-
curMF.mtype = pmetric.MetricTypeExponentialHistogram
355-
} else if curMF.mtype != pmetric.MetricTypeExponentialHistogram {
356-
// Already scraped as classic histogram.
357-
return 0, nil
358-
}
359-
}
391+
curMF := t.getOrCreateMetricFamily(*rKey, getScopeID(ls), metricName)
360392

361393
seriesRef := t.getSeriesRef(ls, curMF.mtype)
362394
curMF.addCreationTimestamp(seriesRef, ls, atMs, ctMs)
@@ -543,6 +575,7 @@ func (t *transaction) UpdateMetadata(_ storage.SeriesRef, _ labels.Labels, _ met
543575
}
544576

545577
func (t *transaction) AddTargetInfo(key resourceKey, ls labels.Labels) {
578+
t.addingNativeHistogram = false
546579
if resource, ok := t.nodeResources[key]; ok {
547580
attrs := resource.Attributes()
548581
ls.Range(func(lbl labels.Label) {
@@ -555,6 +588,7 @@ func (t *transaction) AddTargetInfo(key resourceKey, ls labels.Labels) {
555588
}
556589

557590
func (t *transaction) addScopeInfo(key resourceKey, ls labels.Labels) {
591+
t.addingNativeHistogram = false
558592
attrs := pcommon.NewMap()
559593
scope := scopeID{}
560594
ls.Range(func(lbl labels.Label) {

0 commit comments

Comments
 (0)