Skip to content

Commit 4865ce7

Browse files
authored
fix: fix histogram metric query (#737)
Fix for the histogram query based on the late night session at KubeCon. Ref: HDX-1572
1 parent 08009ac commit 4865ce7

File tree

5 files changed

+216
-98
lines changed

5 files changed

+216
-98
lines changed

.changeset/old-rules-check.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@hyperdx/common-utils": patch
3+
---
4+
5+
Fixes the histogram query to perform quantile calculation across all data points

packages/api/src/clickhouse/__tests__/__snapshots__/renderChartConfig.test.ts.snap

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,15 @@ Array [
6060
]
6161
`;
6262

63+
exports[`renderChartConfig Query Metrics should include multiple data points in percentile computation (p50) 1`] = `
64+
Array [
65+
Object {
66+
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
67+
"any(toFloat64OrNull(toString(Rate)))": 2.9,
68+
},
69+
]
70+
`;
71+
6372
exports[`renderChartConfig Query Metrics single avg gauge with group-by 1`] = `
6473
Array [
6574
Object {
@@ -192,11 +201,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_bounded histogram (p25)
192201
Array [
193202
Object {
194203
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
195-
"sum(toFloat64OrNull(toString(Rate)))": 0,
204+
"any(toFloat64OrNull(toString(Rate)))": 0,
196205
},
197206
Object {
198207
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
199-
"sum(toFloat64OrNull(toString(Rate)))": 10,
208+
"any(toFloat64OrNull(toString(Rate)))": 10,
200209
},
201210
]
202211
`;
@@ -205,11 +214,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_bounded histogram (p50)
205214
Array [
206215
Object {
207216
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
208-
"sum(toFloat64OrNull(toString(Rate)))": 0,
217+
"any(toFloat64OrNull(toString(Rate)))": 0,
209218
},
210219
Object {
211220
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
212-
"sum(toFloat64OrNull(toString(Rate)))": 20,
221+
"any(toFloat64OrNull(toString(Rate)))": 13.333333333333332,
213222
},
214223
]
215224
`;
@@ -218,11 +227,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_bounded histogram (p90)
218227
Array [
219228
Object {
220229
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
221-
"sum(toFloat64OrNull(toString(Rate)))": 0,
230+
"any(toFloat64OrNull(toString(Rate)))": 0,
222231
},
223232
Object {
224233
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
225-
"sum(toFloat64OrNull(toString(Rate)))": 30,
234+
"any(toFloat64OrNull(toString(Rate)))": 30,
226235
},
227236
]
228237
`;
@@ -231,11 +240,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_lower_bound_inf histogra
231240
Array [
232241
Object {
233242
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
234-
"sum(toFloat64OrNull(toString(Rate)))": 0,
243+
"any(toFloat64OrNull(toString(Rate)))": 0,
235244
},
236245
Object {
237246
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
238-
"sum(toFloat64OrNull(toString(Rate)))": 1,
247+
"any(toFloat64OrNull(toString(Rate)))": 1,
239248
},
240249
]
241250
`;
@@ -244,11 +253,11 @@ exports[`renderChartConfig Query Metrics two_timestamps_upper_bound_inf histogra
244253
Array [
245254
Object {
246255
"__hdx_time_bucket": "2022-01-05T00:00:00Z",
247-
"sum(toFloat64OrNull(toString(Rate)))": 0,
256+
"any(toFloat64OrNull(toString(Rate)))": 0,
248257
},
249258
Object {
250259
"__hdx_time_bucket": "2022-01-05T00:01:00Z",
251-
"sum(toFloat64OrNull(toString(Rate)))": 30,
260+
"any(toFloat64OrNull(toString(Rate)))": 30,
252261
},
253262
]
254263
`;

packages/api/src/clickhouse/__tests__/renderChartConfig.test.ts

Lines changed: 89 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,36 @@ describe('renderChartConfig', () => {
446446
AggregationTemporality: 2, // Cumulative
447447
...point,
448448
}));
449+
const histPointsE = [
450+
{
451+
BucketCounts: [1, 1, 1, 1, 1, 1],
452+
ExplicitBounds: [1, 2, 5, 8, 13],
453+
TimeUnix: new Date(now),
454+
ResourceAttributes: { host: 'test-a' },
455+
},
456+
{
457+
BucketCounts: [2, 2, 2, 2, 2, 2],
458+
ExplicitBounds: [1, 2, 5, 8, 13],
459+
TimeUnix: new Date(now + ms('5s')),
460+
ResourceAttributes: { host: 'test-b' },
461+
},
462+
{
463+
BucketCounts: [2, 1, 2, 1, 2, 1],
464+
ExplicitBounds: [1, 2, 5, 8, 13],
465+
TimeUnix: new Date(now + ms('1m')),
466+
ResourceAttributes: { host: 'test-a' },
467+
},
468+
{
469+
BucketCounts: [3, 3, 2, 2, 3, 3],
470+
ExplicitBounds: [1, 2, 5, 8, 13],
471+
TimeUnix: new Date(now + ms('65s')),
472+
ResourceAttributes: { host: 'test-b' },
473+
},
474+
].map(point => ({
475+
MetricName: 'test.multiple_series',
476+
AggregationTemporality: 2, // Cumulative
477+
...point,
478+
}));
449479

450480
await Promise.all([
451481
bulkInsertMetricsGauge([...gaugePointsA, ...gaugePointsB]),
@@ -462,6 +492,7 @@ describe('renderChartConfig', () => {
462492
...histPointsB,
463493
...histPointsC,
464494
...histPointsD,
495+
...histPointsE,
465496
]),
466497
]);
467498
});
@@ -655,6 +686,10 @@ describe('renderChartConfig', () => {
655686
});
656687

657688
it('calculates min_rate/max_rate correctly for sum metrics', async () => {
689+
// Raw Data is
690+
// MIN_VARIANT_0: [0, 1, 8, 0, 7, 7, 15, 17, 0, 42]
691+
// MIN_VARIANT_1: [0, 2, 9, 0, 15, 25 35, 57, 0, 92]
692+
//
658693
// Based on the data inserted in the fixture, the expected stream of values
659694
// for each series after adjusting for the zero reset should be:
660695
// MIN_VARIANT_0: [0, 1, 8, 8, 15, 15, 23, 25, 25, 67]
@@ -725,20 +760,10 @@ describe('renderChartConfig', () => {
725760
Since the AggregationTemporality is 2(cumulative), we need to calculate the delta between the two points:
726761
delta: [10, 10, 10] - [0, 0, 0] = [10, 10, 10]
727762
728-
Total observations: 10 + 10 + 10 = 30
729-
Cumulative counts: [10, 20, 30]
730-
p50 point:
731-
Rank = 0.5 * 30 = 15
732-
This falls in the second bucket (since 10 < 15 ≤ 20)
733-
734763
We need to interpolate between the lower and upper bounds of the second bucket:
735-
Lower bound: 10
736-
Upper bound: 30
737-
Position in bucket: (15 - 10) / (20 - 10) = 0.5
738-
Interpolated value: 10 + (30 - 10) * 0.5 = 10 + 10 = 20
739-
740-
Thus the first point value would be 0 since it's at the start of the bounds.
741-
The second point value would be 20 since that is the median point value delta from the first point.
764+
cum sum = [10, 20, 30]
765+
rank = 0.5 * 30 = 15 (between bounds 10 - 30)
766+
interpolate: 10 + ((15 - 10) / 30) * (30 - 10) = 13.3333
742767
*/
743768
const query = await renderChartConfig(
744769
{
@@ -952,6 +977,57 @@ describe('renderChartConfig', () => {
952977
expect(res).toMatchSnapshot();
953978
});
954979

980+
it('should include multiple data points in percentile computation (p50)', async () => {
981+
/*
982+
bounds: [1, 2, 5, 8, 13]
983+
host = test-a:
984+
p1 = [1, 1, 1, 1, 1, 1]
985+
p2 = [2, 1, 2, 1, 2, 1]
986+
host = test-b:
987+
p1 = [2, 2, 2, 2, 2, 2]
988+
p2 = [3, 3, 2, 2, 3, 3]
989+
990+
Compute the diff between adjacent points for each unique host (deltaSumForEach)
991+
host = test-a, diff = [1, 0, 1, 0, 1, 0]
992+
host = test-b, diff = [1, 1, 0, 0, 1, 1]
993+
994+
Sum the diffs together to obtain a combined count for the different series
995+
sum elements(d) = [2, 1, 1, 0, 2, 1]
996+
997+
Now compute the p50 value:
998+
sum(d) = 7
999+
cum sum = [2, 3, 4, 4, 6, 7]
1000+
rank = 0.5 * 7 = 3.5 (between bounds 2 - 5)
1001+
interpolate: 2 + ((3.5 - 2) / 5) * (5 - 2) = 2.9
1002+
1003+
Since all the points fall within a single granularity interval the result should be a single row
1004+
with the value 2.9.
1005+
*/
1006+
const query = await renderChartConfig(
1007+
{
1008+
select: [
1009+
{
1010+
aggFn: 'quantile',
1011+
level: 0.5,
1012+
metricName: 'test.multiple_series',
1013+
metricType: MetricsDataType.Histogram,
1014+
valueExpression: 'Value',
1015+
},
1016+
],
1017+
from: metricSource.from,
1018+
where: '',
1019+
metricTables: TEST_METRIC_TABLES,
1020+
dateRange: [new Date(now), new Date(now + ms('2m'))],
1021+
granularity: '5 minute',
1022+
timestampValueExpression: metricSource.timestampValueExpression,
1023+
connection: connection.id,
1024+
},
1025+
metadata,
1026+
);
1027+
const res = await queryData(query);
1028+
expect(res).toMatchSnapshot();
1029+
});
1030+
9551031
// HDX-1515: Handle counter reset in histogram metric in the same way that the counter reset
9561032
// is handled for sum metrics.
9571033
it.skip('three_timestamps_bounded histogram with reset (p50)', async () => {

packages/common-utils/src/__tests__/__snapshots__/renderChartConfig.test.ts.snap

Lines changed: 33 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -36,39 +36,47 @@ exports[`renderChartConfig should generate sql for a single gauge metric 1`] = `
3636
`;
3737

3838
exports[`renderChartConfig should generate sql for a single histogram metric 1`] = `
39-
"WITH HistRate AS (
39+
"WITH Source AS (
4040
SELECT
4141
*,
4242
cityHash64(mapConcat(ScopeAttributes, ResourceAttributes, Attributes)) AS AttributesHash,
43-
length(BucketCounts) as CountLength,
44-
any(BucketCounts) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevBucketCounts,
45-
any(CountLength) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevCountLength,
46-
any(AttributesHash) OVER (ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS PrevAttributesHash,
4743
IF(AggregationTemporality = 1,
48-
BucketCounts,
49-
IF(AttributesHash = PrevAttributesHash AND CountLength = PrevCountLength,
50-
arrayMap((prev, curr) -> IF(curr < prev, curr, toUInt64(toInt64(curr) - toInt64(prev))), PrevBucketCounts, BucketCounts),
51-
BucketCounts)) as BucketRates
44+
sumForEach(BucketCounts) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),
45+
deltaSumForEach(BucketCounts) OVER (PARTITION BY AttributesHash ORDER BY AttributesHash, TimeUnix ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
46+
) AS MonoBucketCounts
5247
FROM default.otel_metrics_histogram
5348
WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000)) AND ((MetricName = 'http.server.duration'))
54-
ORDER BY Attributes, TimeUnix ASC
55-
),RawHist AS (
49+
ORDER BY AttributesHash ASC
50+
),Bucketed AS (
5651
SELECT
57-
*,
58-
toUInt64( 0.5 * arraySum(BucketRates)) AS Rank,
59-
arrayCumSum(BucketRates) as CumRates,
60-
arrayFirstIndex(x -> if(x > Rank, 1, 0), CumRates) AS BucketLowIdx,
61-
IF(BucketLowIdx = length(BucketRates),
62-
arrayElement(ExplicitBounds, length(ExplicitBounds)),
63-
IF(BucketLowIdx > 1,
64-
arrayElement(ExplicitBounds, BucketLowIdx - 1) + (arrayElement(ExplicitBounds, BucketLowIdx) - arrayElement(ExplicitBounds, BucketLowIdx - 1)) *
65-
IF(arrayElement(CumRates, BucketLowIdx) > arrayElement(CumRates, BucketLowIdx - 1),
66-
(Rank - arrayElement(CumRates, BucketLowIdx - 1)) / (arrayElement(CumRates, BucketLowIdx) - arrayElement(CumRates, BucketLowIdx - 1)), 0),
67-
arrayElement(ExplicitBounds, BucketLowIdx)
68-
)) as Rate
69-
FROM HistRate) SELECT sum(
52+
53+
any(Source.ExplicitBounds) AS ExplicitBounds,
54+
sumForEach(MonoBucketCounts) AS TotalBucketCounts,
55+
arrayCumSum(TotalBucketCounts) AS BucketCumCounts,
56+
0.5 * arraySum(TotalBucketCounts) AS Rank,
57+
arrayFirstIndex(x -> if(x > Rank, 1, 0), BucketCumCounts) AS BucketLowIdx,
58+
CASE
59+
WHEN BucketLowIdx = 0 THEN 0
60+
WHEN BucketLowIdx = 1 THEN ExplicitBounds[BucketLowIdx]
61+
WHEN BucketLowIdx = length(TotalBucketCounts) THEN ExplicitBounds[length(ExplicitBounds)]
62+
ELSE
63+
ExplicitBounds[BucketLowIdx - 1] +
64+
((Rank - ExplicitBounds[BucketLowIdx - 1]) / (ExplicitBounds[BucketLowIdx])) * (ExplicitBounds[BucketLowIdx] - ExplicitBounds[BucketLowIdx - 1])
65+
END AS Value
66+
FROM Source
67+
GROUP BY
68+
ORDER BY ASC
69+
),Rates AS (
70+
SELECT
71+
\`__hdx_time_bucket\`,
72+
any(Bucketed.Value) AS \`__hdx_value_high\`,
73+
any(\`__hdx_value_high\`) OVER (ORDER BY \`__hdx_time_bucket\` ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS \`__hdx_prev_high\`,
74+
\`__hdx_value_high\` - \`__hdx_prev_high\` AS Rate
75+
FROM Bucketed
76+
GROUP BY \`__hdx_time_bucket\`
77+
) SELECT any(
7078
toFloat64OrNull(toString(Rate))
71-
) FROM RawHist WHERE (TimeUnix >= fromUnixTimestamp64Milli(1739318400000) AND TimeUnix <= fromUnixTimestamp64Milli(1765670400000)) LIMIT 10"
79+
) FROM Rates WHERE (\`__hdx_time_bucket\` >= fromUnixTimestamp64Milli(1739318400000) AND \`__hdx_time_bucket\` <= fromUnixTimestamp64Milli(1765670400000)) LIMIT 10"
7280
`;
7381

7482
exports[`renderChartConfig should generate sql for a single sum metric 1`] = `

0 commit comments

Comments
 (0)