Skip to content

Commit 0a571de

Browse files
committed
Add new query stats metrics to track prometheus querystats
Signed-off-by: SungJin1212 <[email protected]>
1 parent d829d65 commit 0a571de

File tree

10 files changed

+344
-55
lines changed

10 files changed

+344
-55
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
* [FEATURE] Ruler: Experimental: Add `ruler.frontend-address` to allow query to query frontends instead of ingesters. #6151
66
* [FEATURE] Ruler: Minimize chances of missed rule group evaluations that can occur due to OOM kills, bad underlying nodes, or due to an unhealthy ruler that appears in the ring as healthy. This feature is enabled via `-ruler.enable-ha-evaluation` flag. #6129
7+
* [ENHANCEMENT] Query Frontend: Add new query stats metrics `cortex_query_total_queryable_samples_total` and `cortex_query_peak_samples` to track totalQueryableSamples and peakSample per user. #6228
78
* [ENHANCEMENT] Query Frontend: Add info field to query response. #6207
89
* [ENHANCEMENT] Query Frontend: Add peakSample in query stats response. #6188
910
* [ENHANCEMENT] Ruler: Add new ruler metric `cortex_ruler_rule_groups_in_store` that is the total rule groups per tenant in store, which can be used to compare with `cortex_prometheus_rule_group_rules` to count the number of rule groups that are not loaded by a ruler. #5869

pkg/api/handlers.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ func NewQuerierHandler(
225225
// This is used for the stats API which we should not support. Or find other ways to.
226226
prometheus.GathererFunc(func() ([]*dto.MetricFamily, error) { return nil, nil }),
227227
reg,
228-
nil,
228+
querier.StatsRenderer,
229229
false,
230230
nil,
231231
false,

pkg/frontend/transport/handler.go

+25-7
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,15 @@ type Handler struct {
8989
roundTripper http.RoundTripper
9090

9191
// Metrics.
92-
querySeconds *prometheus.CounterVec
93-
querySeries *prometheus.CounterVec
94-
querySamples *prometheus.CounterVec
95-
queryChunkBytes *prometheus.CounterVec
96-
queryDataBytes *prometheus.CounterVec
97-
rejectedQueries *prometheus.CounterVec
98-
activeUsers *util.ActiveUsersCleanupService
92+
querySeconds *prometheus.CounterVec
93+
querySeries *prometheus.CounterVec
94+
querySamples *prometheus.CounterVec
95+
queryTotalQueryableSamples *prometheus.CounterVec
96+
queryPeakSamples *prometheus.CounterVec
97+
queryChunkBytes *prometheus.CounterVec
98+
queryDataBytes *prometheus.CounterVec
99+
rejectedQueries *prometheus.CounterVec
100+
activeUsers *util.ActiveUsersCleanupService
99101
}
100102

101103
// NewHandler creates a new frontend handler.
@@ -122,6 +124,16 @@ func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logge
122124
Help: "Number of samples fetched to execute a query.",
123125
}, []string{"user"})
124126

127+
h.queryTotalQueryableSamples = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
128+
Name: "cortex_query_total_queryable_samples_total",
129+
Help: "Number of total queryable samples to execute a query.",
130+
}, []string{"user"})
131+
132+
h.queryPeakSamples = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
133+
Name: "cortex_query_peak_samples_total",
134+
Help: "Highest count of samples considered to execute a query.",
135+
}, []string{"user"})
136+
125137
h.queryChunkBytes = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
126138
Name: "cortex_query_fetched_chunks_bytes_total",
127139
Help: "Size of all chunks fetched to execute a query in bytes.",
@@ -144,6 +156,8 @@ func NewHandler(cfg HandlerConfig, roundTripper http.RoundTripper, log log.Logge
144156
h.querySeconds.DeleteLabelValues(user)
145157
h.querySeries.DeleteLabelValues(user)
146158
h.querySamples.DeleteLabelValues(user)
159+
h.queryTotalQueryableSamples.DeleteLabelValues(user)
160+
h.queryPeakSamples.DeleteLabelValues(user)
147161
h.queryChunkBytes.DeleteLabelValues(user)
148162
h.queryDataBytes.DeleteLabelValues(user)
149163
if err := util.DeleteMatchingLabels(h.rejectedQueries, map[string]string{"user": user}); err != nil {
@@ -301,6 +315,8 @@ func (f *Handler) reportQueryStats(r *http.Request, userID string, queryString u
301315
numSeries := stats.LoadFetchedSeries()
302316
numChunks := stats.LoadFetchedChunks()
303317
numSamples := stats.LoadFetchedSamples()
318+
numTotalQueryableSamples := stats.LoadTotalQueryableSamples()
319+
numPeakSamples := stats.LoadPeakSamples()
304320
numChunkBytes := stats.LoadFetchedChunkBytes()
305321
numDataBytes := stats.LoadFetchedDataBytes()
306322
numStoreGatewayTouchedPostings := stats.LoadStoreGatewayTouchedPostings()
@@ -313,6 +329,8 @@ func (f *Handler) reportQueryStats(r *http.Request, userID string, queryString u
313329
f.querySeconds.WithLabelValues(userID).Add(wallTime.Seconds())
314330
f.querySeries.WithLabelValues(userID).Add(float64(numSeries))
315331
f.querySamples.WithLabelValues(userID).Add(float64(numSamples))
332+
f.queryTotalQueryableSamples.WithLabelValues(userID).Add(float64(numTotalQueryableSamples))
333+
f.queryPeakSamples.WithLabelValues(userID).Add(float64(numPeakSamples))
316334
f.queryChunkBytes.WithLabelValues(userID).Add(float64(numChunkBytes))
317335
f.queryDataBytes.WithLabelValues(userID).Add(float64(numDataBytes))
318336
f.activeUsers.UpdateUserTimestamp(userID, time.Now())

pkg/frontend/transport/handler_test.go

+13-11
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
188188
{
189189
name: "test handler with stats enabled",
190190
cfg: HandlerConfig{QueryStatsEnabled: true},
191-
expectedMetrics: 4,
191+
expectedMetrics: 6,
192192
roundTripperFunc: roundTripper,
193193
expectedStatusCode: http.StatusOK,
194194
},
@@ -202,7 +202,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
202202
{
203203
name: "test handler with reasonResponseTooLarge",
204204
cfg: HandlerConfig{QueryStatsEnabled: true},
205-
expectedMetrics: 4,
205+
expectedMetrics: 6,
206206
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
207207
return &http.Response{
208208
StatusCode: http.StatusRequestEntityTooLarge,
@@ -218,7 +218,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
218218
{
219219
name: "test handler with reasonTooManyRequests",
220220
cfg: HandlerConfig{QueryStatsEnabled: true},
221-
expectedMetrics: 4,
221+
expectedMetrics: 6,
222222
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
223223
return &http.Response{
224224
StatusCode: http.StatusTooManyRequests,
@@ -234,7 +234,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
234234
{
235235
name: "test handler with reasonTooManySamples",
236236
cfg: HandlerConfig{QueryStatsEnabled: true},
237-
expectedMetrics: 4,
237+
expectedMetrics: 6,
238238
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
239239
return &http.Response{
240240
StatusCode: http.StatusUnprocessableEntity,
@@ -250,7 +250,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
250250
{
251251
name: "test handler with reasonTooLongRange",
252252
cfg: HandlerConfig{QueryStatsEnabled: true},
253-
expectedMetrics: 4,
253+
expectedMetrics: 6,
254254
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
255255
return &http.Response{
256256
StatusCode: http.StatusUnprocessableEntity,
@@ -266,7 +266,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
266266
{
267267
name: "test handler with reasonSeriesFetched",
268268
cfg: HandlerConfig{QueryStatsEnabled: true},
269-
expectedMetrics: 4,
269+
expectedMetrics: 6,
270270
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
271271
return &http.Response{
272272
StatusCode: http.StatusUnprocessableEntity,
@@ -282,7 +282,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
282282
{
283283
name: "test handler with reasonChunksFetched",
284284
cfg: HandlerConfig{QueryStatsEnabled: true},
285-
expectedMetrics: 4,
285+
expectedMetrics: 6,
286286
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
287287
return &http.Response{
288288
StatusCode: http.StatusUnprocessableEntity,
@@ -298,7 +298,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
298298
{
299299
name: "test handler with reasonChunkBytesFetched",
300300
cfg: HandlerConfig{QueryStatsEnabled: true},
301-
expectedMetrics: 4,
301+
expectedMetrics: 6,
302302
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
303303
return &http.Response{
304304
StatusCode: http.StatusUnprocessableEntity,
@@ -314,7 +314,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
314314
{
315315
name: "test handler with reasonDataBytesFetched",
316316
cfg: HandlerConfig{QueryStatsEnabled: true},
317-
expectedMetrics: 4,
317+
expectedMetrics: 6,
318318
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
319319
return &http.Response{
320320
StatusCode: http.StatusUnprocessableEntity,
@@ -346,7 +346,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
346346
{
347347
name: "test handler with reasonChunksLimitStoreGateway",
348348
cfg: HandlerConfig{QueryStatsEnabled: true},
349-
expectedMetrics: 4,
349+
expectedMetrics: 6,
350350
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
351351
return &http.Response{
352352
StatusCode: http.StatusUnprocessableEntity,
@@ -362,7 +362,7 @@ func TestHandler_ServeHTTP(t *testing.T) {
362362
{
363363
name: "test handler with reasonBytesLimitStoreGateway",
364364
cfg: HandlerConfig{QueryStatsEnabled: true},
365-
expectedMetrics: 4,
365+
expectedMetrics: 6,
366366
roundTripperFunc: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
367367
return &http.Response{
368368
StatusCode: http.StatusUnprocessableEntity,
@@ -395,6 +395,8 @@ func TestHandler_ServeHTTP(t *testing.T) {
395395
"cortex_query_fetched_series_total",
396396
"cortex_query_samples_total",
397397
"cortex_query_fetched_chunks_bytes_total",
398+
"cortex_query_total_queryable_samples_total",
399+
"cortex_query_peak_samples_total",
398400
)
399401

400402
assert.NoError(t, err)

pkg/querier/stats/stats.go

+42
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,46 @@ func (s *QueryStats) LoadStoreGatewayTouchedPostingBytes() uint64 {
302302
return atomic.LoadUint64(&s.StoreGatewayTouchedPostingBytes)
303303
}
304304

305+
func (s *QueryStats) AddTotalQueryableSamples(count uint64) {
306+
if s == nil {
307+
return
308+
}
309+
310+
atomic.AddUint64(&s.TotalQueryableSamples, count)
311+
}
312+
313+
func (s *QueryStats) LoadTotalQueryableSamples() uint64 {
314+
if s == nil {
315+
return 0
316+
}
317+
318+
return atomic.LoadUint64(&s.TotalQueryableSamples)
319+
}
320+
321+
func (s *QueryStats) AddPeakSamples(count uint64) {
322+
if s == nil {
323+
return
324+
}
325+
326+
atomic.AddUint64(&s.PeakSamples, count)
327+
}
328+
329+
func (s *QueryStats) SetPeakSamples(count uint64) {
330+
if s == nil {
331+
return
332+
}
333+
334+
atomic.StoreUint64(&s.PeakSamples, count)
335+
}
336+
337+
func (s *QueryStats) LoadPeakSamples() uint64 {
338+
if s == nil {
339+
return 0
340+
}
341+
342+
return atomic.LoadUint64(&s.PeakSamples)
343+
}
344+
305345
// Merge the provided Stats into this one.
306346
func (s *QueryStats) Merge(other *QueryStats) {
307347
if s == nil || other == nil {
@@ -317,6 +357,8 @@ func (s *QueryStats) Merge(other *QueryStats) {
317357
s.AddFetchedChunks(other.LoadFetchedChunks())
318358
s.AddStoreGatewayTouchedPostings(other.LoadStoreGatewayTouchedPostings())
319359
s.AddStoreGatewayTouchedPostingBytes(other.LoadStoreGatewayTouchedPostingBytes())
360+
s.AddTotalQueryableSamples(other.LoadTotalQueryableSamples())
361+
s.SetPeakSamples(max(s.LoadPeakSamples(), other.LoadPeakSamples()))
320362
s.AddExtraFields(other.LoadExtraFields()...)
321363
}
322364

0 commit comments

Comments
 (0)