Skip to content

Commit 85bc555

Browse files
authored
Add support for Prometheus rule query offset (#6085)
* Add support for prometheus rule query offset Signed-off-by: Mustafain Ali Khan <[email protected]> * Fix tests Signed-off-by: Mustafain Ali Khan <[email protected]> * Use per-tenant limit for global query offset Signed-off-by: Mustafain Ali Khan <[email protected]> --------- Signed-off-by: Mustafain Ali Khan <[email protected]>
1 parent 8ce3642 commit 85bc555

File tree

12 files changed

+267
-74
lines changed

12 files changed

+267
-74
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
* [FEATURE] Ingester: Add `ingester.instance-limits.max-inflight-query-requests` to allow limiting ingester concurrent queries. #6081
1313
* [FEATURE] Distributor: Add `validation.max-native-histogram-buckets` to limit max number of bucket count. Distributor will try to automatically reduce histogram resolution until it is within the bucket limit or resolution cannot be reduced anymore. #6104
1414
* [FEATURE] Store Gateway: Token bucket limiter. #6016
15+
* [FEATURE] Ruler: Add support for `query_offset` field on RuleGroup and new `ruler_query_offset` per-tenant limit. #6085
1516
* [ENHANCEMENT] rulers: Add support to persist tokens in rulers. #5987
1617
* [ENHANCEMENT] Query Frontend/Querier: Added store gateway postings touched count and touched size in Querier stats and log in Query Frontend. #5892
1718
* [ENHANCEMENT] Query Frontend/Querier: Returns `warnings` on prometheus query responses. #5916

docs/configuration/config-file-reference.md

+4
Original file line numberDiff line numberDiff line change
@@ -3349,6 +3349,10 @@ query_rejection:
33493349
# CLI flag: -ruler.max-rule-groups-per-tenant
33503350
[ruler_max_rule_groups_per_tenant: <int> | default = 0]
33513351

3352+
# Duration to offset all rule evaluation queries per-tenant.
3353+
# CLI flag: -ruler.query-offset
3354+
[ruler_query_offset: <duration> | default = 0s]
3355+
33523356
# The default tenant's shard size when the shuffle-sharding strategy is used.
33533357
# Must be set when the store-gateway sharding is enabled with the
33543358
# shuffle-sharding strategy. When this setting is specified in the per-tenant

pkg/ruler/api_test.go

+16-3
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ interval: 15s
263263
err: errors.New("invalid rules config: rule group 'rg_name' has no rules"),
264264
},
265265
{
266-
name: "with a a valid rules file",
266+
name: "with a valid rules file",
267267
status: 202,
268268
input: `
269269
name: test
@@ -279,7 +279,20 @@ rules:
279279
labels:
280280
test: test
281281
`,
282-
output: "name: test\ninterval: 15s\nrules:\n - record: up_rule\n expr: up{}\n - alert: up_alert\n expr: sum(up{}) > 1\n for: 30s\n labels:\n test: test\n annotations:\n test: test\n",
282+
output: "name: test\ninterval: 15s\nquery_offset: 0s\nrules:\n - record: up_rule\n expr: up{}\n - alert: up_alert\n expr: sum(up{}) > 1\n for: 30s\n labels:\n test: test\n annotations:\n test: test\n",
283+
},
284+
{
285+
name: "with a valid rule query offset",
286+
status: 202,
287+
input: `
288+
name: test
289+
interval: 15s
290+
query_offset: 2m
291+
rules:
292+
- record: up_rule
293+
expr: up{}
294+
`,
295+
output: "name: test\ninterval: 15s\nquery_offset: 2m\nrules:\n - record: up_rule\n expr: up{}\n",
283296
},
284297
}
285298

@@ -329,7 +342,7 @@ func TestRuler_DeleteNamespace(t *testing.T) {
329342

330343
router.ServeHTTP(w, req)
331344
require.Equal(t, http.StatusOK, w.Code)
332-
require.Equal(t, "name: group1\ninterval: 1m\nrules:\n - record: UP_RULE\n expr: up\n - alert: UP_ALERT\n expr: up < 1\n", w.Body.String())
345+
require.Equal(t, "name: group1\ninterval: 1m\nquery_offset: 0s\nrules:\n - record: UP_RULE\n expr: up\n - alert: UP_ALERT\n expr: up < 1\n", w.Body.String())
333346

334347
// Delete namespace1
335348
req = requestFor(t, http.MethodDelete, "https://localhost:8080/api/v1/rules/namespace1", nil, "user1")

pkg/ruler/compat.go

+4
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ type RulesLimits interface {
178178
RulerTenantShardSize(userID string) int
179179
RulerMaxRuleGroupsPerTenant(userID string) int
180180
RulerMaxRulesPerRuleGroup(userID string) int
181+
RulerQueryOffset(userID string) time.Duration
181182
DisabledRuleGroups(userID string) validation.DisabledRuleGroups
182183
}
183184

@@ -358,6 +359,9 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi
358359
ResendDelay: cfg.ResendDelay,
359360
ConcurrentEvalsEnabled: cfg.ConcurrentEvalsEnabled,
360361
MaxConcurrentEvals: cfg.MaxConcurrentEvals,
362+
DefaultRuleQueryOffset: func() time.Duration {
363+
return overrides.RulerQueryOffset(userID)
364+
},
361365
})
362366
}
363367
}

pkg/ruler/ruler.go

+7-5
Original file line numberDiff line numberDiff line change
@@ -911,13 +911,15 @@ func (r *Ruler) getLocalRules(userID string, rulesRequest RulesRequest, includeB
911911
}
912912
interval := group.Interval()
913913

914+
queryOffset := group.QueryOffset()
914915
groupDesc := &GroupStateDesc{
915916
Group: &rulespb.RuleGroupDesc{
916-
Name: group.Name(),
917-
Namespace: string(decodedNamespace),
918-
Interval: interval,
919-
User: userID,
920-
Limit: int64(group.Limit()),
917+
Name: group.Name(),
918+
Namespace: string(decodedNamespace),
919+
Interval: interval,
920+
User: userID,
921+
Limit: int64(group.Limit()),
922+
QueryOffset: &queryOffset,
921923
},
922924

923925
EvaluationTimestamp: group.GetLastEvaluation(),

pkg/ruler/ruler_test.go

+37
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ type ruleLimits struct {
8888
maxRuleGroups int
8989
disabledRuleGroups validation.DisabledRuleGroups
9090
maxQueryLength time.Duration
91+
queryOffset time.Duration
9192
}
9293

9394
func (r ruleLimits) EvaluationDelay(_ string) time.Duration {
@@ -112,6 +113,10 @@ func (r ruleLimits) DisabledRuleGroups(userID string) validation.DisabledRuleGro
112113

113114
func (r ruleLimits) MaxQueryLength(_ string) time.Duration { return r.maxQueryLength }
114115

116+
func (r ruleLimits) RulerQueryOffset(_ string) time.Duration {
117+
return r.queryOffset
118+
}
119+
115120
func newEmptyQueryable() storage.Queryable {
116121
return storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) {
117122
return emptyQuerier{}, nil
@@ -2533,3 +2538,35 @@ func TestRulerDisablesRuleGroups(t *testing.T) {
25332538
})
25342539
}
25352540
}
2541+
2542+
func TestRuler_QueryOffset(t *testing.T) {
2543+
store := newMockRuleStore(mockRulesQueryOffset, nil)
2544+
cfg := defaultRulerConfig(t)
2545+
2546+
r := newTestRuler(t, cfg, store, nil)
2547+
defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck
2548+
2549+
ctx := user.InjectOrgID(context.Background(), "user1")
2550+
rls, err := r.Rules(ctx, &RulesRequest{})
2551+
require.NoError(t, err)
2552+
require.Len(t, rls.Groups, 1)
2553+
rg := rls.Groups[0]
2554+
expectedRg := mockRulesQueryOffset["user1"][0]
2555+
compareRuleGroupDescToStateDesc(t, expectedRg, rg)
2556+
2557+
// test default query offset=0 when not defined at group level
2558+
gotOffset := rg.GetGroup().QueryOffset
2559+
require.Equal(t, time.Duration(0), *gotOffset)
2560+
2561+
ctx = user.InjectOrgID(context.Background(), "user2")
2562+
rls, err = r.Rules(ctx, &RulesRequest{})
2563+
require.NoError(t, err)
2564+
require.Len(t, rls.Groups, 1)
2565+
rg = rls.Groups[0]
2566+
expectedRg = mockRules["user2"][0]
2567+
compareRuleGroupDescToStateDesc(t, expectedRg, rg)
2568+
2569+
// test group query offset is set
2570+
gotOffset = rg.GetGroup().QueryOffset
2571+
require.Equal(t, time.Minute*2, *gotOffset)
2572+
}

pkg/ruler/rulespb/compat.go

+21-10
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,19 @@ import (
1313

1414
// ToProto transforms a formatted prometheus rulegroup to a rule group protobuf
1515
func ToProto(user string, namespace string, rl rulefmt.RuleGroup) *RuleGroupDesc {
16+
var queryOffset *time.Duration
17+
if rl.QueryOffset != nil {
18+
offset := time.Duration(*rl.QueryOffset)
19+
queryOffset = &offset
20+
}
1621
rg := RuleGroupDesc{
17-
Name: rl.Name,
18-
Namespace: namespace,
19-
Interval: time.Duration(rl.Interval),
20-
Rules: formattedRuleToProto(rl.Rules),
21-
User: user,
22-
Limit: int64(rl.Limit),
22+
Name: rl.Name,
23+
Namespace: namespace,
24+
Interval: time.Duration(rl.Interval),
25+
Rules: formattedRuleToProto(rl.Rules),
26+
User: user,
27+
Limit: int64(rl.Limit),
28+
QueryOffset: queryOffset,
2329
}
2430
return &rg
2531
}
@@ -43,11 +49,16 @@ func formattedRuleToProto(rls []rulefmt.RuleNode) []*RuleDesc {
4349

4450
// FromProto generates a rulefmt RuleGroup
4551
func FromProto(rg *RuleGroupDesc) rulefmt.RuleGroup {
52+
var queryOffset model.Duration
53+
if rg.QueryOffset != nil {
54+
queryOffset = model.Duration(*rg.QueryOffset)
55+
}
4656
formattedRuleGroup := rulefmt.RuleGroup{
47-
Name: rg.GetName(),
48-
Interval: model.Duration(rg.Interval),
49-
Rules: make([]rulefmt.RuleNode, len(rg.GetRules())),
50-
Limit: int(rg.Limit),
57+
Name: rg.GetName(),
58+
Interval: model.Duration(rg.Interval),
59+
Rules: make([]rulefmt.RuleNode, len(rg.GetRules())),
60+
Limit: int(rg.Limit),
61+
QueryOffset: &queryOffset,
5162
}
5263

5364
for i, rl := range rg.GetRules() {

pkg/ruler/rulespb/compat_test.go

+7-3
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,18 @@ func TestProto(t *testing.T) {
2929

3030
rules = append(rules, testRule)
3131

32+
queryOffset := model.Duration(30 * time.Second)
3233
rg := rulefmt.RuleGroup{
33-
Name: "group1",
34-
Rules: rules,
35-
Interval: model.Duration(time.Minute),
34+
Name: "group1",
35+
Rules: rules,
36+
Interval: model.Duration(time.Minute),
37+
QueryOffset: &queryOffset,
3638
}
39+
3740
desc := ToProto("test", "namespace", rg)
3841

3942
assert.Equal(t, len(rules), len(desc.Rules))
43+
assert.Equal(t, 30*time.Second, *desc.QueryOffset)
4044

4145
ruleDesc := desc.Rules[0]
4246

0 commit comments

Comments
 (0)