Skip to content

Commit 820170a

Browse files
committed
Use per-tenant limit for global query offset
Signed-off-by: Mustafain Ali Khan <[email protected]>
1 parent d96cf38 commit 820170a

File tree

11 files changed

+89
-58
lines changed

11 files changed

+89
-58
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
* [FEATURE] Ingester: Add `ingester.instance-limits.max-inflight-query-requests` to allow limiting ingester concurrent queries. #6081
1212
* [FEATURE] Distributor: Add `validation.max-native-histogram-buckets` to limit max number of bucket count. Distributor will try to automatically reduce histogram resolution until it is within the bucket limit or resolution cannot be reduced anymore. #6104
1313
* [FEATURE] Store Gateway: Token bucket limiter. #6016
14+
* [FEATURE] Ruler: Add support for `query_offset` field on RuleGroup and new `ruler_query_offset` per-tenant limit. #6085
1415
* [ENHANCEMENT] rulers: Add support to persist tokens in rulers. #5987
1516
* [ENHANCEMENT] Query Frontend/Querier: Added store gateway postings touched count and touched size in Querier stats and log in Query Frontend. #5892
1617
* [ENHANCEMENT] Query Frontend/Querier: Returns `warnings` on prometheus query responses. #5916

docs/configuration/config-file-reference.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -3349,6 +3349,10 @@ query_rejection:
33493349
# CLI flag: -ruler.max-rule-groups-per-tenant
33503350
[ruler_max_rule_groups_per_tenant: <int> | default = 0]
33513351

3352+
# Duration to offset all rule evaluation queries per-tenant.
3353+
# CLI flag: -ruler.query-offset
3354+
[ruler_query_offset: <duration> | default = 0s]
3355+
33523356
# The default tenant's shard size when the shuffle-sharding strategy is used.
33533357
# Must be set when the store-gateway sharding is enabled with the
33543358
# shuffle-sharding strategy. When this setting is specified in the per-tenant
@@ -4147,10 +4151,6 @@ ruler_client:
41474151
# CLI flag: -ruler.rule-path
41484152
[rule_path: <string> | default = "/rules"]
41494153
4150-
# Default offset for all rule evaluation queries
4151-
# CLI flag: -ruler.rule-query-offset
4152-
[rule_query_offset: <duration> | default = 0s]
4153-
41544154
# Comma-separated list of URL(s) of the Alertmanager(s) to send notifications
41554155
# to. Each Alertmanager URL is treated as a separate group in the configuration.
41564156
# Multiple Alertmanagers in HA per group can be supported by using DNS

pkg/ruler/compat.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ type RulesLimits interface {
178178
RulerTenantShardSize(userID string) int
179179
RulerMaxRuleGroupsPerTenant(userID string) int
180180
RulerMaxRulesPerRuleGroup(userID string) int
181+
RulerQueryOffset(userID string) time.Duration
181182
DisabledRuleGroups(userID string) validation.DisabledRuleGroups
182183
}
183184

@@ -359,7 +360,7 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi
359360
ConcurrentEvalsEnabled: cfg.ConcurrentEvalsEnabled,
360361
MaxConcurrentEvals: cfg.MaxConcurrentEvals,
361362
DefaultRuleQueryOffset: func() time.Duration {
362-
return cfg.RuleQueryOffset
363+
return overrides.RulerQueryOffset(userID)
363364
},
364365
})
365366
}

pkg/ruler/ruler.go

+2-4
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,6 @@ type Config struct {
105105
PollInterval time.Duration `yaml:"poll_interval"`
106106
// Path to store rule files for prom manager.
107107
RulePath string `yaml:"rule_path"`
108-
// Default offset for all rule evaluation queries.
109-
RuleQueryOffset time.Duration `yaml:"rule_query_offset"`
110108

111109
// URL of the Alertmanager to send notifications to.
112110
// If you are configuring the ruler to send to a Cortex Alertmanager,
@@ -196,7 +194,6 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
196194
f.Var(&cfg.ExternalURL, "ruler.external.url", "URL of alerts return path.")
197195
f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 1*time.Minute, "How frequently to evaluate rules")
198196
f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes")
199-
f.DurationVar(&cfg.RuleQueryOffset, "ruler.rule-query-offset", 0*time.Minute, "Default offset for all rule evaluation queries")
200197

201198
f.StringVar(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "", "Comma-separated list of URL(s) of the Alertmanager(s) to send notifications to. Each Alertmanager URL is treated as a separate group in the configuration. Multiple Alertmanagers in HA per group can be supported by using DNS resolution via -ruler.alertmanager-discovery.")
202199
f.BoolVar(&cfg.AlertmanagerDiscovery, "ruler.alertmanager-discovery", false, "Use DNS SRV records to discover Alertmanager hosts.")
@@ -914,14 +911,15 @@ func (r *Ruler) getLocalRules(userID string, rulesRequest RulesRequest, includeB
914911
}
915912
interval := group.Interval()
916913

914+
queryOffset := group.QueryOffset()
917915
groupDesc := &GroupStateDesc{
918916
Group: &rulespb.RuleGroupDesc{
919917
Name: group.Name(),
920918
Namespace: string(decodedNamespace),
921919
Interval: interval,
922920
User: userID,
923921
Limit: int64(group.Limit()),
924-
QueryOffset: group.QueryOffset(),
922+
QueryOffset: &queryOffset,
925923
},
926924

927925
EvaluationTimestamp: group.GetLastEvaluation(),

pkg/ruler/ruler_test.go

+9-2
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ type ruleLimits struct {
8888
maxRuleGroups int
8989
disabledRuleGroups validation.DisabledRuleGroups
9090
maxQueryLength time.Duration
91+
queryOffset time.Duration
9192
}
9293

9394
func (r ruleLimits) EvaluationDelay(_ string) time.Duration {
@@ -112,6 +113,10 @@ func (r ruleLimits) DisabledRuleGroups(userID string) validation.DisabledRuleGro
112113

113114
func (r ruleLimits) MaxQueryLength(_ string) time.Duration { return r.maxQueryLength }
114115

116+
func (r ruleLimits) RulerQueryOffset(_ string) time.Duration {
117+
return r.queryOffset
118+
}
119+
115120
func newEmptyQueryable() storage.Queryable {
116121
return storage.QueryableFunc(func(mint, maxt int64) (storage.Querier, error) {
117122
return emptyQuerier{}, nil
@@ -2550,7 +2555,8 @@ func TestRuler_QueryOffset(t *testing.T) {
25502555
compareRuleGroupDescToStateDesc(t, expectedRg, rg)
25512556

25522557
// test default query offset=0 when not defined at group level
2553-
require.Equal(t, time.Duration(0), rg.GetGroup().QueryOffset)
2558+
gotOffset := rg.GetGroup().QueryOffset
2559+
require.Equal(t, time.Duration(0), *gotOffset)
25542560

25552561
ctx = user.InjectOrgID(context.Background(), "user2")
25562562
rls, err = r.Rules(ctx, &RulesRequest{})
@@ -2561,5 +2567,6 @@ func TestRuler_QueryOffset(t *testing.T) {
25612567
compareRuleGroupDescToStateDesc(t, expectedRg, rg)
25622568

25632569
// test group query offset is set
2564-
require.Equal(t, time.Minute*2, rg.GetGroup().QueryOffset)
2570+
gotOffset = rg.GetGroup().QueryOffset
2571+
require.Equal(t, time.Minute*2, *gotOffset)
25652572
}

pkg/ruler/rulespb/compat.go

+7-3
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@ import (
1313

1414
// ToProto transforms a formatted prometheus rulegroup to a rule group protobuf
1515
func ToProto(user string, namespace string, rl rulefmt.RuleGroup) *RuleGroupDesc {
16-
queryOffset := time.Duration(0)
16+
var queryOffset *time.Duration
1717
if rl.QueryOffset != nil {
18-
queryOffset = time.Duration(*rl.QueryOffset)
18+
offset := time.Duration(*rl.QueryOffset)
19+
queryOffset = &offset
1920
}
2021
rg := RuleGroupDesc{
2122
Name: rl.Name,
@@ -48,7 +49,10 @@ func formattedRuleToProto(rls []rulefmt.RuleNode) []*RuleDesc {
4849

4950
// FromProto generates a rulefmt RuleGroup
5051
func FromProto(rg *RuleGroupDesc) rulefmt.RuleGroup {
51-
queryOffset := model.Duration(rg.QueryOffset)
52+
var queryOffset model.Duration
53+
if rg.QueryOffset != nil {
54+
queryOffset = model.Duration(*rg.QueryOffset)
55+
}
5256
formattedRuleGroup := rulefmt.RuleGroup{
5357
Name: rg.GetName(),
5458
Interval: model.Duration(rg.Interval),

pkg/ruler/rulespb/compat_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ func TestProto(t *testing.T) {
4040
desc := ToProto("test", "namespace", rg)
4141

4242
assert.Equal(t, len(rules), len(desc.Rules))
43-
assert.Equal(t, 30*time.Second, desc.QueryOffset)
43+
assert.Equal(t, 30*time.Second, *desc.QueryOffset)
4444

4545
ruleDesc := desc.Rules[0]
4646

pkg/ruler/rulespb/rules.pb.go

+53-40
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/ruler/rulespb/rules.proto

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ message RuleGroupDesc {
2929
repeated google.protobuf.Any options = 9;
3030
int64 limit =10;
3131
google.protobuf.Duration queryOffset = 11
32-
[(gogoproto.nullable) = false, (gogoproto.stdduration) = true];
32+
[(gogoproto.nullable) = true, (gogoproto.stdduration) = true];
3333
}
3434

3535
// RuleDesc is a proto representation of a Prometheus Rule

pkg/ruler/store_mock_test.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ var (
132132
},
133133
},
134134
}
135+
queryOffset = 2 * time.Minute
135136
mockRulesQueryOffset = map[string]rulespb.RuleGroupList{
136137
"user1": {
137138
&rulespb.RuleGroupDesc{
@@ -164,7 +165,7 @@ var (
164165
},
165166
},
166167
Interval: interval,
167-
QueryOffset: 2 * time.Minute,
168+
QueryOffset: &queryOffset,
168169
},
169170
},
170171
}

pkg/util/validation/limits.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ type Limits struct {
178178
RulerTenantShardSize int `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"`
179179
RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"`
180180
RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"`
181+
RulerQueryOffset model.Duration `yaml:"ruler_query_offset" json:"ruler_query_offset"`
181182

182183
// Store-gateway.
183184
StoreGatewayTenantShardSize float64 `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"`
@@ -264,10 +265,10 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
264265

265266
f.IntVar(&l.MaxOutstandingPerTenant, "frontend.max-outstanding-requests-per-tenant", 100, "Maximum number of outstanding requests per tenant per request queue (either query frontend or query scheduler); requests beyond this error with HTTP 429.")
266267

267-
f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.")
268268
f.IntVar(&l.RulerTenantShardSize, "ruler.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by ruler. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
269269
f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.")
270270
f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.")
271+
f.Var(&l.RulerQueryOffset, "ruler.query-offset", "Duration to offset all rule evaluation queries per-tenant.")
271272

272273
f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.")
273274
f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
@@ -791,6 +792,11 @@ func (o *Overrides) RulerMaxRuleGroupsPerTenant(userID string) int {
791792
return o.GetOverridesForUser(userID).RulerMaxRuleGroupsPerTenant
792793
}
793794

795+
// RulerQueryOffset returns the rule query offset for a given user.
796+
func (o *Overrides) RulerQueryOffset(userID string) time.Duration {
797+
return time.Duration(o.GetOverridesForUser(userID).RulerQueryOffset)
798+
}
799+
794800
// StoreGatewayTenantShardSize returns the store-gateway shard size for a given user.
795801
func (o *Overrides) StoreGatewayTenantShardSize(userID string) float64 {
796802
return o.GetOverridesForUser(userID).StoreGatewayTenantShardSize

0 commit comments

Comments
 (0)