Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert all validation.Limits durations to model.Duration #4044

Merged
merged 5 commits into from
Apr 2, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
* [ENHANCEMENT] Ingester: reduce CPU and memory when an high number of errors are returned by the ingester on the write path with the blocks storage. #3969 #3971 #3973
* [ENHANCEMENT] Distributor: reduce CPU and memory when an high number of errors are returned by the distributor on the write path. #3990
* [ENHANCEMENT] Put metric before label value in the "label value too long" error message. #4018
* [ENHANCEMENT] Allow use of `y|w|d` suffixes for duration related limits and per-tenant limits. #TBD
* [BUGFIX] Ruler-API: fix bug where `/api/v1/rules/<namespace>/<group_name>` endpoint return `400` instead of `404`. #4013
* [BUGFIX] Distributor: reverted changes done to rate limiting in #3825. #3948
* [BUGFIX] Ingester: Fix race condition when opening and closing tsdb concurrently. #3959
Expand Down
2 changes: 1 addition & 1 deletion docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3829,7 +3829,7 @@ The `limits_config` configures default and per-tenant limits imposed by Cortex s

# Maximum accepted sample age before rejecting.
# CLI flag: -validation.reject-old-samples.max-age
[reject_old_samples_max_age: <duration> | default = 336h]
[reject_old_samples_max_age: <duration> | default = 2w]

# Duration which table will be created/deleted before/after it's needed; we
# won't accept sample from before this time.
Expand Down
2 changes: 1 addition & 1 deletion pkg/chunk/chunk_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func newTestChunkStoreConfigWithMockStorage(t require.TestingT, schemaCfg Schema

var limits validation.Limits
flagext.DefaultValues(&limits)
limits.MaxQueryLength = 30 * 24 * time.Hour
limits.MaxQueryLength = model.Duration(30 * 24 * time.Hour)
overrides, err := validation.NewOverrides(limits, nil)
require.NoError(t, err)

Expand Down
2 changes: 1 addition & 1 deletion pkg/chunk/testutils/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ func SetupTestChunkStoreWithClients(indexClient chunk.IndexClient, chunksClient

var limits validation.Limits
flagext.DefaultValues(&limits)
limits.MaxQueryLength = 30 * 24 * time.Hour
limits.MaxQueryLength = model.Duration(30 * 24 * time.Hour)
overrides, err := validation.NewOverrides(limits, nil)
if err != nil {
return nil, err
Expand Down
2 changes: 1 addition & 1 deletion pkg/cortex/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func (t *Cortex) initRing() (serv services.Service, err error) {
func (t *Cortex) initRuntimeConfig() (services.Service, error) {
if t.Cfg.RuntimeConfig.LoadPath == "" {
t.Cfg.RuntimeConfig.LoadPath = t.Cfg.LimitsConfig.PerTenantOverrideConfig
t.Cfg.RuntimeConfig.ReloadPeriod = t.Cfg.LimitsConfig.PerTenantOverridePeriod
t.Cfg.RuntimeConfig.ReloadPeriod = time.Duration(t.Cfg.LimitsConfig.PerTenantOverridePeriod)
}

if t.Cfg.RuntimeConfig.LoadPath == "" {
Expand Down
8 changes: 4 additions & 4 deletions pkg/distributor/distributor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1112,7 +1112,7 @@ func BenchmarkDistributor_Push(b *testing.B) {
"timestamp too old": {
prepareConfig: func(limits *validation.Limits) {
limits.RejectOldSamples = true
limits.RejectOldSamplesMaxAge = time.Hour
limits.RejectOldSamplesMaxAge = model.Duration(time.Hour)
},
prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) {
metrics := make([]labels.Labels, numSeriesPerRequest)
Expand All @@ -1137,7 +1137,7 @@ func BenchmarkDistributor_Push(b *testing.B) {
},
"timestamp too new": {
prepareConfig: func(limits *validation.Limits) {
limits.CreationGracePeriod = time.Minute
limits.CreationGracePeriod = model.Duration(time.Minute)
},
prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) {
metrics := make([]labels.Labels, numSeriesPerRequest)
Expand Down Expand Up @@ -2055,9 +2055,9 @@ func TestDistributorValidation(t *testing.T) {
var limits validation.Limits
flagext.DefaultValues(&limits)

limits.CreationGracePeriod = 2 * time.Hour
limits.CreationGracePeriod = model.Duration(2 * time.Hour)
limits.RejectOldSamples = true
limits.RejectOldSamplesMaxAge = 24 * time.Hour
limits.RejectOldSamplesMaxAge = model.Duration(24 * time.Hour)
limits.MaxLabelNamesPerSeries = 2

ds, _, r, _ := prepare(t, prepConfig{
Expand Down
2 changes: 1 addition & 1 deletion pkg/querier/querier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ func TestQuerier_ValidateQueryTimeRange_MaxQueryLength(t *testing.T) {
flagext.DefaultValues(&cfg)

limits := defaultLimitsConfig()
limits.MaxQueryLength = maxQueryLength
limits.MaxQueryLength = model.Duration(maxQueryLength)
overrides, err := validation.NewOverrides(limits, nil)
require.NoError(t, err)

Expand Down
49 changes: 28 additions & 21 deletions pkg/util/validation/limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ type Limits struct {
MaxLabelNamesPerSeries int `yaml:"max_label_names_per_series" json:"max_label_names_per_series"`
MaxMetadataLength int `yaml:"max_metadata_length" json:"max_metadata_length"`
RejectOldSamples bool `yaml:"reject_old_samples" json:"reject_old_samples"`
RejectOldSamplesMaxAge time.Duration `yaml:"reject_old_samples_max_age" json:"reject_old_samples_max_age"`
CreationGracePeriod time.Duration `yaml:"creation_grace_period" json:"creation_grace_period"`
RejectOldSamplesMaxAge model.Duration `yaml:"reject_old_samples_max_age" json:"reject_old_samples_max_age"`
CreationGracePeriod model.Duration `yaml:"creation_grace_period" json:"creation_grace_period"`
EnforceMetadataMetricName bool `yaml:"enforce_metadata_metric_name" json:"enforce_metadata_metric_name"`
EnforceMetricName bool `yaml:"enforce_metric_name" json:"enforce_metric_name"`
IngestionTenantShardSize int `yaml:"ingestion_tenant_shard_size" json:"ingestion_tenant_shard_size"`
Expand All @@ -71,17 +71,17 @@ type Limits struct {
// Querier enforced limits.
MaxChunksPerQuery int `yaml:"max_chunks_per_query" json:"max_chunks_per_query"`
MaxQueryLookback model.Duration `yaml:"max_query_lookback" json:"max_query_lookback"`
MaxQueryLength time.Duration `yaml:"max_query_length" json:"max_query_length"`
MaxQueryLength model.Duration `yaml:"max_query_length" json:"max_query_length"`
MaxQueryParallelism int `yaml:"max_query_parallelism" json:"max_query_parallelism"`
CardinalityLimit int `yaml:"cardinality_limit" json:"cardinality_limit"`
MaxCacheFreshness time.Duration `yaml:"max_cache_freshness" json:"max_cache_freshness"`
MaxCacheFreshness model.Duration `yaml:"max_cache_freshness" json:"max_cache_freshness"`
MaxQueriersPerTenant int `yaml:"max_queriers_per_tenant" json:"max_queriers_per_tenant"`

// Ruler defaults and limits.
RulerEvaluationDelay time.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"`
RulerTenantShardSize int `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"`
RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"`
RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"`
RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"`
RulerTenantShardSize int `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"`
RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"`
RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"`

// Store-gateway.
StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"`
Expand All @@ -96,8 +96,8 @@ type Limits struct {
S3SSEKMSEncryptionContext string `yaml:"s3_sse_kms_encryption_context" json:"s3_sse_kms_encryption_context" doc:"nocli|description=S3 server-side encryption KMS encryption context. If unset and the key ID override is set, the encryption context will not be provided to S3. Ignored if the SSE type override is not set."`

// Config for overrides, convenient if it goes here. [Deprecated in favor of RuntimeConfig flag in cortex.Config]
PerTenantOverrideConfig string `yaml:"per_tenant_override_config" json:"per_tenant_override_config"`
PerTenantOverridePeriod time.Duration `yaml:"per_tenant_override_period" json:"per_tenant_override_period"`
PerTenantOverrideConfig string `yaml:"per_tenant_override_config" json:"per_tenant_override_config"`
PerTenantOverridePeriod model.Duration `yaml:"per_tenant_override_period" json:"per_tenant_override_period"`
}

// RegisterFlags adds the flags required to config this to the given FlagSet
Expand All @@ -116,8 +116,10 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&l.MaxLabelNamesPerSeries, "validation.max-label-names-per-series", 30, "Maximum number of label names per series.")
f.IntVar(&l.MaxMetadataLength, "validation.max-metadata-length", 1024, "Maximum length accepted for metric metadata. Metadata refers to Metric Name, HELP and UNIT.")
f.BoolVar(&l.RejectOldSamples, "validation.reject-old-samples", false, "Reject old samples.")
f.DurationVar(&l.RejectOldSamplesMaxAge, "validation.reject-old-samples.max-age", 14*24*time.Hour, "Maximum accepted sample age before rejecting.")
f.DurationVar(&l.CreationGracePeriod, "validation.create-grace-period", 10*time.Minute, "Duration which table will be created/deleted before/after it's needed; we won't accept sample from before this time.")
_ = l.RejectOldSamplesMaxAge.Set("14d")
f.Var(&l.RejectOldSamplesMaxAge, "validation.reject-old-samples.max-age", "Maximum accepted sample age before rejecting.")
_ = l.CreationGracePeriod.Set("10m")
f.Var(&l.CreationGracePeriod, "validation.create-grace-period", "Duration which table will be created/deleted before/after it's needed; we won't accept sample from before this time.")
f.BoolVar(&l.EnforceMetricName, "validation.enforce-metric-name", true, "Enforce every sample has a metric name.")
f.BoolVar(&l.EnforceMetadataMetricName, "validation.enforce-metadata-metric-name", true, "Enforce every metadata has a metric name.")

Expand All @@ -135,22 +137,27 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
f.IntVar(&l.MaxGlobalMetadataPerMetric, "ingester.max-global-metadata-per-metric", 0, "The maximum number of metadata per metric, across the cluster. 0 to disable.")

f.IntVar(&l.MaxChunksPerQuery, "store.query-chunk-limit", 2e6, "Maximum number of chunks that can be fetched in a single query. This limit is enforced when fetching chunks from the long-term storage. When running the Cortex chunks storage, this limit is enforced in the querier, while when running the Cortex blocks storage this limit is both enforced in the querier and store-gateway. 0 to disable.")
f.DurationVar(&l.MaxQueryLength, "store.max-query-length", 0, "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and in the chunks storage. 0 to disable.")
_ = l.MaxQueryLength.Set("0s")
f.Var(&l.MaxQueryLength, "store.max-query-length", "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and in the chunks storage. 0 to disable.")
_ = l.MaxQueryLookback.Set("0s")
f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until <lookback> duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.")
f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 14, "Maximum number of split queries will be scheduled in parallel by the frontend.")
f.IntVar(&l.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries. This limit is ignored when running the Cortex blocks storage. 0 to disable.")
f.DurationVar(&l.MaxCacheFreshness, "frontend.max-cache-freshness", 1*time.Minute, "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.")
_ = l.MaxCacheFreshness.Set("1m")
f.Var(&l.MaxCacheFreshness, "frontend.max-cache-freshness", "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.")
f.IntVar(&l.MaxQueriersPerTenant, "frontend.max-queriers-per-tenant", 0, "Maximum number of queriers that can handle requests for a single tenant. If set to 0 or value higher than number of available queriers, *all* queriers will handle requests for the tenant. Each frontend (or query-scheduler, if used) will select the same set of queriers for the same tenant (given that all queriers are connected to all frontends / query-schedulers). This option only works with queriers connecting to the query-frontend / query-scheduler, not when using downstream URL.")

f.DurationVar(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", 0, "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.")
_ = l.RulerEvaluationDelay.Set("0s")
f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.")
f.IntVar(&l.RulerTenantShardSize, "ruler.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by ruler. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.")
f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.")

f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.")

f.StringVar(&l.PerTenantOverrideConfig, "limits.per-user-override-config", "", "File name of per-user overrides. [deprecated, use -runtime-config.file instead]")
f.DurationVar(&l.PerTenantOverridePeriod, "limits.per-user-override-period", 10*time.Second, "Period with which to reload the overrides. [deprecated, use -runtime-config.reload-period instead]")
_ = l.PerTenantOverridePeriod.Set("10s")
f.Var(&l.PerTenantOverridePeriod, "limits.per-user-override-period", "Period with which to reload the overrides. [deprecated, use -runtime-config.reload-period instead]")

// Store-gateway.
f.IntVar(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
Expand Down Expand Up @@ -299,13 +306,13 @@ func (o *Overrides) RejectOldSamples(userID string) bool {

// RejectOldSamplesMaxAge returns the age at which samples should be rejected.
func (o *Overrides) RejectOldSamplesMaxAge(userID string) time.Duration {
return o.getOverridesForUser(userID).RejectOldSamplesMaxAge
return time.Duration(o.getOverridesForUser(userID).RejectOldSamplesMaxAge)
}

// CreationGracePeriod is misnamed, and actually returns how far into the future
// we should accept samples.
func (o *Overrides) CreationGracePeriod(userID string) time.Duration {
return o.getOverridesForUser(userID).CreationGracePeriod
return time.Duration(o.getOverridesForUser(userID).CreationGracePeriod)
}

// MaxSeriesPerQuery returns the maximum number of series a query is allowed to hit.
Expand Down Expand Up @@ -350,13 +357,13 @@ func (o *Overrides) MaxQueryLookback(userID string) time.Duration {

// MaxQueryLength returns the limit of the length (in time) of a query.
func (o *Overrides) MaxQueryLength(userID string) time.Duration {
return o.getOverridesForUser(userID).MaxQueryLength
return time.Duration(o.getOverridesForUser(userID).MaxQueryLength)
}

// MaxCacheFreshness returns the period after which results are cacheable,
// to prevent caching of very recent results.
func (o *Overrides) MaxCacheFreshness(userID string) time.Duration {
return o.getOverridesForUser(userID).MaxCacheFreshness
return time.Duration(o.getOverridesForUser(userID).MaxCacheFreshness)
}

// MaxQueriersPerUser returns the maximum number of queriers that can handle requests for this user.
Expand Down Expand Up @@ -417,7 +424,7 @@ func (o *Overrides) IngestionTenantShardSize(userID string) int {

// EvaluationDelay returns the rules evaluation delay for a given user.
func (o *Overrides) EvaluationDelay(userID string) time.Duration {
return o.getOverridesForUser(userID).RulerEvaluationDelay
return time.Duration(o.getOverridesForUser(userID).RulerEvaluationDelay)
}

// CompactorBlocksRetentionPeriod returns the retention period for a given user.
Expand Down
38 changes: 36 additions & 2 deletions pkg/util/validation/limits_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,40 @@ func TestLimitsTagsYamlMatchJson(t *testing.T) {
assert.Empty(t, mismatch, "expected no mismatched JSON and YAML tags")
}

func TestLimitsStringDurationYamlMatchJson(t *testing.T) {
inputYAML := `
max_query_lookback: 1s
max_query_length: 1s
`
inputJSON := `{"max_query_lookback": "1s", "max_query_length": "1s"}`

limitsYAML := Limits{}
err := yaml.Unmarshal([]byte(inputYAML), &limitsYAML)
require.NoError(t, err, "expected to be able to unmarshal from YAML")

limitsJSON := Limits{}
err = json.Unmarshal([]byte(inputJSON), &limitsJSON)
require.NoError(t, err, "expected to be able to unmarshal from JSON")

assert.Equal(t, limitsYAML, limitsJSON)
}

func TestLimitsAlwaysUsesPromDuration(t *testing.T) {
stdlibDuration := reflect.TypeOf(time.Duration(0))
limits := reflect.TypeOf(Limits{})
n := limits.NumField()
var badDurationType []string

for i := 0; i < n; i++ {
field := limits.Field(i)
if field.Type == stdlibDuration {
badDurationType = append(badDurationType, field.Name)
}
}

assert.Empty(t, badDurationType, "some Limits fields are using stdlib time.Duration instead of model.Duration")
}

func TestMetricRelabelConfigLimitsLoadingFromYaml(t *testing.T) {
SetDefaultLimitsForYAMLUnmarshalling(Limits{})

Expand Down Expand Up @@ -238,10 +272,10 @@ func TestSmallestPositiveNonZeroIntPerTenant(t *testing.T) {
func TestSmallestPositiveNonZeroDurationPerTenant(t *testing.T) {
tenantLimits := map[string]*Limits{
"tenant-a": {
MaxQueryLength: time.Hour,
MaxQueryLength: model.Duration(time.Hour),
},
"tenant-b": {
MaxQueryLength: 4 * time.Hour,
MaxQueryLength: model.Duration(4 * time.Hour),
},
}

Expand Down