Skip to content

Commit 9b74ad3

Browse files
authored
Add conditional artificial delay (#11139)
Signed-off-by: Marco Pracucci <[email protected]>
1 parent a2ab088 commit 9b74ad3

File tree

2 files changed

+124
-4
lines changed

2 files changed

+124
-4
lines changed

pkg/util/validation/limits.go

+36-4
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"fmt"
1414
"math"
1515
"reflect"
16+
"strconv"
1617
"strings"
1718
"time"
1819

@@ -129,7 +130,13 @@ type Limits struct {
129130
MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty" json:"metric_relabel_configs,omitempty" doc:"nocli|description=List of metric relabel configurations. Note that in most situations, it is more effective to use metrics relabeling directly in the Prometheus server, e.g. remote_write.write_relabel_configs. Labels available during the relabeling phase and cleaned afterwards: __meta_tenant_id" category:"experimental"`
130131
MetricRelabelingEnabled bool `yaml:"metric_relabeling_enabled" json:"metric_relabeling_enabled" category:"experimental"`
131132
ServiceOverloadStatusCodeOnRateLimitEnabled bool `yaml:"service_overload_status_code_on_rate_limit_enabled" json:"service_overload_status_code_on_rate_limit_enabled" category:"experimental"`
132-
IngestionArtificialDelay model.Duration `yaml:"ingestion_artificial_delay" json:"ingestion_artificial_delay" category:"experimental" doc:"hidden"`
133+
134+
IngestionArtificialDelay model.Duration `yaml:"ingestion_artificial_delay" json:"ingestion_artificial_delay" category:"experimental" doc:"hidden"`
135+
IngestionArtificialDelayConditionForTenantsWithLessThanMaxSeries int `yaml:"ingestion_artificial_delay_condition_for_tenants_with_less_than_max_series" json:"ingestion_artificial_delay_condition_for_tenants_with_less_than_max_series" category:"experimental" doc:"hidden"`
136+
IngestionArtificialDelayDurationForTenantsWithLessThanMaxSeries model.Duration `yaml:"ingestion_artificial_delay_duration_for_tenants_with_less_than_max_series" json:"ingestion_artificial_delay_duration_for_tenants_with_less_than_max_series" category:"experimental" doc:"hidden"`
137+
IngestionArtificialDelayConditionForTenantsWithIDGreaterThan int `yaml:"ingestion_artificial_delay_condition_for_tenants_with_id_greater_than" json:"ingestion_artificial_delay_condition_for_tenants_with_id_greater_than" category:"experimental" doc:"hidden"`
138+
IngestionArtificialDelayDurationForTenantsWithIDGreaterThan model.Duration `yaml:"ingestion_artificial_delay_duration_for_tenants_with_id_greater_than" json:"ingestion_artificial_delay_duration_for_tenants_with_id_greater_than" category:"experimental" doc:"hidden"`
139+
133140
// Ingester enforced limits.
134141
// Series
135142
MaxGlobalSeriesPerUser int `yaml:"max_global_series_per_user" json:"max_global_series_per_user"`
@@ -302,7 +309,12 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) {
302309
f.BoolVar(&l.OTelCreatedTimestampZeroIngestionEnabled, "distributor.otel-created-timestamp-zero-ingestion-enabled", false, "Whether to enable translation of OTel start timestamps to Prometheus zero samples in the OTLP endpoint.")
303310
f.Var(&l.PromoteOTelResourceAttributes, "distributor.otel-promote-resource-attributes", "Optionally specify OTel resource attributes to promote to labels.")
304311
f.BoolVar(&l.OTelKeepIdentifyingResourceAttributes, "distributor.otel-keep-identifying-resource-attributes", false, "Whether to keep identifying OTel resource attributes in the target_info metric on top of converting to job and instance labels.")
305-
f.Var(&l.IngestionArtificialDelay, "distributor.ingestion-artificial-delay", "Target ingestion delay. If set to a non-zero value, the distributor will artificially delay ingestion time-frame by the specified duration by computing the difference between actual ingestion and the target. There is no delay on actual ingestion of samples, it is only the response back to the client.")
312+
313+
f.Var(&l.IngestionArtificialDelay, "distributor.ingestion-artificial-delay", "Target ingestion delay to apply to all tenants. If set to a non-zero value, the distributor will artificially delay ingestion time-frame by the specified duration by computing the difference between actual ingestion and the target. There is no delay on actual ingestion of samples, it is only the response back to the client.")
314+
f.IntVar(&l.IngestionArtificialDelayConditionForTenantsWithLessThanMaxSeries, "distributor.ingestion-artificial-delay-condition-for-tenants-with-less-than-max-series", 0, "Condition to select tenants for which -distributor.ingestion-artificial-delay-duration-for-tenants-with-less-than-max-series should be applied.")
315+
f.Var(&l.IngestionArtificialDelayDurationForTenantsWithLessThanMaxSeries, "distributor.ingestion-artificial-delay-duration-for-tenants-with-less-than-max-series", "Target ingestion delay to apply to tenants with configured max global series to a value lower than -distributor.ingestion-artificial-delay-condition-for-tenants-with-less-than-max-series.")
316+
f.IntVar(&l.IngestionArtificialDelayConditionForTenantsWithIDGreaterThan, "distributor.ingestion-artificial-delay-condition-for-tenants-with-id-greater-than", 0, "Condition to select tenants for which -distributor.ingestion-artificial-delay-duration-for-tenants-with-id-greater-than should be applied.")
317+
f.Var(&l.IngestionArtificialDelayDurationForTenantsWithIDGreaterThan, "distributor.ingestion-artificial-delay-duration-for-tenants-with-id-greater-than", "Target ingestion delay to apply to tenants with a numeric ID whose value is greater than -distributor.ingestion-artificial-delay-condition-for-tenants-with-id-greater-than.")
306318

307319
f.IntVar(&l.MaxGlobalSeriesPerUser, MaxSeriesPerUserFlag, 150000, "The maximum number of in-memory series per tenant, across the cluster before replication. 0 to disable.")
308320
f.IntVar(&l.MaxGlobalSeriesPerMetric, MaxSeriesPerMetricFlag, 0, "The maximum number of in-memory series per metric name, across the cluster before replication. 0 to disable.")
@@ -1224,9 +1236,29 @@ func (o *Overrides) OTelKeepIdentifyingResourceAttributes(tenantID string) bool
12241236
return o.getOverridesForUser(tenantID).OTelKeepIdentifyingResourceAttributes
12251237
}
12261238

1227-
// DistributorIngestionArtificialDelay returns the artificial ingestion latency for a given use.
1239+
// DistributorIngestionArtificialDelay returns the artificial ingestion latency for a given user.
12281240
func (o *Overrides) DistributorIngestionArtificialDelay(tenantID string) time.Duration {
1229-
return time.Duration(o.getOverridesForUser(tenantID).IngestionArtificialDelay)
1241+
overrides := o.getOverridesForUser(tenantID)
1242+
1243+
// Default delay to apply to all tenants.
1244+
delay := overrides.IngestionArtificialDelay
1245+
1246+
// Check if the "max series" condition applies to this tenant.
1247+
maxSeriesCondition := overrides.IngestionArtificialDelayConditionForTenantsWithLessThanMaxSeries
1248+
maxSeriesDelay := overrides.IngestionArtificialDelayDurationForTenantsWithLessThanMaxSeries
1249+
if maxSeriesCondition > 0 && maxSeriesDelay > delay && o.MaxGlobalSeriesPerUser(tenantID) < maxSeriesCondition {
1250+
delay = maxSeriesDelay
1251+
}
1252+
1253+
// Check if the "tenant ID" condition applies to this tenant.
1254+
idCondition := overrides.IngestionArtificialDelayConditionForTenantsWithIDGreaterThan
1255+
idDelay := overrides.IngestionArtificialDelayDurationForTenantsWithIDGreaterThan
1256+
idNumber, idNumberErr := strconv.ParseInt(tenantID, 10, 32)
1257+
if idCondition > 0 && idDelay > delay && idNumberErr == nil && int(idNumber) > idCondition {
1258+
delay = idDelay
1259+
}
1260+
1261+
return time.Duration(delay)
12301262
}
12311263

12321264
func (o *Overrides) AlignQueriesWithStep(userID string) bool {

pkg/util/validation/limits_test.go

+88
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,94 @@ func TestMaxPartialQueryLengthWithoutDefault(t *testing.T) {
399399
assert.Equal(t, time.Duration(0), ov.MaxPartialQueryLength("tenant-b"))
400400
}
401401

402+
func TestDistributorIngestionArtificialDelay(t *testing.T) {
403+
tests := map[string]struct {
404+
tenantID string
405+
tenantLimits func(*Limits)
406+
expectedDelay time.Duration
407+
}{
408+
"should not apply delay by default": {
409+
tenantID: "tenant-a",
410+
tenantLimits: func(*Limits) {},
411+
expectedDelay: 0,
412+
},
413+
"should apply delay if a plain delay has been configured for the tenant": {
414+
tenantID: "tenant-a",
415+
tenantLimits: func(l *Limits) {
416+
l.IngestionArtificialDelay = model.Duration(time.Second)
417+
},
418+
expectedDelay: time.Second,
419+
},
420+
"should apply delay based on 'max series less than' condition if tenant max series is < the threshold": {
421+
tenantID: "tenant-a",
422+
tenantLimits: func(l *Limits) {
423+
l.IngestionArtificialDelayConditionForTenantsWithLessThanMaxSeries = 15001
424+
l.IngestionArtificialDelayDurationForTenantsWithLessThanMaxSeries = model.Duration(time.Second)
425+
l.MaxGlobalSeriesPerUser = 15000
426+
},
427+
expectedDelay: time.Second,
428+
},
429+
"should not apply delay based on 'max series less than' condition if tenant max series is >= the threshold": {
430+
tenantID: "tenant-a",
431+
tenantLimits: func(l *Limits) {
432+
l.IngestionArtificialDelayConditionForTenantsWithLessThanMaxSeries = 15001
433+
l.IngestionArtificialDelayDurationForTenantsWithLessThanMaxSeries = model.Duration(time.Second)
434+
l.MaxGlobalSeriesPerUser = 15001
435+
},
436+
expectedDelay: 0,
437+
},
438+
"should apply delay based on 'tenant ID greater than' condition if tenant ID is numeric and > the condition": {
439+
tenantID: "12346",
440+
tenantLimits: func(l *Limits) {
441+
l.IngestionArtificialDelayConditionForTenantsWithIDGreaterThan = 12345
442+
l.IngestionArtificialDelayDurationForTenantsWithIDGreaterThan = model.Duration(time.Second)
443+
},
444+
expectedDelay: time.Second,
445+
},
446+
"should not apply delay based on 'tenant ID greater than' condition if tenant ID is numeric and <= the condition": {
447+
tenantID: "12345",
448+
tenantLimits: func(l *Limits) {
449+
l.IngestionArtificialDelayConditionForTenantsWithIDGreaterThan = 12345
450+
l.IngestionArtificialDelayDurationForTenantsWithIDGreaterThan = model.Duration(time.Second)
451+
},
452+
expectedDelay: 0,
453+
},
454+
"should not apply delay based on 'tenant ID greater than' condition if tenant ID is not numeric": {
455+
tenantID: "tenant-123456",
456+
tenantLimits: func(l *Limits) {
457+
l.IngestionArtificialDelayConditionForTenantsWithIDGreaterThan = 12345
458+
l.IngestionArtificialDelayDurationForTenantsWithIDGreaterThan = model.Duration(time.Second)
459+
},
460+
expectedDelay: 0,
461+
},
462+
"should apply the highest delay among matching conditions": {
463+
tenantID: "12346",
464+
tenantLimits: func(l *Limits) {
465+
l.IngestionArtificialDelay = model.Duration(300 * time.Millisecond)
466+
467+
l.IngestionArtificialDelayConditionForTenantsWithLessThanMaxSeries = 15001
468+
l.IngestionArtificialDelayDurationForTenantsWithLessThanMaxSeries = model.Duration(200 * time.Millisecond)
469+
l.MaxGlobalSeriesPerUser = 15000
470+
471+
l.IngestionArtificialDelayConditionForTenantsWithIDGreaterThan = 12345
472+
l.IngestionArtificialDelayDurationForTenantsWithIDGreaterThan = model.Duration(100 * time.Millisecond)
473+
},
474+
expectedDelay: 300 * time.Millisecond,
475+
},
476+
}
477+
478+
for testName, testData := range tests {
479+
t.Run(testName, func(t *testing.T) {
480+
tenantLimits := &Limits{}
481+
flagext.DefaultValues(tenantLimits)
482+
testData.tenantLimits(tenantLimits)
483+
484+
ov := NewOverrides(Limits{}, NewMockTenantLimits(map[string]*Limits{testData.tenantID: tenantLimits}))
485+
require.Equal(t, testData.expectedDelay, ov.DistributorIngestionArtificialDelay(testData.tenantID))
486+
})
487+
}
488+
}
489+
402490
func TestAlertmanagerNotificationLimits(t *testing.T) {
403491
for name, tc := range map[string]struct {
404492
inputYAML string

0 commit comments

Comments
 (0)