Skip to content

Commit b64a97d

Browse files
pedro-stanakaalvinlin123
authored andcommitted
Adds new metric for dropped samples in ingester (cortexproject#4503)
* Adding test case for dropping metrics by name to understand better flow of distributor Signed-off-by: Pedro Tanaka <[email protected]> * Adding test case and new metric for dropped samples Signed-off-by: Pedro Tanaka <[email protected]> * Updating CHANGELOG with new changes Signed-off-by: Pedro Tanaka <[email protected]> * Fixing linting problem on distributor file Signed-off-by: Pedro Tanaka <[email protected]> * Reusing discarded samples metric from validate package Signed-off-by: Pedro Tanaka <[email protected]> * Compare labelset with len() instead of comparing to nil Signed-off-by: Pedro Tanaka <[email protected]> * Undoing unnecessary changes on tests and distributor Signed-off-by: Pedro Tanaka <[email protected]> * Small rename on comment Signed-off-by: Pedro Tanaka <[email protected]> * Fixing linting offenses Signed-off-by: Pedro Tanaka <[email protected]> * Reseting validation dropped samples metric to avoid getting metrics from other test runs Signed-off-by: Pedro Tanaka <[email protected]> * Resolving problems after rebase conflicts Signed-off-by: Pedro Tanaka <[email protected]> * Registering counter for dropped metrics in test Signed-off-by: Pedro Tanaka <[email protected]> * Checking if user label drop configuration did not drop __name__ label Signed-off-by: Pedro Tanaka <[email protected]> * Do not check for name label, adding new test Signed-off-by: Pedro Tanaka <[email protected]> Signed-off-by: Alvin Lin <[email protected]>
1 parent cc7154b commit b64a97d

File tree

4 files changed

+196
-40
lines changed

4 files changed

+196
-40
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* [FEATURE] AlertManager: Add support for SNS Receiver. #4382
1010
=======
1111

12+
* [ENHANCEMENT] Keep track of discarded samples due to relabel configuration in `cortex_discarded_samples_total`. #4503
1213
* [CHANGE] Changed default for `-ingester.min-ready-duration` from 1 minute to 15 seconds. #4539
1314
* [CHANGE] query-frontend: Do not print anything in the logs of `query-frontend` if a in-progress query has been canceled (context canceled). #4562
1415
* [ENHANCEMENT] Ruler: Add `-ruler.disable-rule-group-label` to disable the `rule_group` label on exported metrics. #4571

pkg/distributor/distributor.go

+13
Original file line numberDiff line numberDiff line change
@@ -637,6 +637,14 @@ func (d *Distributor) Push(ctx context.Context, req *cortexpb.WriteRequest) (*co
637637

638638
if mrc := d.limits.MetricRelabelConfigs(userID); len(mrc) > 0 {
639639
l := relabel.Process(cortexpb.FromLabelAdaptersToLabels(ts.Labels), mrc...)
640+
if len(l) == 0 {
641+
// all labels are gone, samples will be discarded
642+
validation.DiscardedSamples.WithLabelValues(
643+
validation.DroppedByRelabelConfiguration,
644+
userID,
645+
).Add(float64(len(ts.Samples)))
646+
continue
647+
}
640648
ts.Labels = cortexpb.FromLabelsToLabelAdapters(l)
641649
}
642650

@@ -652,6 +660,11 @@ func (d *Distributor) Push(ctx context.Context, req *cortexpb.WriteRequest) (*co
652660
}
653661

654662
if len(ts.Labels) == 0 {
663+
validation.DiscardedExemplars.WithLabelValues(
664+
validation.DroppedByUserConfigurationOverride,
665+
userID,
666+
).Add(float64(len(ts.Samples)))
667+
655668
continue
656669
}
657670

pkg/distributor/distributor_test.go

+177-40
Original file line numberDiff line numberDiff line change
@@ -1150,7 +1150,7 @@ func TestDistributor_Push_LabelRemoval(t *testing.T) {
11501150
})
11511151

11521152
// Push the series to the distributor
1153-
req := mockWriteRequest(tc.inputSeries, 1, 1)
1153+
req := mockWriteRequest([]labels.Labels{tc.inputSeries}, 1, 1)
11541154
_, err = ds[0].Push(ctx, req)
11551155
require.NoError(t, err)
11561156

@@ -1166,6 +1166,47 @@ func TestDistributor_Push_LabelRemoval(t *testing.T) {
11661166
}
11671167
}
11681168

1169+
func TestDistributor_Push_LabelRemoval_RemovingNameLabelWillError(t *testing.T) {
1170+
ctx := user.InjectOrgID(context.Background(), "user")
1171+
type testcase struct {
1172+
inputSeries labels.Labels
1173+
expectedSeries labels.Labels
1174+
removeReplica bool
1175+
removeLabels []string
1176+
}
1177+
1178+
tc := testcase{
1179+
removeReplica: true,
1180+
removeLabels: []string{"__name__"},
1181+
inputSeries: labels.Labels{
1182+
{Name: "__name__", Value: "some_metric"},
1183+
{Name: "cluster", Value: "one"},
1184+
{Name: "__replica__", Value: "two"},
1185+
},
1186+
expectedSeries: labels.Labels{},
1187+
}
1188+
1189+
var err error
1190+
var limits validation.Limits
1191+
flagext.DefaultValues(&limits)
1192+
limits.DropLabels = tc.removeLabels
1193+
limits.AcceptHASamples = tc.removeReplica
1194+
1195+
ds, _, _ := prepare(t, prepConfig{
1196+
numIngesters: 2,
1197+
happyIngesters: 2,
1198+
numDistributors: 1,
1199+
shardByAllLabels: true,
1200+
limits: &limits,
1201+
})
1202+
1203+
// Push the series to the distributor
1204+
req := mockWriteRequest([]labels.Labels{tc.inputSeries}, 1, 1)
1205+
_, err = ds[0].Push(ctx, req)
1206+
require.Error(t, err)
1207+
assert.Equal(t, "rpc error: code = Code(400) desc = sample missing metric name", err.Error())
1208+
}
1209+
11691210
func TestDistributor_Push_ShouldGuaranteeShardingTokenConsistencyOverTheTime(t *testing.T) {
11701211
ctx := user.InjectOrgID(context.Background(), "user")
11711212
tests := map[string]struct {
@@ -1254,7 +1295,7 @@ func TestDistributor_Push_ShouldGuaranteeShardingTokenConsistencyOverTheTime(t *
12541295
})
12551296

12561297
// Push the series to the distributor
1257-
req := mockWriteRequest(testData.inputSeries, 1, 1)
1298+
req := mockWriteRequest([]labels.Labels{testData.inputSeries}, 1, 1)
12581299
_, err := ds[0].Push(ctx, req)
12591300
require.NoError(t, err)
12601301

@@ -1312,7 +1353,7 @@ func TestDistributor_Push_LabelNameValidation(t *testing.T) {
13121353
shuffleShardSize: 1,
13131354
skipLabelNameValidation: tc.skipLabelNameValidationCfg,
13141355
})
1315-
req := mockWriteRequest(tc.inputLabels, 42, 100000)
1356+
req := mockWriteRequest([]labels.Labels{tc.inputLabels}, 42, 100000)
13161357
req.SkipLabelNameValidation = tc.skipLabelNameValidationReq
13171358
_, err := ds[0].Push(ctx, req)
13181359
if tc.errExpected {
@@ -1790,7 +1831,7 @@ func TestDistributor_MetricsForLabelMatchers(t *testing.T) {
17901831
ctx := user.InjectOrgID(context.Background(), "test")
17911832

17921833
for _, series := range fixtures {
1793-
req := mockWriteRequest(series.lbls, series.value, series.timestamp)
1834+
req := mockWriteRequest([]labels.Labels{series.lbls}, series.value, series.timestamp)
17941835
_, err := ds[0].Push(ctx, req)
17951836
require.NoError(t, err)
17961837
}
@@ -1875,15 +1916,16 @@ func mustNewMatcher(t labels.MatchType, n, v string) *labels.Matcher {
18751916
return m
18761917
}
18771918

1878-
func mockWriteRequest(lbls labels.Labels, value float64, timestampMs int64) *cortexpb.WriteRequest {
1879-
samples := []cortexpb.Sample{
1880-
{
1919+
func mockWriteRequest(lbls []labels.Labels, value float64, timestampMs int64) *cortexpb.WriteRequest {
1920+
samples := make([]cortexpb.Sample, len(lbls))
1921+
for i := range lbls {
1922+
samples[i] = cortexpb.Sample{
18811923
TimestampMs: timestampMs,
18821924
Value: value,
1883-
},
1925+
}
18841926
}
18851927

1886-
return cortexpb.ToWriteRequest([]labels.Labels{lbls}, samples, nil, cortexpb.API)
1928+
return cortexpb.ToWriteRequest(lbls, samples, nil, cortexpb.API)
18871929
}
18881930

18891931
type prepConfig struct {
@@ -2644,27 +2686,33 @@ func TestDistributor_Push_Relabel(t *testing.T) {
26442686
ctx := user.InjectOrgID(context.Background(), "user")
26452687

26462688
type testcase struct {
2647-
inputSeries labels.Labels
2689+
name string
2690+
inputSeries []labels.Labels
26482691
expectedSeries labels.Labels
26492692
metricRelabelConfigs []*relabel.Config
26502693
}
26512694

26522695
cases := []testcase{
2653-
// No relabel config.
26542696
{
2655-
inputSeries: labels.Labels{
2656-
{Name: "__name__", Value: "foo"},
2657-
{Name: "cluster", Value: "one"},
2697+
name: "with no relabel config",
2698+
inputSeries: []labels.Labels{
2699+
{
2700+
{Name: "__name__", Value: "foo"},
2701+
{Name: "cluster", Value: "one"},
2702+
},
26582703
},
26592704
expectedSeries: labels.Labels{
26602705
{Name: "__name__", Value: "foo"},
26612706
{Name: "cluster", Value: "one"},
26622707
},
26632708
},
26642709
{
2665-
inputSeries: labels.Labels{
2666-
{Name: "__name__", Value: "foo"},
2667-
{Name: "cluster", Value: "one"},
2710+
name: "with hardcoded replace",
2711+
inputSeries: []labels.Labels{
2712+
{
2713+
{Name: "__name__", Value: "foo"},
2714+
{Name: "cluster", Value: "one"},
2715+
},
26682716
},
26692717
expectedSeries: labels.Labels{
26702718
{Name: "__name__", Value: "foo"},
@@ -2680,37 +2728,126 @@ func TestDistributor_Push_Relabel(t *testing.T) {
26802728
},
26812729
},
26822730
},
2731+
{
2732+
name: "with drop action",
2733+
inputSeries: []labels.Labels{
2734+
{
2735+
{Name: "__name__", Value: "foo"},
2736+
{Name: "cluster", Value: "one"},
2737+
},
2738+
{
2739+
{Name: "__name__", Value: "bar"},
2740+
{Name: "cluster", Value: "two"},
2741+
},
2742+
},
2743+
expectedSeries: labels.Labels{
2744+
{Name: "__name__", Value: "bar"},
2745+
{Name: "cluster", Value: "two"},
2746+
},
2747+
metricRelabelConfigs: []*relabel.Config{
2748+
{
2749+
SourceLabels: []model.LabelName{"__name__"},
2750+
Action: relabel.Drop,
2751+
Regex: relabel.MustNewRegexp("(foo)"),
2752+
},
2753+
},
2754+
},
26832755
}
26842756

26852757
for _, tc := range cases {
2686-
var err error
2687-
var limits validation.Limits
2688-
flagext.DefaultValues(&limits)
2689-
limits.MetricRelabelConfigs = tc.metricRelabelConfigs
2758+
t.Run(tc.name, func(t *testing.T) {
2759+
var err error
2760+
var limits validation.Limits
2761+
flagext.DefaultValues(&limits)
2762+
limits.MetricRelabelConfigs = tc.metricRelabelConfigs
26902763

2691-
ds, ingesters, _ := prepare(t, prepConfig{
2692-
numIngesters: 2,
2693-
happyIngesters: 2,
2694-
numDistributors: 1,
2695-
shardByAllLabels: true,
2696-
limits: &limits,
2697-
})
2764+
ds, ingesters, _ := prepare(t, prepConfig{
2765+
numIngesters: 2,
2766+
happyIngesters: 2,
2767+
numDistributors: 1,
2768+
shardByAllLabels: true,
2769+
limits: &limits,
2770+
})
26982771

2699-
// Push the series to the distributor
2700-
req := mockWriteRequest(tc.inputSeries, 1, 1)
2701-
_, err = ds[0].Push(ctx, req)
2702-
require.NoError(t, err)
2772+
// Push the series to the distributor
2773+
req := mockWriteRequest(tc.inputSeries, 1, 1)
2774+
_, err = ds[0].Push(ctx, req)
2775+
require.NoError(t, err)
27032776

2704-
// Since each test pushes only 1 series, we do expect the ingester
2705-
// to have received exactly 1 series
2706-
for i := range ingesters {
2707-
timeseries := ingesters[i].series()
2708-
assert.Equal(t, 1, len(timeseries))
2709-
for _, v := range timeseries {
2710-
assert.Equal(t, tc.expectedSeries, cortexpb.FromLabelAdaptersToLabels(v.Labels))
2777+
// Since each test pushes only 1 series, we do expect the ingester
2778+
// to have received exactly 1 series
2779+
for i := range ingesters {
2780+
timeseries := ingesters[i].series()
2781+
assert.Equal(t, 1, len(timeseries))
2782+
for _, v := range timeseries {
2783+
assert.Equal(t, tc.expectedSeries, cortexpb.FromLabelAdaptersToLabels(v.Labels))
2784+
}
27112785
}
2712-
}
2786+
})
2787+
}
2788+
}
2789+
2790+
func TestDistributor_Push_RelabelDropWillExportMetricOfDroppedSamples(t *testing.T) {
2791+
metricRelabelConfigs := []*relabel.Config{
2792+
{
2793+
SourceLabels: []model.LabelName{"__name__"},
2794+
Action: relabel.Drop,
2795+
Regex: relabel.MustNewRegexp("(foo)"),
2796+
},
2797+
}
2798+
2799+
inputSeries := []labels.Labels{
2800+
{
2801+
{Name: "__name__", Value: "foo"},
2802+
{Name: "cluster", Value: "one"},
2803+
},
2804+
{
2805+
{Name: "__name__", Value: "bar"},
2806+
{Name: "cluster", Value: "two"},
2807+
},
2808+
}
2809+
2810+
var err error
2811+
var limits validation.Limits
2812+
flagext.DefaultValues(&limits)
2813+
limits.MetricRelabelConfigs = metricRelabelConfigs
2814+
2815+
ds, ingesters, regs := prepare(t, prepConfig{
2816+
numIngesters: 2,
2817+
happyIngesters: 2,
2818+
numDistributors: 1,
2819+
shardByAllLabels: true,
2820+
limits: &limits,
2821+
})
2822+
2823+
regs[0].MustRegister(validation.DiscardedSamples)
2824+
validation.DiscardedSamples.Reset()
2825+
2826+
// Push the series to the distributor
2827+
req := mockWriteRequest(inputSeries, 1, 1)
2828+
ctx := user.InjectOrgID(context.Background(), "user1")
2829+
_, err = ds[0].Push(ctx, req)
2830+
require.NoError(t, err)
2831+
2832+
// Since each test pushes only 1 series, we do expect the ingester
2833+
// to have received exactly 1 series
2834+
for i := range ingesters {
2835+
timeseries := ingesters[i].series()
2836+
assert.Equal(t, 1, len(timeseries))
27132837
}
2838+
2839+
metrics := []string{"cortex_distributor_received_samples_total", "cortex_discarded_samples_total"}
2840+
2841+
expectedMetrics := `
2842+
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
2843+
# TYPE cortex_discarded_samples_total counter
2844+
cortex_discarded_samples_total{reason="relabel_configuration",user="user1"} 1
2845+
# HELP cortex_distributor_received_samples_total The total number of received samples, excluding rejected and deduped samples.
2846+
# TYPE cortex_distributor_received_samples_total counter
2847+
cortex_distributor_received_samples_total{user="user1"} 1
2848+
`
2849+
2850+
require.NoError(t, testutil.GatherAndCompare(regs[0], strings.NewReader(expectedMetrics), metrics...))
27142851
}
27152852

27162853
func countMockIngestersCalls(ingesters []mockIngester, name string) int {

pkg/util/validation/validate.go

+5
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ const (
5757
// Too many HA clusters is one of the reasons for discarding samples.
5858
TooManyHAClusters = "too_many_ha_clusters"
5959

60+
// DroppedByRelabelConfiguration Samples can also be discarded because of relabeling configuration
61+
DroppedByRelabelConfiguration = "relabel_configuration"
62+
// DroppedByUserConfigurationOverride Samples discarded due to user configuration removing label __name__
63+
DroppedByUserConfigurationOverride = "user_label_removal_configuration"
64+
6065
// The combined length of the label names and values of an Exemplar's LabelSet MUST NOT exceed 128 UTF-8 characters
6166
// https://github.com/OpenObservability/OpenMetrics/blob/main/specification/OpenMetrics.md#exemplars
6267
ExemplarMaxLabelSetLength = 128

0 commit comments

Comments
 (0)