8
8
"context"
9
9
"crypto/x509"
10
10
"crypto/x509/pkix"
11
+ "encoding/json"
11
12
"fmt"
12
13
"math"
13
14
"math/rand"
@@ -19,10 +20,7 @@ import (
19
20
"testing"
20
21
"time"
21
22
22
- "github.com/cortexproject/cortex/pkg/ruler"
23
-
24
- "github.com/cortexproject/cortex/pkg/storage/tsdb"
25
-
23
+ v1 "github.com/prometheus/client_golang/api/prometheus/v1"
26
24
"github.com/prometheus/common/model"
27
25
"github.com/prometheus/prometheus/model/labels"
28
26
"github.com/prometheus/prometheus/model/rulefmt"
@@ -37,6 +35,8 @@ import (
37
35
"github.com/cortexproject/cortex/integration/e2e"
38
36
e2edb "github.com/cortexproject/cortex/integration/e2e/db"
39
37
"github.com/cortexproject/cortex/integration/e2ecortex"
38
+ "github.com/cortexproject/cortex/pkg/ruler"
39
+ "github.com/cortexproject/cortex/pkg/storage/tsdb"
40
40
)
41
41
42
42
func TestRulerAPI (t * testing.T ) {
@@ -1038,6 +1038,168 @@ func TestRulerDisablesRuleGroups(t *testing.T) {
1038
1038
})
1039
1039
}
1040
1040
1041
+ func TestRulerKeepFiring (t * testing.T ) {
1042
+ s , err := e2e .NewScenario (networkName )
1043
+ require .NoError (t , err )
1044
+ defer s .Close ()
1045
+
1046
+ // Start dependencies.
1047
+ consul := e2edb .NewConsul ()
1048
+ minio := e2edb .NewMinio (9000 , bucketName , rulestoreBucketName )
1049
+ require .NoError (t , s .StartAndWaitReady (consul , minio ))
1050
+
1051
+ // Configure the ruler.
1052
+ flags := mergeFlags (
1053
+ BlocksStorageFlags (),
1054
+ RulerFlags (),
1055
+ map [string ]string {
1056
+ // Since we're not going to run any rule (our only rule is invalid), we don't need the
1057
+ // store-gateway to be configured to a valid address.
1058
+ "-querier.store-gateway-addresses" : "localhost:12345" ,
1059
+ // Enable the bucket index so we can skip the initial bucket scan.
1060
+ "-blocks-storage.bucket-store.bucket-index.enabled" : "true" ,
1061
+ // Evaluate rules often, so that we don't need to wait for metrics to show up.
1062
+ "-ruler.evaluation-interval" : "2s" ,
1063
+ "-ruler.poll-interval" : "2s" ,
1064
+ // No delay
1065
+ "-ruler.evaluation-delay-duration" : "0" ,
1066
+
1067
+ "-blocks-storage.tsdb.block-ranges-period" : "1h" ,
1068
+ "-blocks-storage.bucket-store.sync-interval" : "1s" ,
1069
+ "-blocks-storage.tsdb.retention-period" : "2h" ,
1070
+
1071
+ // We run single ingester only, no replication.
1072
+ "-distributor.replication-factor" : "1" ,
1073
+
1074
+ "-querier.max-fetched-chunks-per-query" : "50" ,
1075
+ },
1076
+ )
1077
+
1078
+ const namespace = "test"
1079
+ const user = "user"
1080
+
1081
+ distributor := e2ecortex .NewDistributor ("distributor" , e2ecortex .RingStoreConsul , consul .NetworkHTTPEndpoint (), flags , "" )
1082
+ ruler := e2ecortex .NewRuler ("ruler" , consul .NetworkHTTPEndpoint (), flags , "" )
1083
+ ingester := e2ecortex .NewIngester ("ingester" , e2ecortex .RingStoreConsul , consul .NetworkHTTPEndpoint (), flags , "" )
1084
+ require .NoError (t , s .StartAndWaitReady (distributor , ingester , ruler ))
1085
+
1086
+ // Wait until both the distributor and ruler have updated the ring. The querier will also watch
1087
+ // the store-gateway ring if blocks sharding is enabled.
1088
+ require .NoError (t , distributor .WaitSumMetrics (e2e .Equals (512 ), "cortex_ring_tokens_total" ))
1089
+ require .NoError (t , ruler .WaitSumMetrics (e2e .Equals (512 ), "cortex_ring_tokens_total" ))
1090
+
1091
+ c , err := e2ecortex .NewClient (distributor .HTTPEndpoint (), "" , "" , ruler .HTTPEndpoint (), user )
1092
+ require .NoError (t , err )
1093
+
1094
+ expression := "vector(1) > 0" // Alert will fire
1095
+ groupName := "rule_group_1"
1096
+ ruleName := "rule_keep_firing"
1097
+
1098
+ require .NoError (t , c .SetRuleGroup (alertRuleWithKeepFiringFor (groupName , ruleName , expression , model .Duration (10 * time .Second )), namespace ))
1099
+
1100
+ m := ruleGroupMatcher (user , namespace , groupName )
1101
+
1102
+ // Wait until ruler has loaded the group.
1103
+ require .NoError (t , ruler .WaitSumMetricsWithOptions (e2e .Equals (1 ), []string {"cortex_prometheus_rule_group_rules" }, e2e .WithLabelMatchers (m ), e2e .WaitMissingMetrics ))
1104
+ // Wait until rule group has tried to evaluate the rule.
1105
+ require .NoError (t , ruler .WaitSumMetricsWithOptions (e2e .GreaterOrEqual (1 ), []string {"cortex_prometheus_rule_evaluations_total" }, e2e .WithLabelMatchers (m ), e2e .WaitMissingMetrics ))
1106
+
1107
+ groups , err := c .GetPrometheusRules (e2ecortex.RuleFilter {
1108
+ RuleNames : []string {ruleName },
1109
+ })
1110
+ require .NoError (t , err )
1111
+ require .NotEmpty (t , groups )
1112
+ require .Equal (t , 1 , len (groups [0 ].Rules ))
1113
+ alert := parseAlertFromRule (t , groups [0 ].Rules [0 ])
1114
+ require .Equal (t , float64 (10 ), alert .KeepFiringFor )
1115
+ require .Equal (t , 1 , len (alert .Alerts ))
1116
+ require .Empty (t , alert .Alerts [0 ].KeepFiringSince ) //Alert expression not resolved, keepFiringSince should be empty
1117
+
1118
+ expression = "vector(1) > 1" // Resolve, should keep firing for set duration
1119
+ ts := time .Now ()
1120
+ require .NoError (t , c .SetRuleGroup (alertRuleWithKeepFiringFor (groupName , ruleName , expression , model .Duration (10 * time .Second )), namespace ))
1121
+ // Wait until rule group has tried to evaluate the rule.
1122
+ require .NoError (t , ruler .WaitSumMetricsWithOptions (e2e .GreaterOrEqual (2 ), []string {"cortex_prometheus_rule_evaluations_total" }, e2e .WithLabelMatchers (m ), e2e .WaitMissingMetrics ))
1123
+
1124
+ updatedGroups , err := c .GetPrometheusRules (e2ecortex.RuleFilter {
1125
+ RuleNames : []string {ruleName },
1126
+ })
1127
+ require .NoError (t , err )
1128
+ require .NotEmpty (t , updatedGroups )
1129
+ require .Equal (t , 1 , len (updatedGroups [0 ].Rules ))
1130
+
1131
+ alert = parseAlertFromRule (t , updatedGroups [0 ].Rules [0 ])
1132
+ require .Equal (t , "firing" , alert .State )
1133
+ require .Equal (t , float64 (10 ), alert .KeepFiringFor )
1134
+ require .Equal (t , 1 , len (alert .Alerts ))
1135
+ require .NotEmpty (t , alert .Alerts [0 ].KeepFiringSince )
1136
+ require .Greater (t , alert .Alerts [0 ].KeepFiringSince .UnixNano (), ts .UnixNano (), "KeepFiringSince value should be after expression is resolved" )
1137
+
1138
+ time .Sleep (10 * time .Second ) // Sleep beyond keepFiringFor time
1139
+ updatedGroups , err = c .GetPrometheusRules (e2ecortex.RuleFilter {
1140
+ RuleNames : []string {ruleName },
1141
+ })
1142
+ require .NoError (t , err )
1143
+ require .NotEmpty (t , updatedGroups )
1144
+ require .Equal (t , 1 , len (updatedGroups [0 ].Rules ))
1145
+ alert = parseAlertFromRule (t , updatedGroups [0 ].Rules [0 ])
1146
+ require .Equal (t , 0 , len (alert .Alerts )) // alert should be resolved once keepFiringFor time expires
1147
+ }
1148
+
1149
+ func parseAlertFromRule (t * testing.T , rules interface {}) * alertingRule {
1150
+ responseJson , err := json .Marshal (rules )
1151
+ require .NoError (t , err )
1152
+
1153
+ alertResp := & alertingRule {}
1154
+ require .NoError (t , json .Unmarshal (responseJson , alertResp ))
1155
+ return alertResp
1156
+ }
1157
+
1158
+ type alertingRule struct {
1159
+ // State can be "pending", "firing", "inactive".
1160
+ State string `json:"state"`
1161
+ Name string `json:"name"`
1162
+ Query string `json:"query"`
1163
+ Duration float64 `json:"duration"`
1164
+ KeepFiringFor float64 `json:"keepFiringFor"`
1165
+ Labels labels.Labels `json:"labels"`
1166
+ Annotations labels.Labels `json:"annotations"`
1167
+ Alerts []* Alert `json:"alerts"`
1168
+ Health string `json:"health"`
1169
+ LastError string `json:"lastError"`
1170
+ Type v1.RuleType `json:"type"`
1171
+ LastEvaluation time.Time `json:"lastEvaluation"`
1172
+ EvaluationTime float64 `json:"evaluationTime"`
1173
+ }
1174
+
1175
+ // Alert has info for an alert.
1176
+ type Alert struct {
1177
+ Labels labels.Labels `json:"labels"`
1178
+ Annotations labels.Labels `json:"annotations"`
1179
+ State string `json:"state"`
1180
+ ActiveAt * time.Time `json:"activeAt"`
1181
+ KeepFiringSince * time.Time `json:"keepFiringSince,omitempty"`
1182
+ Value string `json:"value"`
1183
+ }
1184
+
1185
+ func alertRuleWithKeepFiringFor (groupName string , ruleName string , expression string , keepFiring model.Duration ) rulefmt.RuleGroup {
1186
+ var recordNode = yaml.Node {}
1187
+ var exprNode = yaml.Node {}
1188
+
1189
+ recordNode .SetString (ruleName )
1190
+ exprNode .SetString (expression )
1191
+
1192
+ return rulefmt.RuleGroup {
1193
+ Name : groupName ,
1194
+ Interval : 10 ,
1195
+ Rules : []rulefmt.RuleNode {{
1196
+ Alert : recordNode ,
1197
+ Expr : exprNode ,
1198
+ KeepFiringFor : keepFiring ,
1199
+ }},
1200
+ }
1201
+ }
1202
+
1041
1203
func ruleGroupMatcher (user , namespace , groupName string ) * labels.Matcher {
1042
1204
return labels .MustNewMatcher (labels .MatchEqual , "rule_group" , fmt .Sprintf ("/rules/%s/%s;%s" , user , namespace , groupName ))
1043
1205
}
0 commit comments