8
8
"context"
9
9
"crypto/x509"
10
10
"crypto/x509/pkix"
11
+ "encoding/json"
11
12
"fmt"
12
13
"math"
13
14
"math/rand"
@@ -20,9 +21,8 @@ import (
20
21
"time"
21
22
22
23
"github.com/cortexproject/cortex/pkg/ruler"
23
-
24
24
"github.com/cortexproject/cortex/pkg/storage/tsdb"
25
-
25
+ v1 "github.com/prometheus/client_golang/api/prometheus/v1"
26
26
"github.com/prometheus/common/model"
27
27
"github.com/prometheus/prometheus/model/labels"
28
28
"github.com/prometheus/prometheus/model/rulefmt"
@@ -1038,6 +1038,165 @@ func TestRulerDisablesRuleGroups(t *testing.T) {
1038
1038
})
1039
1039
}
1040
1040
1041
+ func TestRulerKeepFiring (t * testing.T ) {
1042
+ s , err := e2e .NewScenario (networkName )
1043
+ require .NoError (t , err )
1044
+ defer s .Close ()
1045
+
1046
+ // Start dependencies.
1047
+ consul := e2edb .NewConsul ()
1048
+ minio := e2edb .NewMinio (9000 , bucketName , rulestoreBucketName )
1049
+ require .NoError (t , s .StartAndWaitReady (consul , minio ))
1050
+
1051
+ // Configure the ruler.
1052
+ flags := mergeFlags (
1053
+ BlocksStorageFlags (),
1054
+ RulerFlags (),
1055
+ map [string ]string {
1056
+ // Since we're not going to run any rule (our only rule is invalid), we don't need the
1057
+ // store-gateway to be configured to a valid address.
1058
+ "-querier.store-gateway-addresses" : "localhost:12345" ,
1059
+ // Enable the bucket index so we can skip the initial bucket scan.
1060
+ "-blocks-storage.bucket-store.bucket-index.enabled" : "true" ,
1061
+ // Evaluate rules often, so that we don't need to wait for metrics to show up.
1062
+ "-ruler.evaluation-interval" : "2s" ,
1063
+ "-ruler.poll-interval" : "2s" ,
1064
+ // No delay
1065
+ "-ruler.evaluation-delay-duration" : "0" ,
1066
+
1067
+ "-blocks-storage.tsdb.block-ranges-period" : "1h" ,
1068
+ "-blocks-storage.bucket-store.sync-interval" : "1s" ,
1069
+ "-blocks-storage.tsdb.retention-period" : "2h" ,
1070
+
1071
+ // We run single ingester only, no replication.
1072
+ "-distributor.replication-factor" : "1" ,
1073
+
1074
+ "-querier.max-fetched-chunks-per-query" : "50" ,
1075
+ },
1076
+ )
1077
+
1078
+ const namespace = "test"
1079
+ const user = "user"
1080
+
1081
+ distributor := e2ecortex .NewDistributor ("distributor" , e2ecortex .RingStoreConsul , consul .NetworkHTTPEndpoint (), flags , "" )
1082
+ ruler := e2ecortex .NewRuler ("ruler" , consul .NetworkHTTPEndpoint (), flags , "" )
1083
+ ingester := e2ecortex .NewIngester ("ingester" , e2ecortex .RingStoreConsul , consul .NetworkHTTPEndpoint (), flags , "" )
1084
+ require .NoError (t , s .StartAndWaitReady (distributor , ingester , ruler ))
1085
+
1086
+ // Wait until both the distributor and ruler have updated the ring. The querier will also watch
1087
+ // the store-gateway ring if blocks sharding is enabled.
1088
+ require .NoError (t , distributor .WaitSumMetrics (e2e .Equals (512 ), "cortex_ring_tokens_total" ))
1089
+ require .NoError (t , ruler .WaitSumMetrics (e2e .Equals (512 ), "cortex_ring_tokens_total" ))
1090
+
1091
+ c , err := e2ecortex .NewClient (distributor .HTTPEndpoint (), "" , "" , ruler .HTTPEndpoint (), user )
1092
+ require .NoError (t , err )
1093
+
1094
+ expression := "vector(1) > 0" // Alert will fire
1095
+ groupName := "rule_group_1"
1096
+ ruleName := "rule_keep_firing"
1097
+ require .NoError (t , err )
1098
+ require .NoError (t , c .SetRuleGroup (alertRuleWithKeepFiringFor (groupName , ruleName , expression , model .Duration (10 * time .Second )), namespace ))
1099
+
1100
+ m := ruleGroupMatcher (user , namespace , groupName )
1101
+
1102
+ // Wait until ruler has loaded the group.
1103
+ require .NoError (t , ruler .WaitSumMetricsWithOptions (e2e .Equals (1 ), []string {"cortex_prometheus_rule_group_rules" }, e2e .WithLabelMatchers (m ), e2e .WaitMissingMetrics ))
1104
+ // Wait until rule group has tried to evaluate the rule.
1105
+ require .NoError (t , ruler .WaitSumMetricsWithOptions (e2e .GreaterOrEqual (1 ), []string {"cortex_prometheus_rule_evaluations_total" }, e2e .WithLabelMatchers (m ), e2e .WaitMissingMetrics ))
1106
+
1107
+ groups , err := c .GetPrometheusRules (e2ecortex.RuleFilter {
1108
+ RuleNames : []string {ruleName },
1109
+ })
1110
+ require .NotEmpty (t , groups )
1111
+ require .Equal (t , 1 , len (groups [0 ].Rules ))
1112
+ alert := parseAlertFromRule (t , groups [0 ].Rules [0 ])
1113
+ require .Equal (t , float64 (10 ), alert .KeepFiringFor )
1114
+ require .Equal (t , 1 , len (alert .Alerts ))
1115
+ require .Empty (t , alert .Alerts [0 ].KeepFiringSince ) //Alert expression not resolved, keepFiringSince should be empty
1116
+
1117
+ expression = "vector(1) > 1" // Resolve, should keep firing for set duration
1118
+ ts := time .Now ()
1119
+ require .NoError (t , c .SetRuleGroup (alertRuleWithKeepFiringFor (groupName , ruleName , expression , model .Duration (10 * time .Second )), namespace ))
1120
+ // Wait until rule group has tried to evaluate the rule.
1121
+ require .NoError (t , ruler .WaitSumMetricsWithOptions (e2e .GreaterOrEqual (2 ), []string {"cortex_prometheus_rule_evaluations_total" }, e2e .WithLabelMatchers (m ), e2e .WaitMissingMetrics ))
1122
+
1123
+ updatedGroups , err := c .GetPrometheusRules (e2ecortex.RuleFilter {
1124
+ RuleNames : []string {ruleName },
1125
+ })
1126
+ require .NotEmpty (t , updatedGroups )
1127
+ require .Equal (t , 1 , len (updatedGroups [0 ].Rules ))
1128
+
1129
+ alert = parseAlertFromRule (t , updatedGroups [0 ].Rules [0 ])
1130
+ require .Equal (t , "firing" , alert .State )
1131
+ require .Equal (t , float64 (10 ), alert .KeepFiringFor )
1132
+ require .Equal (t , 1 , len (alert .Alerts ))
1133
+ require .NotEmpty (t , alert .Alerts [0 ].KeepFiringSince )
1134
+ require .Greater (t , alert .Alerts [0 ].KeepFiringSince .UnixNano (), ts .UnixNano (), "KeepFiringSince value should be after expression is resolved" )
1135
+
1136
+ time .Sleep (10 * time .Second ) // Sleep beyond keepFiringFor time
1137
+ updatedGroups , err = c .GetPrometheusRules (e2ecortex.RuleFilter {
1138
+ RuleNames : []string {ruleName },
1139
+ })
1140
+ require .NotEmpty (t , updatedGroups )
1141
+ require .Equal (t , 1 , len (updatedGroups [0 ].Rules ))
1142
+ alert = parseAlertFromRule (t , updatedGroups [0 ].Rules [0 ])
1143
+ require .Equal (t , 0 , len (alert .Alerts )) // alert should be resolved once keepFiringFor time expires
1144
+ }
1145
+
1146
+ func parseAlertFromRule (t * testing.T , rules interface {}) * alertingRule {
1147
+ responseJson , err := json .Marshal (rules )
1148
+ require .NoError (t , err )
1149
+
1150
+ alertResp := & alertingRule {}
1151
+ require .NoError (t , json .Unmarshal (responseJson , alertResp ))
1152
+ return alertResp
1153
+ }
1154
+
1155
+ type alertingRule struct {
1156
+ // State can be "pending", "firing", "inactive".
1157
+ State string `json:"state"`
1158
+ Name string `json:"name"`
1159
+ Query string `json:"query"`
1160
+ Duration float64 `json:"duration"`
1161
+ KeepFiringFor float64 `json:"keepFiringFor"`
1162
+ Labels labels.Labels `json:"labels"`
1163
+ Annotations labels.Labels `json:"annotations"`
1164
+ Alerts []* Alert `json:"alerts"`
1165
+ Health string `json:"health"`
1166
+ LastError string `json:"lastError"`
1167
+ Type v1.RuleType `json:"type"`
1168
+ LastEvaluation time.Time `json:"lastEvaluation"`
1169
+ EvaluationTime float64 `json:"evaluationTime"`
1170
+ }
1171
+
1172
+ // Alert has info for an alert.
1173
+ type Alert struct {
1174
+ Labels labels.Labels `json:"labels"`
1175
+ Annotations labels.Labels `json:"annotations"`
1176
+ State string `json:"state"`
1177
+ ActiveAt * time.Time `json:"activeAt"`
1178
+ KeepFiringSince * time.Time `json:"keepFiringSince,omitempty"`
1179
+ Value string `json:"value"`
1180
+ }
1181
+
1182
+ func alertRuleWithKeepFiringFor (groupName string , ruleName string , expression string , keepFiring model.Duration ) rulefmt.RuleGroup {
1183
+ var recordNode = yaml.Node {}
1184
+ var exprNode = yaml.Node {}
1185
+
1186
+ recordNode .SetString (ruleName )
1187
+ exprNode .SetString (expression )
1188
+
1189
+ return rulefmt.RuleGroup {
1190
+ Name : groupName ,
1191
+ Interval : 10 ,
1192
+ Rules : []rulefmt.RuleNode {{
1193
+ Alert : recordNode ,
1194
+ Expr : exprNode ,
1195
+ KeepFiringFor : keepFiring ,
1196
+ }},
1197
+ }
1198
+ }
1199
+
1041
1200
func ruleGroupMatcher (user , namespace , groupName string ) * labels.Matcher {
1042
1201
return labels .MustNewMatcher (labels .MatchEqual , "rule_group" , fmt .Sprintf ("/rules/%s/%s;%s" , user , namespace , groupName ))
1043
1202
}
0 commit comments