Skip to content

fix for query_rejection bug #6145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
* [BUGFIX] Scheduler: Fix user queue in scheduler that was not thread-safe. #6077
* [BUGFIX] Ingester: Include out-of-order head compaction when compacting TSDB head. #6108
* [BUGFIX] Ingester: Fix `cortex_ingester_tsdb_mmap_chunks_total` metric. #6134
* [BUGFIX] Query Frontend: Fix query rejection bug for metadata queries. #6143

## 1.17.1 2024-05-20

Expand Down
40 changes: 40 additions & 0 deletions integration/query_frontend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -798,4 +798,44 @@ func TestQueryFrontendQueryRejection(t *testing.T) {
require.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
require.Contains(t, string(body), tripperware.QueryRejectErrorMessage)

newRuntimeConfig = []byte(`overrides:
user-1:
query_rejection:
enabled: true
query_attributes:
- query_step_limit:
min: 12s
- api_type: "labels"
- dashboard_uid: "dash123"
panel_id: "panel321"
`)

require.NoError(t, client.Upload(context.Background(), configFileName, bytes.NewReader(newRuntimeConfig)))
time.Sleep(2 * time.Second)

// We expect any request for speific api to be rejected if api_type is configured for that api and no other properties provided
resp, body, err = c.LabelNamesRaw([]string{}, time.Time{}, time.Time{}, map[string]string{})
require.NoError(t, err)
require.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
require.Contains(t, string(body), tripperware.QueryRejectErrorMessage)

// We expect request not to be rejected if all the provided parameters are not applicable for current API type
// There is no dashboardUID or panelId in metadata queries so if only those provided then metadata query shouldn't be rejected.
resp, body, err = c.LabelValuesRaw("cluster", []string{}, time.Time{}, time.Time{}, map[string]string{"User-Agent": "grafana-agent/v0.19.0"})
require.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode)
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)

// We expect instant request not to be rejected if only query_step_limit is provided (it's not applicable to instant queries)
resp, body, err = c.QueryRaw("{instance=~\"hello.*\"}", time.Time{}, map[string]string{})
require.NoError(t, err)
require.Equal(t, http.StatusOK, resp.StatusCode)
require.NotContains(t, string(body), tripperware.QueryRejectErrorMessage)

// We expect range query request to be rejected even if only query_step_limit is provided
resp, body, err = c.QueryRangeRaw(`rate(test[1m])`, now.Add(-11*time.Hour), now.Add(-8*time.Hour), 20*time.Minute, map[string]string{"X-Dashboard-Uid": "dash123", "User-Agent": "grafana"})
require.NoError(t, err)
require.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode)
require.Contains(t, string(body), tripperware.QueryRejectErrorMessage)

}
117 changes: 71 additions & 46 deletions pkg/querier/tripperware/query_attribute_matcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
"github.com/cortexproject/cortex/pkg/util/validation"
)

const QueryRejectErrorMessage = "This query has been rejected by the service operator."
const QueryRejectErrorMessage = "This query does not perform well and has been rejected by the service operator."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is probably unnecessary for this one. But we do make the message a flag in Ingester in order to customize it. https://github.com/cortexproject/cortex/blob/master/pkg/ingester/ingester.go#L162

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we want to include the first part does not perform well. If a regex accidentally matches a good query, does it confuse users?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In that case, it gives good reason to user to reach out service operator asking why that query does not perform well. We want user to reach out to service team if query is rejected by accident so I think it's good idea to have this part of the error message in case of accidental match.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I think we can leave it for now.


func rejectQueryOrSetPriority(r *http.Request, now time.Time, lookbackDelta time.Duration, limits Limits, userStr string, rejectedQueriesPerTenant *prometheus.CounterVec) error {
if limits == nil || !(limits.QueryPriority(userStr).Enabled || limits.QueryRejection(userStr).Enabled) {
Expand Down Expand Up @@ -86,89 +86,120 @@ func getOperation(r *http.Request) string {
}

func matchAttributeForExpressionQuery(attribute validation.QueryAttribute, op string, r *http.Request, query string, now time.Time, minTime, maxTime int64) bool {
if attribute.ApiType != "" && attribute.ApiType != op {
return false
matched := false
if attribute.ApiType != "" {
matched = true
if attribute.ApiType != op {
return false
}
}
if attribute.Regex != "" && attribute.Regex != ".*" && attribute.Regex != ".+" {
if attribute.CompiledRegex != nil && !attribute.CompiledRegex.MatchString(query) {
if attribute.Regex != "" {
matched = true
if attribute.Regex != ".*" && attribute.Regex != ".+" && attribute.CompiledRegex != nil && !attribute.CompiledRegex.MatchString(query) {
return false
}
}

if !isWithinTimeAttributes(attribute.TimeWindow, now, minTime, maxTime) {
return false
if attribute.TimeWindow.Start != 0 || attribute.TimeWindow.End != 0 {
matched = true
if !isWithinTimeAttributes(attribute.TimeWindow, now, minTime, maxTime) {
return false
}
}

if !isWithinTimeRangeAttribute(attribute.TimeRangeLimit, minTime, maxTime) {
return false
if attribute.TimeRangeLimit.Min != 0 || attribute.TimeRangeLimit.Max != 0 {
matched = true
if !isWithinTimeRangeAttribute(attribute.TimeRangeLimit, minTime, maxTime) {
return false
}
}

if op == "query_range" && !isWithinQueryStepLimit(attribute.QueryStepLimit, r) {
return false
if op == "query_range" && (attribute.QueryStepLimit.Min != 0 || attribute.QueryStepLimit.Max != 0) {
matched = true
if !isWithinQueryStepLimit(attribute.QueryStepLimit, r) {
return false
}
}

if attribute.UserAgentRegex != "" && attribute.UserAgentRegex != ".*" && attribute.CompiledUserAgentRegex != nil {
if !attribute.CompiledUserAgentRegex.MatchString(r.Header.Get("User-Agent")) {
if attribute.UserAgentRegex != "" {
matched = true
if attribute.UserAgentRegex != ".*" && attribute.CompiledUserAgentRegex != nil && !attribute.CompiledUserAgentRegex.MatchString(r.Header.Get("User-Agent")) {
return false
}
}

if attribute.DashboardUID != "" && attribute.DashboardUID != r.Header.Get("X-Dashboard-Uid") {
return false
if attribute.DashboardUID != "" {
matched = true
if attribute.DashboardUID != r.Header.Get("X-Dashboard-Uid") {
return false
}
}

if attribute.PanelID != "" && attribute.PanelID != r.Header.Get("X-Panel-Id") {
return false
if attribute.PanelID != "" {
matched = true
if attribute.PanelID != r.Header.Get("X-Panel-Id") {
return false
}
}

return true
return matched
}

func matchAttributeForMetadataQuery(attribute validation.QueryAttribute, op string, r *http.Request, now time.Time) bool {
if attribute.ApiType != "" && attribute.ApiType != op {
return false
matched := false
if attribute.ApiType != "" {
matched = true
if attribute.ApiType != op {
return false
}
}
if err := r.ParseForm(); err != nil {
return false
}
if attribute.Regex != "" && attribute.Regex != ".*" && attribute.CompiledRegex != nil {
atLeastOneMatched := false
for _, matcher := range r.Form["match[]"] {
if attribute.CompiledRegex.MatchString(matcher) {
atLeastOneMatched = true
break
if attribute.Regex != "" {
matched = true
if attribute.Regex != ".*" && attribute.CompiledRegex != nil {
atLeastOneMatched := false
for _, matcher := range r.Form["match[]"] {
if attribute.CompiledRegex.MatchString(matcher) {
atLeastOneMatched = true
break
}
}
if !atLeastOneMatched {
return false
}
}
if !atLeastOneMatched {
return false
}
}

startTime, _ := util.ParseTime(r.FormValue("start"))
endTime, _ := util.ParseTime(r.FormValue("end"))

if !isWithinTimeAttributes(attribute.TimeWindow, now, startTime, endTime) {
return false
if attribute.TimeWindow.Start != 0 || attribute.TimeWindow.End != 0 {
matched = true
if !isWithinTimeAttributes(attribute.TimeWindow, now, startTime, endTime) {
return false
}
}

if !isWithinTimeRangeAttribute(attribute.TimeRangeLimit, startTime, endTime) {
return false
if attribute.TimeRangeLimit.Min != 0 || attribute.TimeRangeLimit.Max != 0 {
matched = true
if !isWithinTimeRangeAttribute(attribute.TimeRangeLimit, startTime, endTime) {
return false
}
}

if attribute.UserAgentRegex != "" && attribute.UserAgentRegex != ".*" && attribute.CompiledUserAgentRegex != nil {
if !attribute.CompiledUserAgentRegex.MatchString(r.Header.Get("User-Agent")) {
if attribute.UserAgentRegex != "" {
matched = true
if attribute.UserAgentRegex != ".*" && attribute.CompiledUserAgentRegex != nil && !attribute.CompiledUserAgentRegex.MatchString(r.Header.Get("User-Agent")) {
return false
}
}

return true
return matched
}

func isWithinTimeAttributes(timeWindow validation.TimeWindow, now time.Time, startTime, endTime int64) bool {
if timeWindow.Start == 0 && timeWindow.End == 0 {
return true
}

if timeWindow.Start != 0 {
startTimeThreshold := now.Add(-1 * time.Duration(timeWindow.Start).Abs()).Add(-1 * time.Minute).Truncate(time.Minute).UnixMilli()
if startTime == 0 || startTime < startTimeThreshold {
Expand All @@ -187,9 +218,6 @@ func isWithinTimeAttributes(timeWindow validation.TimeWindow, now time.Time, sta
}

func isWithinTimeRangeAttribute(limit validation.TimeRangeLimit, startTime, endTime int64) bool {
if limit.Min == 0 && limit.Max == 0 {
return true
}

if startTime == 0 || endTime == 0 {
return false
Expand All @@ -208,9 +236,6 @@ func isWithinTimeRangeAttribute(limit validation.TimeRangeLimit, startTime, endT
}

func isWithinQueryStepLimit(queryStepLimit validation.QueryStepLimit, r *http.Request) bool {
if queryStepLimit.Min == 0 && queryStepLimit.Max == 0 {
return true
}

step, err := util.ParseDurationMs(r.FormValue("step"))
if err != nil {
Expand Down
Loading
Loading