Skip to content

Commit ac4d48b

Browse files
czeslavopmalekrainest
authored
feat(metrics) add failure reason (#2965)
Add failure_reason label to ingress_controller_configuration_push_count metric. Co-authored-by: Patryk Małek <[email protected]> Co-authored-by: Travis Raines <[email protected]>
1 parent 6eb2b9c commit ac4d48b

File tree

5 files changed

+222
-41
lines changed

5 files changed

+222
-41
lines changed

CHANGELOG.md

+6
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ instructions and the [revised Kong 3.x upgrade instructions](https://docs.konghq
7979
overriden by setting a `konghq.com/regex-prefix` annotation, for routes that
8080
need their paths to actually begin with `/~`
8181
[#2956](https://github.com/Kong/kubernetes-ingress-controller/pull/2956)
82+
- Prometheus metrics now highlight configuration push failures caused by
83+
conflicts. The `ingress_controller_configuration_push_count` Prometheus
84+
metric now reports `success="false"` with a `failure_reason="conflict|other"`
85+
label, distinguishing configuration conflicts from other errors (transient
86+
network errors, Kong offline, Kong reported non-conflict error, etc.).
87+
[#2965](https://github.com/Kong/kubernetes-ingress-controller/pull/2965)
8288

8389
### Fixed
8490

grafana.json

+3-4
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@
235235
"targets": [
236236
{
237237
"exemplar": true,
238-
"expr": "ingress_controller_configuration_push_count{success=\"false\"}",
238+
"expr": "sum by (failure_reason) (ingress_controller_configuration_push_count{success=\"false\"})",
239239
"instant": false,
240240
"interval": "",
241241
"legendFormat": "",
@@ -375,6 +375,5 @@
375375
"timezone": "",
376376
"title": "Kong ingress controller",
377377
"uid": "SaGwMOtnz",
378-
"version": 1,
379-
"weekStart": ""
380-
}
378+
"version": 2
379+
}

internal/dataplane/sendconfig/sendconfig.go

+89-22
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import (
55
"context"
66
"encoding/hex"
77
"encoding/json"
8+
"errors"
89
"fmt"
10+
"net"
911
"net/http"
1012
"reflect"
1113
"sync"
@@ -16,6 +18,7 @@ import (
1618
"github.com/kong/deck/file"
1719
"github.com/kong/deck/state"
1820
deckutils "github.com/kong/deck/utils"
21+
"github.com/kong/go-kong/kong"
1922
"github.com/prometheus/client_golang/prometheus"
2023
"github.com/sirupsen/logrus"
2124

@@ -85,8 +88,9 @@ func PerformUpdate(ctx context.Context,
8588

8689
if err != nil {
8790
promMetrics.ConfigPushCount.With(prometheus.Labels{
88-
metrics.SuccessKey: metrics.SuccessFalse,
89-
metrics.ProtocolKey: metricsProtocol,
91+
metrics.SuccessKey: metrics.SuccessFalse,
92+
metrics.ProtocolKey: metricsProtocol,
93+
metrics.FailureReasonKey: pushFailureReason(err),
9094
}).Inc()
9195
promMetrics.ConfigPushDuration.With(prometheus.Labels{
9296
metrics.SuccessKey: metrics.SuccessFalse,
@@ -96,8 +100,9 @@ func PerformUpdate(ctx context.Context,
96100
}
97101

98102
promMetrics.ConfigPushCount.With(prometheus.Labels{
99-
metrics.SuccessKey: metrics.SuccessTrue,
100-
metrics.ProtocolKey: metricsProtocol,
103+
metrics.SuccessKey: metrics.SuccessTrue,
104+
metrics.ProtocolKey: metricsProtocol,
105+
metrics.FailureReasonKey: "",
101106
}).Inc()
102107
promMetrics.ConfigPushDuration.With(prometheus.Labels{
103108
metrics.SuccessKey: metrics.SuccessTrue,
@@ -204,45 +209,55 @@ func onUpdateDBMode(ctx context.Context,
204209
skipCACertificates bool,
205210
) error {
206211
dumpConfig := dump.Config{SelectorTags: selectorTags, SkipCACerts: skipCACertificates}
207-
// read the current state
208-
rawState, err := dump.Get(ctx, kongConfig.Client, dumpConfig)
209-
if err != nil {
210-
return fmt.Errorf("loading configuration from kong: %w", err)
211-
}
212-
currentState, err := state.Get(rawState)
213-
if err != nil {
214-
return err
215-
}
216212

217-
// read the target state
218-
rawState, err = file.Get(ctx, targetContent, file.RenderConfig{
219-
CurrentState: currentState,
220-
KongVersion: kongConfig.Version,
221-
}, dumpConfig, kongConfig.Client)
213+
cs, err := currentState(ctx, kongConfig, dumpConfig)
222214
if err != nil {
223215
return err
224216
}
225-
targetState, err := state.Get(rawState)
217+
218+
ts, err := targetState(ctx, targetContent, cs, kongConfig, dumpConfig)
226219
if err != nil {
227-
return err
220+
return deckConfigConflictError{err}
228221
}
229222

230223
syncer, err := diff.NewSyncer(diff.SyncerOpts{
231-
CurrentState: currentState,
232-
TargetState: targetState,
224+
CurrentState: cs,
225+
TargetState: ts,
233226
KongClient: kongConfig.Client,
234227
SilenceWarnings: true,
235228
})
236229
if err != nil {
237230
return fmt.Errorf("creating a new syncer: %w", err)
238231
}
232+
239233
_, errs := syncer.Solve(ctx, kongConfig.Concurrency, false)
240234
if errs != nil {
241235
return deckutils.ErrArray{Errors: errs}
242236
}
243237
return nil
244238
}
245239

240+
func currentState(ctx context.Context, kongConfig *Kong, dumpConfig dump.Config) (*state.KongState, error) {
241+
rawState, err := dump.Get(ctx, kongConfig.Client, dumpConfig)
242+
if err != nil {
243+
return nil, fmt.Errorf("loading configuration from kong: %w", err)
244+
}
245+
246+
return state.Get(rawState)
247+
}
248+
249+
func targetState(ctx context.Context, targetContent *file.Content, currentState *state.KongState, kongConfig *Kong, dumpConfig dump.Config) (*state.KongState, error) {
250+
rawState, err := file.Get(ctx, targetContent, file.RenderConfig{
251+
CurrentState: currentState,
252+
KongVersion: kongConfig.Version,
253+
}, dumpConfig, kongConfig.Client)
254+
if err != nil {
255+
return nil, err
256+
}
257+
258+
return state.Get(rawState)
259+
}
260+
246261
func equalSHA(a, b []byte) bool {
247262
return reflect.DeepEqual(a, b)
248263
}
@@ -273,3 +288,55 @@ func hasSHAUpdateAlreadyBeenReported(latestUpdateSHA []byte) bool {
273288
latestReportedSHA = latestUpdateSHA
274289
return false
275290
}
291+
292+
// deckConfigConflictError is an error used to wrap deck config conflict errors returned from deck functions
293+
// transforming KongRawState to KongState (e.g. state.Get, dump.Get).
294+
type deckConfigConflictError struct {
295+
err error
296+
}
297+
298+
func (e deckConfigConflictError) Error() string {
299+
return e.err.Error()
300+
}
301+
302+
func (e deckConfigConflictError) Is(target error) bool {
303+
_, ok := target.(deckConfigConflictError)
304+
return ok
305+
}
306+
307+
func (e deckConfigConflictError) Unwrap() error {
308+
return e.err
309+
}
310+
311+
// pushFailureReason extracts config push failure reason from an error returned from onUpdateInMemoryMode or onUpdateDBMode.
312+
func pushFailureReason(err error) string {
313+
var netErr net.Error
314+
if errors.As(err, &netErr) {
315+
return metrics.FailureReasonNetwork
316+
}
317+
318+
if isConflictErr(err) {
319+
return metrics.FailureReasonConflict
320+
}
321+
322+
return metrics.FailureReasonOther
323+
}
324+
325+
func isConflictErr(err error) bool {
326+
var apiErr *kong.APIError
327+
if errors.As(err, &apiErr) && apiErr.Code() == http.StatusConflict ||
328+
errors.Is(err, deckConfigConflictError{}) {
329+
return true
330+
}
331+
332+
var deckErrArray deckutils.ErrArray
333+
if errors.As(err, &deckErrArray) {
334+
for _, err := range deckErrArray.Errors {
335+
if isConflictErr(err) {
336+
return true
337+
}
338+
}
339+
}
340+
341+
return false
342+
}

internal/dataplane/sendconfig/sendconfig_test.go

+86
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
11
package sendconfig
22

33
import (
4+
"errors"
5+
"fmt"
6+
"net"
7+
"net/http"
48
"reflect"
59
"testing"
610

711
"github.com/kong/deck/file"
12+
deckutils "github.com/kong/deck/utils"
813
"github.com/kong/go-kong/kong"
914
"github.com/sirupsen/logrus"
1015
"github.com/stretchr/testify/assert"
16+
"github.com/stretchr/testify/require"
17+
18+
"github.com/kong/kubernetes-ingress-controller/v2/internal/metrics"
1119
)
1220

1321
func Test_renderConfigWithCustomEntities(t *testing.T) {
@@ -128,3 +136,81 @@ func Test_updateReportingUtilities(t *testing.T) {
128136
assert.True(t, hasSHAUpdateAlreadyBeenReported([]byte("yet-another-fake-sha")))
129137
assert.True(t, hasSHAUpdateAlreadyBeenReported([]byte("yet-another-fake-sha")))
130138
}
139+
140+
func Test_pushFailureReason(t *testing.T) {
141+
apiConflictErr := kong.NewAPIError(http.StatusConflict, "conflict api error")
142+
networkErr := net.UnknownNetworkError("network error")
143+
genericError := errors.New("generic error")
144+
145+
testCases := []struct {
146+
name string
147+
err error
148+
expectedReason string
149+
}{
150+
{
151+
name: "generic_error",
152+
err: genericError,
153+
expectedReason: metrics.FailureReasonOther,
154+
},
155+
{
156+
name: "api_conflict_error",
157+
err: apiConflictErr,
158+
expectedReason: metrics.FailureReasonConflict,
159+
},
160+
{
161+
name: "api_conflict_error_wrapped",
162+
err: fmt.Errorf("wrapped conflict api err: %w", apiConflictErr),
163+
expectedReason: metrics.FailureReasonConflict,
164+
},
165+
{
166+
name: "deck_config_conflict_error_empty",
167+
err: deckConfigConflictError{},
168+
expectedReason: metrics.FailureReasonConflict,
169+
},
170+
{
171+
name: "deck_config_conflict_error_with_generic_error",
172+
err: deckConfigConflictError{genericError},
173+
expectedReason: metrics.FailureReasonConflict,
174+
},
175+
{
176+
name: "deck_err_array_with_api_conflict_error",
177+
err: deckutils.ErrArray{Errors: []error{apiConflictErr}},
178+
expectedReason: metrics.FailureReasonConflict,
179+
},
180+
{
181+
name: "wrapped_deck_err_array_with_api_conflict_error",
182+
err: fmt.Errorf("wrapped: %w", deckutils.ErrArray{Errors: []error{apiConflictErr}}),
183+
expectedReason: metrics.FailureReasonConflict,
184+
},
185+
{
186+
name: "deck_err_array_with_generic_error",
187+
err: deckutils.ErrArray{Errors: []error{genericError}},
188+
expectedReason: metrics.FailureReasonOther,
189+
},
190+
{
191+
name: "deck_err_array_empty",
192+
err: deckutils.ErrArray{Errors: []error{genericError}},
193+
expectedReason: metrics.FailureReasonOther,
194+
},
195+
{
196+
name: "network_error",
197+
err: networkErr,
198+
expectedReason: metrics.FailureReasonNetwork,
199+
},
200+
{
201+
name: "network_error_wrapped_in_deck_config_conflict_error",
202+
err: deckConfigConflictError{networkErr},
203+
expectedReason: metrics.FailureReasonNetwork,
204+
},
205+
}
206+
207+
for _, tc := range testCases {
208+
tc := tc
209+
t.Run(tc.name, func(t *testing.T) {
210+
t.Parallel()
211+
212+
reason := pushFailureReason(tc.err)
213+
require.Equal(t, tc.expectedReason, reason)
214+
})
215+
}
216+
}

0 commit comments

Comments
 (0)