Skip to content

Commit c395adc

Browse files
author
mllu
authored
add custom metrics for RBACSync (#19)
* add custom metrics for RBACSync and fix test Signed-off-by: Meng-Lin Lu <[email protected]>
1 parent 42775a1 commit c395adc

File tree

17 files changed

+499
-15
lines changed

17 files changed

+499
-15
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ debugging.
234234
To use GSuite, you'll need a service account with "G Suite Domain-Wide
235235
Delegation of Authority". It's recommended to read the
236236
[guide](https://developers.google.com/admin-sdk/directory/v1/guides/delegation)
237-
to understand how this works in cause you run into issues. The blog
237+
to understand how this works in case you run into issues. The blog
238238
[Navigating the Google Suite Directory
239239
API](https://www.fin.com/post/2017/10/navigating-google-suite-directory-api)
240240
may also provide some insight.

example.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ spec:
2525
- group: someother-group
2626
roleRef:
2727
apiGroup: rbac.authorization.k8s.io
28-
kind: Role
28+
kind: Role
2929
name: someother-role
3030

3131
# Define group memberships directly

pkg/controller/controller.go

+28-6
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package controller
1818

1919
import (
2020
"fmt"
21-
"k8s.io/klog"
2221
"time"
2322

2423
"github.com/pkg/errors"
@@ -41,13 +40,15 @@ import (
4140
"k8s.io/client-go/tools/cache"
4241
"k8s.io/client-go/tools/record"
4342
"k8s.io/client-go/util/workqueue"
43+
"k8s.io/klog"
4444

4545
rbacsyncv1alpha "github.com/cruise-automation/rbacsync/pkg/apis/rbacsync/v1alpha"
4646
clientset "github.com/cruise-automation/rbacsync/pkg/generated/clientset/versioned"
4747
rbacsyncscheme "github.com/cruise-automation/rbacsync/pkg/generated/clientset/versioned/scheme"
4848
informers "github.com/cruise-automation/rbacsync/pkg/generated/informers/externalversions/rbacsync/v1alpha"
4949
listers "github.com/cruise-automation/rbacsync/pkg/generated/listers/rbacsync/v1alpha"
5050
"github.com/cruise-automation/rbacsync/pkg/groups"
51+
"github.com/cruise-automation/rbacsync/pkg/metrics"
5152
)
5253

5354
const (
@@ -58,6 +59,8 @@ const (
5859
EventReasonConfigEnqueued = "ConfigEnqueued"
5960
EventReasonBindingConfigured = "BindingConfigured"
6061
EventReasonBindingDeleted = "BindingDeleted"
62+
EventReasonBindingDuplicated = "BindingDuplicated"
63+
EventReasonBindingWarning = "BindingWarning"
6164
EventReasonBindingError = "BindingError"
6265
EventReasonUnknownGroup = "UnknownGroup"
6366
)
@@ -278,8 +281,10 @@ func (c *Controller) enqueue(obj interface{}) {
278281
switch obj.(type) {
279282
case *rbacsyncv1alpha.RBACSyncConfig:
280283
c.queue.AddRateLimited(key)
284+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindRBACSyncConfig, EventReasonConfigEnqueued).Inc()
281285
case *rbacsyncv1alpha.ClusterRBACSyncConfig:
282286
c.clusterqueue.AddRateLimited(key)
287+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindClusterRBACSyncConfig, EventReasonConfigEnqueued).Inc()
283288
default:
284289
klog.Warningf("ignoring object of type %T: %#v", obj, obj)
285290
return // skip event emit below
@@ -344,6 +349,7 @@ func (c *Controller) handleConfig(config *rbacsyncv1alpha.RBACSyncConfig) error
344349
c.recorder.Eventf(config, corev1.EventTypeWarning,
345350
EventReasonBindingError, "RoleRef kind %q invalid for RBACSyncConfig on group %q, use only Role or ClusterRole",
346351
binding.RoleRef.Kind, binding.Group)
352+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindRBACSyncConfig, EventReasonBindingError).Inc()
347353
continue
348354
}
349355

@@ -354,9 +360,11 @@ func (c *Controller) handleConfig(config *rbacsyncv1alpha.RBACSyncConfig) error
354360
if groups.IsNotFound(err) {
355361
c.recorder.Eventf(config, corev1.EventTypeWarning,
356362
EventReasonUnknownGroup, "group %v not found", binding.Group)
363+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindRBACSyncConfig, EventReasonUnknownGroup).Inc()
357364
} else if groups.IsUnknownMemberships(err) {
358365
c.recorder.Eventf(config, corev1.EventTypeWarning,
359366
EventReasonBindingError, "group %v lookup failed: %v", binding.Group, err)
367+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindRBACSyncConfig, EventReasonBindingError).Inc()
360368
// An error occurred looking up the groups, it should be marked as active
361369
// so the rolebindings are not deleted in the cleanup.
362370
active[name] = struct{}{}
@@ -367,8 +375,9 @@ func (c *Controller) handleConfig(config *rbacsyncv1alpha.RBACSyncConfig) error
367375

368376
if len(members) == 0 {
369377
c.recorder.Eventf(config, corev1.EventTypeWarning,
370-
EventReasonBindingError, "%v/%v has no members for group %v",
378+
EventReasonBindingWarning, "%v/%v has no members for group %v",
371379
config.Namespace, config.Name, binding.Group)
380+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindRBACSyncConfig, EventReasonBindingWarning).Inc()
372381
continue
373382
}
374383

@@ -378,7 +387,8 @@ func (c *Controller) handleConfig(config *rbacsyncv1alpha.RBACSyncConfig) error
378387
// result will be the same. Accordingly, we log the bad
379388
// configuration and move on.
380389
c.recorder.Eventf(config, corev1.EventTypeWarning,
381-
EventReasonBindingError, "duplicate binding %v ignored", name)
390+
EventReasonBindingDuplicated, "duplicate binding %v ignored", name)
391+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindRoleBinding, EventReasonBindingDuplicated).Inc()
382392
continue
383393
}
384394

@@ -403,6 +413,7 @@ func (c *Controller) handleConfig(config *rbacsyncv1alpha.RBACSyncConfig) error
403413
c.recorder.Eventf(config, corev1.EventTypeWarning,
404414
EventReasonBindingError, "unable to update or create RoleBinding %v/%v: %v",
405415
rb.Namespace, rb.Name, err)
416+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindRoleBinding, EventReasonBindingError).Inc()
406417
continue
407418
}
408419

@@ -416,7 +427,7 @@ func (c *Controller) handleConfig(config *rbacsyncv1alpha.RBACSyncConfig) error
416427
c.recorder.Eventf(config, corev1.EventTypeNormal,
417428
EventReasonBindingConfigured,
418429
"RoleBinding %v/%v configured", created.Namespace, created.Name)
419-
430+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindRoleBinding, EventReasonBindingConfigured).Inc()
420431
}
421432

422433
selector, err := buildChildSelector(config.Name)
@@ -451,6 +462,7 @@ func (c *Controller) handleConfig(config *rbacsyncv1alpha.RBACSyncConfig) error
451462
EventReasonBindingError,
452463
"RoleBinding %v/%v could not be deleted: %v",
453464
rb.Namespace, rb.Name, err)
465+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindRoleBinding, EventReasonBindingError).Inc()
454466
continue
455467
}
456468

@@ -459,6 +471,7 @@ func (c *Controller) handleConfig(config *rbacsyncv1alpha.RBACSyncConfig) error
459471

460472
c.recorder.Eventf(config, corev1.EventTypeNormal,
461473
EventReasonBindingDeleted, "RoleBinding %v/%v deleted", rb.Namespace, rb.Name)
474+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindRoleBinding, EventReasonBindingDeleted).Inc()
462475
}
463476

464477
return nil
@@ -507,6 +520,7 @@ func (c *Controller) handleClusterConfig(config *rbacsyncv1alpha.ClusterRBACSync
507520
c.recorder.Eventf(config, corev1.EventTypeWarning,
508521
EventReasonBindingError, "RoleRef kind %q invalid for ClusterRBACSyncConfig on group %q, use only ClusterRole",
509522
binding.RoleRef.Kind, binding.Group)
523+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindClusterRBACSyncConfig, EventReasonBindingError).Inc()
510524
continue
511525
}
512526
name := config.Name + "-" + binding.Group + "-" + binding.RoleRef.Name
@@ -516,9 +530,11 @@ func (c *Controller) handleClusterConfig(config *rbacsyncv1alpha.ClusterRBACSync
516530
if groups.IsNotFound(err) {
517531
c.recorder.Eventf(config, corev1.EventTypeWarning,
518532
EventReasonUnknownGroup, "group %v not found", binding.Group)
533+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindClusterRBACSyncConfig, EventReasonUnknownGroup).Inc()
519534
} else if groups.IsUnknownMemberships(err) {
520535
c.recorder.Eventf(config, corev1.EventTypeWarning,
521536
EventReasonBindingError, "group %v lookup failed: %v", binding.Group, err)
537+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindClusterRBACSyncConfig, EventReasonBindingError).Inc()
522538
// In the case of unknown memberships from the grouper, we want to keep what already exists
523539
// so we mark the binding as active.
524540
active[name] = struct{}{}
@@ -529,8 +545,9 @@ func (c *Controller) handleClusterConfig(config *rbacsyncv1alpha.ClusterRBACSync
529545

530546
if len(members) == 0 {
531547
c.recorder.Eventf(config, corev1.EventTypeWarning,
532-
EventReasonBindingError, "%v has no members for group %v",
548+
EventReasonBindingWarning, "%v has no members for group %v",
533549
config.Name, binding.Group)
550+
metrics.RBACSyncConfigStatus.WithLabelValues(metrics.LabelKindClusterRBACSyncConfig, EventReasonBindingWarning).Inc()
534551
continue
535552
}
536553

@@ -540,7 +557,8 @@ func (c *Controller) handleClusterConfig(config *rbacsyncv1alpha.ClusterRBACSync
540557
// result will be the same. Accordingly, we log the bad
541558
// configuration and move on.
542559
c.recorder.Eventf(config, corev1.EventTypeWarning,
543-
EventReasonBindingError, "duplicate binding %v ignored", name)
560+
EventReasonBindingDuplicated, "duplicate binding %v ignored", name)
561+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindClusterRoleBinding, EventReasonBindingDuplicated).Inc()
544562
continue
545563
}
546564

@@ -564,6 +582,7 @@ func (c *Controller) handleClusterConfig(config *rbacsyncv1alpha.ClusterRBACSync
564582
c.recorder.Eventf(config, corev1.EventTypeWarning,
565583
EventReasonBindingError,
566584
"unable to update or create ClusterRoleBinding %v: %v", crb.Name, err)
585+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindClusterRoleBinding, EventReasonBindingError).Inc()
567586
continue
568587
}
569588

@@ -577,6 +596,7 @@ func (c *Controller) handleClusterConfig(config *rbacsyncv1alpha.ClusterRBACSync
577596
c.recorder.Eventf(config, corev1.EventTypeNormal,
578597
EventReasonBindingConfigured,
579598
"ClusterRoleBinding %v configured", created.Name)
599+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindClusterRoleBinding, EventReasonBindingConfigured).Inc()
580600
}
581601

582602
selector, err := buildChildSelector(config.Name)
@@ -609,6 +629,7 @@ func (c *Controller) handleClusterConfig(config *rbacsyncv1alpha.ClusterRBACSync
609629
c.recorder.Eventf(config, corev1.EventTypeWarning,
610630
EventReasonBindingError,
611631
"ClusterRoleBinding %v could not be deleted: %v", crb.Name, err)
632+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindClusterRoleBinding, EventReasonBindingError).Inc()
612633
continue
613634
}
614635

@@ -617,6 +638,7 @@ func (c *Controller) handleClusterConfig(config *rbacsyncv1alpha.ClusterRBACSync
617638

618639
c.recorder.Eventf(config, corev1.EventTypeNormal,
619640
EventReasonBindingDeleted, "ClusterRoleBinding %v deleted", crb.Name)
641+
metrics.RBACSyncBindingStatus.WithLabelValues(metrics.LabelKindClusterRoleBinding, EventReasonBindingDeleted).Inc()
620642
}
621643

622644
return nil

pkg/controller/controller_test.go

+5-4
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ package controller
1818

1919
import (
2020
"fmt"
21-
"github.com/pkg/errors"
22-
"k8s.io/klog"
2321
"testing"
2422
"time"
2523

24+
"github.com/pkg/errors"
25+
"k8s.io/klog"
26+
2627
rbacsyncv1alpha "github.com/cruise-automation/rbacsync/pkg/apis/rbacsync/v1alpha"
2728
"github.com/cruise-automation/rbacsync/pkg/checks"
2829
rsfake "github.com/cruise-automation/rbacsync/pkg/generated/clientset/versioned/fake"
@@ -116,7 +117,7 @@ func TestControllerRBACSyncConfig(t *testing.T) {
116117
// the controller. It just creates the binding twice. We may
117118
// change this to avoid extra round trip in these cases. We
118119
// should probably actually warn.
119-
"Warning BindingError duplicate binding duplicates-group0-role0 ignored",
120+
"Warning BindingDuplicated duplicate binding duplicates-group0-role0 ignored",
120121
"Normal BindingConfigured RoleBinding testing/duplicates-group0-role1 configured",
121122
"Normal BindingConfigured RoleBinding testing/duplicates-upstream-role0 configured",
122123
},
@@ -256,7 +257,7 @@ func TestControllerClusterRBACSyncConfig(t *testing.T) {
256257
// the controller. It just creates the binding twice. We may
257258
// change this to avoid extra round trip in these cases. We
258259
// should probably actually warn.
259-
"Warning BindingError duplicate binding duplicates-group0-role0 ignored",
260+
"Warning BindingDuplicated duplicate binding duplicates-group0-role0 ignored",
260261
"Normal BindingConfigured ClusterRoleBinding duplicates-group0-role1 configured",
261262
"Normal BindingConfigured ClusterRoleBinding duplicates-upstream-role0 configured",
262263
},

pkg/groups/gsuite/grouper.go

+28-3
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@ import (
2323
"regexp"
2424
"time"
2525

26-
"github.com/cruise-automation/rbacsync/pkg/groups"
2726
"github.com/pkg/errors"
2827
"golang.org/x/oauth2/google"
2928
"golang.org/x/oauth2/jwt"
3029
admin "google.golang.org/api/admin/directory/v1"
3130
"google.golang.org/api/googleapi"
3231
rbacv1 "k8s.io/api/rbac/v1"
32+
33+
"github.com/cruise-automation/rbacsync/pkg/groups"
34+
"github.com/cruise-automation/rbacsync/pkg/metrics"
3335
)
3436

3537
const (
@@ -109,17 +111,20 @@ func (g *Grouper) Members(group string) ([]rbacv1.Subject, error) {
109111
ctx := context.TODO()
110112
client, err := g.service(ctx)
111113
if err != nil {
114+
metrics.RBACSyncGsuiteClientCreationStatus.WithLabelValues("Failed").Inc()
112115
return nil, errors.Wrapf(groups.ErrUnknown,
113-
"unable to determine group members, an error occurred creating gsuite client: %v",
114-
err)
116+
"unable to determine group members, an error occurred creating gsuite client: %v",
117+
err)
115118
}
119+
metrics.RBACSyncGsuiteClientCreationStatus.WithLabelValues("Succeeded").Inc()
116120

117121
var (
118122
tctx, cancel = context.WithTimeout(ctx, g.timeout)
119123
subjects []rbacv1.Subject
120124
)
121125
defer cancel()
122126

127+
startTime := time.Now()
123128
if err := client.Members.List(group).
124129
IncludeDerivedMembership(true).
125130
Pages(tctx, func(members *admin.Members) error {
@@ -135,16 +140,36 @@ func (g *Grouper) Members(group string) ([]rbacv1.Subject, error) {
135140

136141
switch {
137142
case isNotFound(err):
143+
metrics.RBACSyncGsuiteMembersLatency.WithLabelValues("NotFound").Observe(
144+
float64(time.Since(startTime).Nanoseconds()) / float64(time.Second),
145+
)
146+
metrics.RBACSyncGsuiteMembersStatus.WithLabelValues("NotFound").Inc()
138147
return nil, errors.Wrapf(groups.ErrNotFound, "gsuite does not have group: %v", err)
139148
case isTimeout(tctx):
149+
metrics.RBACSyncGsuiteMembersLatency.WithLabelValues("Timeout").Observe(
150+
float64(time.Since(startTime).Nanoseconds()) / float64(time.Second),
151+
)
152+
metrics.RBACSyncGsuiteMembersStatus.WithLabelValues("Timeout").Inc()
140153
return nil, errors.Wrapf(groups.ErrTimeout, "timeout calling gsuite api: %v", err)
141154
case isCanceled(tctx):
155+
metrics.RBACSyncGsuiteMembersLatency.WithLabelValues("Canceled").Observe(
156+
float64(time.Since(startTime).Nanoseconds()) / float64(time.Second),
157+
)
158+
metrics.RBACSyncGsuiteMembersStatus.WithLabelValues("Canceled").Inc()
142159
return nil, errors.Wrapf(groups.ErrCanceled, "the context canceled the call to gsuite: %v", err)
143160
default:
161+
metrics.RBACSyncGsuiteMembersLatency.WithLabelValues("Unknown").Observe(
162+
float64(time.Since(startTime).Nanoseconds()) / float64(time.Second),
163+
)
164+
metrics.RBACSyncGsuiteMembersStatus.WithLabelValues("Unknown").Inc()
144165
return nil, errors.Wrapf(groups.ErrUnknown, "error retrieving group members: %v", err)
145166
}
146167

147168
}
169+
metrics.RBACSyncGsuiteMembersLatency.WithLabelValues("Succeeded").Observe(
170+
float64(time.Since(startTime).Nanoseconds()) / float64(time.Second),
171+
)
172+
metrics.RBACSyncGsuiteMembersStatus.WithLabelValues("Succeeded").Inc()
148173

149174
// If you're trying to find some nasty memory allocation, it might be here.
150175
// Grouper interface should be converted to callback style if this is a

pkg/metrics/metrics.go

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
package metrics
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
"github.com/prometheus/client_golang/prometheus/promauto"
6+
)
7+
8+
const (
9+
LabelKindRBACSyncConfig = "RBACSyncConfig"
10+
LabelKindClusterRBACSyncConfig = "ClusterRBACSyncConfig"
11+
12+
LabelKindRoleBinding = "RoleBinding"
13+
LabelKindClusterRoleBinding = "ClusterRoleBinding"
14+
)
15+
16+
var (
17+
// Metrics for Controller
18+
RBACSyncConfigStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
19+
Name: "rbacsync_config_status",
20+
Help: "The number of RBACSyncConfigs and RBACSyncClusterConfigs and the status of the processed config",
21+
}, []string{"kind", "status"})
22+
RBACSyncBindingStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
23+
Name: "rbacsync_binding_status",
24+
Help: "The number of RoleBindings and ClusterRoleBindings configured by the controller and their statuses",
25+
}, []string{"kind", "status"})
26+
27+
// Metrics for Mapper/GSuite
28+
RBACSyncGsuiteClientCreationStatus = promauto.NewCounterVec(prometheus.CounterOpts{
29+
Name: "rbacsync_gsuite_client_creation_status",
30+
Help: "Total number of the status of gsuite client creations",
31+
}, []string{"status"})
32+
RBACSyncGsuiteMembersStatus = promauto.NewCounterVec(prometheus.CounterOpts{
33+
Name: "rbacsync_gsuite_members_status",
34+
Help: "Total number of the status of calls to gsuite with labels for state",
35+
}, []string{"status"})
36+
RBACSyncGsuiteMembersLatency = promauto.NewHistogramVec(prometheus.HistogramOpts{
37+
Name: "rbacsync_gsuite_members_latency_duration_seconds",
38+
Help: "The amount of time the calls to gsuite for group memberships",
39+
}, []string{"status"})
40+
)

vendor/github.com/golang/protobuf/go.mod

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/google/go-cmp/go.mod

+3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/hashicorp/golang-lru/go.mod

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)