Skip to content

Commit b533fa8

Browse files
authored
Merge pull request #7 from krancour/exporter-tests
improve test coverage on exporter
2 parents 2f28aff + bfef4d9 commit b533fa8

File tree

3 files changed

+492
-79
lines changed

3 files changed

+492
-79
lines changed

exporter/metrics_exporter.go

+93-77
Original file line numberDiff line numberDiff line change
@@ -14,48 +14,50 @@ import (
1414
)
1515

1616
type metricsExporter struct {
17-
apiClient sdk.APIClient
17+
coreClient core.APIClient
18+
authnClient authn.APIClient
1819
scrapeInterval time.Duration
19-
totalProjects prometheus.Gauge
20-
totalUsers prometheus.Gauge
21-
totalServiceAccounts prometheus.Gauge
20+
projectsGauge prometheus.Gauge
21+
usersGauge prometheus.Gauge
22+
serviceAccountsGauge prometheus.Gauge
2223
allWorkersByPhase *prometheus.GaugeVec
23-
totalPendingJobs prometheus.Gauge
24+
pendingJobsGauge prometheus.Gauge
2425
}
2526

2627
func newMetricsExporter(
2728
apiClient sdk.APIClient,
2829
scrapeInterval time.Duration,
2930
) *metricsExporter {
3031
return &metricsExporter{
31-
apiClient: apiClient,
32+
coreClient: apiClient.Core(),
33+
authnClient: apiClient.Authn(),
3234
scrapeInterval: scrapeInterval,
33-
totalProjects: promauto.NewGauge(
35+
projectsGauge: promauto.NewGauge(
3436
prometheus.GaugeOpts{
3537
Name: "brigade_projects_total",
36-
Help: "The total number of brigade projects",
38+
Help: "The total number of projects",
3739
},
3840
),
39-
totalUsers: promauto.NewGauge(
41+
usersGauge: promauto.NewGauge(
4042
prometheus.GaugeOpts{
4143
Name: "brigade_users_total",
4244
Help: "The total number of users",
4345
},
4446
),
45-
totalServiceAccounts: promauto.NewGauge(
47+
serviceAccountsGauge: promauto.NewGauge(
4648
prometheus.GaugeOpts{
4749
Name: "brigade_service_accounts_total",
4850
Help: "The total number of service accounts",
4951
},
5052
),
5153
allWorkersByPhase: promauto.NewGaugeVec(
5254
prometheus.GaugeOpts{
53-
Name: "brigade_all_workers_by_phase",
54-
Help: "All workers separated by phase",
55+
Name: "brigade_events_by_worker_phase",
56+
Help: "The total number of events grouped by worker phase",
5557
},
5658
[]string{"workerPhase"},
5759
),
58-
totalPendingJobs: promauto.NewGauge(
60+
pendingJobsGauge: promauto.NewGauge(
5961
prometheus.GaugeOpts{
6062
Name: "brigade_pending_jobs_total",
6163
Help: "The total number of pending jobs",
@@ -70,112 +72,126 @@ func (m *metricsExporter) run(ctx context.Context) {
7072
for {
7173
select {
7274
case <-ticker.C:
73-
m.recordMetrics()
75+
if err := m.recordProjectsCount(); err != nil {
76+
log.Println(err)
77+
}
78+
if err := m.recordUsersCount(); err != nil {
79+
log.Println(err)
80+
}
81+
if err := m.recordServiceAccountsCount(); err != nil {
82+
log.Println(err)
83+
}
84+
if err := m.recordEventCountsByWorkersPhase(); err != nil {
85+
log.Println(err)
86+
}
87+
if err := m.recordPendingJobsCount(); err != nil {
88+
log.Println(err)
89+
}
7490
case <-ctx.Done():
7591
return
7692
}
7793
}
7894
}
7995

80-
func (m *metricsExporter) recordMetrics() {
96+
func (m *metricsExporter) recordProjectsCount() error {
8197
// brigade_projects_total
82-
projects, err := m.apiClient.Core().Projects().List(
98+
projects, err := m.coreClient.Projects().List(
8399
context.Background(),
84100
&core.ProjectsSelector{},
85101
&meta.ListOptions{},
86102
)
87103
if err != nil {
88-
log.Println(err)
89-
} else {
90-
m.totalProjects.Set(float64(len(projects.Items) +
91-
int(projects.RemainingItemCount)))
104+
return err
92105
}
106+
m.projectsGauge.Set(
107+
float64(len(projects.Items) + int(projects.RemainingItemCount)),
108+
)
109+
return nil
110+
}
93111

112+
func (m *metricsExporter) recordUsersCount() error {
94113
// brigade_users_total
95-
users, err := m.apiClient.Authn().Users().List(
114+
users, err := m.authnClient.Users().List(
96115
context.Background(),
97116
&authn.UsersSelector{},
98117
&meta.ListOptions{},
99118
)
100119
if err != nil {
101-
log.Println(err)
102-
} else {
103-
m.totalUsers.Set(float64(len(users.Items) +
104-
int(users.RemainingItemCount)))
120+
return err
105121
}
122+
m.usersGauge.Set(
123+
float64(int64(len(users.Items)) + users.RemainingItemCount),
124+
)
125+
return nil
126+
}
106127

128+
func (m *metricsExporter) recordServiceAccountsCount() error {
107129
// brigade_service_accounts_total
108-
serviceAccounts, err := m.apiClient.Authn().ServiceAccounts().List(
130+
serviceAccounts, err := m.authnClient.ServiceAccounts().List(
109131
context.Background(),
110132
&authn.ServiceAccountsSelector{},
111133
&meta.ListOptions{},
112134
)
113135
if err != nil {
114-
log.Println(err)
115-
} else {
116-
m.totalServiceAccounts.Set(
117-
float64(
118-
len(serviceAccounts.Items) +
119-
int(serviceAccounts.RemainingItemCount),
120-
),
121-
)
136+
return err
122137
}
138+
m.serviceAccountsGauge.Set(
139+
float64(
140+
int64(len(serviceAccounts.Items)) + serviceAccounts.RemainingItemCount,
141+
),
142+
)
143+
return nil
144+
}
123145

124-
// brigade_all_workers_by_phase
146+
func (m *metricsExporter) recordEventCountsByWorkersPhase() error {
147+
// brigade_events_by_worker_phase
125148
for _, phase := range core.WorkerPhasesAll() {
126-
var events core.EventList
127-
events, err = m.apiClient.Core().Events().List(
149+
events, err := m.coreClient.Events().List(
128150
context.Background(),
129151
&core.EventsSelector{
130152
WorkerPhases: []core.WorkerPhase{phase},
131153
},
132154
&meta.ListOptions{},
133155
)
134156
if err != nil {
135-
log.Println(err)
136-
} else {
137-
m.allWorkersByPhase.With(
138-
prometheus.Labels{"workerPhase": string(phase)},
139-
).Set(float64(len(events.Items) + int(events.RemainingItemCount)))
157+
return err
140158
}
159+
m.allWorkersByPhase.With(
160+
prometheus.Labels{"workerPhase": string(phase)},
161+
).Set(float64(len(events.Items) + int(events.RemainingItemCount)))
162+
}
163+
return nil
164+
}
141165

142-
// brigade_pending_jobs_total
143-
//
144-
// There is no way to query the API directly for pending Jobs, but only
145-
// running Workers should ever HAVE pending Jobs, so if we're currently
146-
// counting running Workers, we can iterate over those to count pending
147-
// jobs. Note, there's a cap on the max number of workers that can run
148-
// concurrently, so we assume that as long as that cap isn't enormous (which
149-
// would only occur on an enormous cluster), it's practical to iterate over
150-
// all the running workers.
151-
if phase == core.WorkerPhaseRunning {
152-
var pendingJobs int
153-
for {
154-
for _, event := range events.Items {
155-
for _, job := range event.Worker.Jobs {
156-
if job.Status.Phase == core.JobPhasePending {
157-
pendingJobs++
158-
}
159-
}
160-
}
161-
if events.Continue == "" {
162-
break
163-
}
164-
if events, err = m.apiClient.Core().Events().List(
165-
context.Background(),
166-
&core.EventsSelector{
167-
WorkerPhases: []core.WorkerPhase{phase},
168-
},
169-
&meta.ListOptions{Continue: events.Continue},
170-
); err != nil {
171-
log.Println(err)
172-
break
166+
func (m *metricsExporter) recordPendingJobsCount() error {
167+
// brigade_pending_jobs_total
168+
var pendingJobs int
169+
var continueValue string
170+
for {
171+
events, err := m.coreClient.Events().List(
172+
context.Background(),
173+
&core.EventsSelector{
174+
WorkerPhases: []core.WorkerPhase{core.WorkerPhaseRunning},
175+
},
176+
&meta.ListOptions{
177+
Continue: continueValue,
178+
},
179+
)
180+
if err != nil {
181+
return err
182+
}
183+
for _, event := range events.Items {
184+
for _, job := range event.Worker.Jobs {
185+
if job.Status.Phase == core.JobPhasePending {
186+
pendingJobs++
173187
}
174188
}
175-
if err == nil {
176-
m.totalPendingJobs.Set(float64(pendingJobs))
177-
}
178189
}
190+
if events.Continue == "" {
191+
break
192+
}
193+
continueValue = events.Continue
179194
}
180-
195+
m.pendingJobsGauge.Set(float64(pendingJobs))
196+
return nil
181197
}

0 commit comments

Comments
 (0)