@@ -20,6 +20,7 @@ import (
20
20
"time"
21
21
22
22
"github.com/prometheus/client_golang/prometheus"
23
+ "github.com/prometheus/client_golang/prometheus/collectors"
23
24
"k8s.io/klog/v2"
24
25
"sigs.k8s.io/controller-runtime/pkg/metrics"
25
26
)
58
59
reconcileFederatedResourcesDuration = prometheus .NewHistogram (
59
60
prometheus.HistogramOpts {
60
61
Name : "reconcile_federated_resources_duration_seconds" ,
61
- Help : "Time taken to reconcile federated resources in the target clusters." ,
62
+ Help : "[Deprecated] Time taken to reconcile federated resources in the target clusters. Replaced by controller_runtime_reconcile_time_seconds ." ,
62
63
Buckets : []float64 {0.01 , 0.05 , 0.1 , 0.5 , 1.0 , 2.5 , 5.0 , 7.5 , 10.0 , 12.5 , 15.0 , 17.5 , 20.0 , 22.5 , 25.0 , 27.5 , 30.0 , 50.0 , 75.0 , 100.0 , 1000.0 },
63
64
},
64
65
)
@@ -90,18 +91,45 @@ var (
90
91
controllerRuntimeReconcileDuration = prometheus .NewHistogramVec (
91
92
prometheus.HistogramOpts {
92
93
Name : "controller_runtime_reconcile_duration_seconds" ,
93
- Help : "Time taken by various parts of Kubefed controllers reconciliation loops." ,
94
+ Help : "[Deprecated] Time taken by various parts of Kubefed controllers reconciliation loops. Replaced by controller_runtime_reconcile_time_seconds ." ,
94
95
Buckets : []float64 {0.01 , 0.05 , 0.1 , 0.5 , 1.0 , 2.5 , 5.0 , 7.5 , 10.0 , 12.5 , 15.0 , 17.5 , 20.0 , 22.5 , 25.0 , 27.5 , 30.0 , 50.0 , 75.0 , 100.0 , 1000.0 },
95
96
}, []string {"controller" },
96
97
)
97
98
98
99
controllerRuntimeReconcileDurationSummary = prometheus .NewSummaryVec (
99
100
prometheus.SummaryOpts {
100
101
Name : "controller_runtime_reconcile_quantile_seconds" ,
101
- Help : "Quantiles of time taken by various parts of Kubefed controllers reconciliation loops." ,
102
+ Help : "[Deprecated] Quantiles of time taken by various parts of Kubefed controllers reconciliation loops. Replaced by controller_runtime_reconcile_time_seconds ." ,
102
103
MaxAge : time .Hour ,
103
104
}, []string {"controller" },
104
105
)
106
+
107
+ ControllerRuntimeReconcileTotal = prometheus .NewCounterVec (prometheus.CounterOpts {
108
+ Name : "controller_runtime_reconcile_total" ,
109
+ Help : "Total number of reconciliations per controller" ,
110
+ }, []string {"controller" , "result" })
111
+
112
+ ControllerRuntimeReconcileErrors = prometheus .NewCounterVec (prometheus.CounterOpts {
113
+ Name : "controller_runtime_reconcile_errors_total" ,
114
+ Help : "Total number of reconciliation errors per controller" ,
115
+ }, []string {"controller" })
116
+
117
+ ControllerRuntimeReconcileTime = prometheus .NewHistogramVec (prometheus.HistogramOpts {
118
+ Name : "controller_runtime_reconcile_time_seconds" ,
119
+ Help : "Length of time per reconciliation per controller" ,
120
+ Buckets : []float64 {0.005 , 0.01 , 0.025 , 0.05 , 0.1 , 0.15 , 0.2 , 0.25 , 0.3 , 0.35 , 0.4 , 0.45 , 0.5 , 0.6 , 0.7 , 0.8 , 0.9 , 1.0 ,
121
+ 1.25 , 1.5 , 1.75 , 2.0 , 2.5 , 3.0 , 3.5 , 4.0 , 4.5 , 5 , 6 , 7 , 8 , 9 , 10 , 15 , 20 , 25 , 30 , 40 , 50 , 60 },
122
+ }, []string {"controller" })
123
+
124
+ ControllerRuntimeWorkerCount = prometheus .NewGaugeVec (prometheus.GaugeOpts {
125
+ Name : "controller_runtime_max_concurrent_reconciles" ,
126
+ Help : "Maximum number of concurrent reconciles per controller" ,
127
+ }, []string {"controller" })
128
+
129
+ ControllerRuntimeActiveWorkers = prometheus .NewGaugeVec (prometheus.GaugeOpts {
130
+ Name : "controller_runtime_active_workers" ,
131
+ Help : "Number of currently used workers per controller" ,
132
+ }, []string {"controller" })
105
133
)
106
134
107
135
const (
@@ -117,6 +145,10 @@ const (
117
145
// RegisterAll registers all metrics.
118
146
func RegisterAll () {
119
147
metrics .Registry .MustRegister (
148
+ // expose process metrics like CPU, Memory, file descriptor usage etc.
149
+ collectors .NewProcessCollector (collectors.ProcessCollectorOpts {}),
150
+ // expose Go runtime metrics like GC stats, memory stats etc.
151
+ collectors .NewGoCollector (),
120
152
kubefedClusterTotal ,
121
153
joinedClusterTotal ,
122
154
reconcileFederatedResourcesDuration ,
@@ -127,6 +159,11 @@ func RegisterAll() {
127
159
dispatchOperationDuration ,
128
160
controllerRuntimeReconcileDuration ,
129
161
controllerRuntimeReconcileDurationSummary ,
162
+ ControllerRuntimeReconcileTotal ,
163
+ ControllerRuntimeReconcileErrors ,
164
+ ControllerRuntimeReconcileTime ,
165
+ ControllerRuntimeWorkerCount ,
166
+ ControllerRuntimeActiveWorkers ,
130
167
)
131
168
}
132
169
@@ -203,10 +240,20 @@ func UpdateControllerReconcileDurationFromStart(controller string, start time.Ti
203
240
204
241
// UpdateControllerReconcileDuration records the duration of the reconcile function of a controller
205
242
func UpdateControllerReconcileDuration (controller string , duration time.Duration ) {
243
+ controllerRuntimeReconcileDurationSummary .WithLabelValues (controller ).Observe (duration .Seconds ())
244
+ controllerRuntimeReconcileDuration .WithLabelValues (controller ).Observe (duration .Seconds ())
245
+ }
246
+
247
+ // UpdateControllerRuntimeReconcileTimeFromStart records the duration of the reconcile loop of a controller
248
+ func UpdateControllerRuntimeReconcileTimeFromStart (controller string , start time.Time ) {
249
+ duration := time .Since (start )
250
+ UpdateControllerRuntimeReconcileTime (controller , duration )
251
+ }
252
+
253
+ // UpdateControllerRuntimeReconcileTime records the duration of the reconcile function of a controller
254
+ func UpdateControllerRuntimeReconcileTime (controller string , duration time.Duration ) {
206
255
if duration > LogReconcileLongDurationThreshold {
207
256
klog .V (4 ).Infof ("Reconcile loop %s took %v to complete" , controller , duration )
208
257
}
209
-
210
- controllerRuntimeReconcileDurationSummary .WithLabelValues (controller ).Observe (duration .Seconds ())
211
- controllerRuntimeReconcileDuration .WithLabelValues (controller ).Observe (duration .Seconds ())
258
+ ControllerRuntimeReconcileTime .WithLabelValues (controller ).Observe (duration .Seconds ())
212
259
}
0 commit comments