Skip to content

Commit 41f5dd5

Browse files
mohammed90hussam-almarzoqhairyhenderson
authored
metrics: scope metrics to active config, add optional per-host metrics (#6531)
* Add per host config * Pass host label when option is enabled * Test per host enabled * metrics: scope metrics per loaded config * doc and linter Signed-off-by: Mohammed Al Sahaf <[email protected]> * inject the custom registry into the admin handler Co-Authored-By: Dave Henderson <[email protected]> * remove `TODO` comment * fixes Signed-off-by: Mohammed Al Sahaf <[email protected]> * refactor to delay metrics admin handler provision Signed-off-by: Mohammed Al Sahaf <[email protected]> --------- Signed-off-by: Mohammed Al Sahaf <[email protected]> Co-authored-by: Hussam Almarzooq <[email protected]> Co-authored-by: Dave Henderson <[email protected]>
1 parent 1672484 commit 41f5dd5

File tree

12 files changed

+365
-66
lines changed

12 files changed

+365
-66
lines changed

admin.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ type AdminPermissions struct {
214214

215215
// newAdminHandler reads admin's config and returns an http.Handler suitable
216216
// for use in an admin endpoint server, which will be listening on listenAddr.
217-
func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool) adminHandler {
217+
func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool, ctx Context) adminHandler {
218218
muxWrap := adminHandler{mux: http.NewServeMux()}
219219

220220
// secure the local or remote endpoint respectively
@@ -270,7 +270,6 @@ func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool) admi
270270
// register third-party module endpoints
271271
for _, m := range GetModules("admin.api") {
272272
router := m.New().(AdminRouter)
273-
handlerLabel := m.ID.Name()
274273
for _, route := range router.Routes() {
275274
addRoute(route.Pattern, handlerLabel, route.Handler)
276275
}
@@ -382,7 +381,9 @@ func (admin AdminConfig) allowedOrigins(addr NetworkAddress) []*url.URL {
382381
// for the admin endpoint exists in cfg, a default one is used, so
383382
// that there is always an admin server (unless it is explicitly
384383
// configured to be disabled).
385-
func replaceLocalAdminServer(cfg *Config) error {
384+
// Critically note that some elements and functionality of the context
385+
// may not be ready, e.g. storage. Tread carefully.
386+
func replaceLocalAdminServer(cfg *Config, ctx Context) error {
386387
// always* be sure to close down the old admin endpoint
387388
// as gracefully as possible, even if the new one is
388389
// disabled -- careful to use reference to the current
@@ -424,7 +425,7 @@ func replaceLocalAdminServer(cfg *Config) error {
424425
return err
425426
}
426427

427-
handler := cfg.Admin.newAdminHandler(addr, false)
428+
handler := cfg.Admin.newAdminHandler(addr, false, ctx)
428429

429430
ln, err := addr.Listen(context.TODO(), 0, net.ListenConfig{})
430431
if err != nil {
@@ -545,7 +546,7 @@ func replaceRemoteAdminServer(ctx Context, cfg *Config) error {
545546

546547
// make the HTTP handler but disable Host/Origin enforcement
547548
// because we are using TLS authentication instead
548-
handler := cfg.Admin.newAdminHandler(addr, true)
549+
handler := cfg.Admin.newAdminHandler(addr, true, ctx)
549550

550551
// create client certificate pool for TLS mutual auth, and extract public keys
551552
// so that we can enforce access controls at the application layer

caddy.go

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ func unsyncedDecodeAndRun(cfgJSON []byte, allowPersist bool) error {
399399
func run(newCfg *Config, start bool) (Context, error) {
400400
ctx, err := provisionContext(newCfg, start)
401401
if err != nil {
402+
globalMetrics.configSuccess.Set(0)
402403
return ctx, err
403404
}
404405

@@ -410,6 +411,7 @@ func run(newCfg *Config, start bool) (Context, error) {
410411
// some of the other apps at runtime
411412
err = ctx.cfg.Admin.provisionAdminRouters(ctx)
412413
if err != nil {
414+
globalMetrics.configSuccess.Set(0)
413415
return ctx, err
414416
}
415417

@@ -435,9 +437,11 @@ func run(newCfg *Config, start bool) (Context, error) {
435437
return nil
436438
}()
437439
if err != nil {
440+
globalMetrics.configSuccess.Set(0)
438441
return ctx, err
439442
}
440-
443+
globalMetrics.configSuccess.Set(1)
444+
globalMetrics.configSuccessTime.SetToCurrentTime()
441445
// now that the user's config is running, finish setting up anything else,
442446
// such as remote admin endpoint, config loader, etc.
443447
return ctx, finishSettingUp(ctx, ctx.cfg)
@@ -471,6 +475,7 @@ func provisionContext(newCfg *Config, replaceAdminServer bool) (Context, error)
471475
ctx, cancel := NewContext(Context{Context: context.Background(), cfg: newCfg})
472476
defer func() {
473477
if err != nil {
478+
globalMetrics.configSuccess.Set(0)
474479
// if there were any errors during startup,
475480
// we should cancel the new context we created
476481
// since the associated config won't be used;
@@ -497,7 +502,7 @@ func provisionContext(newCfg *Config, replaceAdminServer bool) (Context, error)
497502

498503
// start the admin endpoint (and stop any prior one)
499504
if replaceAdminServer {
500-
err = replaceLocalAdminServer(newCfg)
505+
err = replaceLocalAdminServer(newCfg, ctx)
501506
if err != nil {
502507
return ctx, fmt.Errorf("starting caddy administration endpoint: %v", err)
503508
}

caddyconfig/httpcaddyfile/serveroptions.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -240,13 +240,13 @@ func unmarshalCaddyfileServerOptions(d *caddyfile.Dispenser) (any, error) {
240240
}
241241

242242
case "metrics":
243-
if d.NextArg() {
244-
return nil, d.ArgErr()
245-
}
246-
if nesting := d.Nesting(); d.NextBlock(nesting) {
247-
return nil, d.ArgErr()
248-
}
249243
serverOpts.Metrics = new(caddyhttp.Metrics)
244+
for nesting := d.Nesting(); d.NextBlock(nesting); {
245+
switch d.Val() {
246+
case "per_host":
247+
serverOpts.Metrics.PerHost = true
248+
}
249+
}
250250

251251
case "trace":
252252
if d.NextArg() {
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
{
2+
servers :80 {
3+
metrics {
4+
per_host
5+
}
6+
}
7+
}
8+
:80 {
9+
respond "Hello"
10+
}
11+
----------
12+
{
13+
"apps": {
14+
"http": {
15+
"servers": {
16+
"srv0": {
17+
"listen": [
18+
":80"
19+
],
20+
"routes": [
21+
{
22+
"handle": [
23+
{
24+
"body": "Hello",
25+
"handler": "static_response"
26+
}
27+
]
28+
}
29+
],
30+
"metrics": {
31+
"per_host": true
32+
}
33+
}
34+
}
35+
}
36+
}
37+
}

context.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import (
2323
"reflect"
2424

2525
"github.com/caddyserver/certmagic"
26+
"github.com/prometheus/client_golang/prometheus"
27+
"github.com/prometheus/client_golang/prometheus/collectors"
2628
"go.uber.org/zap"
2729
"go.uber.org/zap/exp/zapslog"
2830

@@ -47,6 +49,7 @@ type Context struct {
4749
ancestry []Module
4850
cleanupFuncs []func() // invoked at every config unload
4951
exitFuncs []func(context.Context) // invoked at config unload ONLY IF the process is exiting (EXPERIMENTAL)
52+
metricsRegistry *prometheus.Registry
5053
}
5154

5255
// NewContext provides a new context derived from the given
@@ -58,7 +61,7 @@ type Context struct {
5861
// modules which are loaded will be properly unloaded.
5962
// See standard library context package's documentation.
6063
func NewContext(ctx Context) (Context, context.CancelFunc) {
61-
newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg}
64+
newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg, metricsRegistry: prometheus.NewPedanticRegistry()}
6265
c, cancel := context.WithCancel(ctx.Context)
6366
wrappedCancel := func() {
6467
cancel()
@@ -79,6 +82,7 @@ func NewContext(ctx Context) (Context, context.CancelFunc) {
7982
}
8083
}
8184
newCtx.Context = c
85+
newCtx.initMetrics()
8286
return newCtx, wrappedCancel
8387
}
8488

@@ -97,6 +101,22 @@ func (ctx *Context) Filesystems() FileSystems {
97101
return ctx.cfg.filesystems
98102
}
99103

104+
// Returns the active metrics registry for the context
105+
// EXPERIMENTAL: This API is subject to change.
106+
func (ctx *Context) GetMetricsRegistry() *prometheus.Registry {
107+
return ctx.metricsRegistry
108+
}
109+
110+
func (ctx *Context) initMetrics() {
111+
ctx.metricsRegistry.MustRegister(
112+
collectors.NewBuildInfoCollector(),
113+
adminMetrics.requestCount,
114+
adminMetrics.requestErrors,
115+
globalMetrics.configSuccess,
116+
globalMetrics.configSuccessTime,
117+
)
118+
}
119+
100120
// OnExit executes f when the process exits gracefully.
101121
// The function is only executed if the process is gracefully
102122
// shut down while this context is active.

metrics.go

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,30 +4,33 @@ import (
44
"net/http"
55

66
"github.com/prometheus/client_golang/prometheus"
7-
"github.com/prometheus/client_golang/prometheus/collectors"
8-
"github.com/prometheus/client_golang/prometheus/promauto"
97

108
"github.com/caddyserver/caddy/v2/internal/metrics"
119
)
1210

1311
// define and register the metrics used in this package.
1412
func init() {
15-
prometheus.MustRegister(collectors.NewBuildInfoCollector())
16-
1713
const ns, sub = "caddy", "admin"
18-
19-
adminMetrics.requestCount = promauto.NewCounterVec(prometheus.CounterOpts{
14+
adminMetrics.requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
2015
Namespace: ns,
2116
Subsystem: sub,
2217
Name: "http_requests_total",
2318
Help: "Counter of requests made to the Admin API's HTTP endpoints.",
2419
}, []string{"handler", "path", "code", "method"})
25-
adminMetrics.requestErrors = promauto.NewCounterVec(prometheus.CounterOpts{
20+
adminMetrics.requestErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
2621
Namespace: ns,
2722
Subsystem: sub,
2823
Name: "http_request_errors_total",
2924
Help: "Number of requests resulting in middleware errors.",
3025
}, []string{"handler", "path", "method"})
26+
globalMetrics.configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{
27+
Name: "caddy_config_last_reload_successful",
28+
Help: "Whether the last configuration reload attempt was successful.",
29+
})
30+
globalMetrics.configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{
31+
Name: "caddy_config_last_reload_success_timestamp_seconds",
32+
Help: "Timestamp of the last successful configuration reload.",
33+
})
3134
}
3235

3336
// adminMetrics is a collection of metrics that can be tracked for the admin API.
@@ -36,6 +39,12 @@ var adminMetrics = struct {
3639
requestErrors *prometheus.CounterVec
3740
}{}
3841

42+
// globalMetrics is a collection of metrics that can be tracked for Caddy global state
43+
var globalMetrics = struct {
44+
configSuccess prometheus.Gauge
45+
configSuccessTime prometheus.Gauge
46+
}{}
47+
3948
// Similar to promhttp.InstrumentHandlerCounter, but upper-cases method names
4049
// instead of lower-casing them.
4150
//

modules/caddyhttp/app.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,10 @@ func (app *App) Provision(ctx caddy.Context) error {
347347
// route handler so that important security checks are done, etc.
348348
primaryRoute := emptyHandler
349349
if srv.Routes != nil {
350+
if srv.Metrics != nil {
351+
srv.Metrics.init = sync.Once{}
352+
srv.Metrics.httpMetrics = &httpMetrics{}
353+
}
350354
err := srv.Routes.ProvisionHandlers(ctx, srv.Metrics)
351355
if err != nil {
352356
return fmt.Errorf("server %s: setting up route handlers: %v", srvName, err)

0 commit comments

Comments
 (0)