Skip to content

Commit

Permalink
metrics: scope metrics to active config, add optional per-host metrics (
Browse files Browse the repository at this point in the history
#6531)

* Add per host config

* Pass host label when option is enabled

* Test per host enabled

* metrics: scope metrics per loaded config

* doc and linter

Signed-off-by: Mohammed Al Sahaf <msaa1990@gmail.com>

* inject the custom registry into the admin handler

Co-Authored-By: Dave Henderson <dhenderson@gmail.com>

* remove `TODO` comment

* fixes

Signed-off-by: Mohammed Al Sahaf <msaa1990@gmail.com>

* refactor to delay metrics admin handler provision

Signed-off-by: Mohammed Al Sahaf <msaa1990@gmail.com>

---------

Signed-off-by: Mohammed Al Sahaf <msaa1990@gmail.com>
Co-authored-by: Hussam Almarzooq <me@hussam.io>
Co-authored-by: Dave Henderson <dhenderson@gmail.com>
  • Loading branch information
3 people authored Oct 2, 2024
1 parent 1672484 commit 41f5dd5
Show file tree
Hide file tree
Showing 12 changed files with 365 additions and 66 deletions.
11 changes: 6 additions & 5 deletions admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ type AdminPermissions struct {

// newAdminHandler reads admin's config and returns an http.Handler suitable
// for use in an admin endpoint server, which will be listening on listenAddr.
func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool) adminHandler {
func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool, ctx Context) adminHandler {
muxWrap := adminHandler{mux: http.NewServeMux()}

// secure the local or remote endpoint respectively
Expand Down Expand Up @@ -270,7 +270,6 @@ func (admin *AdminConfig) newAdminHandler(addr NetworkAddress, remote bool) admi
// register third-party module endpoints
for _, m := range GetModules("admin.api") {
router := m.New().(AdminRouter)
handlerLabel := m.ID.Name()
for _, route := range router.Routes() {
addRoute(route.Pattern, handlerLabel, route.Handler)
}
Expand Down Expand Up @@ -382,7 +381,9 @@ func (admin AdminConfig) allowedOrigins(addr NetworkAddress) []*url.URL {
// for the admin endpoint exists in cfg, a default one is used, so
// that there is always an admin server (unless it is explicitly
// configured to be disabled).
func replaceLocalAdminServer(cfg *Config) error {
// Critically note that some elements and functionality of the context
// may not be ready, e.g. storage. Tread carefully.
func replaceLocalAdminServer(cfg *Config, ctx Context) error {
// always* be sure to close down the old admin endpoint
// as gracefully as possible, even if the new one is
// disabled -- careful to use reference to the current
Expand Down Expand Up @@ -424,7 +425,7 @@ func replaceLocalAdminServer(cfg *Config) error {
return err
}

handler := cfg.Admin.newAdminHandler(addr, false)
handler := cfg.Admin.newAdminHandler(addr, false, ctx)

ln, err := addr.Listen(context.TODO(), 0, net.ListenConfig{})
if err != nil {
Expand Down Expand Up @@ -545,7 +546,7 @@ func replaceRemoteAdminServer(ctx Context, cfg *Config) error {

// make the HTTP handler but disable Host/Origin enforcement
// because we are using TLS authentication instead
handler := cfg.Admin.newAdminHandler(addr, true)
handler := cfg.Admin.newAdminHandler(addr, true, ctx)

// create client certificate pool for TLS mutual auth, and extract public keys
// so that we can enforce access controls at the application layer
Expand Down
9 changes: 7 additions & 2 deletions caddy.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ func unsyncedDecodeAndRun(cfgJSON []byte, allowPersist bool) error {
func run(newCfg *Config, start bool) (Context, error) {
ctx, err := provisionContext(newCfg, start)
if err != nil {
globalMetrics.configSuccess.Set(0)
return ctx, err
}

Expand All @@ -410,6 +411,7 @@ func run(newCfg *Config, start bool) (Context, error) {
// some of the other apps at runtime
err = ctx.cfg.Admin.provisionAdminRouters(ctx)
if err != nil {
globalMetrics.configSuccess.Set(0)
return ctx, err
}

Expand All @@ -435,9 +437,11 @@ func run(newCfg *Config, start bool) (Context, error) {
return nil
}()
if err != nil {
globalMetrics.configSuccess.Set(0)
return ctx, err
}

globalMetrics.configSuccess.Set(1)
globalMetrics.configSuccessTime.SetToCurrentTime()
// now that the user's config is running, finish setting up anything else,
// such as remote admin endpoint, config loader, etc.
return ctx, finishSettingUp(ctx, ctx.cfg)
Expand Down Expand Up @@ -471,6 +475,7 @@ func provisionContext(newCfg *Config, replaceAdminServer bool) (Context, error)
ctx, cancel := NewContext(Context{Context: context.Background(), cfg: newCfg})
defer func() {
if err != nil {
globalMetrics.configSuccess.Set(0)
// if there were any errors during startup,
// we should cancel the new context we created
// since the associated config won't be used;
Expand All @@ -497,7 +502,7 @@ func provisionContext(newCfg *Config, replaceAdminServer bool) (Context, error)

// start the admin endpoint (and stop any prior one)
if replaceAdminServer {
err = replaceLocalAdminServer(newCfg)
err = replaceLocalAdminServer(newCfg, ctx)
if err != nil {
return ctx, fmt.Errorf("starting caddy administration endpoint: %v", err)
}
Expand Down
12 changes: 6 additions & 6 deletions caddyconfig/httpcaddyfile/serveroptions.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,13 +240,13 @@ func unmarshalCaddyfileServerOptions(d *caddyfile.Dispenser) (any, error) {
}

case "metrics":
if d.NextArg() {
return nil, d.ArgErr()
}
if nesting := d.Nesting(); d.NextBlock(nesting) {
return nil, d.ArgErr()
}
serverOpts.Metrics = new(caddyhttp.Metrics)
for nesting := d.Nesting(); d.NextBlock(nesting); {
switch d.Val() {
case "per_host":
serverOpts.Metrics.PerHost = true
}
}

case "trace":
if d.NextArg() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
servers :80 {
metrics {
per_host
}
}
}
:80 {
respond "Hello"
}
----------
{
"apps": {
"http": {
"servers": {
"srv0": {
"listen": [
":80"
],
"routes": [
{
"handle": [
{
"body": "Hello",
"handler": "static_response"
}
]
}
],
"metrics": {
"per_host": true
}
}
}
}
}
}
22 changes: 21 additions & 1 deletion context.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ import (
"reflect"

"github.com/caddyserver/certmagic"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"go.uber.org/zap"
"go.uber.org/zap/exp/zapslog"

Expand All @@ -47,6 +49,7 @@ type Context struct {
ancestry []Module
cleanupFuncs []func() // invoked at every config unload
exitFuncs []func(context.Context) // invoked at config unload ONLY IF the process is exiting (EXPERIMENTAL)
metricsRegistry *prometheus.Registry
}

// NewContext provides a new context derived from the given
Expand All @@ -58,7 +61,7 @@ type Context struct {
// modules which are loaded will be properly unloaded.
// See standard library context package's documentation.
func NewContext(ctx Context) (Context, context.CancelFunc) {
newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg}
newCtx := Context{moduleInstances: make(map[string][]Module), cfg: ctx.cfg, metricsRegistry: prometheus.NewPedanticRegistry()}
c, cancel := context.WithCancel(ctx.Context)
wrappedCancel := func() {
cancel()
Expand All @@ -79,6 +82,7 @@ func NewContext(ctx Context) (Context, context.CancelFunc) {
}
}
newCtx.Context = c
newCtx.initMetrics()
return newCtx, wrappedCancel
}

Expand All @@ -97,6 +101,22 @@ func (ctx *Context) Filesystems() FileSystems {
return ctx.cfg.filesystems
}

// Returns the active metrics registry for the context
// EXPERIMENTAL: This API is subject to change.
func (ctx *Context) GetMetricsRegistry() *prometheus.Registry {
return ctx.metricsRegistry
}

func (ctx *Context) initMetrics() {
ctx.metricsRegistry.MustRegister(
collectors.NewBuildInfoCollector(),
adminMetrics.requestCount,
adminMetrics.requestErrors,
globalMetrics.configSuccess,
globalMetrics.configSuccessTime,
)
}

// OnExit executes f when the process exits gracefully.
// The function is only executed if the process is gracefully
// shut down while this context is active.
Expand Down
23 changes: 16 additions & 7 deletions metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,30 +4,33 @@ import (
"net/http"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/caddyserver/caddy/v2/internal/metrics"
)

// define and register the metrics used in this package.
func init() {
prometheus.MustRegister(collectors.NewBuildInfoCollector())

const ns, sub = "caddy", "admin"

adminMetrics.requestCount = promauto.NewCounterVec(prometheus.CounterOpts{
adminMetrics.requestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "http_requests_total",
Help: "Counter of requests made to the Admin API's HTTP endpoints.",
}, []string{"handler", "path", "code", "method"})
adminMetrics.requestErrors = promauto.NewCounterVec(prometheus.CounterOpts{
adminMetrics.requestErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: ns,
Subsystem: sub,
Name: "http_request_errors_total",
Help: "Number of requests resulting in middleware errors.",
}, []string{"handler", "path", "method"})
globalMetrics.configSuccess = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "caddy_config_last_reload_successful",
Help: "Whether the last configuration reload attempt was successful.",
})
globalMetrics.configSuccessTime = prometheus.NewGauge(prometheus.GaugeOpts{
Name: "caddy_config_last_reload_success_timestamp_seconds",
Help: "Timestamp of the last successful configuration reload.",
})
}

// adminMetrics is a collection of metrics that can be tracked for the admin API.
Expand All @@ -36,6 +39,12 @@ var adminMetrics = struct {
requestErrors *prometheus.CounterVec
}{}

// globalMetrics is a collection of metrics that can be tracked for Caddy global state
var globalMetrics = struct {
configSuccess prometheus.Gauge
configSuccessTime prometheus.Gauge
}{}

// Similar to promhttp.InstrumentHandlerCounter, but upper-cases method names
// instead of lower-casing them.
//
Expand Down
4 changes: 4 additions & 0 deletions modules/caddyhttp/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,10 @@ func (app *App) Provision(ctx caddy.Context) error {
// route handler so that important security checks are done, etc.
primaryRoute := emptyHandler
if srv.Routes != nil {
if srv.Metrics != nil {
srv.Metrics.init = sync.Once{}
srv.Metrics.httpMetrics = &httpMetrics{}
}
err := srv.Routes.ProvisionHandlers(ctx, srv.Metrics)
if err != nil {
return fmt.Errorf("server %s: setting up route handlers: %v", srvName, err)
Expand Down
Loading

1 comment on commit 41f5dd5

@ta2013
Copy link

@ta2013 ta2013 commented on 41f5dd5 Oct 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I recently opened an issue regarding compatibility with previous versions of Caddy.

Please sign in to comment.