Skip to content
This repository has been archived by the owner on Jun 13, 2023. It is now read-only.

Add a prometheus exporter #57

Merged
merged 5 commits into from
Jun 21, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
Component,Origin,License
core,"github.com/frapposelli/wwhrd",MIT
core,"github.com/beorn7/perks/quantile",MIT
core,"github.com/cloudfoundry/gosigar",Apache-2.0
core,"github.com/cloudfoundry/gosigar/sys/windows",Apache-2.0
core,"github.com/coreos/go-systemd/daemon",Apache-2.0
Expand Down Expand Up @@ -51,13 +52,24 @@ core,"github.com/imdario/mergo",NewBSD
core,"github.com/inconshreveable/mousetrap",Apache-2.0
core,"github.com/json-iterator/go",MIT
core,"github.com/magiconair/properties",FreeBSD
core,"github.com/matttproud/golang_protobuf_extensions/pbutil",Apache-2.0
core,"github.com/mitchellh/go-homedir",MIT
core,"github.com/mitchellh/mapstructure",MIT
core,"github.com/modern-go/concurrent",Apache-2.0
core,"github.com/modern-go/reflect2",Apache-2.0
core,"github.com/pelletier/go-toml",MIT
core,"github.com/pkg/errors",FreeBSD
core,"github.com/pmezard/go-difflib/difflib",FreeBSD
core,"github.com/prometheus/client_golang/prometheus",Apache-2.0
core,"github.com/prometheus/client_golang/prometheus/promhttp",Apache-2.0
core,"github.com/prometheus/client_model/go",Apache-2.0
core,"github.com/prometheus/common/expfmt",Apache-2.0
core,"github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg",Apache-2.0
core,"github.com/prometheus/common/model",Apache-2.0
core,"github.com/prometheus/procfs",Apache-2.0
core,"github.com/prometheus/procfs/internal/util",Apache-2.0
core,"github.com/prometheus/procfs/nfs",Apache-2.0
core,"github.com/prometheus/procfs/xfs",Apache-2.0
core,"github.com/russross/blackfriday",FreeBSD
core,"github.com/ryanuber/go-glob",MIT
core,"github.com/sethgrid/pester",MIT
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,12 @@ Graceful stop it with:
* `--timeout`
* `curl -XPOST 127.0.0.1:8989/stop`

## Metrics

Pupernetes exposes prometheus metrics to improve the observability.

You can observe which metrics are available [here](./docs/metrics.csv).

## Current limitations

* Container runtime
Expand Down
7 changes: 6 additions & 1 deletion cmd/cli/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,12 @@ func NewCommand() (*cobra.Command, *int) {
exitCode = 1
return
}
err = run.NewRunner(env).Run()
r, err := run.NewRunner(env)
if err != nil {
exitCode = 2
return
}
err = r.Run()
if err != nil {
exitCode = 2
return
Expand Down
37 changes: 37 additions & 0 deletions docs/metrics.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name,type,help
"go_gc_duration_seconds","SUMMARY","A summary of the GC invocation durations."
"go_goroutines","GAUGE","Number of goroutines that currently exist."
"go_memstats_alloc_bytes","GAUGE","Number of bytes allocated and still in use."
"go_memstats_alloc_bytes_total","COUNTER","Total number of bytes allocated, even if freed."
"go_memstats_buck_hash_sys_bytes","GAUGE","Number of bytes used by the profiling bucket hash table."
"go_memstats_frees_total","COUNTER","Total number of frees."
"go_memstats_gc_sys_bytes","GAUGE","Number of bytes used for garbage collection system metadata."
"go_memstats_heap_alloc_bytes","GAUGE","Number of heap bytes allocated and still in use."
"go_memstats_heap_idle_bytes","GAUGE","Number of heap bytes waiting to be used."
"go_memstats_heap_inuse_bytes","GAUGE","Number of heap bytes that are in use."
"go_memstats_heap_objects","GAUGE","Number of allocated objects."
"go_memstats_heap_released_bytes_total","COUNTER","Total number of heap bytes released to OS."
"go_memstats_heap_sys_bytes","GAUGE","Number of heap bytes obtained from system."
"go_memstats_last_gc_time_seconds","GAUGE","Number of seconds since 1970 of last garbage collection."
"go_memstats_lookups_total","COUNTER","Total number of pointer lookups."
"go_memstats_mallocs_total","COUNTER","Total number of mallocs."
"go_memstats_mcache_inuse_bytes","GAUGE","Number of bytes in use by mcache structures."
"go_memstats_mcache_sys_bytes","GAUGE","Number of bytes used for mcache structures obtained from system."
"go_memstats_mspan_inuse_bytes","GAUGE","Number of bytes in use by mspan structures."
"go_memstats_mspan_sys_bytes","GAUGE","Number of bytes used for mspan structures obtained from system."
"go_memstats_next_gc_bytes","GAUGE","Number of heap bytes when next garbage collection will take place."
"go_memstats_other_sys_bytes","GAUGE","Number of bytes used for other system allocations."
"go_memstats_stack_inuse_bytes","GAUGE","Number of bytes in use by the stack allocator."
"go_memstats_stack_sys_bytes","GAUGE","Number of bytes obtained from system for stack allocator."
"go_memstats_sys_bytes","GAUGE","Number of bytes obtained by system. Sum of all system allocations."
"process_cpu_seconds_total","COUNTER","Total user and system CPU time spent in seconds."
"process_max_fds","GAUGE","Maximum number of open file descriptors."
"process_open_fds","GAUGE","Number of open file descriptors."
"process_resident_memory_bytes","GAUGE","Resident memory size in bytes."
"process_start_time_seconds","GAUGE","Start time of the process since unix epoch in seconds."
"process_virtual_memory_bytes","GAUGE","Virtual memory size in bytes."
"pupernetes_kubelet_api_pods_running","GAUGE","Number of kubelet API pods running"
"pupernetes_kubelet_logs_pods_running","GAUGE","Number of kubelet logs pods running"
"pupernetes_kubelet_probe_failures","COUNTER","Total number of kubelet probe failures"
"pupernetes_ready","GAUGE","Boolean for pupernetes readiness"
"pupernetes_version","GAUGE","Pupernetes version"
5 changes: 4 additions & 1 deletion pkg/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ import (
"syscall"
"time"

"github.com/DataDog/pupernetes/pkg/config"
"github.com/golang/glog"
"github.com/gorilla/mux"
corev1 "k8s.io/api/core/v1"

"github.com/DataDog/pupernetes/pkg/config"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

const (
Expand Down Expand Up @@ -89,6 +91,7 @@ func NewAPI(sigChan chan os.Signal, resetNamespaceFn func(namespaces *corev1.Nam

// GETs
r.Methods("GET").Path("/ready").HandlerFunc(h.isReadyHandler)
r.Methods("GET").Path("/metrics").Handler(promhttp.Handler())

srv := &http.Server{
Handler: r,
Expand Down
15 changes: 11 additions & 4 deletions pkg/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/DataDog/pupernetes/pkg/api"
"github.com/DataDog/pupernetes/pkg/config"
"github.com/DataDog/pupernetes/pkg/logging"
"github.com/DataDog/pupernetes/pkg/run/state"
"github.com/DataDog/pupernetes/pkg/setup"
"github.com/DataDog/pupernetes/pkg/util"
"io/ioutil"
Expand All @@ -37,7 +38,7 @@ type Runtime struct {

SigChan chan os.Signal
httpClient *http.Client
state *State
state *state.State
runTimeout time.Duration
waitKubeletGC time.Duration
kubeDeleteOption *v1.DeleteOptions
Expand All @@ -50,16 +51,22 @@ type Runtime struct {
}

// NewRunner instantiate a new Runtimer with the given Environment
func NewRunner(env *setup.Environment) *Runtime {
func NewRunner(env *setup.Environment) (*Runtime, error) {
var zero int64

s, err := state.NewState()
if err != nil {
glog.Errorf("Cannot create the runner: %v", err)
return nil, err
}

run := &Runtime{
env: env,
state: s,
SigChan: make(chan os.Signal, 2),
httpClient: &http.Client{
Timeout: time.Millisecond * 500,
},
state: &State{},
runTimeout: config.ViperConfig.GetDuration("timeout"),
waitKubeletGC: config.ViperConfig.GetDuration("gc"),
kubeDeleteOption: &v1.DeleteOptions{
Expand All @@ -70,7 +77,7 @@ func NewRunner(env *setup.Environment) *Runtime {
ApplyChan: make(chan struct{}),
}
run.api = api.NewAPI(run.SigChan, run.DeleteAPIManifests, run.state.IsReady, run.ApplyChan)
return run
return run, nil
}

// Run daemonise pupernetes
Expand Down
2 changes: 1 addition & 1 deletion pkg/run/notify.go → pkg/run/state/notify.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package run
package state

import (
"os"
Expand Down
65 changes: 64 additions & 1 deletion pkg/run/state.go → pkg/run/state/state.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package run
package state

import (
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"sync"
)

Expand All @@ -15,6 +16,64 @@ type State struct {
kubeletProbeFailures int
kubeletAPIPodRunning int
kubeletLogsPodRunning int

promVersion prometheus.Gauge

promStateReady prometheus.Gauge
promKubeletAPIPodRunning prometheus.Gauge
promKubeletLogsPodRunning prometheus.Gauge

promKubeletProbeFailures prometheus.Counter
}

// NewState instantiate a state with the associated prometheus metrics
func NewState() (*State, error) {
s := &State{
promVersion: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pupernetes_version",
Help: "Pupernetes version",
ConstLabels: prometheus.Labels{},
// TODO record all versions in labels. hyperkube: "1.10.1", etcd: "3.11.1", ...
}),
promStateReady: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pupernetes_ready",
Help: "Boolean for pupernetes readiness",
}),
promKubeletAPIPodRunning: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pupernetes_kubelet_api_pods_running",
Help: "Number of kubelet API pods running",
}),
promKubeletLogsPodRunning: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pupernetes_kubelet_logs_pods_running",
Help: "Number of kubelet logs pods running",
}),
promKubeletProbeFailures: prometheus.NewCounter(prometheus.CounterOpts{
Name: "pupernetes_kubelet_probe_failures",
Help: "Total number of kubelet probe failures",
}),
}
err := prometheus.Register(s.promVersion)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could remove the error from NewState and use MustRegister instead of Register which would make the program panic if failing, this would clean up all the error handling code as well

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mfpierre I wasn't sure about that, do you think it worth it ?
Why panic should be better than an error ?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't you exit anyway if it fails?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes but a panic in the middle of my code path sounds weird to me. Don't you think ?

if err != nil {
return nil, err
}
err = prometheus.Register(s.promStateReady)
if err != nil {
return nil, err
}
err = prometheus.Register(s.promKubeletAPIPodRunning)
if err != nil {
return nil, err
}
err = prometheus.Register(s.promKubeletLogsPodRunning)
if err != nil {
return nil, err
}
err = prometheus.Register(s.promKubeletProbeFailures)
if err != nil {
return nil, err
}
s.promVersion.Inc()
return s, nil
}

// IsReady returns if the kube-apiserver is available and the manifests are applied
Expand All @@ -32,6 +91,7 @@ func (s *State) SetReady() {
s.Unlock()
// Ignore errors
notifySystemd()
s.promStateReady.Set(1)
}

// SetAPIServerProbeLastError keep track of the latest error message and display only
Expand All @@ -50,6 +110,7 @@ func (s *State) IncKubeletProbeFailures() {
s.Lock()
s.kubeletProbeFailures++
s.Unlock()
s.promKubeletProbeFailures.Inc()
}

// GetKubeletProbeFail returns the number of kubelet failures
Expand All @@ -68,6 +129,7 @@ func (s *State) SetKubeletAPIPodRunning(nb int) {
s.kubeletAPIPodRunning = nb
}
s.Unlock()
s.promKubeletAPIPodRunning.Set(float64(nb))
}

// SetKubeletLogsPodRunning keep track of the number of kubelet Pods in /var/log/pods and display only
Expand All @@ -79,6 +141,7 @@ func (s *State) SetKubeletLogsPodRunning(nb int) {
s.kubeletLogsPodRunning = nb
}
s.Unlock()
s.promKubeletLogsPodRunning.Set(float64(nb))
}

// GetKubeletLogsPodRunning returns the number of kubelet Pods in /var/log/pods
Expand Down
4 changes: 1 addition & 3 deletions pkg/run/stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ func (r *Runtime) getNamespaces() (*corev1.NamespaceList, error) {
}

func (r *Runtime) isAPIServerHookDone() bool {
r.state.RLock()
defer r.state.RUnlock()
return r.state.ready
return r.state.IsReady()
}

func (r *Runtime) gracefulDeleteAPIResources() error {
Expand Down
Loading