Skip to content
This repository has been archived by the owner on Jun 13, 2023. It is now read-only.

Commit

Permalink
Add a prometheus exporter (#57)
Browse files Browse the repository at this point in the history
* run: add a prometheus exporter
  • Loading branch information
JulienBalestra authored Jun 21, 2018
1 parent 2379ab9 commit 0fd001d
Show file tree
Hide file tree
Showing 100 changed files with 17,836 additions and 11 deletions.
52 changes: 52 additions & 0 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
Component,Origin,License
core,"github.com/frapposelli/wwhrd",MIT
core,"github.com/beorn7/perks/quantile",MIT
core,"github.com/cloudfoundry/gosigar",Apache-2.0
core,"github.com/cloudfoundry/gosigar/sys/windows",Apache-2.0
core,"github.com/coreos/go-systemd/daemon",Apache-2.0
Expand Down Expand Up @@ -51,13 +52,24 @@ core,"github.com/imdario/mergo",NewBSD
core,"github.com/inconshreveable/mousetrap",Apache-2.0
core,"github.com/json-iterator/go",MIT
core,"github.com/magiconair/properties",FreeBSD
core,"github.com/matttproud/golang_protobuf_extensions/pbutil",Apache-2.0
core,"github.com/mitchellh/go-homedir",MIT
core,"github.com/mitchellh/mapstructure",MIT
core,"github.com/modern-go/concurrent",Apache-2.0
core,"github.com/modern-go/reflect2",Apache-2.0
core,"github.com/pelletier/go-toml",MIT
core,"github.com/pkg/errors",FreeBSD
core,"github.com/pmezard/go-difflib/difflib",FreeBSD
core,"github.com/prometheus/client_golang/prometheus",Apache-2.0
core,"github.com/prometheus/client_golang/prometheus/promhttp",Apache-2.0
core,"github.com/prometheus/client_model/go",Apache-2.0
core,"github.com/prometheus/common/expfmt",Apache-2.0
core,"github.com/prometheus/common/internal/bitbucket.org/ww/goautoneg",Apache-2.0
core,"github.com/prometheus/common/model",Apache-2.0
core,"github.com/prometheus/procfs",Apache-2.0
core,"github.com/prometheus/procfs/internal/util",Apache-2.0
core,"github.com/prometheus/procfs/nfs",Apache-2.0
core,"github.com/prometheus/procfs/xfs",Apache-2.0
core,"github.com/russross/blackfriday",FreeBSD
core,"github.com/ryanuber/go-glob",MIT
core,"github.com/sethgrid/pester",MIT
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,12 @@ Graceful stop it with:
* `--timeout`
* `curl -XPOST 127.0.0.1:8989/stop`

## Metrics

Pupernetes exposes prometheus metrics to improve the observability.

You can observe which metrics are available [here](./docs/metrics.csv).

## Current limitations

* Container runtime
Expand Down
7 changes: 6 additions & 1 deletion cmd/cli/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,12 @@ func NewCommand() (*cobra.Command, *int) {
exitCode = 1
return
}
err = run.NewRunner(env).Run()
r, err := run.NewRunner(env)
if err != nil {
exitCode = 2
return
}
err = r.Run()
if err != nil {
exitCode = 2
return
Expand Down
37 changes: 37 additions & 0 deletions docs/metrics.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name,type,help
"go_gc_duration_seconds","SUMMARY","A summary of the GC invocation durations."
"go_goroutines","GAUGE","Number of goroutines that currently exist."
"go_memstats_alloc_bytes","GAUGE","Number of bytes allocated and still in use."
"go_memstats_alloc_bytes_total","COUNTER","Total number of bytes allocated, even if freed."
"go_memstats_buck_hash_sys_bytes","GAUGE","Number of bytes used by the profiling bucket hash table."
"go_memstats_frees_total","COUNTER","Total number of frees."
"go_memstats_gc_sys_bytes","GAUGE","Number of bytes used for garbage collection system metadata."
"go_memstats_heap_alloc_bytes","GAUGE","Number of heap bytes allocated and still in use."
"go_memstats_heap_idle_bytes","GAUGE","Number of heap bytes waiting to be used."
"go_memstats_heap_inuse_bytes","GAUGE","Number of heap bytes that are in use."
"go_memstats_heap_objects","GAUGE","Number of allocated objects."
"go_memstats_heap_released_bytes_total","COUNTER","Total number of heap bytes released to OS."
"go_memstats_heap_sys_bytes","GAUGE","Number of heap bytes obtained from system."
"go_memstats_last_gc_time_seconds","GAUGE","Number of seconds since 1970 of last garbage collection."
"go_memstats_lookups_total","COUNTER","Total number of pointer lookups."
"go_memstats_mallocs_total","COUNTER","Total number of mallocs."
"go_memstats_mcache_inuse_bytes","GAUGE","Number of bytes in use by mcache structures."
"go_memstats_mcache_sys_bytes","GAUGE","Number of bytes used for mcache structures obtained from system."
"go_memstats_mspan_inuse_bytes","GAUGE","Number of bytes in use by mspan structures."
"go_memstats_mspan_sys_bytes","GAUGE","Number of bytes used for mspan structures obtained from system."
"go_memstats_next_gc_bytes","GAUGE","Number of heap bytes when next garbage collection will take place."
"go_memstats_other_sys_bytes","GAUGE","Number of bytes used for other system allocations."
"go_memstats_stack_inuse_bytes","GAUGE","Number of bytes in use by the stack allocator."
"go_memstats_stack_sys_bytes","GAUGE","Number of bytes obtained from system for stack allocator."
"go_memstats_sys_bytes","GAUGE","Number of bytes obtained by system. Sum of all system allocations."
"process_cpu_seconds_total","COUNTER","Total user and system CPU time spent in seconds."
"process_max_fds","GAUGE","Maximum number of open file descriptors."
"process_open_fds","GAUGE","Number of open file descriptors."
"process_resident_memory_bytes","GAUGE","Resident memory size in bytes."
"process_start_time_seconds","GAUGE","Start time of the process since unix epoch in seconds."
"process_virtual_memory_bytes","GAUGE","Virtual memory size in bytes."
"pupernetes_kubelet_api_pods_running","GAUGE","Number of kubelet API pods running"
"pupernetes_kubelet_logs_pods_running","GAUGE","Number of kubelet logs pods running"
"pupernetes_kubelet_probe_failures","COUNTER","Total number of kubelet probe failures"
"pupernetes_ready","GAUGE","Boolean for pupernetes readiness"
"pupernetes_version","GAUGE","Pupernetes version"
5 changes: 4 additions & 1 deletion pkg/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ import (
"syscall"
"time"

"github.com/DataDog/pupernetes/pkg/config"
"github.com/golang/glog"
"github.com/gorilla/mux"
corev1 "k8s.io/api/core/v1"

"github.com/DataDog/pupernetes/pkg/config"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

const (
Expand Down Expand Up @@ -89,6 +91,7 @@ func NewAPI(sigChan chan os.Signal, resetNamespaceFn func(namespaces *corev1.Nam

// GETs
r.Methods("GET").Path("/ready").HandlerFunc(h.isReadyHandler)
r.Methods("GET").Path("/metrics").Handler(promhttp.Handler())

srv := &http.Server{
Handler: r,
Expand Down
15 changes: 11 additions & 4 deletions pkg/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/DataDog/pupernetes/pkg/api"
"github.com/DataDog/pupernetes/pkg/config"
"github.com/DataDog/pupernetes/pkg/logging"
"github.com/DataDog/pupernetes/pkg/run/state"
"github.com/DataDog/pupernetes/pkg/setup"
"github.com/DataDog/pupernetes/pkg/util"
"io/ioutil"
Expand All @@ -37,7 +38,7 @@ type Runtime struct {

SigChan chan os.Signal
httpClient *http.Client
state *State
state *state.State
runTimeout time.Duration
waitKubeletGC time.Duration
kubeDeleteOption *v1.DeleteOptions
Expand All @@ -50,16 +51,22 @@ type Runtime struct {
}

// NewRunner instantiate a new Runtimer with the given Environment
func NewRunner(env *setup.Environment) *Runtime {
func NewRunner(env *setup.Environment) (*Runtime, error) {
var zero int64

s, err := state.NewState()
if err != nil {
glog.Errorf("Cannot create the runner: %v", err)
return nil, err
}

run := &Runtime{
env: env,
state: s,
SigChan: make(chan os.Signal, 2),
httpClient: &http.Client{
Timeout: time.Millisecond * 500,
},
state: &State{},
runTimeout: config.ViperConfig.GetDuration("timeout"),
waitKubeletGC: config.ViperConfig.GetDuration("gc"),
kubeDeleteOption: &v1.DeleteOptions{
Expand All @@ -70,7 +77,7 @@ func NewRunner(env *setup.Environment) *Runtime {
ApplyChan: make(chan struct{}),
}
run.api = api.NewAPI(run.SigChan, run.DeleteAPIManifests, run.state.IsReady, run.ApplyChan)
return run
return run, nil
}

// Run daemonise pupernetes
Expand Down
2 changes: 1 addition & 1 deletion pkg/run/notify.go → pkg/run/state/notify.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package run
package state

import (
"os"
Expand Down
65 changes: 64 additions & 1 deletion pkg/run/state.go → pkg/run/state/state.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package run
package state

import (
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus"
"sync"
)

Expand All @@ -15,6 +16,64 @@ type State struct {
kubeletProbeFailures int
kubeletAPIPodRunning int
kubeletLogsPodRunning int

promVersion prometheus.Gauge

promStateReady prometheus.Gauge
promKubeletAPIPodRunning prometheus.Gauge
promKubeletLogsPodRunning prometheus.Gauge

promKubeletProbeFailures prometheus.Counter
}

// NewState instantiate a state with the associated prometheus metrics
func NewState() (*State, error) {
s := &State{
promVersion: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pupernetes_version",
Help: "Pupernetes version",
ConstLabels: prometheus.Labels{},
// TODO record all versions in labels. hyperkube: "1.10.1", etcd: "3.11.1", ...
}),
promStateReady: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pupernetes_ready",
Help: "Boolean for pupernetes readiness",
}),
promKubeletAPIPodRunning: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pupernetes_kubelet_api_pods_running",
Help: "Number of kubelet API pods running",
}),
promKubeletLogsPodRunning: prometheus.NewGauge(prometheus.GaugeOpts{
Name: "pupernetes_kubelet_logs_pods_running",
Help: "Number of kubelet logs pods running",
}),
promKubeletProbeFailures: prometheus.NewCounter(prometheus.CounterOpts{
Name: "pupernetes_kubelet_probe_failures",
Help: "Total number of kubelet probe failures",
}),
}
err := prometheus.Register(s.promVersion)
if err != nil {
return nil, err
}
err = prometheus.Register(s.promStateReady)
if err != nil {
return nil, err
}
err = prometheus.Register(s.promKubeletAPIPodRunning)
if err != nil {
return nil, err
}
err = prometheus.Register(s.promKubeletLogsPodRunning)
if err != nil {
return nil, err
}
err = prometheus.Register(s.promKubeletProbeFailures)
if err != nil {
return nil, err
}
s.promVersion.Inc()
return s, nil
}

// IsReady returns if the kube-apiserver is available and the manifests are applied
Expand All @@ -32,6 +91,7 @@ func (s *State) SetReady() {
s.Unlock()
// Ignore errors
notifySystemd()
s.promStateReady.Set(1)
}

// SetAPIServerProbeLastError keep track of the latest error message and display only
Expand All @@ -50,6 +110,7 @@ func (s *State) IncKubeletProbeFailures() {
s.Lock()
s.kubeletProbeFailures++
s.Unlock()
s.promKubeletProbeFailures.Inc()
}

// GetKubeletProbeFail returns the number of kubelet failures
Expand All @@ -68,6 +129,7 @@ func (s *State) SetKubeletAPIPodRunning(nb int) {
s.kubeletAPIPodRunning = nb
}
s.Unlock()
s.promKubeletAPIPodRunning.Set(float64(nb))
}

// SetKubeletLogsPodRunning keep track of the number of kubelet Pods in /var/log/pods and display only
Expand All @@ -79,6 +141,7 @@ func (s *State) SetKubeletLogsPodRunning(nb int) {
s.kubeletLogsPodRunning = nb
}
s.Unlock()
s.promKubeletLogsPodRunning.Set(float64(nb))
}

// GetKubeletLogsPodRunning returns the number of kubelet Pods in /var/log/pods
Expand Down
4 changes: 1 addition & 3 deletions pkg/run/stop.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,7 @@ func (r *Runtime) getNamespaces() (*corev1.NamespaceList, error) {
}

func (r *Runtime) isAPIServerHookDone() bool {
r.state.RLock()
defer r.state.RUnlock()
return r.state.ready
return r.state.IsReady()
}

func (r *Runtime) gracefulDeleteAPIResources() error {
Expand Down
Loading

0 comments on commit 0fd001d

Please sign in to comment.