Skip to content

Commit

Permalink
DAOS-7203 control: Add histogram support to Prometheus exporter
Browse files Browse the repository at this point in the history
Update the Prometheus exporter to support passthrough histograms
from native DAOS telemetry format. Fixes a few bugs and inefficiencies
in the native histogram implementation.

Features: telemetry
Required-githooks: true
Change-Id: I7842cc48a107ec0ba0ec93472fb6684db7394d30
Signed-off-by: Michael MacDonald <mjmac@google.com>
  • Loading branch information
mjmac committed Oct 31, 2024
1 parent 50128bd commit 757bddc
Show file tree
Hide file tree
Showing 21 changed files with 962 additions and 133 deletions.
10 changes: 4 additions & 6 deletions src/control/cmd/dmg/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,11 @@ func (cmd *telemConfigCmd) configurePrometheus() (*installInfo, error) {
}

sc := &staticConfig{}
for _, h := range cmd.config.HostList {
host, _, err := common.SplitPort(h, 0)
if err != nil {
return nil, err
}
sc.Targets = append(sc.Targets, host+":9191")
sc.Targets, err = common.ParseHostList(cmd.config.HostList, 9191)
if err != nil {
return nil, err
}

cfg.ScrapeConfigs = []*scrapeConfig{
{
JobName: "daos",
Expand Down
4 changes: 4 additions & 0 deletions src/control/lib/control/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,6 +617,10 @@ func TestControl_Metric_JSON(t *testing.T) {
CumulativeCount: 55,
UpperBound: 500,
},
{
CumulativeCount: 4242,
UpperBound: math.Inf(1),
},
},
},
},
Expand Down
66 changes: 66 additions & 0 deletions src/control/lib/daos/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package daos

import (
"encoding/json"
"math"
"sort"
"strconv"
"strings"
Expand Down Expand Up @@ -222,6 +223,71 @@ func (ms *MetricSet) MarshalJSON() ([]byte, error) {
})
}

// jsonFloat is a terrible hack to deal with the stdlib's inabilility
// to deal with -Inf/+Inf/NaN: https://github.com/golang/go/issues/59627
type jsonFloat float64

func (jf jsonFloat) MarshalJSON() ([]byte, error) {
switch {
case math.IsInf(float64(jf), 1):
return []byte(`"+Inf"`), nil
case math.IsInf(float64(jf), -1):
return []byte(`"-Inf"`), nil
case math.IsNaN(float64(jf)):
return []byte(`"NaN"`), nil
}
return json.Marshal(float64(jf))
}

func (jf *jsonFloat) UnmarshalJSON(data []byte) error {
if err := json.Unmarshal(data, (*float64)(jf)); err == nil {
return nil
}

var stringVal string
if err := json.Unmarshal(data, &stringVal); err != nil {
return err
}

val, err := strconv.ParseFloat(stringVal, 64)
if err != nil {
return err
}

*jf = jsonFloat(val)

return nil
}

func (mb *MetricBucket) MarshalJSON() ([]byte, error) {
type toJSON MetricBucket
return json.Marshal(&struct {
*toJSON
UpperBound jsonFloat `json:"upper_bound"`
}{
toJSON: (*toJSON)(mb),
UpperBound: jsonFloat(mb.UpperBound),
})
}

func (mb *MetricBucket) UnmarshalJSON(data []byte) error {
type fromJSON MetricBucket

from := &struct {
UpperBound jsonFloat `json:"upper_bound"`
*fromJSON
}{
fromJSON: (*fromJSON)(mb),
}
if err := json.Unmarshal(data, from); err != nil {
return err
}

mb.UpperBound = float64(from.UpperBound)

return nil
}

// jsonMetric serves as a universal metric representation for unmarshaling from
// JSON. It covers all possible fields of Metric types.
type jsonMetric struct {
Expand Down
66 changes: 66 additions & 0 deletions src/control/lib/daos/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package daos

import (
"encoding/json"
"math"
"testing"
"time"

Expand Down Expand Up @@ -244,3 +245,68 @@ func TestDaos_MetricSet_JSON(t *testing.T) {
})
}
}

func TestDaos_MetricBucket_JSON(t *testing.T) {
for name, tc := range map[string]struct {
bucket *MetricBucket
expUpperBound float64
expMarshalErr error
expUnmarshalErr error
}{
"+Inf": {
bucket: &MetricBucket{
UpperBound: math.Inf(1),
},
expUpperBound: math.Inf(1),
},
"-Inf": {
bucket: &MetricBucket{
UpperBound: math.Inf(-1),
},
expUpperBound: math.Inf(-1),
},
"NaN": {
bucket: &MetricBucket{
UpperBound: math.NaN(),
},
expUpperBound: math.NaN(),
},
"42.42": {
bucket: &MetricBucket{
UpperBound: 42.42,
},
expUpperBound: 42.42,
},
"0.000": {
bucket: &MetricBucket{
UpperBound: 0.000,
},
expUpperBound: 0.000,
},
} {
t.Run(name, func(t *testing.T) {
data, gotErr := json.Marshal(tc.bucket)
test.CmpErr(t, tc.expMarshalErr, gotErr)
if tc.expMarshalErr != nil {
return
}

var gotBucket MetricBucket
gotErr = json.Unmarshal(data, &gotBucket)
test.CmpErr(t, tc.expUnmarshalErr, gotErr)
if tc.expUnmarshalErr != nil {
return
}

if math.IsNaN(tc.expUpperBound) {
if !math.IsNaN(gotBucket.UpperBound) {
t.Fatalf("UpperBound NaN value did not survive Marshal/Unmarshal (got %f)", gotBucket.UpperBound)
}
} else {
if diff := cmp.Diff(tc.expUpperBound, gotBucket.UpperBound); diff != "" {
t.Fatalf("Bucket UpperBound value did not survive Marshal/Unmarshal (-want, +got): %s", diff)
}
}
})
}
}
3 changes: 3 additions & 0 deletions src/control/lib/telemetry/counter.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (
"fmt"
)

var _ Metric = (*Counter)(nil)

// Counter is a counter metric.
type Counter struct {
metricBase
}
Expand Down
4 changes: 4 additions & 0 deletions src/control/lib/telemetry/duration.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,11 @@ import (
"time"
)

var _ StatsMetric = (*Duration)(nil)

type Duration struct {
statsMetric
hist *Histogram // optional histogram data
}

func (d *Duration) Type() MetricType {
Expand Down Expand Up @@ -67,6 +70,7 @@ func newDuration(hdl *handle, path string, name *string, node *C.struct_d_tm_nod
},
},
}
d.hist = newHistogram(&d.statsMetric)

// Load up statistics
_ = d.Value()
Expand Down
6 changes: 6 additions & 0 deletions src/control/lib/telemetry/gauge.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (
"fmt"
)

var _ Metric = (*Gauge)(nil)
var _ StatsMetric = (*StatsGauge)(nil)

// Gauge is a metric that consists of a single value that may increase or decrease.
type Gauge struct {
metricBase
Expand Down Expand Up @@ -93,6 +96,7 @@ func GetGauge(ctx context.Context, name string) (*Gauge, error) {
// StatsGauge is a gauge with statistics gathered.
type StatsGauge struct {
statsMetric
hist *Histogram // optional histogram data
}

// Type returns the type of the gauge with stats.
Expand Down Expand Up @@ -136,9 +140,11 @@ func newStatsGauge(hdl *handle, path string, name *string, node *C.struct_d_tm_n
},
},
}
g.hist = newHistogram(&g.statsMetric)

// Load up the stats
_ = g.Value()

return g
}

Expand Down
Loading

0 comments on commit 757bddc

Please sign in to comment.