Skip to content

Commit

Permalink
Merge metrics descriptions in textfile collector (prometheus#2475)
Browse files Browse the repository at this point in the history
The textfile collector will now provide a unified metric description
(that will look like "Metric read from file/a.prom, file/b.prom")
for metrics collected accross several text-files that don't already
have a description.

Also change the error handling in the textfile collector tests to
ContinueOnError to better mirror the real-life use-case.

Signed-off-by: Guillaume Espanel <guillaume.espanel.ext@ovhcloud.com>

Signed-off-by: Guillaume Espanel <guillaume.espanel.ext@ovhcloud.com>
  • Loading branch information
quatre authored and oblitorum committed Apr 9, 2024
1 parent 240a1be commit f569a7d
Show file tree
Hide file tree
Showing 14 changed files with 134 additions and 20 deletions.
11 changes: 11 additions & 0 deletions collector/fixtures/textfile/metrics_merge_different_help.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# HELP events_total A nice help message.
# TYPE events_total counter
events_total{file="a",foo="bar"} 10
events_total{file="a",foo="baz"} 20
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_different_help/a.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_different_help/b.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# HELP events_total A nice help message.
# TYPE events_total counter
events_total{foo="bar",file="a"} 10
events_total{foo="baz",file="a"} 20

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# HELP events_total A different help message.
# TYPE events_total counter
events_total{foo="bar",file="b"} 30
events_total{foo="baz",file="b"} 40

13 changes: 13 additions & 0 deletions collector/fixtures/textfile/metrics_merge_empty_help.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# HELP events_total Metric read from fixtures/textfile/metrics_merge_empty_help/a.prom, fixtures/textfile/metrics_merge_empty_help/b.prom
# TYPE events_total counter
events_total{file="a",foo="bar"} 10
events_total{file="a",foo="baz"} 20
events_total{file="b",foo="bar"} 30
events_total{file="b",foo="baz"} 40
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_empty_help/a.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_empty_help/b.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
5 changes: 5 additions & 0 deletions collector/fixtures/textfile/metrics_merge_empty_help/a.prom
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# HELP events_total
# TYPE events_total counter
events_total{foo="bar",file="a"} 10
events_total{foo="baz",file="a"} 20

5 changes: 5 additions & 0 deletions collector/fixtures/textfile/metrics_merge_empty_help/b.prom
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# HELP events_total
# TYPE events_total counter
events_total{foo="bar",file="b"} 30
events_total{foo="baz",file="b"} 40

13 changes: 13 additions & 0 deletions collector/fixtures/textfile/metrics_merge_no_help.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# HELP events_total Metric read from fixtures/textfile/metrics_merge_no_help/a.prom, fixtures/textfile/metrics_merge_no_help/b.prom
# TYPE events_total counter
events_total{file="a",foo="bar"} 10
events_total{file="a",foo="baz"} 20
events_total{file="b",foo="bar"} 30
events_total{file="b",foo="baz"} 40
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_no_help/a.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_no_help/b.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
4 changes: 4 additions & 0 deletions collector/fixtures/textfile/metrics_merge_no_help/a.prom
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# TYPE events_total counter
events_total{foo="bar",file="a"} 10
events_total{foo="baz",file="a"} 20

4 changes: 4 additions & 0 deletions collector/fixtures/textfile/metrics_merge_no_help/b.prom
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# TYPE events_total counter
events_total{foo="bar",file="b"} 30
events_total{foo="baz",file="b"} 40

13 changes: 13 additions & 0 deletions collector/fixtures/textfile/metrics_merge_same_help.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# HELP events_total The same help.
# TYPE events_total counter
events_total{file="a",foo="bar"} 10
events_total{file="a",foo="baz"} 20
events_total{file="b",foo="bar"} 30
events_total{file="b",foo="baz"} 40
# HELP node_textfile_mtime_seconds Unixtime mtime of textfiles successfully read.
# TYPE node_textfile_mtime_seconds gauge
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_same_help/a.prom"} 1
node_textfile_mtime_seconds{file="fixtures/textfile/metrics_merge_same_help/b.prom"} 1
# HELP node_textfile_scrape_error 1 if there was an error opening or reading a file, 0 otherwise
# TYPE node_textfile_scrape_error gauge
node_textfile_scrape_error 0
5 changes: 5 additions & 0 deletions collector/fixtures/textfile/metrics_merge_same_help/a.prom
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# HELP events_total The same help.
# TYPE events_total counter
events_total{foo="bar",file="a"} 10
events_total{foo="baz",file="a"} 20

5 changes: 5 additions & 0 deletions collector/fixtures/textfile/metrics_merge_same_help/b.prom
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# HELP events_total The same help.
# TYPE events_total counter
events_total{foo="bar",file="b"} 30
events_total{foo="baz",file="b"} 40

48 changes: 29 additions & 19 deletions collector/textfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
// Iterate over files and accumulate their metrics, but also track any
// parsing errors so an error metric can be reported.
var errored bool
var parsedFamilies []*dto.MetricFamily
metricsNamesToFiles := map[string][]string{}

paths, err := filepath.Glob(c.path)
if err != nil || len(paths) == 0 {
Expand All @@ -208,20 +210,39 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
}

for _, f := range files {
metricsFilePath := filepath.Join(path, f.Name())
if !strings.HasSuffix(f.Name(), ".prom") {
continue
}

mtime, err := c.processFile(path, f.Name(), ch)
mtime, families, err := c.processFile(path, f.Name(), ch)

for _, mf := range families {
metricsNamesToFiles[*mf.Name] = append(metricsNamesToFiles[*mf.Name], metricsFilePath)
parsedFamilies = append(parsedFamilies, mf)
}

if err != nil {
errored = true
level.Error(c.logger).Log("msg", "failed to collect textfile data", "file", f.Name(), "err", err)
continue
}

mtimes[filepath.Join(path, f.Name())] = *mtime
mtimes[metricsFilePath] = *mtime
}
}

for _, mf := range parsedFamilies {
if mf.Help == nil {
help := fmt.Sprintf("Metric read from %s", strings.Join(metricsNamesToFiles[*mf.Name], ", "))
mf.Help = &help
}
}

for _, mf := range parsedFamilies {
convertMetricFamily(mf, ch, c.logger)
}

c.exportMTimes(mtimes, ch)

// Export if there were errors.
Expand All @@ -243,44 +264,33 @@ func (c *textFileCollector) Update(ch chan<- prometheus.Metric) error {
}

// processFile processes a single file, returning its modification time on success.
func (c *textFileCollector) processFile(dir, name string, ch chan<- prometheus.Metric) (*time.Time, error) {
func (c *textFileCollector) processFile(dir, name string, ch chan<- prometheus.Metric) (*time.Time, map[string]*dto.MetricFamily, error) {
path := filepath.Join(dir, name)
f, err := os.Open(path)
if err != nil {
return nil, fmt.Errorf("failed to open textfile data file %q: %w", path, err)
return nil, nil, fmt.Errorf("failed to open textfile data file %q: %w", path, err)
}
defer f.Close()

var parser expfmt.TextParser
families, err := parser.TextToMetricFamilies(f)
if err != nil {
return nil, fmt.Errorf("failed to parse textfile data from %q: %w", path, err)
return nil, nil, fmt.Errorf("failed to parse textfile data from %q: %w", path, err)
}

if hasTimestamps(families) {
return nil, fmt.Errorf("textfile %q contains unsupported client-side timestamps, skipping entire file", path)
}

for _, mf := range families {
if mf.Help == nil {
help := fmt.Sprintf("Metric read from %s", path)
mf.Help = &help
}
}

for _, mf := range families {
convertMetricFamily(mf, ch, c.logger)
return nil, nil, fmt.Errorf("textfile %q contains unsupported client-side timestamps, skipping entire file", path)
}

// Only stat the file once it has been parsed and validated, so that
// a failure does not appear fresh.
stat, err := f.Stat()
if err != nil {
return nil, fmt.Errorf("failed to stat %q: %w", path, err)
return nil, families, fmt.Errorf("failed to stat %q: %w", path, err)
}

t := stat.ModTime()
return &t, nil
return &t, families, nil
}

// hasTimestamps returns true when metrics contain unsupported timestamps.
Expand Down
18 changes: 17 additions & 1 deletion collector/textfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,22 @@ func TestTextfileCollector(t *testing.T) {
path: "fixtures/textfile/*_extra_dimension",
out: "fixtures/textfile/glob_extra_dimension.out",
},
{
path: "fixtures/textfile/metrics_merge_empty_help",
out: "fixtures/textfile/metrics_merge_empty_help.out",
},
{
path: "fixtures/textfile/metrics_merge_no_help",
out: "fixtures/textfile/metrics_merge_no_help.out",
},
{
path: "fixtures/textfile/metrics_merge_same_help",
out: "fixtures/textfile/metrics_merge_same_help.out",
},
{
path: "fixtures/textfile/metrics_merge_different_help",
out: "fixtures/textfile/metrics_merge_different_help.out",
},
}

for i, test := range tests {
Expand All @@ -117,7 +133,7 @@ func TestTextfileCollector(t *testing.T) {
registry.MustRegister(collectorAdapter{c})

rw := httptest.NewRecorder()
promhttp.HandlerFor(registry, promhttp.HandlerOpts{}).ServeHTTP(rw, &http.Request{})
promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorHandling: promhttp.ContinueOnError}).ServeHTTP(rw, &http.Request{})
got := string(rw.Body.String())

want, err := os.ReadFile(test.out)
Expand Down

0 comments on commit f569a7d

Please sign in to comment.