diff --git a/config/http_config.go b/config/http_config.go index 4a926e8d..7a67a0a6 100644 --- a/config/http_config.go +++ b/config/http_config.go @@ -30,7 +30,7 @@ import ( "sync" "time" - "github.com/mwitkow/go-conntrack" + conntrack "github.com/mwitkow/go-conntrack" "golang.org/x/net/http/httpproxy" "golang.org/x/net/http2" "golang.org/x/oauth2" diff --git a/expfmt/decode_test.go b/expfmt/decode_test.go index fff57ab6..3c023f53 100644 --- a/expfmt/decode_test.go +++ b/expfmt/decode_test.go @@ -17,6 +17,7 @@ import ( "bufio" "errors" "io" + "math" "net/http" "reflect" "sort" @@ -104,9 +105,10 @@ func TestProtoDecoder(t *testing.T) { testTime := model.Now() scenarios := []struct { - in string - expected model.Vector - fail bool + in string + expected model.Vector + legacyNameFail bool + fail bool }{ { in: "", @@ -332,6 +334,30 @@ func TestProtoDecoder(t *testing.T) { }, }, }, + { + in: "\xa8\x01\n\ngauge.name\x12\x11gauge\ndoc\nstr\"ing\x18\x01\"T\n\x1b\n\x06name.1\x12\x11val with\nnew line\n*\n\x06name*2\x12 val with \\backslash and \"quotes\"\x12\t\t\x00\x00\x00\x00\x00\x00\xf0\x7f\"/\n\x10\n\x06name.1\x12\x06Björn\n\x10\n\x06name*2\x12\x06佖佥\x12\t\t\xd1\xcfD\xb9\xd0\x05\xc2H", + legacyNameFail: true, + expected: model.Vector{ + &model.Sample{ + Metric: model.Metric{ + model.MetricNameLabel: "gauge.name", + "name.1": "val with\nnew line", + "name*2": "val with \\backslash and \"quotes\"", + }, + Value: model.SampleValue(math.Inf(+1)), + Timestamp: testTime, + }, + &model.Sample{ + Metric: model.Metric{ + model.MetricNameLabel: "gauge.name", + "name.1": "Björn", + "name*2": "佖佥", + }, + Value: 3.14e42, + Timestamp: testTime, + }, + }, + }, } for i, scenario := range scenarios { @@ -344,11 +370,31 @@ func TestProtoDecoder(t *testing.T) { var all model.Vector for { + model.NameValidationScheme = model.LegacyValidation var smpls model.Vector err := dec.Decode(&smpls) if err != nil && errors.Is(err, io.EOF) { break } + if scenario.legacyNameFail { + if err == nil { + t.Fatal("Expected error when decoding without UTF-8 support enabled but got none") + } + model.NameValidationScheme = model.UTF8Validation + dec = &SampleDecoder{ + Dec: &protoDecoder{r: strings.NewReader(scenario.in)}, + Opts: &DecodeOptions{ + Timestamp: testTime, + }, + } + err = dec.Decode(&smpls) + if errors.Is(err, io.EOF) { + break + } + if err != nil { + t.Fatalf("Unexpected error when decoding with UTF-8 support: %v", err) + } + } if scenario.fail { if err == nil { t.Fatal("Expected error but got none") diff --git a/expfmt/expfmt.go b/expfmt/expfmt.go index c4cb20f0..d866b474 100644 --- a/expfmt/expfmt.go +++ b/expfmt/expfmt.go @@ -17,7 +17,13 @@ package expfmt // Format specifies the HTTP content type of the different wire protocols. type Format string -// Constants to assemble the Content-Type values for the different wire protocols. +// Constants to assemble the Content-Type values for the different wire +// protocols. The Content-Type strings here are all for the legacy exposition +// formats, where valid characters for metric names and label names are limited. +// Support for arbitrary UTF-8 characters in those names is already partially +// implemented in this module (see model.ValidationScheme), but to actually use +// it on the wire, new content-type strings will have to be agreed upon and +// added here. const ( TextVersion = "0.0.4" ProtoType = `application/vnd.google.protobuf` diff --git a/expfmt/openmetrics_create.go b/expfmt/openmetrics_create.go index 21cdddcf..5622578e 100644 --- a/expfmt/openmetrics_create.go +++ b/expfmt/openmetrics_create.go @@ -35,6 +35,18 @@ import ( // sanity checks. If the input contains duplicate metrics or invalid metric or // label names, the conversion will result in invalid text format output. // +// If metric names conform to the legacy validation pattern, they will be placed +// outside the brackets in the traditional way, like `foo{}`. If the metric name +// fails the legacy validation check, it will be placed quoted inside the +// brackets: `{"foo"}`. As stated above, the input is assumed to be santized and +// no error will be thrown in this case. +// +// Similar to metric names, if label names conform to the legacy validation +// pattern, they will be unquoted as normal, like `foo{bar="baz"}`. If the label +// name fails the legacy validation check, it will be quoted: +// `foo{"bar"="baz"}`. As stated above, the input is assumed to be santized and +// no error will be thrown in this case. +// // This function fulfills the type 'expfmt.encoder'. // // Note that OpenMetrics requires a final `# EOF` line. Since this function acts @@ -98,7 +110,7 @@ func MetricFamilyToOpenMetrics(out io.Writer, in *dto.MetricFamily) (written int if err != nil { return } - n, err = w.WriteString(shortName) + n, err = writeName(w, shortName) written += n if err != nil { return @@ -124,7 +136,7 @@ func MetricFamilyToOpenMetrics(out io.Writer, in *dto.MetricFamily) (written int if err != nil { return } - n, err = w.WriteString(shortName) + n, err = writeName(w, shortName) written += n if err != nil { return @@ -303,21 +315,9 @@ func writeOpenMetricsSample( floatValue float64, intValue uint64, useIntValue bool, exemplar *dto.Exemplar, ) (int, error) { - var written int - n, err := w.WriteString(name) - written += n - if err != nil { - return written, err - } - if suffix != "" { - n, err = w.WriteString(suffix) - written += n - if err != nil { - return written, err - } - } - n, err = writeOpenMetricsLabelPairs( - w, metric.Label, additionalLabelName, additionalLabelValue, + written := 0 + n, err := writeOpenMetricsNameAndLabelPairs( + w, name+suffix, metric.Label, additionalLabelName, additionalLabelValue, ) written += n if err != nil { @@ -365,27 +365,58 @@ func writeOpenMetricsSample( return written, nil } -// writeOpenMetricsLabelPairs works like writeOpenMetrics but formats the float -// in OpenMetrics style. -func writeOpenMetricsLabelPairs( +// writeOpenMetricsNameAndLabelPairs works like writeOpenMetricsSample but +// formats the float in OpenMetrics style. +func writeOpenMetricsNameAndLabelPairs( w enhancedWriter, + name string, in []*dto.LabelPair, additionalLabelName string, additionalLabelValue float64, ) (int, error) { - if len(in) == 0 && additionalLabelName == "" { - return 0, nil - } var ( - written int - separator byte = '{' + written int + separator byte = '{' + metricInsideBraces = false ) + + if name != "" { + // If the name does not pass the legacy validity check, we must put the + // metric name inside the braces, quoted. + if !model.IsValidLegacyMetricName(model.LabelValue(name)) { + metricInsideBraces = true + err := w.WriteByte(separator) + written++ + if err != nil { + return written, err + } + separator = ',' + } + + n, err := writeName(w, name) + written += n + if err != nil { + return written, err + } + } + + if len(in) == 0 && additionalLabelName == "" { + if metricInsideBraces { + err := w.WriteByte('}') + written++ + if err != nil { + return written, err + } + } + return written, nil + } + for _, lp := range in { err := w.WriteByte(separator) written++ if err != nil { return written, err } - n, err := w.WriteString(lp.GetName()) + n, err := writeName(w, lp.GetName()) written += n if err != nil { return written, err @@ -451,7 +482,7 @@ func writeExemplar(w enhancedWriter, e *dto.Exemplar) (int, error) { if err != nil { return written, err } - n, err = writeOpenMetricsLabelPairs(w, e.Label, "", 0) + n, err = writeOpenMetricsNameAndLabelPairs(w, "", e.Label, "", 0) written += n if err != nil { return written, err diff --git a/expfmt/openmetrics_create_test.go b/expfmt/openmetrics_create_test.go index ab1e8616..7b201655 100644 --- a/expfmt/openmetrics_create_test.go +++ b/expfmt/openmetrics_create_test.go @@ -82,7 +82,79 @@ name{labelname="val1",basename="basevalue"} 42.0 name{labelname="val2",basename="basevalue"} 0.23 1.23456789e+06 `, }, - // 1: Gauge, some escaping required, +Inf as value, multi-byte characters in label values. + // 1: Dots in name + { + in: &dto.MetricFamily{ + Name: proto.String("name.with.dots"), + Help: proto.String("boring help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("labelname"), + Value: proto.String("val1"), + }, + { + Name: proto.String("basename"), + Value: proto.String("basevalue"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(42), + }, + }, + { + Label: []*dto.LabelPair{ + { + Name: proto.String("labelname"), + Value: proto.String("val2"), + }, + { + Name: proto.String("basename"), + Value: proto.String("basevalue"), + }, + }, + Counter: &dto.Counter{ + Value: proto.Float64(.23), + }, + TimestampMs: proto.Int64(1234567890), + }, + }, + }, + out: `# HELP "name.with.dots" boring help +# TYPE "name.with.dots" unknown +{"name.with.dots",labelname="val1",basename="basevalue"} 42.0 +{"name.with.dots",labelname="val2",basename="basevalue"} 0.23 1.23456789e+06 +`, + }, + // 2: Dots in name, no labels + { + in: &dto.MetricFamily{ + Name: proto.String("name.with.dots"), + Help: proto.String("boring help"), + Type: dto.MetricType_COUNTER.Enum(), + Metric: []*dto.Metric{ + { + Counter: &dto.Counter{ + Value: proto.Float64(42), + }, + }, + { + Counter: &dto.Counter{ + Value: proto.Float64(.23), + }, + TimestampMs: proto.Int64(1234567890), + }, + }, + }, + out: `# HELP "name.with.dots" boring help +# TYPE "name.with.dots" unknown +{"name.with.dots"} 42.0 +{"name.with.dots"} 0.23 1.23456789e+06 +`, + }, + // 3: Gauge, some escaping required, +Inf as value, multi-byte characters in label values. { in: &dto.MetricFamily{ Name: proto.String("gauge_name"), @@ -127,7 +199,52 @@ gauge_name{name_1="val with\nnew line",name_2="val with \\backslash and \"quotes gauge_name{name_1="Björn",name_2="佖佥"} 3.14e+42 `, }, - // 2: Unknown, no help, one sample with no labels and -Inf as value, another sample with one label. + // 4: Gauge, utf8, some escaping required, +Inf as value, multi-byte characters in label values. + { + in: &dto.MetricFamily{ + Name: proto.String("gauge.name\""), + Help: proto.String("gauge\ndoc\nstr\"ing"), + Type: dto.MetricType_GAUGE.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("name.1"), + Value: proto.String("val with\nnew line"), + }, + { + Name: proto.String("name*2"), + Value: proto.String("val with \\backslash and \"quotes\""), + }, + }, + Gauge: &dto.Gauge{ + Value: proto.Float64(math.Inf(+1)), + }, + }, + { + Label: []*dto.LabelPair{ + { + Name: proto.String("name.1"), + Value: proto.String("Björn"), + }, + { + Name: proto.String("name*2"), + Value: proto.String("佖佥"), + }, + }, + Gauge: &dto.Gauge{ + Value: proto.Float64(3.14e42), + }, + }, + }, + }, + out: `# HELP "gauge.name\"" gauge\ndoc\nstr\"ing +# TYPE "gauge.name\"" gauge +{"gauge.name\"","name.1"="val with\nnew line","name*2"="val with \\backslash and \"quotes\""} +Inf +{"gauge.name\"","name.1"="Björn","name*2"="佖佥"} 3.14e+42 +`, + }, + // 5: Unknown, no help, one sample with no labels and -Inf as value, another sample with one label. { in: &dto.MetricFamily{ Name: proto.String("unknown_name"), @@ -156,7 +273,7 @@ unknown_name -Inf unknown_name{name_1="value 1"} -1.23e-45 `, }, - // 3: Summary. + // 6: Summary. { in: &dto.MetricFamily{ Name: proto.String("summary_name"), @@ -229,7 +346,7 @@ summary_name_sum{name_1="value 1",name_2="value 2"} 2010.1971 summary_name_count{name_1="value 1",name_2="value 2"} 4711 `, }, - // 4: Histogram + // 7: Histogram { in: &dto.MetricFamily{ Name: proto.String("request_duration_microseconds"), @@ -277,7 +394,7 @@ request_duration_microseconds_sum 1.7560473e+06 request_duration_microseconds_count 2693 `, }, - // 5: Histogram with missing +Inf bucket. + // 8: Histogram with missing +Inf bucket. { in: &dto.MetricFamily{ Name: proto.String("request_duration_microseconds"), @@ -321,7 +438,7 @@ request_duration_microseconds_sum 1.7560473e+06 request_duration_microseconds_count 2693 `, }, - // 6: Histogram with missing +Inf bucket but with different exemplars. + // 9: Histogram with missing +Inf bucket but with different exemplars. { in: &dto.MetricFamily{ Name: proto.String("request_duration_microseconds"), @@ -388,7 +505,7 @@ request_duration_microseconds_sum 1.7560473e+06 request_duration_microseconds_count 2693 `, }, - // 7: Simple Counter. + // 10: Simple Counter. { in: &dto.MetricFamily{ Name: proto.String("foos_total"), @@ -407,7 +524,7 @@ request_duration_microseconds_count 2693 foos_total 42.0 `, }, - // 8: No metric. + // 11: No metric. { in: &dto.MetricFamily{ Name: proto.String("name_total"), diff --git a/expfmt/text_create.go b/expfmt/text_create.go index 2946b8f1..f9b8265a 100644 --- a/expfmt/text_create.go +++ b/expfmt/text_create.go @@ -62,6 +62,18 @@ var ( // contains duplicate metrics or invalid metric or label names, the conversion // will result in invalid text format output. // +// If metric names conform to the legacy validation pattern, they will be placed +// outside the brackets in the traditional way, like `foo{}`. If the metric name +// fails the legacy validation check, it will be placed quoted inside the +// brackets: `{"foo"}`. As stated above, the input is assumed to be santized and +// no error will be thrown in this case. +// +// Similar to metric names, if label names conform to the legacy validation +// pattern, they will be unquoted as normal, like `foo{bar="baz"}`. If the label +// name fails the legacy validation check, it will be quoted: +// `foo{"bar"="baz"}`. As stated above, the input is assumed to be santized and +// no error will be thrown in this case. +// // This method fulfills the type 'prometheus.encoder'. func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (written int, err error) { // Fail-fast checks. @@ -98,7 +110,7 @@ func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (written int, err e if err != nil { return } - n, err = w.WriteString(name) + n, err = writeName(w, name) written += n if err != nil { return @@ -124,7 +136,7 @@ func MetricFamilyToText(out io.Writer, in *dto.MetricFamily) (written int, err e if err != nil { return } - n, err = w.WriteString(name) + n, err = writeName(w, name) written += n if err != nil { return @@ -280,21 +292,9 @@ func writeSample( additionalLabelName string, additionalLabelValue float64, value float64, ) (int, error) { - var written int - n, err := w.WriteString(name) - written += n - if err != nil { - return written, err - } - if suffix != "" { - n, err = w.WriteString(suffix) - written += n - if err != nil { - return written, err - } - } - n, err = writeLabelPairs( - w, metric.Label, additionalLabelName, additionalLabelValue, + written := 0 + n, err := writeNameAndLabelPairs( + w, name+suffix, metric.Label, additionalLabelName, additionalLabelValue, ) written += n if err != nil { @@ -330,32 +330,64 @@ func writeSample( return written, nil } -// writeLabelPairs converts a slice of LabelPair proto messages plus the -// explicitly given additional label pair into text formatted as required by the -// text format and writes it to 'w'. An empty slice in combination with an empty -// string 'additionalLabelName' results in nothing being written. Otherwise, the -// label pairs are written, escaped as required by the text format, and enclosed -// in '{...}'. The function returns the number of bytes written and any error -// encountered. -func writeLabelPairs( +// writeNameAndLabelPairs converts a slice of LabelPair proto messages plus the +// explicitly given metric name and additional label pair into text formatted as +// required by the text format and writes it to 'w'. An empty slice in +// combination with an empty string 'additionalLabelName' results in nothing +// being written. Otherwise, the label pairs are written, escaped as required by +// the text format, and enclosed in '{...}'. The function returns the number of +// bytes written and any error encountered. If the metric name is not +// legacy-valid, it will be put inside the brackets as well. Legacy-invalid +// label names will also be quoted. +func writeNameAndLabelPairs( w enhancedWriter, + name string, in []*dto.LabelPair, additionalLabelName string, additionalLabelValue float64, ) (int, error) { - if len(in) == 0 && additionalLabelName == "" { - return 0, nil - } var ( - written int - separator byte = '{' + written int + separator byte = '{' + metricInsideBraces = false ) + + if name != "" { + // If the name does not pass the legacy validity check, we must put the + // metric name inside the braces. + if !model.IsValidLegacyMetricName(model.LabelValue(name)) { + metricInsideBraces = true + err := w.WriteByte(separator) + written++ + if err != nil { + return written, err + } + separator = ',' + } + n, err := writeName(w, name) + written += n + if err != nil { + return written, err + } + } + + if len(in) == 0 && additionalLabelName == "" { + if metricInsideBraces { + err := w.WriteByte('}') + written++ + if err != nil { + return written, err + } + } + return written, nil + } + for _, lp := range in { err := w.WriteByte(separator) written++ if err != nil { return written, err } - n, err := w.WriteString(lp.GetName()) + n, err := writeName(w, lp.GetName()) written += n if err != nil { return written, err @@ -462,3 +494,27 @@ func writeInt(w enhancedWriter, i int64) (int, error) { numBufPool.Put(bp) return written, err } + +// writeName writes a string as-is if it complies with the legacy naming +// scheme, or escapes it in double quotes if not. +func writeName(w enhancedWriter, name string) (int, error) { + if model.IsValidLegacyMetricName(model.LabelValue(name)) { + return w.WriteString(name) + } + var written int + var err error + err = w.WriteByte('"') + written++ + if err != nil { + return written, err + } + var n int + n, err = writeEscapedString(w, name, true) + written += n + if err != nil { + return written, err + } + err = w.WriteByte('"') + written++ + return written, err +} diff --git a/expfmt/text_create_test.go b/expfmt/text_create_test.go index 8d3ac3d1..41bd408c 100644 --- a/expfmt/text_create_test.go +++ b/expfmt/text_create_test.go @@ -120,7 +120,52 @@ gauge_name{name_1="val with\nnew line",name_2="val with \\backslash and \"quotes gauge_name{name_1="Björn",name_2="佖佥"} 3.14e+42 `, }, - // 2: Untyped, no help, one sample with no labels and -Inf as value, another sample with one label. + // 2: Gauge, utf8, +Inf as value, multi-byte characters in label values. + { + in: &dto.MetricFamily{ + Name: proto.String("gauge.name"), + Help: proto.String("gauge\ndoc\nstr\"ing"), + Type: dto.MetricType_GAUGE.Enum(), + Metric: []*dto.Metric{ + { + Label: []*dto.LabelPair{ + { + Name: proto.String("name.1"), + Value: proto.String("val with\nnew line"), + }, + { + Name: proto.String("name*2"), + Value: proto.String("val with \\backslash and \"quotes\""), + }, + }, + Gauge: &dto.Gauge{ + Value: proto.Float64(math.Inf(+1)), + }, + }, + { + Label: []*dto.LabelPair{ + { + Name: proto.String("name.1"), + Value: proto.String("Björn"), + }, + { + Name: proto.String("name*2"), + Value: proto.String("佖佥"), + }, + }, + Gauge: &dto.Gauge{ + Value: proto.Float64(3.14e42), + }, + }, + }, + }, + out: `# HELP "gauge.name" gauge\ndoc\nstr"ing +# TYPE "gauge.name" gauge +{"gauge.name","name.1"="val with\nnew line","name*2"="val with \\backslash and \"quotes\""} +Inf +{"gauge.name","name.1"="Björn","name*2"="佖佥"} 3.14e+42 +`, + }, + // 3: Untyped, no help, one sample with no labels and -Inf as value, another sample with one label. { in: &dto.MetricFamily{ Name: proto.String("untyped_name"), @@ -149,7 +194,7 @@ untyped_name -Inf untyped_name{name_1="value 1"} -1.23e-45 `, }, - // 3: Summary. + // 4: Summary. { in: &dto.MetricFamily{ Name: proto.String("summary_name"), @@ -222,7 +267,7 @@ summary_name_sum{name_1="value 1",name_2="value 2"} 2010.1971 summary_name_count{name_1="value 1",name_2="value 2"} 4711 `, }, - // 4: Histogram + // 5: Histogram { in: &dto.MetricFamily{ Name: proto.String("request_duration_microseconds"), @@ -270,7 +315,7 @@ request_duration_microseconds_sum 1.7560473e+06 request_duration_microseconds_count 2693 `, }, - // 5: Histogram with missing +Inf bucket. + // 6: Histogram with missing +Inf bucket. { in: &dto.MetricFamily{ Name: proto.String("request_duration_microseconds"), @@ -314,7 +359,7 @@ request_duration_microseconds_sum 1.7560473e+06 request_duration_microseconds_count 2693 `, }, - // 6: No metric type, should result in default type Counter. + // 7: No metric type, should result in default type Counter. { in: &dto.MetricFamily{ Name: proto.String("name"), diff --git a/model/labels.go b/model/labels.go index ef895633..73dafe43 100644 --- a/model/labels.go +++ b/model/labels.go @@ -97,17 +97,25 @@ var LabelNameRE = regexp.MustCompile("^[a-zA-Z_][a-zA-Z0-9_]*$") // therewith. type LabelName string -// IsValid is true iff the label name matches the pattern of LabelNameRE. This -// method, however, does not use LabelNameRE for the check but a much faster -// hardcoded implementation. +// IsValid returns true iff name matches the pattern of LabelNameRE for legacy +// names, and iff it's valid UTF-8 if NameValidationScheme is set to +// UTF8Validation. For the legacy matching, it does not use LabelNameRE for the +// check but a much faster hardcoded implementation. func (ln LabelName) IsValid() bool { if len(ln) == 0 { return false } - for i, b := range ln { - if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) { - return false + switch NameValidationScheme { + case LegacyValidation: + for i, b := range ln { + if !((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z') || b == '_' || (b >= '0' && b <= '9' && i > 0)) { + return false + } } + case UTF8Validation: + return utf8.ValidString(string(ln)) + default: + panic(fmt.Sprintf("Invalid name validation scheme requested: %d", NameValidationScheme)) } return true } diff --git a/model/labels_test.go b/model/labels_test.go index c49d8e7c..5ec94a40 100644 --- a/model/labels_test.go +++ b/model/labels_test.go @@ -92,49 +92,68 @@ func BenchmarkLabelValues(b *testing.B) { func TestLabelNameIsValid(t *testing.T) { scenarios := []struct { - ln LabelName - valid bool + ln LabelName + legacyValid bool + utf8Valid bool }{ { - ln: "Avalid_23name", - valid: true, + ln: "Avalid_23name", + legacyValid: true, + utf8Valid: true, }, { - ln: "_Avalid_23name", - valid: true, + ln: "_Avalid_23name", + legacyValid: true, + utf8Valid: true, }, { - ln: "1valid_23name", - valid: false, + ln: "1valid_23name", + legacyValid: false, + utf8Valid: true, }, { - ln: "avalid_23name", - valid: true, + ln: "avalid_23name", + legacyValid: true, + utf8Valid: true, }, { - ln: "Ava:lid_23name", - valid: false, + ln: "Ava:lid_23name", + legacyValid: false, + utf8Valid: true, }, { - ln: "a lid_23name", - valid: false, + ln: "a lid_23name", + legacyValid: false, + utf8Valid: true, }, { - ln: ":leading_colon", - valid: false, + ln: ":leading_colon", + legacyValid: false, + utf8Valid: true, }, { - ln: "colon:in:the:middle", - valid: false, + ln: "colon:in:the:middle", + legacyValid: false, + utf8Valid: true, + }, + { + ln: "a\xc5z", + legacyValid: false, + utf8Valid: false, }, } for _, s := range scenarios { - if s.ln.IsValid() != s.valid { - t.Errorf("Expected %v for %q using IsValid method", s.valid, s.ln) + NameValidationScheme = LegacyValidation + if s.ln.IsValid() != s.legacyValid { + t.Errorf("Expected %v for %q using legacy IsValid method", s.legacyValid, s.ln) + } + if LabelNameRE.MatchString(string(s.ln)) != s.legacyValid { + t.Errorf("Expected %v for %q using legacy regexp match", s.legacyValid, s.ln) } - if LabelNameRE.MatchString(string(s.ln)) != s.valid { - t.Errorf("Expected %v for %q using regexp match", s.valid, s.ln) + NameValidationScheme = UTF8Validation + if s.ln.IsValid() != s.utf8Valid { + t.Errorf("Expected %v for %q using UTF8 IsValid method", s.legacyValid, s.ln) } } } diff --git a/model/labelset_test.go b/model/labelset_test.go index a6f1212c..c008816a 100644 --- a/model/labelset_test.go +++ b/model/labelset_test.go @@ -52,6 +52,7 @@ func TestUnmarshalJSONLabelSet(t *testing.T) { } }` + NameValidationScheme = LegacyValidation err = json.Unmarshal([]byte(invalidlabelSetJSON), &c) expectedErr := `"1nvalid_23name" is not a valid label name` if err == nil || err.Error() != expectedErr { diff --git a/model/metric.go b/model/metric.go index f8c5eaba..9aa0b511 100644 --- a/model/metric.go +++ b/model/metric.go @@ -18,12 +18,39 @@ import ( "regexp" "sort" "strings" + "unicode/utf8" ) -// MetricNameRE is a regular expression matching valid metric -// names. Note that the IsValidMetricName function performs the same -// check but faster than a match with this regular expression. -var MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`) +// ValidationScheme is a Go enum for determining how metric and label names will +// be validated by this library. +type ValidationScheme int + +const ( + // LegacyValidation is a setting that requirets that metric and label names + // conform to the original Prometheus character requirements described by + // MetricNameRE and LabelNameRE. + LegacyValidation ValidationScheme = iota + + // UTF8Validation only requires that metric and label names be valid UTF8 + // strings. + UTF8Validation +) + +var ( + // NameValidationScheme determines the method of name validation to be used by + // all calls to IsValidMetricName() and LabelName IsValid(). Setting UTF8 mode + // in isolation from other components that don't support UTF8 may result in + // bugs or other undefined behavior. This value is intended to be set by + // UTF8-aware binaries as part of their startup. To avoid need for locking, + // this value should be set once, ideally in an init(), before multiple + // goroutines are started. + NameValidationScheme = LegacyValidation + + // MetricNameRE is a regular expression matching valid metric + // names. Note that the IsValidMetricName function performs the same + // check but faster than a match with this regular expression. + MetricNameRE = regexp.MustCompile(`^[a-zA-Z_:][a-zA-Z0-9_:]*$`) +) // A Metric is similar to a LabelSet, but the key difference is that a Metric is // a singleton and refers to one and only one stream of samples. @@ -84,10 +111,28 @@ func (m Metric) FastFingerprint() Fingerprint { return LabelSet(m).FastFingerprint() } -// IsValidMetricName returns true iff name matches the pattern of MetricNameRE. +// IsValidMetricName returns true iff name matches the pattern of MetricNameRE +// for legacy names, and iff it's valid UTF-8 if the UTF8Validation scheme is +// selected. +func IsValidMetricName(n LabelValue) bool { + switch NameValidationScheme { + case LegacyValidation: + return IsValidLegacyMetricName(n) + case UTF8Validation: + if len(n) == 0 { + return false + } + return utf8.ValidString(string(n)) + default: + panic(fmt.Sprintf("Invalid name validation scheme requested: %d", NameValidationScheme)) + } +} + +// IsValidLegacyMetricName is similar to IsValidMetricName but always uses the +// legacy validation scheme regardless of the value of NameValidationScheme. // This function, however, does not use MetricNameRE for the check but a much // faster hardcoded implementation. -func IsValidMetricName(n LabelValue) bool { +func IsValidLegacyMetricName(n LabelValue) bool { if len(n) == 0 { return false } diff --git a/model/metric_test.go b/model/metric_test.go index cc4e27d0..60f82930 100644 --- a/model/metric_test.go +++ b/model/metric_test.go @@ -82,55 +82,75 @@ func BenchmarkMetric(b *testing.B) { } } -func TestMetricNameIsValid(t *testing.T) { +func TestMetricNameIsLegacyValid(t *testing.T) { scenarios := []struct { - mn LabelValue - valid bool + mn LabelValue + legacyValid bool + utf8Valid bool }{ { - mn: "Avalid_23name", - valid: true, + mn: "Avalid_23name", + legacyValid: true, + utf8Valid: true, }, { - mn: "_Avalid_23name", - valid: true, + mn: "_Avalid_23name", + legacyValid: true, + utf8Valid: true, }, { - mn: "1valid_23name", - valid: false, + mn: "1valid_23name", + legacyValid: false, + utf8Valid: true, }, { - mn: "avalid_23name", - valid: true, + mn: "avalid_23name", + legacyValid: true, + utf8Valid: true, }, { - mn: "Ava:lid_23name", - valid: true, + mn: "Ava:lid_23name", + legacyValid: true, + utf8Valid: true, }, { - mn: "a lid_23name", - valid: false, + mn: "a lid_23name", + legacyValid: false, + utf8Valid: true, }, { - mn: ":leading_colon", - valid: true, + mn: ":leading_colon", + legacyValid: true, + utf8Valid: true, }, { - mn: "colon:in:the:middle", - valid: true, + mn: "colon:in:the:middle", + legacyValid: true, + utf8Valid: true, }, { - mn: "", - valid: false, + mn: "", + legacyValid: false, + utf8Valid: false, + }, + { + mn: "a\xc5z", + legacyValid: false, + utf8Valid: false, }, } for _, s := range scenarios { - if IsValidMetricName(s.mn) != s.valid { - t.Errorf("Expected %v for %q using IsValidMetricName function", s.valid, s.mn) + NameValidationScheme = LegacyValidation + if IsValidMetricName(s.mn) != s.legacyValid { + t.Errorf("Expected %v for %q using legacy IsValidMetricName method", s.legacyValid, s.mn) + } + if MetricNameRE.MatchString(string(s.mn)) != s.legacyValid { + t.Errorf("Expected %v for %q using regexp matching", s.legacyValid, s.mn) } - if MetricNameRE.MatchString(string(s.mn)) != s.valid { - t.Errorf("Expected %v for %q using regexp matching", s.valid, s.mn) + NameValidationScheme = UTF8Validation + if IsValidMetricName(s.mn) != s.utf8Valid { + t.Errorf("Expected %v for %q using utf8 IsValidMetricName method", s.legacyValid, s.mn) } } } diff --git a/model/silence_test.go b/model/silence_test.go index e6b14116..0b5ad322 100644 --- a/model/silence_test.go +++ b/model/silence_test.go @@ -21,8 +21,9 @@ import ( func TestMatcherValidate(t *testing.T) { cases := []struct { - matcher *Matcher - err string + matcher *Matcher + legacyErr string + utf8Err string }{ { matcher: &Matcher{ @@ -42,46 +43,74 @@ func TestMatcherValidate(t *testing.T) { Name: "name!", Value: "value", }, - err: "invalid name", + legacyErr: "invalid name", }, { matcher: &Matcher{ Name: "", Value: "value", }, - err: "invalid name", + legacyErr: "invalid name", + utf8Err: "invalid name", }, { matcher: &Matcher{ Name: "name", Value: "value\xff", }, - err: "invalid value", + legacyErr: "invalid value", + utf8Err: "invalid value", }, { matcher: &Matcher{ Name: "name", Value: "", }, - err: "invalid value", + legacyErr: "invalid value", + utf8Err: "invalid value", + }, + { + matcher: &Matcher{ + Name: "a\xc5z", + Value: "", + }, + legacyErr: "invalid name", + utf8Err: "invalid name", }, } for i, c := range cases { - err := c.matcher.Validate() - if err == nil { - if c.err == "" { + NameValidationScheme = LegacyValidation + legacyErr := c.matcher.Validate() + NameValidationScheme = UTF8Validation + utf8Err := c.matcher.Validate() + if legacyErr == nil && utf8Err == nil { + if c.legacyErr == "" && c.utf8Err == "" { continue } - t.Errorf("%d. Expected error %q but got none", i, c.err) + if c.legacyErr != "" { + t.Errorf("%d. Expected error for legacy validation %q but got none", i, c.legacyErr) + } + if c.utf8Err != "" { + t.Errorf("%d. Expected error for utf8 validation %q but got none", i, c.utf8Err) + } continue } - if c.err == "" { - t.Errorf("%d. Expected no error but got %q", i, err) - continue + if legacyErr != nil { + if c.legacyErr == "" { + t.Errorf("%d. Expected no legacy validation error but got %q", i, legacyErr) + } else if !strings.Contains(legacyErr.Error(), c.legacyErr) { + t.Errorf("%d. Expected error to contain %q but got %q", i, c.legacyErr, legacyErr) + } } - if !strings.Contains(err.Error(), c.err) { - t.Errorf("%d. Expected error to contain %q but got %q", i, c.err, err) + if utf8Err != nil { + if c.utf8Err == "" { + t.Errorf("%d. Expected no utf8 validation error but got %q", i, utf8Err) + continue + } + if !strings.Contains(utf8Err.Error(), c.utf8Err) { + t.Errorf("%d. Expected error to contain %q but got %q", i, c.utf8Err, utf8Err) + } } } } @@ -219,6 +248,7 @@ func TestSilenceValidate(t *testing.T) { } for i, c := range cases { + NameValidationScheme = LegacyValidation err := c.sil.Validate() if err == nil { if c.err == "" {