diff --git a/internal/tools/go.mod b/internal/tools/go.mod index f5cc5dcf24..ef6f8993d1 100644 --- a/internal/tools/go.mod +++ b/internal/tools/go.mod @@ -111,5 +111,9 @@ require ( google.golang.org/grpc v1.73.0 // indirect google.golang.org/protobuf v1.36.6 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + modernc.org/fileutil v1.2.0 // indirect + modernc.org/golex v1.1.0 // indirect + modernc.org/lex v1.1.1 // indirect + modernc.org/lexer v1.0.5 // indirect pluginrpc.com/pluginrpc v0.5.0 // indirect ) diff --git a/internal/tools/go.sum b/internal/tools/go.sum index 544b227c79..9ea1a3ffc2 100644 --- a/internal/tools/go.sum +++ b/internal/tools/go.sum @@ -186,6 +186,7 @@ github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI= github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg= github.com/quic-go/quic-go v0.50.1 h1:unsgjFIUqW8a2oopkY7YNONpV1gYND6Nt9hnt1PN94Q= github.com/quic-go/quic-go v0.50.1/go.mod h1:Vim6OmUvlYdwBhXP9ZVrtGmCMWa3wEqhq3NgYrI8b4E= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA= @@ -262,6 +263,7 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/exp v0.0.0-20181106170214-d68db9428509/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20250228200357-dead58393ab7 h1:aWwlzYV971S4BXRS9AmqwDLAD85ouC6X+pocatKY58c= golang.org/x/exp v0.0.0-20250228200357-dead58393ab7/go.mod h1:BHOTPb3L19zxehTsLoJXVaTktb06DFgmdW6Wb9s8jqk= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -324,5 +326,16 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= +modernc.org/fileutil v1.1.2/go.mod h1:HdjlliqRHrMAI4nVOvvpYVzVgvRSK7WnoCiG0GUWJNo= +modernc.org/fileutil v1.2.0 h1:c7fsfzHf9WfUFXvv/RY9sStAr+VAKXYGKiAhBQQNoT4= +modernc.org/fileutil v1.2.0/go.mod h1:0rLMFc17WSz6Bm/GtHeme7TOX8pNRhFN2NkfBlOZhrQ= +modernc.org/golex v1.1.0 h1:dmSaksHMd+y6NkBsRsCShNPRaSNCNH+abrVm5/gZic8= +modernc.org/golex v1.1.0/go.mod h1:2pVlfqApurXhR1m0N+WDYu6Twnc4QuvO4+U8HnwoiRA= +modernc.org/lex v1.1.1 h1:prSCNTLw1R4rn7M/RzwsuMtAuOytfyR3cnyM07P+Pas= +modernc.org/lex v1.1.1/go.mod h1:6r8o8DLJkAnOsQaGi8fMoi+Vt6LTbDaCrkUK729D8xM= +modernc.org/lexer v1.0.4/go.mod h1:tOajb8S4sdfOYitzCgXDFmbVJ/LE0v1fNJ7annTw36U= +modernc.org/lexer v1.0.5 h1:NiKuv6LaU6+D2zra31y6FewnAU8LfrtSwHckwdnDSCg= +modernc.org/lexer v1.0.5/go.mod h1:8npHn3u/NxCEtlC/tRSY77x5+WB3HvHMzMVElQ76ayI= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= pluginrpc.com/pluginrpc v0.5.0 h1:tOQj2D35hOmvHyPu8e7ohW2/QvAnEtKscy2IJYWQ2yo= pluginrpc.com/pluginrpc v0.5.0/go.mod h1:UNWZ941hcVAoOZUn8YZsMmOZBzbUjQa3XMns8RQLp9o= diff --git a/model/textparse/README.md b/model/textparse/README.md index 697966f097..663835f6de 100644 --- a/model/textparse/README.md +++ b/model/textparse/README.md @@ -1,6 +1,7 @@ # Making changes to textparse lexers -In the rare case that you need to update the textparse lexers, edit promlex.l or openmetricslex.l and then run the following command: -`golex -o=promlex.l.go promlex.l` -Note that you need golex installed: -`go get -u modernc.org/golex` \ No newline at end of file +Run from the repo root: + +```bash +bash ./scripts/gentextlex.sh +``` diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go index a6fbd4ccd1..96bef15f74 100644 --- a/model/textparse/benchmark_test.go +++ b/model/textparse/benchmark_test.go @@ -23,12 +23,14 @@ import ( "strings" "testing" + "github.com/google/go-cmp/cmp" "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/testutil" ) // BenchmarkParse... set of benchmarks analyze efficiency of parsing various @@ -138,32 +140,81 @@ func BenchmarkParseOpenMetricsNHCB(b *testing.B) { } } -func benchParse(b *testing.B, data []byte, parser string) { - type newParser func([]byte, *labels.SymbolTable) Parser +// BenchmarkParseOpenMetricsNHCB_OM1vs2 is for demo of the benefit for the complex +// type format for OM2, assuming Prometheus stores NS and NHCB (and NH) going forward. +// Format draft: https://github.com/prometheus/docs/pull/2679 +/* + export bench=out && go test ./model/textparse/... \ + -run '^$' -bench '^BenchmarkParseOpenMetricsNHCB_OM1vs2' \ + -benchtime 2s -count 6 -cpu 2 -benchmem -timeout 999m \ + | tee ${bench}.txt +*/ +func BenchmarkParseOpenMetricsNHCB_OM1vs2(b *testing.B) { + parseCases := []struct { + parser string + data []byte + }{ + { + parser: "omtext_with_nhcb", // Measure NHCB over OM parser. + data: readTestdataFile(b, "1histogram.om.txt"), + }, + { + parser: "om2text_with_nhcb", // https://github.com/prometheus/docs/pull/2679 with NHCB output. + data: readTestdataFile(b, "1histogram.om2.txt"), + }, + } + + // Before we go, test parsing works as expected. + gotA := testParse(b, newParser(b, parseCases[0].parser)(parseCases[0].data, labels.NewSymbolTable())) + gotB := testParse(b, newParser(b, parseCases[1].parser)(parseCases[1].data, labels.NewSymbolTable())) + testutil.RequireEqualWithOptions(b, gotA, gotB, []cmp.Option{cmp.AllowUnexported(parsedEntry{})}) + + // For fun, OM2 parser should work with classic histogram too (TODO add separate tests). + _ = testParse(b, newParser(b, parseCases[1].parser)(parseCases[0].data, labels.NewSymbolTable())) + + for _, c := range parseCases { + b.Run(fmt.Sprintf("parser=%v", c.parser), func(b *testing.B) { + benchParse(b, c.data, c.parser) + }) + } +} + +func newParser(t testing.TB, parser string) func([]byte, *labels.SymbolTable) Parser { + t.Helper() - var newParserFn newParser switch parser { case "promtext": - newParserFn = func(b []byte, st *labels.SymbolTable) Parser { + return func(b []byte, st *labels.SymbolTable) Parser { return NewPromParser(b, st, false) } case "promproto": - newParserFn = func(b []byte, st *labels.SymbolTable) Parser { + return func(b []byte, st *labels.SymbolTable) Parser { return NewProtobufParser(b, true, false, false, st) } case "omtext": - newParserFn = func(b []byte, st *labels.SymbolTable) Parser { + return func(b []byte, st *labels.SymbolTable) Parser { return NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) } case "omtext_with_nhcb": - newParserFn = func(buf []byte, st *labels.SymbolTable) Parser { + return func(buf []byte, st *labels.SymbolTable) Parser { p, err := New(buf, "application/openmetrics-text", st, ParserOptions{ConvertClassicHistogramsToNHCB: true}) - require.NoError(b, err) + require.NoError(t, err) return p } + case "om2text_with_nhcb": + return func(b []byte, st *labels.SymbolTable) Parser { + return NewOpenMetrics2Parser(b, st, func(options *openMetrics2ParserOptions) { + options.unrollComplexTypes = false + }) + } default: - b.Fatal("unknown parser", parser) + t.Fatal("unknown parser", parser) } + return nil +} + +func benchParse(b *testing.B, data []byte, parser string) { + newParserFn := newParser(b, parser) var ( res labels.Labels diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index 532c474845..5dc8bf7044 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -226,7 +226,7 @@ func requireEntries(t *testing.T, exp, got []parsedEntry) { }) } -func testParse(t *testing.T, p Parser) (ret []parsedEntry) { +func testParse(t testing.TB, p Parser) (ret []parsedEntry) { t.Helper() for { diff --git a/model/textparse/openmetrics2lex.l b/model/textparse/openmetrics2lex.l new file mode 100644 index 0000000000..811237125b --- /dev/null +++ b/model/textparse/openmetrics2lex.l @@ -0,0 +1,95 @@ +%{ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "fmt" +) + +// Lex is called by the parser generated by "go tool yacc" to obtain each +// token. The method is opened before the matching rules block and closed at +// the end of the file. +func (l *openMetrics2Lexer) Lex() token { + if l.i >= len(l.b) { + return tEOF + } + c := l.b[l.i] + l.start = l.i + +%} + +D [0-9] +L [a-zA-Z_] +M [a-zA-Z_:] +C [^\n] +S [ ] + +%x sComment sMeta1 sMeta2 sLabels sLValue sValue sTimestamp sExemplar sEValue sETimestamp sCValue + +%yyc c +%yyn c = l.next() +%yyt l.state + + +%% + +#{S} l.state = sComment +HELP{S} l.state = sMeta1; return tHelp +TYPE{S} l.state = sMeta1; return tType +UNIT{S} l.state = sMeta1; return tUnit +"EOF"\n? l.state = sInit; return tEOFWord +\"(\\.|[^\\"])*\" l.state = sMeta2; return tMName +{M}({M}|{D})* l.state = sMeta2; return tMName +{S}{C}*\n l.state = sInit; return tText + +{M}({M}|{D})* l.state = sValue; return tMName +\{ l.state = sLabels; return tBraceOpen +\{ l.state = sLabels; return tBraceOpen +{L}({L}|{D})* return tLName +\"(\\.|[^\\"])*\" l.state = sLabels; return tQString +\} l.state = sValue; return tBraceClose += l.state = sLValue; return tEqual +, return tComma +\"(\\.|[^\\"\n])*\" l.state = sLabels; return tLValue + +{S}\{ l.state = sCValue; return tBraceOpen +{L}({L}|{D})* return tLName +: return tColon +\[ return tBracketOpen +\] return tBracketClose +, return tComma +[ \t]+ // Skip whitespace inside the block +[^ \n\t,\[\]{}:]+ return tValue +\} l.state = sTimestamp; return tBraceClose + +{S}[^{ \n]+ l.state = sTimestamp; return tValue +{S}[^ \n]+ return tTimestamp +\n l.state = sInit; return tLinebreak +{S}#{S}\{ l.state = sExemplar; return tComment + +{L}({L}|{D})* return tLName +\"(\\.|[^\\"\n])*\" l.state = sExemplar; return tQString +\} l.state = sEValue; return tBraceClose += l.state = sEValue; return tEqual +\"(\\.|[^\\"\n])*\" l.state = sExemplar; return tLValue +, return tComma +{S}[^ \n]+ l.state = sETimestamp; return tValue +{S}[^ \n]+ return tTimestamp +\n l.state = sInit; return tLinebreak + +%% + + return tInvalid +} diff --git a/model/textparse/openmetrics2lex.l.go b/model/textparse/openmetrics2lex.l.go new file mode 100644 index 0000000000..6b4bf3ee3e --- /dev/null +++ b/model/textparse/openmetrics2lex.l.go @@ -0,0 +1,1029 @@ +// Code generated by golex. DO NOT EDIT. + +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "fmt" +) + +// Lex is called by the parser generated by "go tool yacc" to obtain each +// token. The method is opened before the matching rules block and closed at +// the end of the file. +func (l *openMetrics2Lexer) Lex() token { + if l.i >= len(l.b) { + return tEOF + } + c := l.b[l.i] + l.start = l.i + +yystate0: + + switch yyt := l.state; yyt { + default: + panic(fmt.Errorf(`invalid start condition %d`, yyt)) + case 0: // start condition: INITIAL + goto yystart1 + case 1: // start condition: sComment + goto yystart6 + case 2: // start condition: sMeta1 + goto yystart26 + case 3: // start condition: sMeta2 + goto yystart31 + case 4: // start condition: sLabels + goto yystart34 + case 5: // start condition: sLValue + goto yystart42 + case 6: // start condition: sValue + goto yystart46 + case 7: // start condition: sTimestamp + goto yystart51 + case 8: // start condition: sExemplar + goto yystart58 + case 9: // start condition: sEValue + goto yystart66 + case 10: // start condition: sETimestamp + goto yystart72 + case 11: // start condition: sCValue + goto yystart76 + } + +yystate1: + c = l.next() +yystart1: + switch { + default: + goto yyabort + case c == '#': + goto yystate2 + case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate4 + case c == '{': + goto yystate5 + } + +yystate2: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate3 + } + +yystate3: + c = l.next() + goto yyrule1 + +yystate4: + c = l.next() + switch { + default: + goto yyrule9 + case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate4 + } + +yystate5: + c = l.next() + goto yyrule11 + +yystate6: + c = l.next() +yystart6: + switch { + default: + goto yyabort + case c == 'E': + goto yystate7 + case c == 'H': + goto yystate11 + case c == 'T': + goto yystate16 + case c == 'U': + goto yystate21 + } + +yystate7: + c = l.next() + switch { + default: + goto yyabort + case c == 'O': + goto yystate8 + } + +yystate8: + c = l.next() + switch { + default: + goto yyabort + case c == 'F': + goto yystate9 + } + +yystate9: + c = l.next() + switch { + default: + goto yyrule5 + case c == '\n': + goto yystate10 + } + +yystate10: + c = l.next() + goto yyrule5 + +yystate11: + c = l.next() + switch { + default: + goto yyabort + case c == 'E': + goto yystate12 + } + +yystate12: + c = l.next() + switch { + default: + goto yyabort + case c == 'L': + goto yystate13 + } + +yystate13: + c = l.next() + switch { + default: + goto yyabort + case c == 'P': + goto yystate14 + } + +yystate14: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate15 + } + +yystate15: + c = l.next() + goto yyrule2 + +yystate16: + c = l.next() + switch { + default: + goto yyabort + case c == 'Y': + goto yystate17 + } + +yystate17: + c = l.next() + switch { + default: + goto yyabort + case c == 'P': + goto yystate18 + } + +yystate18: + c = l.next() + switch { + default: + goto yyabort + case c == 'E': + goto yystate19 + } + +yystate19: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate20 + } + +yystate20: + c = l.next() + goto yyrule3 + +yystate21: + c = l.next() + switch { + default: + goto yyabort + case c == 'N': + goto yystate22 + } + +yystate22: + c = l.next() + switch { + default: + goto yyabort + case c == 'I': + goto yystate23 + } + +yystate23: + c = l.next() + switch { + default: + goto yyabort + case c == 'T': + goto yystate24 + } + +yystate24: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate25 + } + +yystate25: + c = l.next() + goto yyrule4 + +yystate26: + c = l.next() +yystart26: + switch { + default: + goto yyabort + case c == '"': + goto yystate27 + case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate30 + } + +yystate27: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate28 + case c == '\\': + goto yystate29 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate27 + } + +yystate28: + c = l.next() + goto yyrule6 + +yystate29: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate27 + } + +yystate30: + c = l.next() + switch { + default: + goto yyrule7 + case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate30 + } + +yystate31: + c = l.next() +yystart31: + switch { + default: + goto yyabort + case c == ' ': + goto yystate32 + } + +yystate32: + c = l.next() + switch { + default: + goto yyabort + case c == '\n': + goto yystate33 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate32 + } + +yystate33: + c = l.next() + goto yyrule8 + +yystate34: + c = l.next() +yystart34: + switch { + default: + goto yyabort + case c == '"': + goto yystate35 + case c == ',': + goto yystate38 + case c == '=': + goto yystate39 + case c == '}': + goto yystate41 + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate40 + } + +yystate35: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate36 + case c == '\\': + goto yystate37 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate35 + } + +yystate36: + c = l.next() + goto yyrule13 + +yystate37: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate35 + } + +yystate38: + c = l.next() + goto yyrule16 + +yystate39: + c = l.next() + goto yyrule15 + +yystate40: + c = l.next() + switch { + default: + goto yyrule12 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate40 + } + +yystate41: + c = l.next() + goto yyrule14 + +yystate42: + c = l.next() +yystart42: + switch { + default: + goto yyabort + case c == '"': + goto yystate43 + } + +yystate43: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate44 + case c == '\\': + goto yystate45 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate43 + } + +yystate44: + c = l.next() + goto yyrule17 + +yystate45: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate43 + } + +yystate46: + c = l.next() +yystart46: + switch { + default: + goto yyabort + case c == ' ': + goto yystate47 + case c == '{': + goto yystate50 + } + +yystate47: + c = l.next() + switch { + default: + goto yyabort + case c == '{': + goto yystate49 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ': + goto yystate48 + } + +yystate48: + c = l.next() + switch { + default: + goto yyrule27 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ': + goto yystate48 + } + +yystate49: + c = l.next() + goto yyrule18 + +yystate50: + c = l.next() + goto yyrule10 + +yystate51: + c = l.next() +yystart51: + switch { + default: + goto yyabort + case c == ' ': + goto yystate53 + case c == '\n': + goto yystate52 + } + +yystate52: + c = l.next() + goto yyrule29 + +yystate53: + c = l.next() + switch { + default: + goto yyabort + case c == '#': + goto yystate55 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c == '!' || c == '"' || c >= '$' && c <= 'ÿ': + goto yystate54 + } + +yystate54: + c = l.next() + switch { + default: + goto yyrule28 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate54 + } + +yystate55: + c = l.next() + switch { + default: + goto yyrule28 + case c == ' ': + goto yystate56 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate54 + } + +yystate56: + c = l.next() + switch { + default: + goto yyabort + case c == '{': + goto yystate57 + } + +yystate57: + c = l.next() + goto yyrule30 + +yystate58: + c = l.next() +yystart58: + switch { + default: + goto yyabort + case c == '"': + goto yystate59 + case c == ',': + goto yystate62 + case c == '=': + goto yystate63 + case c == '}': + goto yystate65 + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate64 + } + +yystate59: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate60 + case c == '\\': + goto yystate61 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate59 + } + +yystate60: + c = l.next() + goto yyrule32 + +yystate61: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate59 + } + +yystate62: + c = l.next() + goto yyrule36 + +yystate63: + c = l.next() + goto yyrule34 + +yystate64: + c = l.next() + switch { + default: + goto yyrule31 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate64 + } + +yystate65: + c = l.next() + goto yyrule33 + +yystate66: + c = l.next() +yystart66: + switch { + default: + goto yyabort + case c == ' ': + goto yystate67 + case c == '"': + goto yystate69 + } + +yystate67: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate68 + } + +yystate68: + c = l.next() + switch { + default: + goto yyrule37 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate68 + } + +yystate69: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate70 + case c == '\\': + goto yystate71 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate69 + } + +yystate70: + c = l.next() + goto yyrule35 + +yystate71: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate69 + } + +yystate72: + c = l.next() +yystart72: + switch { + default: + goto yyabort + case c == ' ': + goto yystate74 + case c == '\n': + goto yystate73 + } + +yystate73: + c = l.next() + goto yyrule39 + +yystate74: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate75 + } + +yystate75: + c = l.next() + switch { + default: + goto yyrule38 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate75 + } + +yystate76: + c = l.next() +yystart76: + switch { + default: + goto yyabort + case c == ',': + goto yystate79 + case c == ':': + goto yystate80 + case c == '[': + goto yystate82 + case c == '\t' || c == ' ': + goto yystate78 + case c == ']': + goto yystate83 + case c == '}': + goto yystate84 + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate81 + case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= '+' || c >= '-' && c <= '9' || c >= ';' && c <= '@' || c == '\\' || c == '^' || c == '`' || c == '|' || c >= '~' && c <= 'ÿ': + goto yystate77 + } + +yystate77: + c = l.next() + switch { + default: + goto yyrule25 + case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= '+' || c >= '-' && c <= '9' || c >= ';' && c <= 'Z' || c == '\\' || c >= '^' && c <= 'z' || c == '|' || c >= '~' && c <= 'ÿ': + goto yystate77 + } + +yystate78: + c = l.next() + switch { + default: + goto yyrule24 + case c == '\t' || c == ' ': + goto yystate78 + } + +yystate79: + c = l.next() + goto yyrule23 + +yystate80: + c = l.next() + goto yyrule20 + +yystate81: + c = l.next() + switch { + default: + goto yyrule19 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate81 + case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= '+' || c >= '-' && c <= '/' || c >= ';' && c <= '@' || c == '\\' || c == '^' || c == '`' || c == '|' || c >= '~' && c <= 'ÿ': + goto yystate77 + } + +yystate82: + c = l.next() + goto yyrule21 + +yystate83: + c = l.next() + goto yyrule22 + +yystate84: + c = l.next() + goto yyrule26 + +yyrule1: // #{S} + { + l.state = sComment + goto yystate0 + } +yyrule2: // HELP{S} + { + l.state = sMeta1 + return tHelp + goto yystate0 + } +yyrule3: // TYPE{S} + { + l.state = sMeta1 + return tType + goto yystate0 + } +yyrule4: // UNIT{S} + { + l.state = sMeta1 + return tUnit + goto yystate0 + } +yyrule5: // "EOF"\n? + { + l.state = sInit + return tEOFWord + goto yystate0 + } +yyrule6: // \"(\\.|[^\\"])*\" + { + l.state = sMeta2 + return tMName + goto yystate0 + } +yyrule7: // {M}({M}|{D})* + { + l.state = sMeta2 + return tMName + goto yystate0 + } +yyrule8: // {S}{C}*\n + { + l.state = sInit + return tText + goto yystate0 + } +yyrule9: // {M}({M}|{D})* + { + l.state = sValue + return tMName + goto yystate0 + } +yyrule10: // \{ + { + l.state = sLabels + return tBraceOpen + goto yystate0 + } +yyrule11: // \{ + { + l.state = sLabels + return tBraceOpen + goto yystate0 + } +yyrule12: // {L}({L}|{D})* + { + return tLName + } +yyrule13: // \"(\\.|[^\\"])*\" + { + l.state = sLabels + return tQString + goto yystate0 + } +yyrule14: // \} + { + l.state = sValue + return tBraceClose + goto yystate0 + } +yyrule15: // = + { + l.state = sLValue + return tEqual + goto yystate0 + } +yyrule16: // , + { + return tComma + } +yyrule17: // \"(\\.|[^\\"\n])*\" + { + l.state = sLabels + return tLValue + goto yystate0 + } +yyrule18: // {S}\{ + { + l.state = sCValue + return tBraceOpen + goto yystate0 + } +yyrule19: // {L}({L}|{D})* + { + return tLName + } +yyrule20: // : + { + return tColon + } +yyrule21: // \[ + { + return tBracketOpen + } +yyrule22: // \] + { + return tBracketClose + } +yyrule23: // , + { + return tComma + } +yyrule24: // [ \t]+ + { + // Skip whitespace inside the block + goto yystate0 + } +yyrule25: // [^ \n\t,\[\]{}:]+ + { + return tValue + } +yyrule26: // \} + { + l.state = sTimestamp + return tBraceClose + goto yystate0 + } +yyrule27: // {S}[^{ \n]+ + { + l.state = sTimestamp + return tValue + goto yystate0 + } +yyrule28: // {S}[^ \n]+ + { + return tTimestamp + } +yyrule29: // \n + { + l.state = sInit + return tLinebreak + goto yystate0 + } +yyrule30: // {S}#{S}\{ + { + l.state = sExemplar + return tComment + goto yystate0 + } +yyrule31: // {L}({L}|{D})* + { + return tLName + } +yyrule32: // \"(\\.|[^\\"\n])*\" + { + l.state = sExemplar + return tQString + goto yystate0 + } +yyrule33: // \} + { + l.state = sEValue + return tBraceClose + goto yystate0 + } +yyrule34: // = + { + l.state = sEValue + return tEqual + goto yystate0 + } +yyrule35: // \"(\\.|[^\\"\n])*\" + { + l.state = sExemplar + return tLValue + goto yystate0 + } +yyrule36: // , + { + return tComma + } +yyrule37: // {S}[^ \n]+ + { + l.state = sETimestamp + return tValue + goto yystate0 + } +yyrule38: // {S}[^ \n]+ + { + return tTimestamp + } +yyrule39: // \n + if true { // avoid go vet determining the below panic will not be reached + l.state = sInit + return tLinebreak + goto yystate0 + } + panic("unreachable") + +yyabort: // no lexem recognized + // silence unused label errors for build and satisfy go vet reachability analysis + { + if false { + goto yyabort + } + if false { + goto yystate0 + } + if false { + goto yystate1 + } + if false { + goto yystate6 + } + if false { + goto yystate26 + } + if false { + goto yystate31 + } + if false { + goto yystate34 + } + if false { + goto yystate42 + } + if false { + goto yystate46 + } + if false { + goto yystate51 + } + if false { + goto yystate58 + } + if false { + goto yystate66 + } + if false { + goto yystate72 + } + if false { + goto yystate76 + } + } + + return tInvalid +} diff --git a/model/textparse/openmetrics2parse.go b/model/textparse/openmetrics2parse.go new file mode 100644 index 0000000000..6090160f3d --- /dev/null +++ b/model/textparse/openmetrics2parse.go @@ -0,0 +1,755 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:generate go get -u modernc.org/golex +//go:generate golex -o=openmetrics2lex.l.go openmetrics2lex.l + +package textparse + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "strings" + "unicode/utf8" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/schema" + "github.com/prometheus/prometheus/util/convertnhcb" +) + +type openMetrics2Lexer struct { + b []byte + i int + start int + err error + state int +} + +// buf returns the buffer of the current token. +func (l *openMetrics2Lexer) buf() []byte { + return l.b[l.start:l.i] +} + +// next advances the openMetricsLexer to the next character. +func (l *openMetrics2Lexer) next() byte { + l.i++ + if l.i >= len(l.b) { + l.err = io.EOF + return byte(tEOF) + } + // Lex struggles with null bytes. If we are in a label value or help string, where + // they are allowed, consume them here immediately. + for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) { + l.i++ + if l.i >= len(l.b) { + l.err = io.EOF + return byte(tEOF) + } + } + return l.b[l.i] +} + +func (l *openMetrics2Lexer) Error(es string) { + l.err = errors.New(es) +} + +// OpenMetrics2Parser text exposition format. +// Specification can be found at https://prometheus.io/docs/specs/om/open_metrics_spec_2_0/ +type OpenMetrics2Parser struct { + l *openMetrics2Lexer + builder labels.ScratchBuilder + series []byte + mfNameLen int // length of metric family name to get from series. + text []byte + mtype model.MetricType + unit string + + val float64 + tempHist convertnhcb.TempHistogram + h *histogram.Histogram + fh *histogram.FloatHistogram + // TODO: Implement summary compelx type. + + ts int64 + hasTS bool + start int + // offsets is a list of offsets into series that describe the positions + // of the metric name and label names and values for this series. + // p.offsets[0] is the start character of the metric name. + // p.offsets[1] is the end of the metric name. + // Subsequently, p.offsets is a pair of pair of offsets for the positions + // of the label name and value start and end characters. + offsets []int + + eOffsets []int + exemplar []byte + exemplarVal float64 + exemplarTs int64 + hasExemplarTs bool + + // ignoreExemplar instructs the parser to not overwrite exemplars (to keep them while peeking ahead). + ignoreExemplar bool + enableTypeAndUnitLabels bool +} + +type openMetrics2ParserOptions struct { + enableTypeAndUnitLabels bool + // TODO: Probably this option should be per metric name (: + unrollComplexTypes bool +} + +type OpenMetrics2Option func(*openMetrics2ParserOptions) + +// WithOM2ParserTypeAndUnitLabels enables type-and-unit-labels mode +// in which parser injects __type__ and __unit__ into labels. +func WithOM2ParserTypeAndUnitLabels() OpenMetrics2Option { + return func(o *openMetrics2ParserOptions) { + o.enableTypeAndUnitLabels = true + } +} + +// NewOpenMetrics2Parser returns a new parser for the byte slice with option to skip CT series parsing. +func NewOpenMetrics2Parser(b []byte, st *labels.SymbolTable, opts ...OpenMetrics2Option) Parser { + options := &openMetrics2ParserOptions{} + + for _, opt := range opts { + opt(options) + } + + if options.unrollComplexTypes { + // TODO: Implement this. + panic("not implemented") + } + + parser := &OpenMetrics2Parser{ + l: &openMetrics2Lexer{b: b}, + builder: labels.NewScratchBuilderWithSymbolTable(st, 16), + enableTypeAndUnitLabels: options.enableTypeAndUnitLabels, + tempHist: convertnhcb.NewTempHistogram(), + } + return parser +} + +// Series returns the bytes of the series, the timestamp if set, and the value +// of the current sample. +func (p *OpenMetrics2Parser) Series() ([]byte, *int64, float64) { + if p.hasTS { + ts := p.ts + return p.series, &ts, p.val + } + return p.series, nil, p.val +} + +// Histogram returns the bytes of the series, the timestamp if set, and one of +// the value (float of integer histogram) of the current complex sample representing histogram. +func (p *OpenMetrics2Parser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) { + if p.hasTS { + ts := p.ts + return p.series, &ts, p.h, p.fh + } + return p.series, nil, p.h, p.fh +} + +// Help returns the metric name and help text in the current entry. +// Must only be called after Next returned a help entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OpenMetrics2Parser) Help() ([]byte, []byte) { + m := p.l.b[p.offsets[0]:p.offsets[1]] + + // Replacer causes allocations. Replace only when necessary. + if bytes.IndexByte(p.text, byte('\\')) >= 0 { + // OpenMetrics always uses the Prometheus format label value escaping. + return m, []byte(lvalReplacer.Replace(string(p.text))) + } + return m, p.text +} + +// Type returns the metric name and type in the current entry. +// Must only be called after Next returned a type entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OpenMetrics2Parser) Type() ([]byte, model.MetricType) { + return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype +} + +// Unit returns the metric name and unit in the current entry. +// Must only be called after Next returned a unit entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OpenMetrics2Parser) Unit() ([]byte, []byte) { + return p.l.b[p.offsets[0]:p.offsets[1]], []byte(p.unit) +} + +// Comment returns the text of the current comment. +// Must only be called after Next returned a comment entry. +// The returned byte slice becomes invalid after the next call to Next. +func (p *OpenMetrics2Parser) Comment() []byte { + return p.text +} + +// Labels writes the labels of the current sample into the passed labels. +func (p *OpenMetrics2Parser) Labels(l *labels.Labels) { + // Defensive copy in case the following keeps a reference. + // See https://github.com/prometheus/prometheus/issues/16490 + s := string(p.series) + + p.builder.Reset() + metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start]) + + m := schema.Metadata{ + Name: metricName, + Type: p.mtype, + Unit: p.unit, + } + if p.enableTypeAndUnitLabels { + m.AddToLabels(&p.builder) + } else { + p.builder.Add(labels.MetricName, metricName) + } + for i := 2; i < len(p.offsets); i += 4 { + a := p.offsets[i] - p.start + b := p.offsets[i+1] - p.start + label := unreplace(s[a:b]) + if p.enableTypeAndUnitLabels && !m.IsEmptyFor(label) { + // Dropping user provided metadata labels, if found in the OM metadata. + continue + } + c := p.offsets[i+2] - p.start + d := p.offsets[i+3] - p.start + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) + p.builder.Add(label, value) + } + + p.builder.Sort() + *l = p.builder.Labels() +} + +// Exemplar writes the exemplar of the current sample into the passed exemplar. +// It returns whether an exemplar exists. As OpenMetrics only ever has one +// exemplar per sample, every call after the first (for the same sample) will +// always return false. +func (p *OpenMetrics2Parser) Exemplar(e *exemplar.Exemplar) bool { + if len(p.exemplar) == 0 { + return false + } + + // Allocate the full immutable string immediately, so we just + // have to create references on it below. + s := string(p.exemplar) + + e.Value = p.exemplarVal + if p.hasExemplarTs { + e.HasTs = true + e.Ts = p.exemplarTs + } + + p.builder.Reset() + for i := 0; i < len(p.eOffsets); i += 4 { + a := p.eOffsets[i] - p.start + b := p.eOffsets[i+1] - p.start + c := p.eOffsets[i+2] - p.start + d := p.eOffsets[i+3] - p.start + + p.builder.Add(s[a:b], s[c:d]) + } + + p.builder.Sort() + e.Labels = p.builder.Labels() + + // Wipe exemplar so that future calls return false. + p.exemplar = p.exemplar[:0] + return true +} + +// CreatedTimestamp returns the created timestamp for a current Metric if exists or nil. +func (*OpenMetrics2Parser) CreatedTimestamp() int64 { + // TODO: Implement. + return 0 +} + +// nextToken returns the next token from the openMetricsLexer. +func (p *OpenMetrics2Parser) nextToken() token { + t := p.l.Lex() + return t +} + +func (p *OpenMetrics2Parser) parseError(exp string, got token) error { + e := min(len(p.l.b), p.l.i+1) + return fmt.Errorf("%s, got %q (%q) while parsing: %q", exp, p.l.b[p.l.start:e], got, p.l.b[p.start:e]) +} + +// Next advances the parser to the next sample. +// It returns (EntryInvalid, io.EOF) if no samples were read. +func (p *OpenMetrics2Parser) Next() (Entry, error) { + var err error + + p.start = p.l.i + p.offsets = p.offsets[:0] + if !p.ignoreExemplar { + p.eOffsets = p.eOffsets[:0] + p.exemplar = p.exemplar[:0] + p.exemplarVal = 0 + p.hasExemplarTs = false + } + + switch t := p.nextToken(); t { + case tEOFWord: + if t := p.nextToken(); t != tEOF { + return EntryInvalid, errors.New("unexpected data after # EOF") + } + return EntryInvalid, io.EOF + case tEOF: + return EntryInvalid, errors.New("data does not end with # EOF") + case tHelp, tType, tUnit: + switch t2 := p.nextToken(); t2 { + case tMName: + mStart := p.l.start + mEnd := p.l.i + if p.l.b[mStart] == '"' && p.l.b[mEnd-1] == '"' { + mStart++ + mEnd-- + } + p.mfNameLen = mEnd - mStart + p.offsets = append(p.offsets, mStart, mEnd) + default: + return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) + } + switch t2 := p.nextToken(); t2 { + case tText: + if len(p.l.buf()) > 1 { + p.text = p.l.buf()[1 : len(p.l.buf())-1] + } else { + p.text = []byte{} + } + default: + return EntryInvalid, fmt.Errorf("expected text in %s", t.String()) + } + switch t { + case tType: + switch s := yoloString(p.text); s { + case "counter": + p.mtype = model.MetricTypeCounter + case "gauge": + p.mtype = model.MetricTypeGauge + case "histogram": + p.mtype = model.MetricTypeHistogram + case "gaugehistogram": + p.mtype = model.MetricTypeGaugeHistogram + case "summary": + p.mtype = model.MetricTypeSummary + case "info": + p.mtype = model.MetricTypeInfo + case "stateset": + p.mtype = model.MetricTypeStateset + case "unknown": + p.mtype = model.MetricTypeUnknown + default: + return EntryInvalid, fmt.Errorf("invalid metric type %q", s) + } + case tHelp: + if !utf8.Valid(p.text) { + return EntryInvalid, fmt.Errorf("help text %q is not a valid utf8 string", p.text) + } + } + switch t { + case tHelp: + return EntryHelp, nil + case tType: + return EntryType, nil + case tUnit: + p.unit = string(p.text) + m := yoloString(p.l.b[p.offsets[0]:p.offsets[1]]) + if len(p.unit) > 0 { + if !strings.HasSuffix(m, p.unit) || len(m) < len(p.unit)+1 || p.l.b[p.offsets[1]-len(p.unit)-1] != '_' { + return EntryInvalid, fmt.Errorf("unit %q not a suffix of metric %q", p.unit, m) + } + } + return EntryUnit, nil + } + + case tBraceOpen: + // We found a brace, so make room for the eventual metric name. If these + // values aren't updated, then the metric name was not set inside the + // braces and we can return an error. + if len(p.offsets) == 0 { + p.offsets = []int{-1, -1} + } + if p.offsets, err = p.parseLVals(p.offsets, false); err != nil { + return EntryInvalid, err + } + + p.series = p.l.b[p.start:p.l.i] + return p.parseSeriesEndOfLine(p.nextToken()) + case tMName: + p.offsets = append(p.offsets, p.start, p.l.i) + p.series = p.l.b[p.start:p.l.i] + + t2 := p.nextToken() + if t2 == tBraceOpen { + p.offsets, err = p.parseLVals(p.offsets, false) + if err != nil { + return EntryInvalid, err + } + p.series = p.l.b[p.start:p.l.i] + t2 = p.nextToken() + } + + return p.parseSeriesEndOfLine(t2) + default: + err = p.parseError("expected a valid start token", t) + } + return EntryInvalid, err +} + +func (p *OpenMetrics2Parser) parseComment() error { + var err error + + if p.ignoreExemplar { + for t := p.nextToken(); t != tLinebreak; t = p.nextToken() { + if t == tEOF { + return errors.New("data does not end with # EOF") + } + } + return nil + } + + // Parse the labels. + p.eOffsets, err = p.parseLVals(p.eOffsets, true) + if err != nil { + return err + } + p.exemplar = p.l.b[p.start:p.l.i] + + // Get the value. + p.exemplarVal, err = p.parseFloatValue(p.nextToken(), "exemplar labels") + if err != nil { + return err + } + + // Read the optional timestamp. + p.hasExemplarTs = false + switch t2 := p.nextToken(); t2 { + case tEOF: + return errors.New("data does not end with # EOF") + case tLinebreak: + break + case tTimestamp: + p.hasExemplarTs = true + var ts float64 + // A float is enough to hold what we need for millisecond resolution. + if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil { + return fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + if math.IsNaN(ts) || math.IsInf(ts, 0) { + return fmt.Errorf("invalid exemplar timestamp %f", ts) + } + p.exemplarTs = int64(ts * 1000) + switch t3 := p.nextToken(); t3 { + case tLinebreak: + default: + return p.parseError("expected next entry after exemplar timestamp", t3) + } + default: + return p.parseError("expected timestamp or comment", t2) + } + return nil +} + +func (p *OpenMetrics2Parser) parseLVals(offsets []int, isExemplar bool) ([]int, error) { + t := p.nextToken() + for { + curTStart := p.l.start + curTI := p.l.i + switch t { + case tBraceClose: + return offsets, nil + case tLName: + case tQString: + default: + return nil, p.parseError("expected label name", t) + } + + t = p.nextToken() + // A quoted string followed by a comma or brace is a metric name. Set the + // offsets and continue processing. If this is an exemplar, this format + // is not allowed. + if t == tComma || t == tBraceClose { + if isExemplar { + return nil, p.parseError("expected label name", t) + } + if offsets[0] != -1 || offsets[1] != -1 { + return nil, fmt.Errorf("metric name already set while parsing: %q", p.l.b[p.start:p.l.i]) + } + offsets[0] = curTStart + 1 + offsets[1] = curTI - 1 + if t == tBraceClose { + return offsets, nil + } + t = p.nextToken() + continue + } + // We have a label name, and it might be quoted. + if p.l.b[curTStart] == '"' { + curTStart++ + curTI-- + } + offsets = append(offsets, curTStart, curTI) + + if t != tEqual { + return nil, p.parseError("expected equal", t) + } + if t := p.nextToken(); t != tLValue { + return nil, p.parseError("expected label value", t) + } + if !utf8.Valid(p.l.buf()) { + return nil, fmt.Errorf("invalid UTF-8 label value: %q", p.l.buf()) + } + + // The openMetricsLexer ensures the value string is quoted. Strip first + // and last character. + offsets = append(offsets, p.l.start+1, p.l.i-1) + + // Free trailing commas are allowed. + t = p.nextToken() + if t == tComma { + t = p.nextToken() + } else if t != tBraceClose { + return nil, p.parseError("expected comma or brace close", t) + } + } +} + +// parseSeriesEndOfLine parses the series end of the line (value, optional +// timestamp, commentary, etc.) after the metric name and labels. +// It starts parsing with the provided token. +func (p *OpenMetrics2Parser) parseSeriesEndOfLine(t token) (e Entry, err error) { + if p.offsets[0] == -1 { + return EntryInvalid, fmt.Errorf("metric name not set while parsing: %q", p.l.b[p.start:p.l.i]) + } + switch p.l.state { + case sTimestamp: + p.val, err = p.parseFloatValue(t, "metric") + if err != nil { + return EntryInvalid, err + } + e = EntrySeries + case sCValue: + e, err = p.parseComplexValue(t, "metric") + if err != nil { + return EntryInvalid, err + } + default: + return EntryInvalid, p.parseError(fmt.Sprintf("unexpected parser state %v, expect float or complex value", p.l.state), t) + } + + p.hasTS = false + switch t2 := p.nextToken(); t2 { + case tEOF: + return EntryInvalid, errors.New("data does not end with # EOF") + case tLinebreak: + break + case tComment: + if err := p.parseComment(); err != nil { + return EntryInvalid, err + } + case tTimestamp: + p.hasTS = true + var ts float64 + // A float is enough to hold what we need for millisecond resolution. + if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil { + return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + if math.IsNaN(ts) || math.IsInf(ts, 0) { + return EntryInvalid, fmt.Errorf("invalid timestamp %f", ts) + } + p.ts = int64(ts * 1000) + switch t3 := p.nextToken(); t3 { + case tLinebreak: + case tComment: + if err := p.parseComment(); err != nil { + return EntryInvalid, err + } + default: + return EntryInvalid, p.parseError("expected next entry after timestamp", t3) + } + } + return e, nil +} + +func (p *OpenMetrics2Parser) parseFloatValue(t token, after string) (float64, error) { + if t != tValue { + return 0, p.parseError(fmt.Sprintf("expected value after %v", after), t) + } + val, err := parseFloat(yoloString(p.l.buf()[1:])) + if err != nil { + return 0, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + // Ensure canonical NaN value. + if math.IsNaN(p.exemplarVal) { + val = math.Float64frombits(value.NormalNaN) + } + return val, nil +} + +func (p *OpenMetrics2Parser) parseComplexValue(t token, after string) (_ Entry, err error) { + if t != tBraceOpen { + return EntryInvalid, p.parseError(fmt.Sprintf("expected brace open after %v", after), t) + } + + switch p.mtype { + default: + return EntryInvalid, p.parseError("unexpected parser type", t) + case model.MetricTypeSummary: + return EntryInvalid, p.parseError("summary complex value parsing not yet implemented", t) + case model.MetricTypeHistogram: + defer p.tempHist.Reset() + + if err := p.parseComplexValueHistogram(); err != nil { + return EntryInvalid, err + } + p.h, p.fh, err = p.tempHist.Convert() + if err != nil { + return EntryInvalid, p.parseError(fmt.Sprintf("histogram complex value parsing failed: %v", err), t) + } + if p.h != nil { + if err := p.h.Validate(); err != nil { + return EntryInvalid, p.parseError(fmt.Sprintf("invalid histogram: %v", err), t) + } + } else if p.fh != nil { + if err := p.fh.Validate(); err != nil { + return EntryInvalid, p.parseError(fmt.Sprintf("invalid float histogram: %v", err), t) + } + } + return EntryHistogram, nil + } +} + +func (p *OpenMetrics2Parser) parseComplexValueHistogram() (err error) { + // The opening brace has already been consumed. + t := p.nextToken() + + // Handle empty complex value, e.g., {}. + if t == tBraceClose { + return nil + } + + for { + // Expect a key (e.g., "count", "sum", "bucket"). + if t != tLName { + return p.parseError("expected key in complex value", t) + } + key := yoloString(p.l.buf()) + + if t2 := p.nextToken(); t2 != tColon { + return p.parseError("expected colon after complex value key", t2) + } + + // Handle the value based on the key. + switch key { + case "count": + if t3 := p.nextToken(); t3 != tValue { + return p.parseError("expected count value", t3) + } + val, err := parseFloat(yoloString(p.l.buf())) + if err != nil { + return fmt.Errorf("%w while parsing count: %q", err, p.l.b[p.start:p.l.i]) + } + if err := p.tempHist.SetCount(val); err != nil { + return fmt.Errorf("%w while parsing count for histogram: %v", err, p.l.b[p.start:p.l.i]) + } + case "sum": + if t3 := p.nextToken(); t3 != tValue { + return p.parseError("expected sum value", t3) + } + val, err := parseFloat(yoloString(p.l.buf())) + if err != nil { + return fmt.Errorf("%w while parsing sum: %q", err, p.l.b[p.start:p.l.i]) + } + if err := p.tempHist.SetSum(val); err != nil { + return fmt.Errorf("%w while parsing sum for histogram: %v", err, p.l.b[p.start:p.l.i]) + } + case "bucket": + if t3 := p.nextToken(); t3 != tBracketOpen { + return p.parseError("expected opening bracket for buckets", t3) + } + if err := p.parseBuckets(); err != nil { + return err + } + default: + return fmt.Errorf("unknown key in complex value: %q", key) + } + + // After a key-value pair, expect a comma or the closing brace. + t = p.nextToken() + if t == tBraceClose { + return nil + } + if t != tComma { + return p.parseError("expected comma or closing brace after value", t) + } + + // If we saw a comma, get the next token, which should be the next key. + t = p.nextToken() + } +} + +// parseBuckets parses the content of a bucket list, e.g., [bound1:count1,bound2:count2]. +func (p *OpenMetrics2Parser) parseBuckets() error { + // Handle empty bucket list, e.g., []. + t := p.nextToken() + if t == tBracketClose { + return nil + } + + for { + // Expect a bucket definition like "bound:count". + if t != tValue { + return p.parseError("expected bucket bound", t) + } + bound, err := parseFloat(yoloString(p.l.buf())) + if err != nil { + return fmt.Errorf("%w while parsing bucket bound: %q", err, p.l.b[p.start:p.l.i]) + } + if t2 := p.nextToken(); t2 != tColon { + return p.parseError("expected colon after bucket bound", t2) + } + if t3 := p.nextToken(); t3 != tValue { + return p.parseError("expected bucket count", t3) + } + // The bucket count must be an integer. + count, err := parseFloat(yoloString(p.l.buf())) + if err != nil { + return fmt.Errorf("%w while parsing bucket count: %q", err, p.l.b[p.start:p.l.i]) + } + if err := p.tempHist.SetBucketCount(bound, count); err != nil { + return fmt.Errorf("%w while parsing bucket bound and count: %q", err, p.l.b[p.start:p.l.i]) + } + + // Check for a comma or the closing bracket. + t = p.nextToken() + if t == tBracketClose { + return nil + } + if t != tComma { + return p.parseError("expected comma or closing bracket in bucket list", t) + } + // If we saw a comma, get the next token for the start of the next bucket. + t = p.nextToken() + } +} diff --git a/model/textparse/promlex.l b/model/textparse/promlex.l index e9fa1fb71c..904bdcf6e7 100644 --- a/model/textparse/promlex.l +++ b/model/textparse/promlex.l @@ -30,6 +30,7 @@ const ( sExemplar sEValue sETimestamp + sCValue // Complex value samples (NHCB, NH, NS) ) // Lex is called by the parser generated by "go tool yacc" to obtain each diff --git a/model/textparse/promlex.l.go b/model/textparse/promlex.l.go index a083e5549b..24590eee32 100644 --- a/model/textparse/promlex.l.go +++ b/model/textparse/promlex.l.go @@ -31,6 +31,7 @@ const ( sExemplar sEValue sETimestamp + sCValue // Complex value samples (NHCB, NH, NS) ) // Lex is called by the parser generated by "go tool yacc" to obtain each diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 2b4b750b4d..bfe4b8d530 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -68,6 +68,9 @@ const ( tEqual tTimestamp tValue + tColon + tBracketOpen + tBracketClose ) func (t token) String() string { diff --git a/model/textparse/testdata/1histogram.om.txt b/model/textparse/testdata/1histogram.om.txt index 1876168355..bd49d872e8 100644 --- a/model/textparse/testdata/1histogram.om.txt +++ b/model/textparse/testdata/1histogram.om.txt @@ -22,11 +22,11 @@ golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.25"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.5"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="1.0"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="2.5"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="2.5"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="5.0"} 1 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="10.0"} 1 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="+Inf"} 1 -golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5002"} 10.0 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5002"} 10.1 golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5002"} 1 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.005"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.01"} 0 @@ -34,12 +34,12 @@ golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.05"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.1"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.25"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.5"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="1.0"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="2.5"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="5.0"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="10.0"} 1 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="+Inf"} 1 -golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5003"} 10.0 -golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5003"} 1 -# EOF \ No newline at end of file +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.5"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="1.0"} 2 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="2.5"} 3 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="5.0"} 4 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="10.0"} 5 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="+Inf"} 6 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5003"} 20.04 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5003"} 6 +# EOF diff --git a/model/textparse/testdata/1histogram.om2.txt b/model/textparse/testdata/1histogram.om2.txt new file mode 100644 index 0000000000..9278f57976 --- /dev/null +++ b/model/textparse/testdata/1histogram.om2.txt @@ -0,0 +1,6 @@ +# HELP golang_manual_histogram_seconds This is a histogram with manually selected parameters +# TYPE golang_manual_histogram_seconds histogram +golang_manual_histogram_seconds{address="0.0.0.0",generation="20",port="5001"} {count:1,sum:10.0,bucket:[0.005:0,0.01:0,0.025:0,0.05:0,0.1:0,0.25:0,0.5:0,1.0:0,2.5:0,5.0:0,10.0:1,+Inf:1]} +golang_manual_histogram_seconds{address="0.0.0.0",generation="20",port="5002"} {count:1,sum:10.1,bucket:[0.005:0,0.01:0,0.025:0,0.05:0,0.1:0,0.25:0,0.5:0,1.0:0,2.5:1,5.0:1,10.0:1,+Inf:1]} +golang_manual_histogram_seconds{address="0.0.0.0",generation="20",port="5003"} {count:6,sum:20.04,bucket:[0.005:0,0.01:0,0.025:0,0.05:0,0.1:0,0.25:0,0.5:1,1.0:2,2.5:3,5.0:4,10.0:5,+Inf:6]} +# EOF diff --git a/scripts/gentextlex.sh b/scripts/gentextlex.sh new file mode 100755 index 0000000000..bd7dd10856 --- /dev/null +++ b/scripts/gentextlex.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# +# Generate all protobuf bindings. +# Run from repository root. +set -e +set -u + +if ! [[ "$0" =~ "scripts/gentextlex.sh" ]]; then + echo "must be run from repository root" + exit 255 +fi + +pushd "internal/tools" +INSTALL_PKGS="modernc.org/golex" +for pkg in ${INSTALL_PKGS}; do + GO111MODULE=on go install "$pkg" +done +popd + +echo "generating lex code" +pushd model/textparse + golex -o=promlex.l.go promlex.l + golex -o=openmetricslex.l.go openmetricslex.l + golex -o=openmetrics2lex.l.go openmetrics2lex.l +popd