From adc41505970f6208e16b14b27a380a33c7195cf0 Mon Sep 17 00:00:00 2001 From: bwplotka Date: Tue, 7 Oct 2025 09:33:04 +0100 Subject: [PATCH] om2: add om2 complex type format for PoC This change is for demo purposes, exploring the benefits (and downsides) for the complex type format for OM2 captured in https://github.com/prometheus/docs/pull/2679. This assumes Prometheus stores NS and NHCB (and NH) going forward (for best case efficiency), but is expected to work for classic mode too with little overhead (benchmarks will tell us). Part of the PromCon talk we do with @krajorama Signed-off-by: bwplotka --- internal/tools/go.mod | 4 + internal/tools/go.sum | 13 + model/textparse/README.md | 9 +- model/textparse/benchmark_test.go | 69 +- model/textparse/interface_test.go | 2 +- model/textparse/openmetrics2lex.l | 95 ++ model/textparse/openmetrics2lex.l.go | 1029 +++++++++++++++++++ model/textparse/openmetrics2parse.go | 755 ++++++++++++++ model/textparse/promlex.l | 1 + model/textparse/promlex.l.go | 1 + model/textparse/promparse.go | 3 + model/textparse/testdata/1histogram.om.txt | 24 +- model/textparse/testdata/1histogram.om2.txt | 6 + scripts/gentextlex.sh | 25 + 14 files changed, 2010 insertions(+), 26 deletions(-) create mode 100644 model/textparse/openmetrics2lex.l create mode 100644 model/textparse/openmetrics2lex.l.go create mode 100644 model/textparse/openmetrics2parse.go create mode 100644 model/textparse/testdata/1histogram.om2.txt create mode 100755 scripts/gentextlex.sh diff --git a/internal/tools/go.mod b/internal/tools/go.mod index f5cc5dcf24..ef6f8993d1 100644 --- a/internal/tools/go.mod +++ b/internal/tools/go.mod @@ -111,5 +111,9 @@ require ( google.golang.org/grpc v1.73.0 // indirect google.golang.org/protobuf v1.36.6 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + modernc.org/fileutil v1.2.0 // indirect + modernc.org/golex v1.1.0 // indirect + modernc.org/lex v1.1.1 // indirect + modernc.org/lexer v1.0.5 // indirect pluginrpc.com/pluginrpc v0.5.0 // indirect ) diff --git a/internal/tools/go.sum b/internal/tools/go.sum index 544b227c79..9ea1a3ffc2 100644 --- a/internal/tools/go.sum +++ b/internal/tools/go.sum @@ -186,6 +186,7 @@ github.com/quic-go/qpack v0.5.1 h1:giqksBPnT/HDtZ6VhtFKgoLOWmlyo9Ei6u9PqzIMbhI= github.com/quic-go/qpack v0.5.1/go.mod h1:+PC4XFrEskIVkcLzpEkbLqq1uCoxPhQuvK5rH1ZgaEg= github.com/quic-go/quic-go v0.50.1 h1:unsgjFIUqW8a2oopkY7YNONpV1gYND6Nt9hnt1PN94Q= github.com/quic-go/quic-go v0.50.1/go.mod h1:Vim6OmUvlYdwBhXP9ZVrtGmCMWa3wEqhq3NgYrI8b4E= +github.com/remyoudompheng/bigfft v0.0.0-20200410134404-eec4a21b6bb0/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/rs/cors v1.11.1 h1:eU3gRzXLRK57F5rKMGMZURNdIG4EoAmX8k94r9wXWHA= @@ -262,6 +263,7 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/exp v0.0.0-20181106170214-d68db9428509/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20250228200357-dead58393ab7 h1:aWwlzYV971S4BXRS9AmqwDLAD85ouC6X+pocatKY58c= golang.org/x/exp v0.0.0-20250228200357-dead58393ab7/go.mod h1:BHOTPb3L19zxehTsLoJXVaTktb06DFgmdW6Wb9s8jqk= golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -324,5 +326,16 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools/v3 v3.0.3 h1:4AuOwCGf4lLR9u3YOe2awrHygurzhO/HeQ6laiA6Sx0= gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8= +modernc.org/fileutil v1.1.2/go.mod h1:HdjlliqRHrMAI4nVOvvpYVzVgvRSK7WnoCiG0GUWJNo= +modernc.org/fileutil v1.2.0 h1:c7fsfzHf9WfUFXvv/RY9sStAr+VAKXYGKiAhBQQNoT4= +modernc.org/fileutil v1.2.0/go.mod h1:0rLMFc17WSz6Bm/GtHeme7TOX8pNRhFN2NkfBlOZhrQ= +modernc.org/golex v1.1.0 h1:dmSaksHMd+y6NkBsRsCShNPRaSNCNH+abrVm5/gZic8= +modernc.org/golex v1.1.0/go.mod h1:2pVlfqApurXhR1m0N+WDYu6Twnc4QuvO4+U8HnwoiRA= +modernc.org/lex v1.1.1 h1:prSCNTLw1R4rn7M/RzwsuMtAuOytfyR3cnyM07P+Pas= +modernc.org/lex v1.1.1/go.mod h1:6r8o8DLJkAnOsQaGi8fMoi+Vt6LTbDaCrkUK729D8xM= +modernc.org/lexer v1.0.4/go.mod h1:tOajb8S4sdfOYitzCgXDFmbVJ/LE0v1fNJ7annTw36U= +modernc.org/lexer v1.0.5 h1:NiKuv6LaU6+D2zra31y6FewnAU8LfrtSwHckwdnDSCg= +modernc.org/lexer v1.0.5/go.mod h1:8npHn3u/NxCEtlC/tRSY77x5+WB3HvHMzMVElQ76ayI= +modernc.org/mathutil v1.5.0/go.mod h1:mZW8CKdRPY1v87qxC/wUdX5O1qDzXMP5TH3wjfpga6E= pluginrpc.com/pluginrpc v0.5.0 h1:tOQj2D35hOmvHyPu8e7ohW2/QvAnEtKscy2IJYWQ2yo= pluginrpc.com/pluginrpc v0.5.0/go.mod h1:UNWZ941hcVAoOZUn8YZsMmOZBzbUjQa3XMns8RQLp9o= diff --git a/model/textparse/README.md b/model/textparse/README.md index 697966f097..663835f6de 100644 --- a/model/textparse/README.md +++ b/model/textparse/README.md @@ -1,6 +1,7 @@ # Making changes to textparse lexers -In the rare case that you need to update the textparse lexers, edit promlex.l or openmetricslex.l and then run the following command: -`golex -o=promlex.l.go promlex.l` -Note that you need golex installed: -`go get -u modernc.org/golex` \ No newline at end of file +Run from the repo root: + +```bash +bash ./scripts/gentextlex.sh +``` diff --git a/model/textparse/benchmark_test.go b/model/textparse/benchmark_test.go index a6fbd4ccd1..96bef15f74 100644 --- a/model/textparse/benchmark_test.go +++ b/model/textparse/benchmark_test.go @@ -23,12 +23,14 @@ import ( "strings" "testing" + "github.com/google/go-cmp/cmp" "github.com/prometheus/common/expfmt" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/prometheus/prometheus/model/exemplar" "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/util/testutil" ) // BenchmarkParse... set of benchmarks analyze efficiency of parsing various @@ -138,32 +140,81 @@ func BenchmarkParseOpenMetricsNHCB(b *testing.B) { } } -func benchParse(b *testing.B, data []byte, parser string) { - type newParser func([]byte, *labels.SymbolTable) Parser +// BenchmarkParseOpenMetricsNHCB_OM1vs2 is for demo of the benefit for the complex +// type format for OM2, assuming Prometheus stores NS and NHCB (and NH) going forward. +// Format draft: https://github.com/prometheus/docs/pull/2679 +/* + export bench=out && go test ./model/textparse/... \ + -run '^$' -bench '^BenchmarkParseOpenMetricsNHCB_OM1vs2' \ + -benchtime 2s -count 6 -cpu 2 -benchmem -timeout 999m \ + | tee ${bench}.txt +*/ +func BenchmarkParseOpenMetricsNHCB_OM1vs2(b *testing.B) { + parseCases := []struct { + parser string + data []byte + }{ + { + parser: "omtext_with_nhcb", // Measure NHCB over OM parser. + data: readTestdataFile(b, "1histogram.om.txt"), + }, + { + parser: "om2text_with_nhcb", // https://github.com/prometheus/docs/pull/2679 with NHCB output. + data: readTestdataFile(b, "1histogram.om2.txt"), + }, + } + + // Before we go, test parsing works as expected. + gotA := testParse(b, newParser(b, parseCases[0].parser)(parseCases[0].data, labels.NewSymbolTable())) + gotB := testParse(b, newParser(b, parseCases[1].parser)(parseCases[1].data, labels.NewSymbolTable())) + testutil.RequireEqualWithOptions(b, gotA, gotB, []cmp.Option{cmp.AllowUnexported(parsedEntry{})}) + + // For fun, OM2 parser should work with classic histogram too (TODO add separate tests). + _ = testParse(b, newParser(b, parseCases[1].parser)(parseCases[0].data, labels.NewSymbolTable())) + + for _, c := range parseCases { + b.Run(fmt.Sprintf("parser=%v", c.parser), func(b *testing.B) { + benchParse(b, c.data, c.parser) + }) + } +} + +func newParser(t testing.TB, parser string) func([]byte, *labels.SymbolTable) Parser { + t.Helper() - var newParserFn newParser switch parser { case "promtext": - newParserFn = func(b []byte, st *labels.SymbolTable) Parser { + return func(b []byte, st *labels.SymbolTable) Parser { return NewPromParser(b, st, false) } case "promproto": - newParserFn = func(b []byte, st *labels.SymbolTable) Parser { + return func(b []byte, st *labels.SymbolTable) Parser { return NewProtobufParser(b, true, false, false, st) } case "omtext": - newParserFn = func(b []byte, st *labels.SymbolTable) Parser { + return func(b []byte, st *labels.SymbolTable) Parser { return NewOpenMetricsParser(b, st, WithOMParserCTSeriesSkipped()) } case "omtext_with_nhcb": - newParserFn = func(buf []byte, st *labels.SymbolTable) Parser { + return func(buf []byte, st *labels.SymbolTable) Parser { p, err := New(buf, "application/openmetrics-text", st, ParserOptions{ConvertClassicHistogramsToNHCB: true}) - require.NoError(b, err) + require.NoError(t, err) return p } + case "om2text_with_nhcb": + return func(b []byte, st *labels.SymbolTable) Parser { + return NewOpenMetrics2Parser(b, st, func(options *openMetrics2ParserOptions) { + options.unrollComplexTypes = false + }) + } default: - b.Fatal("unknown parser", parser) + t.Fatal("unknown parser", parser) } + return nil +} + +func benchParse(b *testing.B, data []byte, parser string) { + newParserFn := newParser(b, parser) var ( res labels.Labels diff --git a/model/textparse/interface_test.go b/model/textparse/interface_test.go index 532c474845..5dc8bf7044 100644 --- a/model/textparse/interface_test.go +++ b/model/textparse/interface_test.go @@ -226,7 +226,7 @@ func requireEntries(t *testing.T, exp, got []parsedEntry) { }) } -func testParse(t *testing.T, p Parser) (ret []parsedEntry) { +func testParse(t testing.TB, p Parser) (ret []parsedEntry) { t.Helper() for { diff --git a/model/textparse/openmetrics2lex.l b/model/textparse/openmetrics2lex.l new file mode 100644 index 0000000000..811237125b --- /dev/null +++ b/model/textparse/openmetrics2lex.l @@ -0,0 +1,95 @@ +%{ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "fmt" +) + +// Lex is called by the parser generated by "go tool yacc" to obtain each +// token. The method is opened before the matching rules block and closed at +// the end of the file. +func (l *openMetrics2Lexer) Lex() token { + if l.i >= len(l.b) { + return tEOF + } + c := l.b[l.i] + l.start = l.i + +%} + +D [0-9] +L [a-zA-Z_] +M [a-zA-Z_:] +C [^\n] +S [ ] + +%x sComment sMeta1 sMeta2 sLabels sLValue sValue sTimestamp sExemplar sEValue sETimestamp sCValue + +%yyc c +%yyn c = l.next() +%yyt l.state + + +%% + +#{S} l.state = sComment +HELP{S} l.state = sMeta1; return tHelp +TYPE{S} l.state = sMeta1; return tType +UNIT{S} l.state = sMeta1; return tUnit +"EOF"\n? l.state = sInit; return tEOFWord +\"(\\.|[^\\"])*\" l.state = sMeta2; return tMName +{M}({M}|{D})* l.state = sMeta2; return tMName +{S}{C}*\n l.state = sInit; return tText + +{M}({M}|{D})* l.state = sValue; return tMName +\{ l.state = sLabels; return tBraceOpen +\{ l.state = sLabels; return tBraceOpen +{L}({L}|{D})* return tLName +\"(\\.|[^\\"])*\" l.state = sLabels; return tQString +\} l.state = sValue; return tBraceClose += l.state = sLValue; return tEqual +, return tComma +\"(\\.|[^\\"\n])*\" l.state = sLabels; return tLValue + +{S}\{ l.state = sCValue; return tBraceOpen +{L}({L}|{D})* return tLName +: return tColon +\[ return tBracketOpen +\] return tBracketClose +, return tComma +[ \t]+ // Skip whitespace inside the block +[^ \n\t,\[\]{}:]+ return tValue +\} l.state = sTimestamp; return tBraceClose + +{S}[^{ \n]+ l.state = sTimestamp; return tValue +{S}[^ \n]+ return tTimestamp +\n l.state = sInit; return tLinebreak +{S}#{S}\{ l.state = sExemplar; return tComment + +{L}({L}|{D})* return tLName +\"(\\.|[^\\"\n])*\" l.state = sExemplar; return tQString +\} l.state = sEValue; return tBraceClose += l.state = sEValue; return tEqual +\"(\\.|[^\\"\n])*\" l.state = sExemplar; return tLValue +, return tComma +{S}[^ \n]+ l.state = sETimestamp; return tValue +{S}[^ \n]+ return tTimestamp +\n l.state = sInit; return tLinebreak + +%% + + return tInvalid +} diff --git a/model/textparse/openmetrics2lex.l.go b/model/textparse/openmetrics2lex.l.go new file mode 100644 index 0000000000..6b4bf3ee3e --- /dev/null +++ b/model/textparse/openmetrics2lex.l.go @@ -0,0 +1,1029 @@ +// Code generated by golex. DO NOT EDIT. + +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package textparse + +import ( + "fmt" +) + +// Lex is called by the parser generated by "go tool yacc" to obtain each +// token. The method is opened before the matching rules block and closed at +// the end of the file. +func (l *openMetrics2Lexer) Lex() token { + if l.i >= len(l.b) { + return tEOF + } + c := l.b[l.i] + l.start = l.i + +yystate0: + + switch yyt := l.state; yyt { + default: + panic(fmt.Errorf(`invalid start condition %d`, yyt)) + case 0: // start condition: INITIAL + goto yystart1 + case 1: // start condition: sComment + goto yystart6 + case 2: // start condition: sMeta1 + goto yystart26 + case 3: // start condition: sMeta2 + goto yystart31 + case 4: // start condition: sLabels + goto yystart34 + case 5: // start condition: sLValue + goto yystart42 + case 6: // start condition: sValue + goto yystart46 + case 7: // start condition: sTimestamp + goto yystart51 + case 8: // start condition: sExemplar + goto yystart58 + case 9: // start condition: sEValue + goto yystart66 + case 10: // start condition: sETimestamp + goto yystart72 + case 11: // start condition: sCValue + goto yystart76 + } + +yystate1: + c = l.next() +yystart1: + switch { + default: + goto yyabort + case c == '#': + goto yystate2 + case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate4 + case c == '{': + goto yystate5 + } + +yystate2: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate3 + } + +yystate3: + c = l.next() + goto yyrule1 + +yystate4: + c = l.next() + switch { + default: + goto yyrule9 + case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate4 + } + +yystate5: + c = l.next() + goto yyrule11 + +yystate6: + c = l.next() +yystart6: + switch { + default: + goto yyabort + case c == 'E': + goto yystate7 + case c == 'H': + goto yystate11 + case c == 'T': + goto yystate16 + case c == 'U': + goto yystate21 + } + +yystate7: + c = l.next() + switch { + default: + goto yyabort + case c == 'O': + goto yystate8 + } + +yystate8: + c = l.next() + switch { + default: + goto yyabort + case c == 'F': + goto yystate9 + } + +yystate9: + c = l.next() + switch { + default: + goto yyrule5 + case c == '\n': + goto yystate10 + } + +yystate10: + c = l.next() + goto yyrule5 + +yystate11: + c = l.next() + switch { + default: + goto yyabort + case c == 'E': + goto yystate12 + } + +yystate12: + c = l.next() + switch { + default: + goto yyabort + case c == 'L': + goto yystate13 + } + +yystate13: + c = l.next() + switch { + default: + goto yyabort + case c == 'P': + goto yystate14 + } + +yystate14: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate15 + } + +yystate15: + c = l.next() + goto yyrule2 + +yystate16: + c = l.next() + switch { + default: + goto yyabort + case c == 'Y': + goto yystate17 + } + +yystate17: + c = l.next() + switch { + default: + goto yyabort + case c == 'P': + goto yystate18 + } + +yystate18: + c = l.next() + switch { + default: + goto yyabort + case c == 'E': + goto yystate19 + } + +yystate19: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate20 + } + +yystate20: + c = l.next() + goto yyrule3 + +yystate21: + c = l.next() + switch { + default: + goto yyabort + case c == 'N': + goto yystate22 + } + +yystate22: + c = l.next() + switch { + default: + goto yyabort + case c == 'I': + goto yystate23 + } + +yystate23: + c = l.next() + switch { + default: + goto yyabort + case c == 'T': + goto yystate24 + } + +yystate24: + c = l.next() + switch { + default: + goto yyabort + case c == ' ': + goto yystate25 + } + +yystate25: + c = l.next() + goto yyrule4 + +yystate26: + c = l.next() +yystart26: + switch { + default: + goto yyabort + case c == '"': + goto yystate27 + case c == ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate30 + } + +yystate27: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate28 + case c == '\\': + goto yystate29 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate27 + } + +yystate28: + c = l.next() + goto yyrule6 + +yystate29: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate27 + } + +yystate30: + c = l.next() + switch { + default: + goto yyrule7 + case c >= '0' && c <= ':' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate30 + } + +yystate31: + c = l.next() +yystart31: + switch { + default: + goto yyabort + case c == ' ': + goto yystate32 + } + +yystate32: + c = l.next() + switch { + default: + goto yyabort + case c == '\n': + goto yystate33 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate32 + } + +yystate33: + c = l.next() + goto yyrule8 + +yystate34: + c = l.next() +yystart34: + switch { + default: + goto yyabort + case c == '"': + goto yystate35 + case c == ',': + goto yystate38 + case c == '=': + goto yystate39 + case c == '}': + goto yystate41 + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate40 + } + +yystate35: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate36 + case c == '\\': + goto yystate37 + case c >= '\x01' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate35 + } + +yystate36: + c = l.next() + goto yyrule13 + +yystate37: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate35 + } + +yystate38: + c = l.next() + goto yyrule16 + +yystate39: + c = l.next() + goto yyrule15 + +yystate40: + c = l.next() + switch { + default: + goto yyrule12 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate40 + } + +yystate41: + c = l.next() + goto yyrule14 + +yystate42: + c = l.next() +yystart42: + switch { + default: + goto yyabort + case c == '"': + goto yystate43 + } + +yystate43: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate44 + case c == '\\': + goto yystate45 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate43 + } + +yystate44: + c = l.next() + goto yyrule17 + +yystate45: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate43 + } + +yystate46: + c = l.next() +yystart46: + switch { + default: + goto yyabort + case c == ' ': + goto yystate47 + case c == '{': + goto yystate50 + } + +yystate47: + c = l.next() + switch { + default: + goto yyabort + case c == '{': + goto yystate49 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ': + goto yystate48 + } + +yystate48: + c = l.next() + switch { + default: + goto yyrule27 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'z' || c >= '|' && c <= 'ÿ': + goto yystate48 + } + +yystate49: + c = l.next() + goto yyrule18 + +yystate50: + c = l.next() + goto yyrule10 + +yystate51: + c = l.next() +yystart51: + switch { + default: + goto yyabort + case c == ' ': + goto yystate53 + case c == '\n': + goto yystate52 + } + +yystate52: + c = l.next() + goto yyrule29 + +yystate53: + c = l.next() + switch { + default: + goto yyabort + case c == '#': + goto yystate55 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c == '!' || c == '"' || c >= '$' && c <= 'ÿ': + goto yystate54 + } + +yystate54: + c = l.next() + switch { + default: + goto yyrule28 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate54 + } + +yystate55: + c = l.next() + switch { + default: + goto yyrule28 + case c == ' ': + goto yystate56 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate54 + } + +yystate56: + c = l.next() + switch { + default: + goto yyabort + case c == '{': + goto yystate57 + } + +yystate57: + c = l.next() + goto yyrule30 + +yystate58: + c = l.next() +yystart58: + switch { + default: + goto yyabort + case c == '"': + goto yystate59 + case c == ',': + goto yystate62 + case c == '=': + goto yystate63 + case c == '}': + goto yystate65 + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate64 + } + +yystate59: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate60 + case c == '\\': + goto yystate61 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate59 + } + +yystate60: + c = l.next() + goto yyrule32 + +yystate61: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate59 + } + +yystate62: + c = l.next() + goto yyrule36 + +yystate63: + c = l.next() + goto yyrule34 + +yystate64: + c = l.next() + switch { + default: + goto yyrule31 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate64 + } + +yystate65: + c = l.next() + goto yyrule33 + +yystate66: + c = l.next() +yystart66: + switch { + default: + goto yyabort + case c == ' ': + goto yystate67 + case c == '"': + goto yystate69 + } + +yystate67: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate68 + } + +yystate68: + c = l.next() + switch { + default: + goto yyrule37 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate68 + } + +yystate69: + c = l.next() + switch { + default: + goto yyabort + case c == '"': + goto yystate70 + case c == '\\': + goto yystate71 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '!' || c >= '#' && c <= '[' || c >= ']' && c <= 'ÿ': + goto yystate69 + } + +yystate70: + c = l.next() + goto yyrule35 + +yystate71: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= 'ÿ': + goto yystate69 + } + +yystate72: + c = l.next() +yystart72: + switch { + default: + goto yyabort + case c == ' ': + goto yystate74 + case c == '\n': + goto yystate73 + } + +yystate73: + c = l.next() + goto yyrule39 + +yystate74: + c = l.next() + switch { + default: + goto yyabort + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate75 + } + +yystate75: + c = l.next() + switch { + default: + goto yyrule38 + case c >= '\x01' && c <= '\t' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= 'ÿ': + goto yystate75 + } + +yystate76: + c = l.next() +yystart76: + switch { + default: + goto yyabort + case c == ',': + goto yystate79 + case c == ':': + goto yystate80 + case c == '[': + goto yystate82 + case c == '\t' || c == ' ': + goto yystate78 + case c == ']': + goto yystate83 + case c == '}': + goto yystate84 + case c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate81 + case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= '+' || c >= '-' && c <= '9' || c >= ';' && c <= '@' || c == '\\' || c == '^' || c == '`' || c == '|' || c >= '~' && c <= 'ÿ': + goto yystate77 + } + +yystate77: + c = l.next() + switch { + default: + goto yyrule25 + case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= '+' || c >= '-' && c <= '9' || c >= ';' && c <= 'Z' || c == '\\' || c >= '^' && c <= 'z' || c == '|' || c >= '~' && c <= 'ÿ': + goto yystate77 + } + +yystate78: + c = l.next() + switch { + default: + goto yyrule24 + case c == '\t' || c == ' ': + goto yystate78 + } + +yystate79: + c = l.next() + goto yyrule23 + +yystate80: + c = l.next() + goto yyrule20 + +yystate81: + c = l.next() + switch { + default: + goto yyrule19 + case c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c == '_' || c >= 'a' && c <= 'z': + goto yystate81 + case c >= '\x01' && c <= '\b' || c >= '\v' && c <= '\x1f' || c >= '!' && c <= '+' || c >= '-' && c <= '/' || c >= ';' && c <= '@' || c == '\\' || c == '^' || c == '`' || c == '|' || c >= '~' && c <= 'ÿ': + goto yystate77 + } + +yystate82: + c = l.next() + goto yyrule21 + +yystate83: + c = l.next() + goto yyrule22 + +yystate84: + c = l.next() + goto yyrule26 + +yyrule1: // #{S} + { + l.state = sComment + goto yystate0 + } +yyrule2: // HELP{S} + { + l.state = sMeta1 + return tHelp + goto yystate0 + } +yyrule3: // TYPE{S} + { + l.state = sMeta1 + return tType + goto yystate0 + } +yyrule4: // UNIT{S} + { + l.state = sMeta1 + return tUnit + goto yystate0 + } +yyrule5: // "EOF"\n? + { + l.state = sInit + return tEOFWord + goto yystate0 + } +yyrule6: // \"(\\.|[^\\"])*\" + { + l.state = sMeta2 + return tMName + goto yystate0 + } +yyrule7: // {M}({M}|{D})* + { + l.state = sMeta2 + return tMName + goto yystate0 + } +yyrule8: // {S}{C}*\n + { + l.state = sInit + return tText + goto yystate0 + } +yyrule9: // {M}({M}|{D})* + { + l.state = sValue + return tMName + goto yystate0 + } +yyrule10: // \{ + { + l.state = sLabels + return tBraceOpen + goto yystate0 + } +yyrule11: // \{ + { + l.state = sLabels + return tBraceOpen + goto yystate0 + } +yyrule12: // {L}({L}|{D})* + { + return tLName + } +yyrule13: // \"(\\.|[^\\"])*\" + { + l.state = sLabels + return tQString + goto yystate0 + } +yyrule14: // \} + { + l.state = sValue + return tBraceClose + goto yystate0 + } +yyrule15: // = + { + l.state = sLValue + return tEqual + goto yystate0 + } +yyrule16: // , + { + return tComma + } +yyrule17: // \"(\\.|[^\\"\n])*\" + { + l.state = sLabels + return tLValue + goto yystate0 + } +yyrule18: // {S}\{ + { + l.state = sCValue + return tBraceOpen + goto yystate0 + } +yyrule19: // {L}({L}|{D})* + { + return tLName + } +yyrule20: // : + { + return tColon + } +yyrule21: // \[ + { + return tBracketOpen + } +yyrule22: // \] + { + return tBracketClose + } +yyrule23: // , + { + return tComma + } +yyrule24: // [ \t]+ + { + // Skip whitespace inside the block + goto yystate0 + } +yyrule25: // [^ \n\t,\[\]{}:]+ + { + return tValue + } +yyrule26: // \} + { + l.state = sTimestamp + return tBraceClose + goto yystate0 + } +yyrule27: // {S}[^{ \n]+ + { + l.state = sTimestamp + return tValue + goto yystate0 + } +yyrule28: // {S}[^ \n]+ + { + return tTimestamp + } +yyrule29: // \n + { + l.state = sInit + return tLinebreak + goto yystate0 + } +yyrule30: // {S}#{S}\{ + { + l.state = sExemplar + return tComment + goto yystate0 + } +yyrule31: // {L}({L}|{D})* + { + return tLName + } +yyrule32: // \"(\\.|[^\\"\n])*\" + { + l.state = sExemplar + return tQString + goto yystate0 + } +yyrule33: // \} + { + l.state = sEValue + return tBraceClose + goto yystate0 + } +yyrule34: // = + { + l.state = sEValue + return tEqual + goto yystate0 + } +yyrule35: // \"(\\.|[^\\"\n])*\" + { + l.state = sExemplar + return tLValue + goto yystate0 + } +yyrule36: // , + { + return tComma + } +yyrule37: // {S}[^ \n]+ + { + l.state = sETimestamp + return tValue + goto yystate0 + } +yyrule38: // {S}[^ \n]+ + { + return tTimestamp + } +yyrule39: // \n + if true { // avoid go vet determining the below panic will not be reached + l.state = sInit + return tLinebreak + goto yystate0 + } + panic("unreachable") + +yyabort: // no lexem recognized + // silence unused label errors for build and satisfy go vet reachability analysis + { + if false { + goto yyabort + } + if false { + goto yystate0 + } + if false { + goto yystate1 + } + if false { + goto yystate6 + } + if false { + goto yystate26 + } + if false { + goto yystate31 + } + if false { + goto yystate34 + } + if false { + goto yystate42 + } + if false { + goto yystate46 + } + if false { + goto yystate51 + } + if false { + goto yystate58 + } + if false { + goto yystate66 + } + if false { + goto yystate72 + } + if false { + goto yystate76 + } + } + + return tInvalid +} diff --git a/model/textparse/openmetrics2parse.go b/model/textparse/openmetrics2parse.go new file mode 100644 index 0000000000..6090160f3d --- /dev/null +++ b/model/textparse/openmetrics2parse.go @@ -0,0 +1,755 @@ +// Copyright 2025 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:generate go get -u modernc.org/golex +//go:generate golex -o=openmetrics2lex.l.go openmetrics2lex.l + +package textparse + +import ( + "bytes" + "errors" + "fmt" + "io" + "math" + "strings" + "unicode/utf8" + + "github.com/prometheus/common/model" + + "github.com/prometheus/prometheus/model/exemplar" + "github.com/prometheus/prometheus/model/histogram" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/model/value" + "github.com/prometheus/prometheus/schema" + "github.com/prometheus/prometheus/util/convertnhcb" +) + +type openMetrics2Lexer struct { + b []byte + i int + start int + err error + state int +} + +// buf returns the buffer of the current token. +func (l *openMetrics2Lexer) buf() []byte { + return l.b[l.start:l.i] +} + +// next advances the openMetricsLexer to the next character. +func (l *openMetrics2Lexer) next() byte { + l.i++ + if l.i >= len(l.b) { + l.err = io.EOF + return byte(tEOF) + } + // Lex struggles with null bytes. If we are in a label value or help string, where + // they are allowed, consume them here immediately. + for l.b[l.i] == 0 && (l.state == sLValue || l.state == sMeta2 || l.state == sComment) { + l.i++ + if l.i >= len(l.b) { + l.err = io.EOF + return byte(tEOF) + } + } + return l.b[l.i] +} + +func (l *openMetrics2Lexer) Error(es string) { + l.err = errors.New(es) +} + +// OpenMetrics2Parser text exposition format. +// Specification can be found at https://prometheus.io/docs/specs/om/open_metrics_spec_2_0/ +type OpenMetrics2Parser struct { + l *openMetrics2Lexer + builder labels.ScratchBuilder + series []byte + mfNameLen int // length of metric family name to get from series. + text []byte + mtype model.MetricType + unit string + + val float64 + tempHist convertnhcb.TempHistogram + h *histogram.Histogram + fh *histogram.FloatHistogram + // TODO: Implement summary compelx type. + + ts int64 + hasTS bool + start int + // offsets is a list of offsets into series that describe the positions + // of the metric name and label names and values for this series. + // p.offsets[0] is the start character of the metric name. + // p.offsets[1] is the end of the metric name. + // Subsequently, p.offsets is a pair of pair of offsets for the positions + // of the label name and value start and end characters. + offsets []int + + eOffsets []int + exemplar []byte + exemplarVal float64 + exemplarTs int64 + hasExemplarTs bool + + // ignoreExemplar instructs the parser to not overwrite exemplars (to keep them while peeking ahead). + ignoreExemplar bool + enableTypeAndUnitLabels bool +} + +type openMetrics2ParserOptions struct { + enableTypeAndUnitLabels bool + // TODO: Probably this option should be per metric name (: + unrollComplexTypes bool +} + +type OpenMetrics2Option func(*openMetrics2ParserOptions) + +// WithOM2ParserTypeAndUnitLabels enables type-and-unit-labels mode +// in which parser injects __type__ and __unit__ into labels. +func WithOM2ParserTypeAndUnitLabels() OpenMetrics2Option { + return func(o *openMetrics2ParserOptions) { + o.enableTypeAndUnitLabels = true + } +} + +// NewOpenMetrics2Parser returns a new parser for the byte slice with option to skip CT series parsing. +func NewOpenMetrics2Parser(b []byte, st *labels.SymbolTable, opts ...OpenMetrics2Option) Parser { + options := &openMetrics2ParserOptions{} + + for _, opt := range opts { + opt(options) + } + + if options.unrollComplexTypes { + // TODO: Implement this. + panic("not implemented") + } + + parser := &OpenMetrics2Parser{ + l: &openMetrics2Lexer{b: b}, + builder: labels.NewScratchBuilderWithSymbolTable(st, 16), + enableTypeAndUnitLabels: options.enableTypeAndUnitLabels, + tempHist: convertnhcb.NewTempHistogram(), + } + return parser +} + +// Series returns the bytes of the series, the timestamp if set, and the value +// of the current sample. +func (p *OpenMetrics2Parser) Series() ([]byte, *int64, float64) { + if p.hasTS { + ts := p.ts + return p.series, &ts, p.val + } + return p.series, nil, p.val +} + +// Histogram returns the bytes of the series, the timestamp if set, and one of +// the value (float of integer histogram) of the current complex sample representing histogram. +func (p *OpenMetrics2Parser) Histogram() ([]byte, *int64, *histogram.Histogram, *histogram.FloatHistogram) { + if p.hasTS { + ts := p.ts + return p.series, &ts, p.h, p.fh + } + return p.series, nil, p.h, p.fh +} + +// Help returns the metric name and help text in the current entry. +// Must only be called after Next returned a help entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OpenMetrics2Parser) Help() ([]byte, []byte) { + m := p.l.b[p.offsets[0]:p.offsets[1]] + + // Replacer causes allocations. Replace only when necessary. + if bytes.IndexByte(p.text, byte('\\')) >= 0 { + // OpenMetrics always uses the Prometheus format label value escaping. + return m, []byte(lvalReplacer.Replace(string(p.text))) + } + return m, p.text +} + +// Type returns the metric name and type in the current entry. +// Must only be called after Next returned a type entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OpenMetrics2Parser) Type() ([]byte, model.MetricType) { + return p.l.b[p.offsets[0]:p.offsets[1]], p.mtype +} + +// Unit returns the metric name and unit in the current entry. +// Must only be called after Next returned a unit entry. +// The returned byte slices become invalid after the next call to Next. +func (p *OpenMetrics2Parser) Unit() ([]byte, []byte) { + return p.l.b[p.offsets[0]:p.offsets[1]], []byte(p.unit) +} + +// Comment returns the text of the current comment. +// Must only be called after Next returned a comment entry. +// The returned byte slice becomes invalid after the next call to Next. +func (p *OpenMetrics2Parser) Comment() []byte { + return p.text +} + +// Labels writes the labels of the current sample into the passed labels. +func (p *OpenMetrics2Parser) Labels(l *labels.Labels) { + // Defensive copy in case the following keeps a reference. + // See https://github.com/prometheus/prometheus/issues/16490 + s := string(p.series) + + p.builder.Reset() + metricName := unreplace(s[p.offsets[0]-p.start : p.offsets[1]-p.start]) + + m := schema.Metadata{ + Name: metricName, + Type: p.mtype, + Unit: p.unit, + } + if p.enableTypeAndUnitLabels { + m.AddToLabels(&p.builder) + } else { + p.builder.Add(labels.MetricName, metricName) + } + for i := 2; i < len(p.offsets); i += 4 { + a := p.offsets[i] - p.start + b := p.offsets[i+1] - p.start + label := unreplace(s[a:b]) + if p.enableTypeAndUnitLabels && !m.IsEmptyFor(label) { + // Dropping user provided metadata labels, if found in the OM metadata. + continue + } + c := p.offsets[i+2] - p.start + d := p.offsets[i+3] - p.start + value := normalizeFloatsInLabelValues(p.mtype, label, unreplace(s[c:d])) + p.builder.Add(label, value) + } + + p.builder.Sort() + *l = p.builder.Labels() +} + +// Exemplar writes the exemplar of the current sample into the passed exemplar. +// It returns whether an exemplar exists. As OpenMetrics only ever has one +// exemplar per sample, every call after the first (for the same sample) will +// always return false. +func (p *OpenMetrics2Parser) Exemplar(e *exemplar.Exemplar) bool { + if len(p.exemplar) == 0 { + return false + } + + // Allocate the full immutable string immediately, so we just + // have to create references on it below. + s := string(p.exemplar) + + e.Value = p.exemplarVal + if p.hasExemplarTs { + e.HasTs = true + e.Ts = p.exemplarTs + } + + p.builder.Reset() + for i := 0; i < len(p.eOffsets); i += 4 { + a := p.eOffsets[i] - p.start + b := p.eOffsets[i+1] - p.start + c := p.eOffsets[i+2] - p.start + d := p.eOffsets[i+3] - p.start + + p.builder.Add(s[a:b], s[c:d]) + } + + p.builder.Sort() + e.Labels = p.builder.Labels() + + // Wipe exemplar so that future calls return false. + p.exemplar = p.exemplar[:0] + return true +} + +// CreatedTimestamp returns the created timestamp for a current Metric if exists or nil. +func (*OpenMetrics2Parser) CreatedTimestamp() int64 { + // TODO: Implement. + return 0 +} + +// nextToken returns the next token from the openMetricsLexer. +func (p *OpenMetrics2Parser) nextToken() token { + t := p.l.Lex() + return t +} + +func (p *OpenMetrics2Parser) parseError(exp string, got token) error { + e := min(len(p.l.b), p.l.i+1) + return fmt.Errorf("%s, got %q (%q) while parsing: %q", exp, p.l.b[p.l.start:e], got, p.l.b[p.start:e]) +} + +// Next advances the parser to the next sample. +// It returns (EntryInvalid, io.EOF) if no samples were read. +func (p *OpenMetrics2Parser) Next() (Entry, error) { + var err error + + p.start = p.l.i + p.offsets = p.offsets[:0] + if !p.ignoreExemplar { + p.eOffsets = p.eOffsets[:0] + p.exemplar = p.exemplar[:0] + p.exemplarVal = 0 + p.hasExemplarTs = false + } + + switch t := p.nextToken(); t { + case tEOFWord: + if t := p.nextToken(); t != tEOF { + return EntryInvalid, errors.New("unexpected data after # EOF") + } + return EntryInvalid, io.EOF + case tEOF: + return EntryInvalid, errors.New("data does not end with # EOF") + case tHelp, tType, tUnit: + switch t2 := p.nextToken(); t2 { + case tMName: + mStart := p.l.start + mEnd := p.l.i + if p.l.b[mStart] == '"' && p.l.b[mEnd-1] == '"' { + mStart++ + mEnd-- + } + p.mfNameLen = mEnd - mStart + p.offsets = append(p.offsets, mStart, mEnd) + default: + return EntryInvalid, p.parseError("expected metric name after "+t.String(), t2) + } + switch t2 := p.nextToken(); t2 { + case tText: + if len(p.l.buf()) > 1 { + p.text = p.l.buf()[1 : len(p.l.buf())-1] + } else { + p.text = []byte{} + } + default: + return EntryInvalid, fmt.Errorf("expected text in %s", t.String()) + } + switch t { + case tType: + switch s := yoloString(p.text); s { + case "counter": + p.mtype = model.MetricTypeCounter + case "gauge": + p.mtype = model.MetricTypeGauge + case "histogram": + p.mtype = model.MetricTypeHistogram + case "gaugehistogram": + p.mtype = model.MetricTypeGaugeHistogram + case "summary": + p.mtype = model.MetricTypeSummary + case "info": + p.mtype = model.MetricTypeInfo + case "stateset": + p.mtype = model.MetricTypeStateset + case "unknown": + p.mtype = model.MetricTypeUnknown + default: + return EntryInvalid, fmt.Errorf("invalid metric type %q", s) + } + case tHelp: + if !utf8.Valid(p.text) { + return EntryInvalid, fmt.Errorf("help text %q is not a valid utf8 string", p.text) + } + } + switch t { + case tHelp: + return EntryHelp, nil + case tType: + return EntryType, nil + case tUnit: + p.unit = string(p.text) + m := yoloString(p.l.b[p.offsets[0]:p.offsets[1]]) + if len(p.unit) > 0 { + if !strings.HasSuffix(m, p.unit) || len(m) < len(p.unit)+1 || p.l.b[p.offsets[1]-len(p.unit)-1] != '_' { + return EntryInvalid, fmt.Errorf("unit %q not a suffix of metric %q", p.unit, m) + } + } + return EntryUnit, nil + } + + case tBraceOpen: + // We found a brace, so make room for the eventual metric name. If these + // values aren't updated, then the metric name was not set inside the + // braces and we can return an error. + if len(p.offsets) == 0 { + p.offsets = []int{-1, -1} + } + if p.offsets, err = p.parseLVals(p.offsets, false); err != nil { + return EntryInvalid, err + } + + p.series = p.l.b[p.start:p.l.i] + return p.parseSeriesEndOfLine(p.nextToken()) + case tMName: + p.offsets = append(p.offsets, p.start, p.l.i) + p.series = p.l.b[p.start:p.l.i] + + t2 := p.nextToken() + if t2 == tBraceOpen { + p.offsets, err = p.parseLVals(p.offsets, false) + if err != nil { + return EntryInvalid, err + } + p.series = p.l.b[p.start:p.l.i] + t2 = p.nextToken() + } + + return p.parseSeriesEndOfLine(t2) + default: + err = p.parseError("expected a valid start token", t) + } + return EntryInvalid, err +} + +func (p *OpenMetrics2Parser) parseComment() error { + var err error + + if p.ignoreExemplar { + for t := p.nextToken(); t != tLinebreak; t = p.nextToken() { + if t == tEOF { + return errors.New("data does not end with # EOF") + } + } + return nil + } + + // Parse the labels. + p.eOffsets, err = p.parseLVals(p.eOffsets, true) + if err != nil { + return err + } + p.exemplar = p.l.b[p.start:p.l.i] + + // Get the value. + p.exemplarVal, err = p.parseFloatValue(p.nextToken(), "exemplar labels") + if err != nil { + return err + } + + // Read the optional timestamp. + p.hasExemplarTs = false + switch t2 := p.nextToken(); t2 { + case tEOF: + return errors.New("data does not end with # EOF") + case tLinebreak: + break + case tTimestamp: + p.hasExemplarTs = true + var ts float64 + // A float is enough to hold what we need for millisecond resolution. + if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil { + return fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + if math.IsNaN(ts) || math.IsInf(ts, 0) { + return fmt.Errorf("invalid exemplar timestamp %f", ts) + } + p.exemplarTs = int64(ts * 1000) + switch t3 := p.nextToken(); t3 { + case tLinebreak: + default: + return p.parseError("expected next entry after exemplar timestamp", t3) + } + default: + return p.parseError("expected timestamp or comment", t2) + } + return nil +} + +func (p *OpenMetrics2Parser) parseLVals(offsets []int, isExemplar bool) ([]int, error) { + t := p.nextToken() + for { + curTStart := p.l.start + curTI := p.l.i + switch t { + case tBraceClose: + return offsets, nil + case tLName: + case tQString: + default: + return nil, p.parseError("expected label name", t) + } + + t = p.nextToken() + // A quoted string followed by a comma or brace is a metric name. Set the + // offsets and continue processing. If this is an exemplar, this format + // is not allowed. + if t == tComma || t == tBraceClose { + if isExemplar { + return nil, p.parseError("expected label name", t) + } + if offsets[0] != -1 || offsets[1] != -1 { + return nil, fmt.Errorf("metric name already set while parsing: %q", p.l.b[p.start:p.l.i]) + } + offsets[0] = curTStart + 1 + offsets[1] = curTI - 1 + if t == tBraceClose { + return offsets, nil + } + t = p.nextToken() + continue + } + // We have a label name, and it might be quoted. + if p.l.b[curTStart] == '"' { + curTStart++ + curTI-- + } + offsets = append(offsets, curTStart, curTI) + + if t != tEqual { + return nil, p.parseError("expected equal", t) + } + if t := p.nextToken(); t != tLValue { + return nil, p.parseError("expected label value", t) + } + if !utf8.Valid(p.l.buf()) { + return nil, fmt.Errorf("invalid UTF-8 label value: %q", p.l.buf()) + } + + // The openMetricsLexer ensures the value string is quoted. Strip first + // and last character. + offsets = append(offsets, p.l.start+1, p.l.i-1) + + // Free trailing commas are allowed. + t = p.nextToken() + if t == tComma { + t = p.nextToken() + } else if t != tBraceClose { + return nil, p.parseError("expected comma or brace close", t) + } + } +} + +// parseSeriesEndOfLine parses the series end of the line (value, optional +// timestamp, commentary, etc.) after the metric name and labels. +// It starts parsing with the provided token. +func (p *OpenMetrics2Parser) parseSeriesEndOfLine(t token) (e Entry, err error) { + if p.offsets[0] == -1 { + return EntryInvalid, fmt.Errorf("metric name not set while parsing: %q", p.l.b[p.start:p.l.i]) + } + switch p.l.state { + case sTimestamp: + p.val, err = p.parseFloatValue(t, "metric") + if err != nil { + return EntryInvalid, err + } + e = EntrySeries + case sCValue: + e, err = p.parseComplexValue(t, "metric") + if err != nil { + return EntryInvalid, err + } + default: + return EntryInvalid, p.parseError(fmt.Sprintf("unexpected parser state %v, expect float or complex value", p.l.state), t) + } + + p.hasTS = false + switch t2 := p.nextToken(); t2 { + case tEOF: + return EntryInvalid, errors.New("data does not end with # EOF") + case tLinebreak: + break + case tComment: + if err := p.parseComment(); err != nil { + return EntryInvalid, err + } + case tTimestamp: + p.hasTS = true + var ts float64 + // A float is enough to hold what we need for millisecond resolution. + if ts, err = parseFloat(yoloString(p.l.buf()[1:])); err != nil { + return EntryInvalid, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + if math.IsNaN(ts) || math.IsInf(ts, 0) { + return EntryInvalid, fmt.Errorf("invalid timestamp %f", ts) + } + p.ts = int64(ts * 1000) + switch t3 := p.nextToken(); t3 { + case tLinebreak: + case tComment: + if err := p.parseComment(); err != nil { + return EntryInvalid, err + } + default: + return EntryInvalid, p.parseError("expected next entry after timestamp", t3) + } + } + return e, nil +} + +func (p *OpenMetrics2Parser) parseFloatValue(t token, after string) (float64, error) { + if t != tValue { + return 0, p.parseError(fmt.Sprintf("expected value after %v", after), t) + } + val, err := parseFloat(yoloString(p.l.buf()[1:])) + if err != nil { + return 0, fmt.Errorf("%w while parsing: %q", err, p.l.b[p.start:p.l.i]) + } + // Ensure canonical NaN value. + if math.IsNaN(p.exemplarVal) { + val = math.Float64frombits(value.NormalNaN) + } + return val, nil +} + +func (p *OpenMetrics2Parser) parseComplexValue(t token, after string) (_ Entry, err error) { + if t != tBraceOpen { + return EntryInvalid, p.parseError(fmt.Sprintf("expected brace open after %v", after), t) + } + + switch p.mtype { + default: + return EntryInvalid, p.parseError("unexpected parser type", t) + case model.MetricTypeSummary: + return EntryInvalid, p.parseError("summary complex value parsing not yet implemented", t) + case model.MetricTypeHistogram: + defer p.tempHist.Reset() + + if err := p.parseComplexValueHistogram(); err != nil { + return EntryInvalid, err + } + p.h, p.fh, err = p.tempHist.Convert() + if err != nil { + return EntryInvalid, p.parseError(fmt.Sprintf("histogram complex value parsing failed: %v", err), t) + } + if p.h != nil { + if err := p.h.Validate(); err != nil { + return EntryInvalid, p.parseError(fmt.Sprintf("invalid histogram: %v", err), t) + } + } else if p.fh != nil { + if err := p.fh.Validate(); err != nil { + return EntryInvalid, p.parseError(fmt.Sprintf("invalid float histogram: %v", err), t) + } + } + return EntryHistogram, nil + } +} + +func (p *OpenMetrics2Parser) parseComplexValueHistogram() (err error) { + // The opening brace has already been consumed. + t := p.nextToken() + + // Handle empty complex value, e.g., {}. + if t == tBraceClose { + return nil + } + + for { + // Expect a key (e.g., "count", "sum", "bucket"). + if t != tLName { + return p.parseError("expected key in complex value", t) + } + key := yoloString(p.l.buf()) + + if t2 := p.nextToken(); t2 != tColon { + return p.parseError("expected colon after complex value key", t2) + } + + // Handle the value based on the key. + switch key { + case "count": + if t3 := p.nextToken(); t3 != tValue { + return p.parseError("expected count value", t3) + } + val, err := parseFloat(yoloString(p.l.buf())) + if err != nil { + return fmt.Errorf("%w while parsing count: %q", err, p.l.b[p.start:p.l.i]) + } + if err := p.tempHist.SetCount(val); err != nil { + return fmt.Errorf("%w while parsing count for histogram: %v", err, p.l.b[p.start:p.l.i]) + } + case "sum": + if t3 := p.nextToken(); t3 != tValue { + return p.parseError("expected sum value", t3) + } + val, err := parseFloat(yoloString(p.l.buf())) + if err != nil { + return fmt.Errorf("%w while parsing sum: %q", err, p.l.b[p.start:p.l.i]) + } + if err := p.tempHist.SetSum(val); err != nil { + return fmt.Errorf("%w while parsing sum for histogram: %v", err, p.l.b[p.start:p.l.i]) + } + case "bucket": + if t3 := p.nextToken(); t3 != tBracketOpen { + return p.parseError("expected opening bracket for buckets", t3) + } + if err := p.parseBuckets(); err != nil { + return err + } + default: + return fmt.Errorf("unknown key in complex value: %q", key) + } + + // After a key-value pair, expect a comma or the closing brace. + t = p.nextToken() + if t == tBraceClose { + return nil + } + if t != tComma { + return p.parseError("expected comma or closing brace after value", t) + } + + // If we saw a comma, get the next token, which should be the next key. + t = p.nextToken() + } +} + +// parseBuckets parses the content of a bucket list, e.g., [bound1:count1,bound2:count2]. +func (p *OpenMetrics2Parser) parseBuckets() error { + // Handle empty bucket list, e.g., []. + t := p.nextToken() + if t == tBracketClose { + return nil + } + + for { + // Expect a bucket definition like "bound:count". + if t != tValue { + return p.parseError("expected bucket bound", t) + } + bound, err := parseFloat(yoloString(p.l.buf())) + if err != nil { + return fmt.Errorf("%w while parsing bucket bound: %q", err, p.l.b[p.start:p.l.i]) + } + if t2 := p.nextToken(); t2 != tColon { + return p.parseError("expected colon after bucket bound", t2) + } + if t3 := p.nextToken(); t3 != tValue { + return p.parseError("expected bucket count", t3) + } + // The bucket count must be an integer. + count, err := parseFloat(yoloString(p.l.buf())) + if err != nil { + return fmt.Errorf("%w while parsing bucket count: %q", err, p.l.b[p.start:p.l.i]) + } + if err := p.tempHist.SetBucketCount(bound, count); err != nil { + return fmt.Errorf("%w while parsing bucket bound and count: %q", err, p.l.b[p.start:p.l.i]) + } + + // Check for a comma or the closing bracket. + t = p.nextToken() + if t == tBracketClose { + return nil + } + if t != tComma { + return p.parseError("expected comma or closing bracket in bucket list", t) + } + // If we saw a comma, get the next token for the start of the next bucket. + t = p.nextToken() + } +} diff --git a/model/textparse/promlex.l b/model/textparse/promlex.l index e9fa1fb71c..904bdcf6e7 100644 --- a/model/textparse/promlex.l +++ b/model/textparse/promlex.l @@ -30,6 +30,7 @@ const ( sExemplar sEValue sETimestamp + sCValue // Complex value samples (NHCB, NH, NS) ) // Lex is called by the parser generated by "go tool yacc" to obtain each diff --git a/model/textparse/promlex.l.go b/model/textparse/promlex.l.go index a083e5549b..24590eee32 100644 --- a/model/textparse/promlex.l.go +++ b/model/textparse/promlex.l.go @@ -31,6 +31,7 @@ const ( sExemplar sEValue sETimestamp + sCValue // Complex value samples (NHCB, NH, NS) ) // Lex is called by the parser generated by "go tool yacc" to obtain each diff --git a/model/textparse/promparse.go b/model/textparse/promparse.go index 2b4b750b4d..bfe4b8d530 100644 --- a/model/textparse/promparse.go +++ b/model/textparse/promparse.go @@ -68,6 +68,9 @@ const ( tEqual tTimestamp tValue + tColon + tBracketOpen + tBracketClose ) func (t token) String() string { diff --git a/model/textparse/testdata/1histogram.om.txt b/model/textparse/testdata/1histogram.om.txt index 1876168355..bd49d872e8 100644 --- a/model/textparse/testdata/1histogram.om.txt +++ b/model/textparse/testdata/1histogram.om.txt @@ -22,11 +22,11 @@ golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.25"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="0.5"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="1.0"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="2.5"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="5.0"} 0 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="2.5"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="5.0"} 1 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="10.0"} 1 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5002",le="+Inf"} 1 -golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5002"} 10.0 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5002"} 10.1 golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5002"} 1 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.005"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.01"} 0 @@ -34,12 +34,12 @@ golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.05"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.1"} 0 golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.25"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.5"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="1.0"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="2.5"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="5.0"} 0 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="10.0"} 1 -golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="+Inf"} 1 -golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5003"} 10.0 -golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5003"} 1 -# EOF \ No newline at end of file +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="0.5"} 1 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="1.0"} 2 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="2.5"} 3 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="5.0"} 4 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="10.0"} 5 +golang_manual_histogram_seconds_bucket{address="0.0.0.0",generation="20",port="5003",le="+Inf"} 6 +golang_manual_histogram_seconds_sum{address="0.0.0.0",generation="20",port="5003"} 20.04 +golang_manual_histogram_seconds_count{address="0.0.0.0",generation="20",port="5003"} 6 +# EOF diff --git a/model/textparse/testdata/1histogram.om2.txt b/model/textparse/testdata/1histogram.om2.txt new file mode 100644 index 0000000000..9278f57976 --- /dev/null +++ b/model/textparse/testdata/1histogram.om2.txt @@ -0,0 +1,6 @@ +# HELP golang_manual_histogram_seconds This is a histogram with manually selected parameters +# TYPE golang_manual_histogram_seconds histogram +golang_manual_histogram_seconds{address="0.0.0.0",generation="20",port="5001"} {count:1,sum:10.0,bucket:[0.005:0,0.01:0,0.025:0,0.05:0,0.1:0,0.25:0,0.5:0,1.0:0,2.5:0,5.0:0,10.0:1,+Inf:1]} +golang_manual_histogram_seconds{address="0.0.0.0",generation="20",port="5002"} {count:1,sum:10.1,bucket:[0.005:0,0.01:0,0.025:0,0.05:0,0.1:0,0.25:0,0.5:0,1.0:0,2.5:1,5.0:1,10.0:1,+Inf:1]} +golang_manual_histogram_seconds{address="0.0.0.0",generation="20",port="5003"} {count:6,sum:20.04,bucket:[0.005:0,0.01:0,0.025:0,0.05:0,0.1:0,0.25:0,0.5:1,1.0:2,2.5:3,5.0:4,10.0:5,+Inf:6]} +# EOF diff --git a/scripts/gentextlex.sh b/scripts/gentextlex.sh new file mode 100755 index 0000000000..bd7dd10856 --- /dev/null +++ b/scripts/gentextlex.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# +# Generate all protobuf bindings. +# Run from repository root. +set -e +set -u + +if ! [[ "$0" =~ "scripts/gentextlex.sh" ]]; then + echo "must be run from repository root" + exit 255 +fi + +pushd "internal/tools" +INSTALL_PKGS="modernc.org/golex" +for pkg in ${INSTALL_PKGS}; do + GO111MODULE=on go install "$pkg" +done +popd + +echo "generating lex code" +pushd model/textparse + golex -o=promlex.l.go promlex.l + golex -o=openmetricslex.l.go openmetricslex.l + golex -o=openmetrics2lex.l.go openmetrics2lex.l +popd