| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | // Copyright 2020 The Prometheus Authors
 | 
					
						
							|  |  |  | // Licensed under the Apache License, Version 2.0 (the "License");
 | 
					
						
							|  |  |  | // you may not use this file except in compliance with the License.
 | 
					
						
							|  |  |  | // You may obtain a copy of the License at
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // http://www.apache.org/licenses/LICENSE-2.0
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Unless required by applicable law or agreed to in writing, software
 | 
					
						
							|  |  |  | // distributed under the License is distributed on an "AS IS" BASIS,
 | 
					
						
							|  |  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
					
						
							|  |  |  | // See the License for the specific language governing permissions and
 | 
					
						
							|  |  |  | // limitations under the License.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | package labels | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2024-03-25 20:20:18 +08:00
										 |  |  | 	"slices" | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	"strings" | 
					
						
							| 
									
										
										
										
											2024-06-11 06:31:41 +08:00
										 |  |  | 	"unicode" | 
					
						
							| 
									
										
										
										
											2024-05-07 22:33:37 +08:00
										 |  |  | 	"unicode/utf8" | 
					
						
							| 
									
										
										
										
											2022-02-08 18:03:20 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/grafana/regexp" | 
					
						
							|  |  |  | 	"github.com/grafana/regexp/syntax" | 
					
						
							| 
									
										
										
										
											2024-06-11 06:31:41 +08:00
										 |  |  | 	"golang.org/x/text/unicode/norm" | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | const ( | 
					
						
							|  |  |  | 	maxSetMatches = 256 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// The minimum number of alternate values a regex should have to trigger
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	// the optimization done by optimizeEqualOrPrefixStringMatchers() and so use a map
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	// to match values instead of iterating over a list. This value has
 | 
					
						
							|  |  |  | 	// been computed running BenchmarkOptimizeEqualStringMatchers.
 | 
					
						
							|  |  |  | 	minEqualMultiStringMatcherMapThreshold = 16 | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type FastRegexMatcher struct { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	// Under some conditions, re is nil because the expression is never parsed.
 | 
					
						
							|  |  |  | 	// We store the original string to be able to return it in GetRegexString().
 | 
					
						
							|  |  |  | 	reString string | 
					
						
							| 
									
										
										
										
											2020-07-07 16:38:04 +08:00
										 |  |  | 	re       *regexp.Regexp | 
					
						
							| 
									
										
										
										
											2023-06-08 04:54:30 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	setMatches    []string | 
					
						
							|  |  |  | 	stringMatcher StringMatcher | 
					
						
							|  |  |  | 	prefix        string | 
					
						
							|  |  |  | 	suffix        string | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 	contains      []string | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// matchString is the "compiled" function to run by MatchString().
 | 
					
						
							|  |  |  | 	matchString func(string) bool | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func NewFastRegexMatcher(v string) (*FastRegexMatcher, error) { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	m := &FastRegexMatcher{ | 
					
						
							|  |  |  | 		reString: v, | 
					
						
							| 
									
										
										
										
											2023-06-08 04:54:30 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	m.stringMatcher, m.setMatches = optimizeAlternatingLiterals(v) | 
					
						
							|  |  |  | 	if m.stringMatcher != nil { | 
					
						
							|  |  |  | 		// If we already have a string matcher, we don't need to parse the regex
 | 
					
						
							|  |  |  | 		// or compile the matchString function. This also avoids the behavior in
 | 
					
						
							|  |  |  | 		// compileMatchStringFunction where it prefers to use setMatches when
 | 
					
						
							|  |  |  | 		// available, even if the string matcher is faster.
 | 
					
						
							|  |  |  | 		m.matchString = m.stringMatcher.Matches | 
					
						
							|  |  |  | 	} else { | 
					
						
							| 
									
										
										
										
											2024-07-24 17:49:10 +08:00
										 |  |  | 		parsed, err := syntax.Parse(v, syntax.Perl|syntax.DotNL) | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return nil, err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// Simplify the syntax tree to run faster.
 | 
					
						
							|  |  |  | 		parsed = parsed.Simplify() | 
					
						
							| 
									
										
										
										
											2024-07-24 17:49:10 +08:00
										 |  |  | 		m.re, err = regexp.Compile("^(?s:" + parsed.String() + ")$") | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		if err != nil { | 
					
						
							|  |  |  | 			return nil, err | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if parsed.Op == syntax.OpConcat { | 
					
						
							|  |  |  | 			m.prefix, m.suffix, m.contains = optimizeConcatRegex(parsed) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if matches, caseSensitive := findSetMatches(parsed); caseSensitive { | 
					
						
							|  |  |  | 			m.setMatches = matches | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		m.stringMatcher = stringMatcherFromRegexp(parsed) | 
					
						
							|  |  |  | 		m.matchString = m.compileMatchStringFunction() | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	return m, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // compileMatchStringFunction returns the function to run by MatchString().
 | 
					
						
							|  |  |  | func (m *FastRegexMatcher) compileMatchStringFunction() func(string) bool { | 
					
						
							|  |  |  | 	// If the only optimization available is the string matcher, then we can just run it.
 | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 	if len(m.setMatches) == 0 && m.prefix == "" && m.suffix == "" && len(m.contains) == 0 && m.stringMatcher != nil { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		return m.stringMatcher.Matches | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	return func(s string) bool { | 
					
						
							|  |  |  | 		if len(m.setMatches) != 0 { | 
					
						
							| 
									
										
										
										
											2025-04-30 10:43:26 +08:00
										 |  |  | 			return slices.Contains(m.setMatches, s) | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		if m.prefix != "" && !strings.HasPrefix(s, m.prefix) { | 
					
						
							|  |  |  | 			return false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if m.suffix != "" && !strings.HasSuffix(s, m.suffix) { | 
					
						
							|  |  |  | 			return false | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 		if len(m.contains) > 0 && !containsInOrder(s, m.contains) { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 			return false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if m.stringMatcher != nil { | 
					
						
							|  |  |  | 			return m.stringMatcher.Matches(s) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return m.re.MatchString(s) | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // IsOptimized returns true if any fast-path optimization is applied to the
 | 
					
						
							|  |  |  | // regex matcher.
 | 
					
						
							|  |  |  | func (m *FastRegexMatcher) IsOptimized() bool { | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 	return len(m.setMatches) > 0 || m.stringMatcher != nil || m.prefix != "" || m.suffix != "" || len(m.contains) > 0 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // findSetMatches extract equality matches from a regexp.
 | 
					
						
							|  |  |  | // Returns nil if we can't replace the regexp by only equality matchers or the regexp contains
 | 
					
						
							|  |  |  | // a mix of case sensitive and case insensitive matchers.
 | 
					
						
							|  |  |  | func findSetMatches(re *syntax.Regexp) (matches []string, caseSensitive bool) { | 
					
						
							|  |  |  | 	clearBeginEndText(re) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return findSetMatchesInternal(re, "") | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | func findSetMatchesInternal(re *syntax.Regexp, base string) (matches []string, caseSensitive bool) { | 
					
						
							|  |  |  | 	switch re.Op { | 
					
						
							|  |  |  | 	case syntax.OpBeginText: | 
					
						
							|  |  |  | 		// Correctly handling the begin text operator inside a regex is tricky,
 | 
					
						
							|  |  |  | 		// so in this case we fallback to the regex engine.
 | 
					
						
							|  |  |  | 		return nil, false | 
					
						
							|  |  |  | 	case syntax.OpEndText: | 
					
						
							|  |  |  | 		// Correctly handling the end text operator inside a regex is tricky,
 | 
					
						
							|  |  |  | 		// so in this case we fallback to the regex engine.
 | 
					
						
							|  |  |  | 		return nil, false | 
					
						
							|  |  |  | 	case syntax.OpLiteral: | 
					
						
							|  |  |  | 		return []string{base + string(re.Rune)}, isCaseSensitive(re) | 
					
						
							|  |  |  | 	case syntax.OpEmptyMatch: | 
					
						
							|  |  |  | 		if base != "" { | 
					
						
							|  |  |  | 			return []string{base}, isCaseSensitive(re) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	case syntax.OpAlternate: | 
					
						
							|  |  |  | 		return findSetMatchesFromAlternate(re, base) | 
					
						
							|  |  |  | 	case syntax.OpCapture: | 
					
						
							|  |  |  | 		clearCapture(re) | 
					
						
							|  |  |  | 		return findSetMatchesInternal(re, base) | 
					
						
							|  |  |  | 	case syntax.OpConcat: | 
					
						
							|  |  |  | 		return findSetMatchesFromConcat(re, base) | 
					
						
							|  |  |  | 	case syntax.OpCharClass: | 
					
						
							|  |  |  | 		if len(re.Rune)%2 != 0 { | 
					
						
							|  |  |  | 			return nil, false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		var matches []string | 
					
						
							|  |  |  | 		var totalSet int | 
					
						
							|  |  |  | 		for i := 0; i+1 < len(re.Rune); i += 2 { | 
					
						
							|  |  |  | 			totalSet += int(re.Rune[i+1]-re.Rune[i]) + 1 | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		// limits the total characters that can be used to create matches.
 | 
					
						
							|  |  |  | 		// In some case like negation [^0-9] a lot of possibilities exists and that
 | 
					
						
							|  |  |  | 		// can create thousands of possible matches at which points we're better off using regexp.
 | 
					
						
							|  |  |  | 		if totalSet > maxSetMatches { | 
					
						
							|  |  |  | 			return nil, false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		for i := 0; i+1 < len(re.Rune); i += 2 { | 
					
						
							|  |  |  | 			lo, hi := re.Rune[i], re.Rune[i+1] | 
					
						
							|  |  |  | 			for c := lo; c <= hi; c++ { | 
					
						
							|  |  |  | 				matches = append(matches, base+string(c)) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return matches, isCaseSensitive(re) | 
					
						
							|  |  |  | 	default: | 
					
						
							|  |  |  | 		return nil, false | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	return nil, false | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | func findSetMatchesFromConcat(re *syntax.Regexp, base string) (matches []string, matchesCaseSensitive bool) { | 
					
						
							|  |  |  | 	if len(re.Sub) == 0 { | 
					
						
							|  |  |  | 		return nil, false | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	clearCapture(re.Sub...) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	matches = []string{base} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for i := 0; i < len(re.Sub); i++ { | 
					
						
							|  |  |  | 		var newMatches []string | 
					
						
							|  |  |  | 		for j, b := range matches { | 
					
						
							|  |  |  | 			m, caseSensitive := findSetMatchesInternal(re.Sub[i], b) | 
					
						
							|  |  |  | 			if m == nil { | 
					
						
							|  |  |  | 				return nil, false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if tooManyMatches(newMatches, m...) { | 
					
						
							|  |  |  | 				return nil, false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			// All matches must have the same case sensitivity. If it's the first set of matches
 | 
					
						
							|  |  |  | 			// returned, we store its sensitivity as the expected case, and then we'll check all
 | 
					
						
							|  |  |  | 			// other ones.
 | 
					
						
							|  |  |  | 			if i == 0 && j == 0 { | 
					
						
							|  |  |  | 				matchesCaseSensitive = caseSensitive | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			if matchesCaseSensitive != caseSensitive { | 
					
						
							|  |  |  | 				return nil, false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			newMatches = append(newMatches, m...) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		matches = newMatches | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return matches, matchesCaseSensitive | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | func findSetMatchesFromAlternate(re *syntax.Regexp, base string) (matches []string, matchesCaseSensitive bool) { | 
					
						
							|  |  |  | 	for i, sub := range re.Sub { | 
					
						
							|  |  |  | 		found, caseSensitive := findSetMatchesInternal(sub, base) | 
					
						
							|  |  |  | 		if found == nil { | 
					
						
							|  |  |  | 			return nil, false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if tooManyMatches(matches, found...) { | 
					
						
							|  |  |  | 			return nil, false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// All matches must have the same case sensitivity. If it's the first set of matches
 | 
					
						
							|  |  |  | 		// returned, we store its sensitivity as the expected case, and then we'll check all
 | 
					
						
							|  |  |  | 		// other ones.
 | 
					
						
							|  |  |  | 		if i == 0 { | 
					
						
							|  |  |  | 			matchesCaseSensitive = caseSensitive | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if matchesCaseSensitive != caseSensitive { | 
					
						
							|  |  |  | 			return nil, false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		matches = append(matches, found...) | 
					
						
							| 
									
										
										
										
											2023-06-08 04:54:30 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	return matches, matchesCaseSensitive | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // clearCapture removes capture operation as they are not used for matching.
 | 
					
						
							|  |  |  | func clearCapture(regs ...*syntax.Regexp) { | 
					
						
							|  |  |  | 	for _, r := range regs { | 
					
						
							|  |  |  | 		// Iterate on the regexp because capture groups could be nested.
 | 
					
						
							|  |  |  | 		for r.Op == syntax.OpCapture { | 
					
						
							|  |  |  | 			*r = *r.Sub[0] | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // clearBeginEndText removes the begin and end text from the regexp. Prometheus regexp are anchored to the beginning and end of the string.
 | 
					
						
							|  |  |  | func clearBeginEndText(re *syntax.Regexp) { | 
					
						
							|  |  |  | 	// Do not clear begin/end text from an alternate operator because it could
 | 
					
						
							|  |  |  | 	// change the actual regexp properties.
 | 
					
						
							|  |  |  | 	if re.Op == syntax.OpAlternate { | 
					
						
							|  |  |  | 		return | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	if len(re.Sub) == 0 { | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if len(re.Sub) == 1 { | 
					
						
							|  |  |  | 		if re.Sub[0].Op == syntax.OpBeginText || re.Sub[0].Op == syntax.OpEndText { | 
					
						
							|  |  |  | 			// We need to remove this element. Since it's the only one, we convert into a matcher of an empty string.
 | 
					
						
							|  |  |  | 			// OpEmptyMatch is regexp's nop operator.
 | 
					
						
							|  |  |  | 			re.Op = syntax.OpEmptyMatch | 
					
						
							|  |  |  | 			re.Sub = nil | 
					
						
							|  |  |  | 			return | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2020-07-07 16:38:04 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	if re.Sub[0].Op == syntax.OpBeginText { | 
					
						
							|  |  |  | 		re.Sub = re.Sub[1:] | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if re.Sub[len(re.Sub)-1].Op == syntax.OpEndText { | 
					
						
							|  |  |  | 		re.Sub = re.Sub[:len(re.Sub)-1] | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // isCaseInsensitive tells if a regexp is case insensitive.
 | 
					
						
							|  |  |  | // The flag should be check at each level of the syntax tree.
 | 
					
						
							|  |  |  | func isCaseInsensitive(reg *syntax.Regexp) bool { | 
					
						
							|  |  |  | 	return (reg.Flags & syntax.FoldCase) != 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // isCaseSensitive tells if a regexp is case sensitive.
 | 
					
						
							|  |  |  | // The flag should be check at each level of the syntax tree.
 | 
					
						
							|  |  |  | func isCaseSensitive(reg *syntax.Regexp) bool { | 
					
						
							|  |  |  | 	return !isCaseInsensitive(reg) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // tooManyMatches guards against creating too many set matches.
 | 
					
						
							|  |  |  | func tooManyMatches(matches []string, added ...string) bool { | 
					
						
							|  |  |  | 	return len(matches)+len(added) > maxSetMatches | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *FastRegexMatcher) MatchString(s string) bool { | 
					
						
							|  |  |  | 	return m.matchString(s) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *FastRegexMatcher) SetMatches() []string { | 
					
						
							|  |  |  | 	// IMPORTANT: always return a copy, otherwise if the caller manipulate this slice it will
 | 
					
						
							|  |  |  | 	// also get manipulated in the cached FastRegexMatcher instance.
 | 
					
						
							|  |  |  | 	return slices.Clone(m.setMatches) | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *FastRegexMatcher) GetRegexString() string { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	return m.reString | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | // optimizeAlternatingLiterals optimizes a regex of the form
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | //	`literal1|literal2|literal3|...`
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // this function returns an optimized StringMatcher or nil if the regex
 | 
					
						
							|  |  |  | // cannot be optimized in this way, and a list of setMatches up to maxSetMatches.
 | 
					
						
							|  |  |  | func optimizeAlternatingLiterals(s string) (StringMatcher, []string) { | 
					
						
							|  |  |  | 	if len(s) == 0 { | 
					
						
							|  |  |  | 		return emptyStringMatcher{}, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	estimatedAlternates := strings.Count(s, "|") + 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// If there are no alternates, check if the string is a literal
 | 
					
						
							|  |  |  | 	if estimatedAlternates == 1 { | 
					
						
							|  |  |  | 		if regexp.QuoteMeta(s) == s { | 
					
						
							|  |  |  | 			return &equalStringMatcher{s: s, caseSensitive: true}, []string{s} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return nil, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	multiMatcher := newEqualMultiStringMatcher(true, estimatedAlternates, 0, 0) | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	for end := strings.IndexByte(s, '|'); end > -1; end = strings.IndexByte(s, '|') { | 
					
						
							|  |  |  | 		// Split the string into the next literal and the remainder
 | 
					
						
							|  |  |  | 		subMatch := s[:end] | 
					
						
							|  |  |  | 		s = s[end+1:] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// break if any of the submatches are not literals
 | 
					
						
							|  |  |  | 		if regexp.QuoteMeta(subMatch) != subMatch { | 
					
						
							|  |  |  | 			return nil, nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		multiMatcher.add(subMatch) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// break if the remainder is not a literal
 | 
					
						
							|  |  |  | 	if regexp.QuoteMeta(s) != s { | 
					
						
							|  |  |  | 		return nil, nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	multiMatcher.add(s) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return multiMatcher, multiMatcher.setMatches() | 
					
						
							| 
									
										
										
										
											2023-06-08 04:54:30 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | // optimizeConcatRegex returns literal prefix/suffix text that can be safely
 | 
					
						
							|  |  |  | // checked against the label value before running the regexp matcher.
 | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | func optimizeConcatRegex(r *syntax.Regexp) (prefix, suffix string, contains []string) { | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	sub := r.Sub | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 	clearCapture(sub...) | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// We can safely remove begin and end text matchers respectively
 | 
					
						
							|  |  |  | 	// at the beginning and end of the regexp.
 | 
					
						
							|  |  |  | 	if len(sub) > 0 && sub[0].Op == syntax.OpBeginText { | 
					
						
							|  |  |  | 		sub = sub[1:] | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if len(sub) > 0 && sub[len(sub)-1].Op == syntax.OpEndText { | 
					
						
							|  |  |  | 		sub = sub[:len(sub)-1] | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if len(sub) == 0 { | 
					
						
							|  |  |  | 		return | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Given Prometheus regex matchers are always anchored to the begin/end
 | 
					
						
							|  |  |  | 	// of the text, if the first/last operations are literals, we can safely
 | 
					
						
							|  |  |  | 	// treat them as prefix/suffix.
 | 
					
						
							| 
									
										
										
										
											2020-10-06 20:16:26 +08:00
										 |  |  | 	if sub[0].Op == syntax.OpLiteral && (sub[0].Flags&syntax.FoldCase) == 0 { | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 		prefix = string(sub[0].Rune) | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2020-10-06 20:16:26 +08:00
										 |  |  | 	if last := len(sub) - 1; sub[last].Op == syntax.OpLiteral && (sub[last].Flags&syntax.FoldCase) == 0 { | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 		suffix = string(sub[last].Rune) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 	// If contains any literal which is not a prefix/suffix, we keep track of
 | 
					
						
							| 
									
										
										
										
											2024-06-03 17:05:20 +08:00
										 |  |  | 	// all the ones which are case-sensitive.
 | 
					
						
							| 
									
										
										
										
											2020-07-07 16:38:04 +08:00
										 |  |  | 	for i := 1; i < len(sub)-1; i++ { | 
					
						
							| 
									
										
										
										
											2020-10-06 20:16:26 +08:00
										 |  |  | 		if sub[i].Op == syntax.OpLiteral && (sub[i].Flags&syntax.FoldCase) == 0 { | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 			contains = append(contains, string(sub[i].Rune)) | 
					
						
							| 
									
										
										
										
											2020-07-07 16:38:04 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-26 17:49:09 +08:00
										 |  |  | 	return | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | // StringMatcher is a matcher that matches a string in place of a regular expression.
 | 
					
						
							|  |  |  | type StringMatcher interface { | 
					
						
							|  |  |  | 	Matches(s string) bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // stringMatcherFromRegexp attempts to replace a common regexp with a string matcher.
 | 
					
						
							|  |  |  | // It returns nil if the regexp is not supported.
 | 
					
						
							|  |  |  | func stringMatcherFromRegexp(re *syntax.Regexp) StringMatcher { | 
					
						
							|  |  |  | 	clearBeginEndText(re) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	m := stringMatcherFromRegexpInternal(re) | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	m = optimizeEqualOrPrefixStringMatchers(m, minEqualMultiStringMatcherMapThreshold) | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	return m | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func stringMatcherFromRegexpInternal(re *syntax.Regexp) StringMatcher { | 
					
						
							|  |  |  | 	clearCapture(re) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	switch re.Op { | 
					
						
							|  |  |  | 	case syntax.OpBeginText: | 
					
						
							|  |  |  | 		// Correctly handling the begin text operator inside a regex is tricky,
 | 
					
						
							|  |  |  | 		// so in this case we fallback to the regex engine.
 | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	case syntax.OpEndText: | 
					
						
							|  |  |  | 		// Correctly handling the end text operator inside a regex is tricky,
 | 
					
						
							|  |  |  | 		// so in this case we fallback to the regex engine.
 | 
					
						
							|  |  |  | 		return nil | 
					
						
							|  |  |  | 	case syntax.OpPlus: | 
					
						
							|  |  |  | 		if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL { | 
					
						
							|  |  |  | 			return nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return &anyNonEmptyStringMatcher{ | 
					
						
							|  |  |  | 			matchNL: re.Sub[0].Op == syntax.OpAnyChar, | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	case syntax.OpStar: | 
					
						
							|  |  |  | 		if re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL { | 
					
						
							|  |  |  | 			return nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// If the newline is valid, than this matcher literally match any string (even empty).
 | 
					
						
							|  |  |  | 		if re.Sub[0].Op == syntax.OpAnyChar { | 
					
						
							|  |  |  | 			return trueMatcher{} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Any string is fine (including an empty one), as far as it doesn't contain any newline.
 | 
					
						
							|  |  |  | 		return anyStringWithoutNewlineMatcher{} | 
					
						
							|  |  |  | 	case syntax.OpQuest: | 
					
						
							|  |  |  | 		// Only optimize for ".?".
 | 
					
						
							|  |  |  | 		if len(re.Sub) != 1 || (re.Sub[0].Op != syntax.OpAnyChar && re.Sub[0].Op != syntax.OpAnyCharNotNL) { | 
					
						
							|  |  |  | 			return nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		return &zeroOrOneCharacterStringMatcher{ | 
					
						
							|  |  |  | 			matchNL: re.Sub[0].Op == syntax.OpAnyChar, | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	case syntax.OpEmptyMatch: | 
					
						
							|  |  |  | 		return emptyStringMatcher{} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	case syntax.OpLiteral: | 
					
						
							|  |  |  | 		return &equalStringMatcher{ | 
					
						
							|  |  |  | 			s:             string(re.Rune), | 
					
						
							|  |  |  | 			caseSensitive: !isCaseInsensitive(re), | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	case syntax.OpAlternate: | 
					
						
							|  |  |  | 		or := make([]StringMatcher, 0, len(re.Sub)) | 
					
						
							|  |  |  | 		for _, sub := range re.Sub { | 
					
						
							|  |  |  | 			m := stringMatcherFromRegexpInternal(sub) | 
					
						
							|  |  |  | 			if m == nil { | 
					
						
							|  |  |  | 				return nil | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			or = append(or, m) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		return orStringMatcher(or) | 
					
						
							|  |  |  | 	case syntax.OpConcat: | 
					
						
							|  |  |  | 		clearCapture(re.Sub...) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if len(re.Sub) == 0 { | 
					
						
							|  |  |  | 			return emptyStringMatcher{} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if len(re.Sub) == 1 { | 
					
						
							|  |  |  | 			return stringMatcherFromRegexpInternal(re.Sub[0]) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		var left, right StringMatcher | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Let's try to find if there's a first and last any matchers.
 | 
					
						
							|  |  |  | 		if re.Sub[0].Op == syntax.OpPlus || re.Sub[0].Op == syntax.OpStar || re.Sub[0].Op == syntax.OpQuest { | 
					
						
							|  |  |  | 			left = stringMatcherFromRegexpInternal(re.Sub[0]) | 
					
						
							|  |  |  | 			if left == nil { | 
					
						
							|  |  |  | 				return nil | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			re.Sub = re.Sub[1:] | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if re.Sub[len(re.Sub)-1].Op == syntax.OpPlus || re.Sub[len(re.Sub)-1].Op == syntax.OpStar || re.Sub[len(re.Sub)-1].Op == syntax.OpQuest { | 
					
						
							|  |  |  | 			right = stringMatcherFromRegexpInternal(re.Sub[len(re.Sub)-1]) | 
					
						
							|  |  |  | 			if right == nil { | 
					
						
							|  |  |  | 				return nil | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			re.Sub = re.Sub[:len(re.Sub)-1] | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		matches, matchesCaseSensitive := findSetMatchesInternal(re, "") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		if len(matches) == 0 && len(re.Sub) == 2 { | 
					
						
							|  |  |  | 			// We have not find fixed set matches. We look for other known cases that
 | 
					
						
							|  |  |  | 			// we can optimize.
 | 
					
						
							|  |  |  | 			switch { | 
					
						
							|  |  |  | 			// Prefix is literal.
 | 
					
						
							|  |  |  | 			case right == nil && re.Sub[0].Op == syntax.OpLiteral: | 
					
						
							|  |  |  | 				right = stringMatcherFromRegexpInternal(re.Sub[1]) | 
					
						
							|  |  |  | 				if right != nil { | 
					
						
							|  |  |  | 					matches = []string{string(re.Sub[0].Rune)} | 
					
						
							|  |  |  | 					matchesCaseSensitive = !isCaseInsensitive(re.Sub[0]) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			// Suffix is literal.
 | 
					
						
							|  |  |  | 			case left == nil && re.Sub[1].Op == syntax.OpLiteral: | 
					
						
							|  |  |  | 				left = stringMatcherFromRegexpInternal(re.Sub[0]) | 
					
						
							|  |  |  | 				if left != nil { | 
					
						
							|  |  |  | 					matches = []string{string(re.Sub[1].Rune)} | 
					
						
							|  |  |  | 					matchesCaseSensitive = !isCaseInsensitive(re.Sub[1]) | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Ensure we've found some literals to match (optionally with a left and/or right matcher).
 | 
					
						
							|  |  |  | 		// If not, then this optimization doesn't trigger.
 | 
					
						
							|  |  |  | 		if len(matches) == 0 { | 
					
						
							|  |  |  | 			return nil | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Use the right (and best) matcher based on what we've found.
 | 
					
						
							|  |  |  | 		switch { | 
					
						
							|  |  |  | 		// No left and right matchers (only fixed set matches).
 | 
					
						
							|  |  |  | 		case left == nil && right == nil: | 
					
						
							|  |  |  | 			// if there's no any matchers on both side it's a concat of literals
 | 
					
						
							|  |  |  | 			or := make([]StringMatcher, 0, len(matches)) | 
					
						
							|  |  |  | 			for _, match := range matches { | 
					
						
							|  |  |  | 				or = append(or, &equalStringMatcher{ | 
					
						
							|  |  |  | 					s:             match, | 
					
						
							|  |  |  | 					caseSensitive: matchesCaseSensitive, | 
					
						
							|  |  |  | 				}) | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 			return orStringMatcher(or) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// Right matcher with 1 fixed set match.
 | 
					
						
							|  |  |  | 		case left == nil && len(matches) == 1: | 
					
						
							| 
									
										
										
										
											2024-07-01 16:12:50 +08:00
										 |  |  | 			return newLiteralPrefixStringMatcher(matches[0], matchesCaseSensitive, right) | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 		// Left matcher with 1 fixed set match.
 | 
					
						
							|  |  |  | 		case right == nil && len(matches) == 1: | 
					
						
							|  |  |  | 			return &literalSuffixStringMatcher{ | 
					
						
							|  |  |  | 				left:                left, | 
					
						
							|  |  |  | 				suffix:              matches[0], | 
					
						
							|  |  |  | 				suffixCaseSensitive: matchesCaseSensitive, | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		// We found literals in the middle. We can trigger the fast path only if
 | 
					
						
							|  |  |  | 		// the matches are case sensitive because containsStringMatcher doesn't
 | 
					
						
							|  |  |  | 		// support case insensitive.
 | 
					
						
							|  |  |  | 		case matchesCaseSensitive: | 
					
						
							|  |  |  | 			return &containsStringMatcher{ | 
					
						
							|  |  |  | 				substrings: matches, | 
					
						
							|  |  |  | 				left:       left, | 
					
						
							|  |  |  | 				right:      right, | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // containsStringMatcher matches a string if it contains any of the substrings.
 | 
					
						
							|  |  |  | // If left and right are not nil, it's a contains operation where left and right must match.
 | 
					
						
							|  |  |  | // If left is nil, it's a hasPrefix operation and right must match.
 | 
					
						
							|  |  |  | // Finally, if right is nil it's a hasSuffix operation and left must match.
 | 
					
						
							|  |  |  | type containsStringMatcher struct { | 
					
						
							|  |  |  | 	// The matcher that must match the left side. Can be nil.
 | 
					
						
							|  |  |  | 	left StringMatcher | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// At least one of these strings must match in the "middle", between left and right matchers.
 | 
					
						
							|  |  |  | 	substrings []string | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// The matcher that must match the right side. Can be nil.
 | 
					
						
							|  |  |  | 	right StringMatcher | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *containsStringMatcher) Matches(s string) bool { | 
					
						
							|  |  |  | 	for _, substr := range m.substrings { | 
					
						
							|  |  |  | 		switch { | 
					
						
							|  |  |  | 		case m.right != nil && m.left != nil: | 
					
						
							|  |  |  | 			searchStartPos := 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			for { | 
					
						
							|  |  |  | 				pos := strings.Index(s[searchStartPos:], substr) | 
					
						
							|  |  |  | 				if pos < 0 { | 
					
						
							|  |  |  | 					break | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				// Since we started searching from searchStartPos, we have to add that offset
 | 
					
						
							|  |  |  | 				// to get the actual position of the substring inside the text.
 | 
					
						
							|  |  |  | 				pos += searchStartPos | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				// If both the left and right matchers match, then we can stop searching because
 | 
					
						
							|  |  |  | 				// we've found a match.
 | 
					
						
							|  |  |  | 				if m.left.Matches(s[:pos]) && m.right.Matches(s[pos+len(substr):]) { | 
					
						
							|  |  |  | 					return true | 
					
						
							|  |  |  | 				} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 				// Continue searching for another occurrence of the substring inside the text.
 | 
					
						
							|  |  |  | 				searchStartPos = pos + 1 | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		case m.left != nil: | 
					
						
							|  |  |  | 			// If we have to check for characters on the left then we need to match a suffix.
 | 
					
						
							|  |  |  | 			if strings.HasSuffix(s, substr) && m.left.Matches(s[:len(s)-len(substr)]) { | 
					
						
							|  |  |  | 				return true | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		case m.right != nil: | 
					
						
							|  |  |  | 			if strings.HasPrefix(s, substr) && m.right.Matches(s[len(substr):]) { | 
					
						
							|  |  |  | 				return true | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return false | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-01 16:12:50 +08:00
										 |  |  | func newLiteralPrefixStringMatcher(prefix string, prefixCaseSensitive bool, right StringMatcher) StringMatcher { | 
					
						
							|  |  |  | 	if prefixCaseSensitive { | 
					
						
							|  |  |  | 		return &literalPrefixSensitiveStringMatcher{ | 
					
						
							|  |  |  | 			prefix: prefix, | 
					
						
							|  |  |  | 			right:  right, | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return &literalPrefixInsensitiveStringMatcher{ | 
					
						
							|  |  |  | 		prefix: prefix, | 
					
						
							|  |  |  | 		right:  right, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // literalPrefixSensitiveStringMatcher matches a string with the given literal case-sensitive prefix and right side matcher.
 | 
					
						
							|  |  |  | type literalPrefixSensitiveStringMatcher struct { | 
					
						
							|  |  |  | 	prefix string | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// The matcher that must match the right side. Can be nil.
 | 
					
						
							|  |  |  | 	right StringMatcher | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-01 16:12:50 +08:00
										 |  |  | func (m *literalPrefixSensitiveStringMatcher) Matches(s string) bool { | 
					
						
							|  |  |  | 	if !strings.HasPrefix(s, m.prefix) { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		return false | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-07-01 16:12:50 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// Ensure the right side matches.
 | 
					
						
							|  |  |  | 	return m.right.Matches(s[len(m.prefix):]) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // literalPrefixInsensitiveStringMatcher matches a string with the given literal case-insensitive prefix and right side matcher.
 | 
					
						
							|  |  |  | type literalPrefixInsensitiveStringMatcher struct { | 
					
						
							|  |  |  | 	prefix string | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// The matcher that must match the right side. Can be nil.
 | 
					
						
							|  |  |  | 	right StringMatcher | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *literalPrefixInsensitiveStringMatcher) Matches(s string) bool { | 
					
						
							|  |  |  | 	if !hasPrefixCaseInsensitive(s, m.prefix) { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		return false | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Ensure the right side matches.
 | 
					
						
							|  |  |  | 	return m.right.Matches(s[len(m.prefix):]) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // literalSuffixStringMatcher matches a string with the given literal suffix and left side matcher.
 | 
					
						
							|  |  |  | type literalSuffixStringMatcher struct { | 
					
						
							|  |  |  | 	// The matcher that must match the left side. Can be nil.
 | 
					
						
							|  |  |  | 	left StringMatcher | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	suffix              string | 
					
						
							|  |  |  | 	suffixCaseSensitive bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *literalSuffixStringMatcher) Matches(s string) bool { | 
					
						
							|  |  |  | 	// Ensure the suffix matches.
 | 
					
						
							|  |  |  | 	if m.suffixCaseSensitive && !strings.HasSuffix(s, m.suffix) { | 
					
						
							|  |  |  | 		return false | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if !m.suffixCaseSensitive && !hasSuffixCaseInsensitive(s, m.suffix) { | 
					
						
							|  |  |  | 		return false | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Ensure the left side matches.
 | 
					
						
							|  |  |  | 	return m.left.Matches(s[:len(s)-len(m.suffix)]) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // emptyStringMatcher matches an empty string.
 | 
					
						
							|  |  |  | type emptyStringMatcher struct{} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-08-04 14:20:57 +08:00
										 |  |  | func (emptyStringMatcher) Matches(s string) bool { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	return len(s) == 0 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // orStringMatcher matches any of the sub-matchers.
 | 
					
						
							|  |  |  | type orStringMatcher []StringMatcher | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m orStringMatcher) Matches(s string) bool { | 
					
						
							|  |  |  | 	for _, matcher := range m { | 
					
						
							|  |  |  | 		if matcher.Matches(s) { | 
					
						
							|  |  |  | 			return true | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return false | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // equalStringMatcher matches a string exactly and support case insensitive.
 | 
					
						
							|  |  |  | type equalStringMatcher struct { | 
					
						
							|  |  |  | 	s             string | 
					
						
							|  |  |  | 	caseSensitive bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *equalStringMatcher) Matches(s string) bool { | 
					
						
							|  |  |  | 	if m.caseSensitive { | 
					
						
							|  |  |  | 		return m.s == s | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return strings.EqualFold(m.s, s) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type multiStringMatcherBuilder interface { | 
					
						
							|  |  |  | 	StringMatcher | 
					
						
							|  |  |  | 	add(s string) | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	addPrefix(prefix string, prefixCaseSensitive bool, matcher StringMatcher) | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	setMatches() []string | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | func newEqualMultiStringMatcher(caseSensitive bool, estimatedSize, estimatedPrefixes, minPrefixLength int) multiStringMatcherBuilder { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	// If the estimated size is low enough, it's faster to use a slice instead of a map.
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	if estimatedSize < minEqualMultiStringMatcherMapThreshold && estimatedPrefixes == 0 { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		return &equalMultiStringSliceMatcher{caseSensitive: caseSensitive, values: make([]string, 0, estimatedSize)} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return &equalMultiStringMapMatcher{ | 
					
						
							|  |  |  | 		values:        make(map[string]struct{}, estimatedSize), | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 		prefixes:      make(map[string][]StringMatcher, estimatedPrefixes), | 
					
						
							|  |  |  | 		minPrefixLen:  minPrefixLength, | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		caseSensitive: caseSensitive, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // equalMultiStringSliceMatcher matches a string exactly against a slice of valid values.
 | 
					
						
							|  |  |  | type equalMultiStringSliceMatcher struct { | 
					
						
							|  |  |  | 	values []string | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	caseSensitive bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *equalMultiStringSliceMatcher) add(s string) { | 
					
						
							|  |  |  | 	m.values = append(m.values, s) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-08-04 14:20:57 +08:00
										 |  |  | func (*equalMultiStringSliceMatcher) addPrefix(string, bool, StringMatcher) { | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	panic("not implemented") | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | func (m *equalMultiStringSliceMatcher) setMatches() []string { | 
					
						
							|  |  |  | 	return m.values | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *equalMultiStringSliceMatcher) Matches(s string) bool { | 
					
						
							|  |  |  | 	if m.caseSensitive { | 
					
						
							| 
									
										
										
										
											2025-04-30 10:43:26 +08:00
										 |  |  | 		return slices.Contains(m.values, s) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for _, v := range m.values { | 
					
						
							|  |  |  | 		if strings.EqualFold(s, v) { | 
					
						
							|  |  |  | 			return true | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return false | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | // equalMultiStringMapMatcher matches a string exactly against a map of valid values
 | 
					
						
							|  |  |  | // or against a set of prefix matchers.
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | type equalMultiStringMapMatcher struct { | 
					
						
							|  |  |  | 	// values contains values to match a string against. If the matching is case insensitive,
 | 
					
						
							|  |  |  | 	// the values here must be lowercase.
 | 
					
						
							|  |  |  | 	values map[string]struct{} | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	// prefixes maps strings, all of length minPrefixLen, to sets of matchers to check the rest of the string.
 | 
					
						
							|  |  |  | 	// If the matching is case insensitive, prefixes are all lowercase.
 | 
					
						
							|  |  |  | 	prefixes map[string][]StringMatcher | 
					
						
							|  |  |  | 	// minPrefixLen can be zero, meaning there are no prefix matchers.
 | 
					
						
							|  |  |  | 	minPrefixLen  int | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	caseSensitive bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *equalMultiStringMapMatcher) add(s string) { | 
					
						
							|  |  |  | 	if !m.caseSensitive { | 
					
						
							| 
									
										
										
										
											2024-10-24 00:00:02 +08:00
										 |  |  | 		s = toNormalisedLower(s, nil) // Don't pass a stack buffer here - it will always escape to heap.
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	m.values[s] = struct{}{} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | func (m *equalMultiStringMapMatcher) addPrefix(prefix string, prefixCaseSensitive bool, matcher StringMatcher) { | 
					
						
							|  |  |  | 	if m.minPrefixLen == 0 { | 
					
						
							|  |  |  | 		panic("addPrefix called when no prefix length defined") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if len(prefix) < m.minPrefixLen { | 
					
						
							|  |  |  | 		panic("addPrefix called with a too short prefix") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	if m.caseSensitive != prefixCaseSensitive { | 
					
						
							|  |  |  | 		panic("addPrefix called with a prefix whose case sensitivity is different than the expected one") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	s := prefix[:m.minPrefixLen] | 
					
						
							|  |  |  | 	if !m.caseSensitive { | 
					
						
							|  |  |  | 		s = strings.ToLower(s) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	m.prefixes[s] = append(m.prefixes[s], matcher) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | func (m *equalMultiStringMapMatcher) setMatches() []string { | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	if len(m.values) >= maxSetMatches || len(m.prefixes) > 0 { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		return nil | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	matches := make([]string, 0, len(m.values)) | 
					
						
							|  |  |  | 	for s := range m.values { | 
					
						
							|  |  |  | 		matches = append(matches, s) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return matches | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *equalMultiStringMapMatcher) Matches(s string) bool { | 
					
						
							| 
									
										
										
										
											2024-10-24 00:00:02 +08:00
										 |  |  | 	if len(m.values) > 0 { | 
					
						
							|  |  |  | 		sNorm := s | 
					
						
							|  |  |  | 		var a [32]byte | 
					
						
							|  |  |  | 		if !m.caseSensitive { | 
					
						
							|  |  |  | 			sNorm = toNormalisedLower(s, a[:]) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if _, ok := m.values[sNorm]; ok { | 
					
						
							|  |  |  | 			return true | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	if m.minPrefixLen > 0 && len(s) >= m.minPrefixLen { | 
					
						
							| 
									
										
										
										
											2024-10-24 00:00:02 +08:00
										 |  |  | 		prefix := s[:m.minPrefixLen] | 
					
						
							|  |  |  | 		var a [32]byte | 
					
						
							|  |  |  | 		if !m.caseSensitive { | 
					
						
							|  |  |  | 			prefix = toNormalisedLower(s[:m.minPrefixLen], a[:]) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		for _, matcher := range m.prefixes[prefix] { | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 			if matcher.Matches(s) { | 
					
						
							|  |  |  | 				return true | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	return false | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-11 06:31:41 +08:00
										 |  |  | // toNormalisedLower normalise the input string using "Unicode Normalization Form D" and then convert
 | 
					
						
							|  |  |  | // it to lower case.
 | 
					
						
							| 
									
										
										
										
											2024-10-24 00:00:02 +08:00
										 |  |  | func toNormalisedLower(s string, a []byte) string { | 
					
						
							| 
									
										
										
										
											2024-06-11 06:31:41 +08:00
										 |  |  | 	for i := 0; i < len(s); i++ { | 
					
						
							|  |  |  | 		c := s[i] | 
					
						
							| 
									
										
										
										
											2024-06-18 17:57:37 +08:00
										 |  |  | 		if c >= utf8.RuneSelf { | 
					
						
							|  |  |  | 			return strings.Map(unicode.ToLower, norm.NFKD.String(s)) | 
					
						
							| 
									
										
										
										
											2024-06-11 06:31:41 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 		if 'A' <= c && c <= 'Z' { | 
					
						
							| 
									
										
										
										
											2024-10-24 00:00:02 +08:00
										 |  |  | 			return toNormalisedLowerSlow(s, i, a) | 
					
						
							| 
									
										
										
										
											2024-06-11 06:31:41 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-10-24 00:00:02 +08:00
										 |  |  | 	return s | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // toNormalisedLowerSlow is split from toNormalisedLower because having a call
 | 
					
						
							|  |  |  | // to `copy` slows it down even when it is not called.
 | 
					
						
							|  |  |  | func toNormalisedLowerSlow(s string, i int, a []byte) string { | 
					
						
							|  |  |  | 	var buf []byte | 
					
						
							|  |  |  | 	if cap(a) > len(s) { | 
					
						
							|  |  |  | 		buf = a[:len(s)] | 
					
						
							|  |  |  | 		copy(buf, s) | 
					
						
							|  |  |  | 	} else { | 
					
						
							|  |  |  | 		buf = []byte(s) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	for ; i < len(s); i++ { | 
					
						
							|  |  |  | 		c := s[i] | 
					
						
							|  |  |  | 		if c >= utf8.RuneSelf { | 
					
						
							|  |  |  | 			return strings.Map(unicode.ToLower, norm.NFKD.String(s)) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if 'A' <= c && c <= 'Z' { | 
					
						
							|  |  |  | 			buf[i] = c + 'a' - 'A' | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2024-06-11 06:31:41 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-06-18 17:57:37 +08:00
										 |  |  | 	return yoloString(buf) | 
					
						
							| 
									
										
										
										
											2024-06-11 06:31:41 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | // anyStringWithoutNewlineMatcher is a stringMatcher which matches any string
 | 
					
						
							|  |  |  | // (including an empty one) as far as it doesn't contain any newline character.
 | 
					
						
							|  |  |  | type anyStringWithoutNewlineMatcher struct{} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-08-04 14:20:57 +08:00
										 |  |  | func (anyStringWithoutNewlineMatcher) Matches(s string) bool { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	// We need to make sure it doesn't contain a newline. Since the newline is
 | 
					
						
							|  |  |  | 	// an ASCII character, we can use strings.IndexByte().
 | 
					
						
							|  |  |  | 	return strings.IndexByte(s, '\n') == -1 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // anyNonEmptyStringMatcher is a stringMatcher which matches any non-empty string.
 | 
					
						
							|  |  |  | type anyNonEmptyStringMatcher struct { | 
					
						
							|  |  |  | 	matchNL bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *anyNonEmptyStringMatcher) Matches(s string) bool { | 
					
						
							|  |  |  | 	if m.matchNL { | 
					
						
							|  |  |  | 		// It's OK if the string contains a newline so we just need to make
 | 
					
						
							|  |  |  | 		// sure it's non-empty.
 | 
					
						
							|  |  |  | 		return len(s) > 0 | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// We need to make sure it non-empty and doesn't contain a newline.
 | 
					
						
							|  |  |  | 	// Since the newline is an ASCII character, we can use strings.IndexByte().
 | 
					
						
							|  |  |  | 	return len(s) > 0 && strings.IndexByte(s, '\n') == -1 | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // zeroOrOneCharacterStringMatcher is a StringMatcher which matches zero or one occurrence
 | 
					
						
							|  |  |  | // of any character. The newline character is matches only if matchNL is set to true.
 | 
					
						
							|  |  |  | type zeroOrOneCharacterStringMatcher struct { | 
					
						
							|  |  |  | 	matchNL bool | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (m *zeroOrOneCharacterStringMatcher) Matches(s string) bool { | 
					
						
							| 
									
										
										
										
											2024-05-13 21:41:00 +08:00
										 |  |  | 	// If there's more than one rune in the string, then it can't match.
 | 
					
						
							| 
									
										
										
										
											2024-05-13 23:44:07 +08:00
										 |  |  | 	if r, size := utf8.DecodeRuneInString(s); r == utf8.RuneError { | 
					
						
							|  |  |  | 		// Size is 0 for empty strings, 1 for invalid rune.
 | 
					
						
							|  |  |  | 		// Empty string matches, invalid rune matches if there isn't anything else.
 | 
					
						
							|  |  |  | 		return size == len(s) | 
					
						
							|  |  |  | 	} else if size < len(s) { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		return false | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// No need to check for the newline if the string is empty or matching a newline is OK.
 | 
					
						
							|  |  |  | 	if m.matchNL || len(s) == 0 { | 
					
						
							|  |  |  | 		return true | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return s[0] != '\n' | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // trueMatcher is a stringMatcher which matches any string (always returns true).
 | 
					
						
							|  |  |  | type trueMatcher struct{} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-08-04 14:20:57 +08:00
										 |  |  | func (trueMatcher) Matches(string) bool { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	return true | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | // optimizeEqualOrPrefixStringMatchers optimize a specific case where all matchers are made by an
 | 
					
						
							|  |  |  | // alternation (orStringMatcher) of strings checked for equality (equalStringMatcher) or
 | 
					
						
							|  |  |  | // with a literal prefix (literalPrefixSensitiveStringMatcher or literalPrefixInsensitiveStringMatcher).
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // In this specific case, when we have many strings to match against we can use a map instead
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | // of iterating over the list of strings.
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | func optimizeEqualOrPrefixStringMatchers(input StringMatcher, threshold int) StringMatcher { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	var ( | 
					
						
							|  |  |  | 		caseSensitive    bool | 
					
						
							|  |  |  | 		caseSensitiveSet bool | 
					
						
							|  |  |  | 		numValues        int | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 		numPrefixes      int | 
					
						
							|  |  |  | 		minPrefixLength  int | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Analyse the input StringMatcher to count the number of occurrences
 | 
					
						
							|  |  |  | 	// and ensure all of them have the same case sensitivity.
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	analyseEqualMatcherCallback := func(matcher *equalStringMatcher) bool { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		// Ensure we don't have mixed case sensitivity.
 | 
					
						
							|  |  |  | 		if caseSensitiveSet && caseSensitive != matcher.caseSensitive { | 
					
						
							|  |  |  | 			return false | 
					
						
							|  |  |  | 		} else if !caseSensitiveSet { | 
					
						
							|  |  |  | 			caseSensitive = matcher.caseSensitive | 
					
						
							|  |  |  | 			caseSensitiveSet = true | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		numValues++ | 
					
						
							|  |  |  | 		return true | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-10 15:06:58 +08:00
										 |  |  | 	analysePrefixMatcherCallback := func(prefix string, prefixCaseSensitive bool, _ StringMatcher) bool { | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 		// Ensure we don't have mixed case sensitivity.
 | 
					
						
							|  |  |  | 		if caseSensitiveSet && caseSensitive != prefixCaseSensitive { | 
					
						
							|  |  |  | 			return false | 
					
						
							|  |  |  | 		} else if !caseSensitiveSet { | 
					
						
							|  |  |  | 			caseSensitive = prefixCaseSensitive | 
					
						
							|  |  |  | 			caseSensitiveSet = true | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		if numPrefixes == 0 || len(prefix) < minPrefixLength { | 
					
						
							|  |  |  | 			minPrefixLength = len(prefix) | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		numPrefixes++ | 
					
						
							|  |  |  | 		return true | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	if !findEqualOrPrefixStringMatchers(input, analyseEqualMatcherCallback, analysePrefixMatcherCallback) { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		return input | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	// If the number of values and prefixes found is less than the threshold, then we should skip the optimization.
 | 
					
						
							|  |  |  | 	if (numValues + numPrefixes) < threshold { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		return input | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Parse again the input StringMatcher to extract all values and storing them.
 | 
					
						
							|  |  |  | 	// We can skip the case sensitivity check because we've already checked it and
 | 
					
						
							|  |  |  | 	// if the code reach this point then it means all matchers have the same case sensitivity.
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	multiMatcher := newEqualMultiStringMatcher(caseSensitive, numValues, numPrefixes, minPrefixLength) | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// Ignore the return value because we already iterated over the input StringMatcher
 | 
					
						
							|  |  |  | 	// and it was all good.
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 	findEqualOrPrefixStringMatchers(input, func(matcher *equalStringMatcher) bool { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 		multiMatcher.add(matcher.s) | 
					
						
							|  |  |  | 		return true | 
					
						
							| 
									
										
										
										
											2025-02-10 15:06:58 +08:00
										 |  |  | 	}, func(prefix string, _ bool, matcher StringMatcher) bool { | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 		multiMatcher.addPrefix(prefix, caseSensitive, matcher) | 
					
						
							|  |  |  | 		return true | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return multiMatcher | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | // findEqualOrPrefixStringMatchers analyze the input StringMatcher and calls the equalMatcherCallback for each
 | 
					
						
							|  |  |  | // equalStringMatcher found, and prefixMatcherCallback for each literalPrefixSensitiveStringMatcher and literalPrefixInsensitiveStringMatcher found.
 | 
					
						
							|  |  |  | //
 | 
					
						
							|  |  |  | // Returns true if and only if the input StringMatcher is *only* composed by an alternation of equalStringMatcher and/or
 | 
					
						
							|  |  |  | // literal prefix matcher. Returns false if prefixMatcherCallback is nil and a literal prefix matcher is encountered.
 | 
					
						
							|  |  |  | func findEqualOrPrefixStringMatchers(input StringMatcher, equalMatcherCallback func(matcher *equalStringMatcher) bool, prefixMatcherCallback func(prefix string, prefixCaseSensitive bool, matcher StringMatcher) bool) bool { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 	orInput, ok := input.(orStringMatcher) | 
					
						
							|  |  |  | 	if !ok { | 
					
						
							|  |  |  | 		return false | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, m := range orInput { | 
					
						
							|  |  |  | 		switch casted := m.(type) { | 
					
						
							|  |  |  | 		case orStringMatcher: | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 			if !findEqualOrPrefixStringMatchers(m, equalMatcherCallback, prefixMatcherCallback) { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 				return false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		case *equalStringMatcher: | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 			if !equalMatcherCallback(casted) { | 
					
						
							|  |  |  | 				return false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		case *literalPrefixSensitiveStringMatcher: | 
					
						
							|  |  |  | 			if prefixMatcherCallback == nil || !prefixMatcherCallback(casted.prefix, true, casted) { | 
					
						
							|  |  |  | 				return false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		case *literalPrefixInsensitiveStringMatcher: | 
					
						
							|  |  |  | 			if prefixMatcherCallback == nil || !prefixMatcherCallback(casted.prefix, false, casted) { | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 				return false | 
					
						
							|  |  |  | 			} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		default: | 
					
						
							| 
									
										
										
										
											2024-07-04 01:45:36 +08:00
										 |  |  | 			// It's not an equal or prefix string matcher, so we have to stop searching
 | 
					
						
							| 
									
										
										
										
											2024-01-25 17:40:57 +08:00
										 |  |  | 			// cause this optimization can't be applied.
 | 
					
						
							|  |  |  | 			return false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return true | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func hasPrefixCaseInsensitive(s, prefix string) bool { | 
					
						
							|  |  |  | 	return len(s) >= len(prefix) && strings.EqualFold(s[0:len(prefix)], prefix) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func hasSuffixCaseInsensitive(s, suffix string) bool { | 
					
						
							|  |  |  | 	return len(s) >= len(suffix) && strings.EqualFold(s[len(s)-len(suffix):], suffix) | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | func containsInOrder(s string, contains []string) bool { | 
					
						
							| 
									
										
										
										
											2024-06-04 16:24:36 +08:00
										 |  |  | 	// Optimization for the case we only have to look for 1 substring.
 | 
					
						
							|  |  |  | 	if len(contains) == 1 { | 
					
						
							|  |  |  | 		return strings.Contains(s, contains[0]) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return containsInOrderMulti(s, contains) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func containsInOrderMulti(s string, contains []string) bool { | 
					
						
							| 
									
										
										
										
											2024-05-31 21:35:58 +08:00
										 |  |  | 	offset := 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	for _, substr := range contains { | 
					
						
							|  |  |  | 		at := strings.Index(s[offset:], substr) | 
					
						
							|  |  |  | 		if at == -1 { | 
					
						
							|  |  |  | 			return false | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 		offset += at + len(substr) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	return true | 
					
						
							|  |  |  | } |