From ff2af660cf3e57a873107dcb18e1811bbe0806cf Mon Sep 17 00:00:00 2001 From: Andy Clement Date: Fri, 9 Jun 2017 11:44:32 -0700 Subject: [PATCH] PathPatternParser encodes patterns as it parses them Before this commit there was no special handling for URL encoding of the path pattern string coming into the path pattern parser. No assumptions were made about it being in an encoded form or not. With this change it is assumed incoming path patterns are not encoded and as part of parsing the parser builds PathPattern objects that include encoded elements. For example parsing "/f o" will create a path pattern of the form "/f%20o". In this form it can then be used to match against encoded paths. Handling encoded characters is not trivial and has resulted in some loss in matching speed but care has been taken to avoid unnecessary creation of additional heap objects. When matching variables the variable values are return in a decoded form. It is hoped the speed can be recovered, at least for the common case of non-encoded incoming paths. Issue: SPR-15640 --- .../pattern/CaptureTheRestPathElement.java | 4 +- .../pattern/CaptureVariablePathElement.java | 21 +++- .../pattern/InternalPathPatternParser.java | 45 ++++++-- .../web/util/pattern/LiteralPathElement.java | 6 +- .../web/util/pattern/PathElement.java | 52 ++++++++- .../web/util/pattern/RegexPathElement.java | 36 +++++- .../SingleCharWildcardedPathElement.java | 24 +++- .../util/pattern/PathPatternMatcherTests.java | 105 +++++++++++++++++- .../util/pattern/PathPatternParserTests.java | 83 +++++++++++--- 9 files changed, 332 insertions(+), 44 deletions(-) diff --git a/spring-web/src/main/java/org/springframework/web/util/pattern/CaptureTheRestPathElement.java b/spring-web/src/main/java/org/springframework/web/util/pattern/CaptureTheRestPathElement.java index 3ac6bdaf081..f5529188210 100644 --- a/spring-web/src/main/java/org/springframework/web/util/pattern/CaptureTheRestPathElement.java +++ b/spring-web/src/main/java/org/springframework/web/util/pattern/CaptureTheRestPathElement.java @@ -56,8 +56,8 @@ class CaptureTheRestPathElement extends PathElement { matchingContext.remainingPathIndex = matchingContext.candidateLength; } if (matchingContext.extractingVariables) { - matchingContext.set(variableName, new String(matchingContext.candidate, candidateIndex, - matchingContext.candidateLength - candidateIndex)); + matchingContext.set(variableName, decode(new String(matchingContext.candidate, candidateIndex, + matchingContext.candidateLength - candidateIndex))); } return true; } diff --git a/spring-web/src/main/java/org/springframework/web/util/pattern/CaptureVariablePathElement.java b/spring-web/src/main/java/org/springframework/web/util/pattern/CaptureVariablePathElement.java index 43a3c85f2a8..090bbc06a04 100644 --- a/spring-web/src/main/java/org/springframework/web/util/pattern/CaptureVariablePathElement.java +++ b/spring-web/src/main/java/org/springframework/web/util/pattern/CaptureVariablePathElement.java @@ -16,9 +16,13 @@ package org.springframework.web.util.pattern; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.springframework.web.util.UriUtils; + /** * A path element representing capturing a piece of the path as a variable. In the pattern * '/foo/{bar}/goo' the {bar} is represented as a {@link CaptureVariablePathElement}. There @@ -74,10 +78,22 @@ class CaptureVariablePathElement extends PathElement { return false; } + String substringForDecoding = null; CharSequence candidateCapture = null; if (this.constraintPattern != null) { // TODO possible optimization - only regex match if rest of pattern matches? Benefit likely to vary pattern to pattern - candidateCapture = new SubSequence(matchingContext.candidate, candidateIndex, nextPos); + if (includesPercent(matchingContext.candidate, candidateIndex, nextPos)) { + substringForDecoding = new String(matchingContext.candidate, candidateIndex, nextPos); + try { + candidateCapture = UriUtils.decode(substringForDecoding,StandardCharsets.UTF_8.name()); + } + catch (UnsupportedEncodingException e) { + throw new IllegalStateException(e); + } + } + else { + candidateCapture = new SubSequence(matchingContext.candidate, candidateIndex, nextPos); + } Matcher matcher = constraintPattern.matcher(candidateCapture); if (matcher.groupCount() != 0) { throw new IllegalArgumentException( @@ -115,7 +131,8 @@ class CaptureVariablePathElement extends PathElement { if (match && matchingContext.extractingVariables) { matchingContext.set(this.variableName, - new String(matchingContext.candidate, candidateIndex, nextPos - candidateIndex)); + candidateCapture != null ? candidateCapture.toString(): + decode(new String(matchingContext.candidate, candidateIndex, nextPos - candidateIndex))); } return match; } diff --git a/spring-web/src/main/java/org/springframework/web/util/pattern/InternalPathPatternParser.java b/spring-web/src/main/java/org/springframework/web/util/pattern/InternalPathPatternParser.java index 5223254b89e..6a5c19ee7c4 100644 --- a/spring-web/src/main/java/org/springframework/web/util/pattern/InternalPathPatternParser.java +++ b/spring-web/src/main/java/org/springframework/web/util/pattern/InternalPathPatternParser.java @@ -16,10 +16,13 @@ package org.springframework.web.util.pattern; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.regex.PatternSyntaxException; +import org.springframework.web.util.UriUtils; import org.springframework.web.util.pattern.PatternParseException.PatternMessage; /** @@ -162,7 +165,7 @@ class InternalPathPatternParser { this.variableCaptureCount++; } else if (ch == ':') { - if (this.insideVariableCapture) { + if (this.insideVariableCapture && !this.isCaptureTheRestVariable) { skipCaptureRegex(); this.insideVariableCapture = false; this.variableCaptureCount++; @@ -304,6 +307,26 @@ class InternalPathPatternParser { resetPathElementState(); } + + private char[] getPathElementText(boolean encodeElement) { + char[] pathElementText = new char[this.pos - this.pathElementStart]; + if (encodeElement) { + try { + String unencoded = new String(this.pathPatternData, this.pathElementStart, this.pos - this.pathElementStart); + String encoded = UriUtils.encodeFragment(unencoded, StandardCharsets.UTF_8.name()); + pathElementText = encoded.toCharArray(); + } + catch (UnsupportedEncodingException ex) { + // Should never happen... + throw new IllegalStateException(ex); + } + } + else { + System.arraycopy(this.pathPatternData, this.pathElementStart, pathElementText, 0, + this.pos - this.pathElementStart); + } + return pathElementText; + } /** * Used the knowledge built up whilst processing since the last path element to determine what kind of path @@ -314,10 +337,7 @@ class InternalPathPatternParser { if (this.insideVariableCapture) { throw new PatternParseException(this.pos, this.pathPatternData, PatternMessage.MISSING_CLOSE_CAPTURE); } - - char[] pathElementText = new char[this.pos - this.pathElementStart]; - System.arraycopy(this.pathPatternData, this.pathElementStart, pathElementText, 0, - this.pos - this.pathElementStart); + PathElement newPE = null; if (this.variableCaptureCount > 0) { @@ -325,12 +345,12 @@ class InternalPathPatternParser { this.pathPatternData[this.pos - 1] == '}') { if (this.isCaptureTheRestVariable) { // It is {*....} - newPE = new CaptureTheRestPathElement(pathElementStart, pathElementText, separator); + newPE = new CaptureTheRestPathElement(pathElementStart, getPathElementText(false), separator); } else { // It is a full capture of this element (possibly with constraint), for example: /foo/{abc}/ try { - newPE = new CaptureVariablePathElement(this.pathElementStart, pathElementText, + newPE = new CaptureVariablePathElement(this.pathElementStart, getPathElementText(false), this.caseSensitive, this.separator); } catch (PatternSyntaxException pse) { @@ -347,8 +367,9 @@ class InternalPathPatternParser { throw new PatternParseException(this.pathElementStart, this.pathPatternData, PatternMessage.CAPTURE_ALL_IS_STANDALONE_CONSTRUCT); } - RegexPathElement newRegexSection = new RegexPathElement(this.pathElementStart, pathElementText, - this.caseSensitive, this.pathPatternData, this.separator); + RegexPathElement newRegexSection = new RegexPathElement(this.pathElementStart, + getPathElementText(false), this.caseSensitive, + this.pathPatternData, this.separator); for (String variableName : newRegexSection.getVariableNames()) { recordCapturedVariable(this.pathElementStart, variableName); } @@ -361,16 +382,16 @@ class InternalPathPatternParser { newPE = new WildcardPathElement(this.pathElementStart, this.separator); } else { - newPE = new RegexPathElement(this.pathElementStart, pathElementText, + newPE = new RegexPathElement(this.pathElementStart, getPathElementText(false), this.caseSensitive, this.pathPatternData, this.separator); } } else if (this.singleCharWildcardCount != 0) { - newPE = new SingleCharWildcardedPathElement(this.pathElementStart, pathElementText, + newPE = new SingleCharWildcardedPathElement(this.pathElementStart, getPathElementText(true), this.singleCharWildcardCount, this.caseSensitive, this.separator); } else { - newPE = new LiteralPathElement(this.pathElementStart, pathElementText, + newPE = new LiteralPathElement(this.pathElementStart, getPathElementText(true), this.caseSensitive, this.separator); } } diff --git a/spring-web/src/main/java/org/springframework/web/util/pattern/LiteralPathElement.java b/spring-web/src/main/java/org/springframework/web/util/pattern/LiteralPathElement.java index c72e4550963..809dc4a467a 100644 --- a/spring-web/src/main/java/org/springframework/web/util/pattern/LiteralPathElement.java +++ b/spring-web/src/main/java/org/springframework/web/util/pattern/LiteralPathElement.java @@ -57,7 +57,11 @@ class LiteralPathElement extends PathElement { if (this.caseSensitive) { for (int i = 0; i < len; i++) { if (matchingContext.candidate[candidateIndex++] != this.text[i]) { - return false; + // TODO unfortunate performance hit here on comparison when encoded data is the less likely case + if (i < 3 || matchingContext.candidate[candidateIndex-3] != '%' || + Character.toUpperCase(matchingContext.candidate[candidateIndex-1]) != this.text[i]) { + return false; + } } } } diff --git a/spring-web/src/main/java/org/springframework/web/util/pattern/PathElement.java b/spring-web/src/main/java/org/springframework/web/util/pattern/PathElement.java index 83509a497e6..55bf05caee7 100644 --- a/spring-web/src/main/java/org/springframework/web/util/pattern/PathElement.java +++ b/spring-web/src/main/java/org/springframework/web/util/pattern/PathElement.java @@ -16,6 +16,10 @@ package org.springframework.web.util.pattern; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; + +import org.springframework.web.util.UriUtils; import org.springframework.web.util.pattern.PathPattern.MatchingContext; /** @@ -65,7 +69,7 @@ abstract class PathElement { public abstract boolean matches(int candidatePos, MatchingContext matchingContext); /** - * Return the length of the path element where captures are considered to be one character long. + * @return the length of the path element where captures are considered to be one character long. */ public abstract int getNormalizedLength(); @@ -98,4 +102,50 @@ abstract class PathElement { matchingContext.candidate[nextIndex] == this.separator); } + /** + * Decode an input CharSequence if necessary. + * @param toDecode the input char sequence that should be decoded if necessary + * @returns the decoded result + */ + protected String decode(CharSequence toDecode) { + CharSequence decoded = toDecode; + if (includesPercent(toDecode)) { + try { + decoded = UriUtils.decode(toDecode.toString(), StandardCharsets.UTF_8.name()); + } + catch (UnsupportedEncodingException e) { + throw new IllegalStateException(e); + } + } + return decoded.toString(); + } + + /** + * @param char sequence of characters + * @param from start position (included in check) + * @param to end position (excluded from check) + * @return true if the chars array includes a '%' character between the specified positions + */ + protected boolean includesPercent(char[] chars, int from, int to) { + for (int i = from; i < to; i++) { + if (chars[i] == '%') { + return true; + } + } + return false; + } + + /** + * @param chars string that may include a '%' character indicating it is encoded + * @return true if the string contains a '%' character + */ + protected boolean includesPercent(CharSequence chars) { + for (int i = 0, max = chars.length(); i < max; i++) { + if (chars.charAt(i) == '%') { + return true; + } + } + return false; + } + } diff --git a/spring-web/src/main/java/org/springframework/web/util/pattern/RegexPathElement.java b/spring-web/src/main/java/org/springframework/web/util/pattern/RegexPathElement.java index 467dee74217..603ef663f20 100644 --- a/spring-web/src/main/java/org/springframework/web/util/pattern/RegexPathElement.java +++ b/spring-web/src/main/java/org/springframework/web/util/pattern/RegexPathElement.java @@ -16,12 +16,15 @@ package org.springframework.web.util.pattern; +import java.io.UnsupportedEncodingException; +import java.nio.charset.StandardCharsets; import java.util.LinkedList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.springframework.util.AntPathMatcher; +import org.springframework.web.util.UriUtils; import org.springframework.web.util.pattern.PathPattern.MatchingContext; /** @@ -39,7 +42,7 @@ class RegexPathElement extends PathElement { private final String DEFAULT_VARIABLE_PATTERN = "(.*)"; - private final char[] regex; + private char[] regex; private final boolean caseSensitive; @@ -61,17 +64,20 @@ class RegexPathElement extends PathElement { public Pattern buildPattern(char[] regex, char[] completePattern) { StringBuilder patternBuilder = new StringBuilder(); String text = new String(regex); + StringBuilder encodedRegexBuilder = new StringBuilder(); Matcher matcher = GLOB_PATTERN.matcher(text); int end = 0; while (matcher.find()) { - patternBuilder.append(quote(text, end, matcher.start())); + patternBuilder.append(quote(text, end, matcher.start(), encodedRegexBuilder)); String match = matcher.group(); if ("?".equals(match)) { patternBuilder.append('.'); + encodedRegexBuilder.append('?'); } else if ("*".equals(match)) { patternBuilder.append(".*"); + encodedRegexBuilder.append('*'); int pos = matcher.start(); if (pos < 1 || text.charAt(pos-1) != '.') { // To be compatible with the AntPathMatcher comparator, @@ -80,6 +86,7 @@ class RegexPathElement extends PathElement { } } else if (match.startsWith("{") && match.endsWith("}")) { + encodedRegexBuilder.append(match); int colonIdx = match.indexOf(':'); if (colonIdx == -1) { patternBuilder.append(DEFAULT_VARIABLE_PATTERN); @@ -106,7 +113,8 @@ class RegexPathElement extends PathElement { end = matcher.end(); } - patternBuilder.append(quote(text, end, text.length())); + patternBuilder.append(quote(text, end, text.length(), encodedRegexBuilder)); + this.regex = encodedRegexBuilder.toString().toCharArray(); if (this.caseSensitive) { return Pattern.compile(patternBuilder.toString()); } @@ -119,17 +127,33 @@ class RegexPathElement extends PathElement { return this.variableNames; } - private String quote(String s, int start, int end) { + private String quote(String s, int start, int end, StringBuilder encodedRegexBuilder) { if (start == end) { return ""; } - return Pattern.quote(s.substring(start, end)); + String substring = s.substring(start, end); + try { + String encodedSubString = UriUtils.encodePath(substring, StandardCharsets.UTF_8.name()); + encodedRegexBuilder.append(encodedSubString); + } + catch (UnsupportedEncodingException e) { + throw new IllegalStateException(e); + } + return Pattern.quote(substring); } @Override public boolean matches(int candidateIndex, MatchingContext matchingContext) { int pos = matchingContext.scanAhead(candidateIndex); - Matcher matcher = this.pattern.matcher(new SubSequence(matchingContext.candidate, candidateIndex, pos)); + + CharSequence textToMatch = null; + if (includesPercent(matchingContext.candidate, candidateIndex, pos)) { + textToMatch = decode(new SubSequence(matchingContext.candidate, candidateIndex, pos)); + } + else { + textToMatch = new SubSequence(matchingContext.candidate, candidateIndex, pos); + } + Matcher matcher = this.pattern.matcher(textToMatch); boolean matches = matcher.matches(); if (matches) { diff --git a/spring-web/src/main/java/org/springframework/web/util/pattern/SingleCharWildcardedPathElement.java b/spring-web/src/main/java/org/springframework/web/util/pattern/SingleCharWildcardedPathElement.java index 6d15d202ce0..800311ef294 100644 --- a/spring-web/src/main/java/org/springframework/web/util/pattern/SingleCharWildcardedPathElement.java +++ b/spring-web/src/main/java/org/springframework/web/util/pattern/SingleCharWildcardedPathElement.java @@ -65,8 +65,18 @@ class SingleCharWildcardedPathElement extends PathElement { if (this.caseSensitive) { for (int i = 0; i