From bbb53d03c4f9f65b8bfaca92fbd1e88aabf6e347 Mon Sep 17 00:00:00 2001 From: rstoyanchev Date: Fri, 4 Oct 2024 14:36:59 +0100 Subject: [PATCH] Pluggable URI parsing, use RFC parser by default See gh-33639 --- .../web/util/DefaultUriBuilderFactory.java | 33 +++- .../web/util/UriComponentsBuilder.java | 146 +++++++++++++----- .../web/util/WhatWgUrlParser.java | 22 ++- .../filter/ForwardedHeaderFilterTests.java | 2 +- .../web/util/UriComponentsBuilderTests.java | 10 +- 5 files changed, 167 insertions(+), 46 deletions(-) diff --git a/spring-web/src/main/java/org/springframework/web/util/DefaultUriBuilderFactory.java b/spring-web/src/main/java/org/springframework/web/util/DefaultUriBuilderFactory.java index c90c49470e..ff7450d854 100644 --- a/spring-web/src/main/java/org/springframework/web/util/DefaultUriBuilderFactory.java +++ b/spring-web/src/main/java/org/springframework/web/util/DefaultUriBuilderFactory.java @@ -45,6 +45,9 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory { @Nullable private final UriComponentsBuilder baseUri; + @Nullable + private UriComponentsBuilder.ParserType parserType; + private EncodingMode encodingMode = EncodingMode.TEMPLATE_AND_VALUES; @Nullable @@ -92,6 +95,26 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory { return (this.baseUri != null); } + /** + * Set the {@link UriComponentsBuilder.ParserType} to use. + *

By default, if the parser type is not specified, + * {@link UriComponentsBuilder} uses {@link UriComponentsBuilder.ParserType#RFC}. + * @param parserType the parser type + * @since 6.2 + */ + public void setParserType(UriComponentsBuilder.ParserType parserType) { + this.parserType = parserType; + } + + /** + * Return the configured parser type. + * @since 6.2 + */ + @Nullable + public UriComponentsBuilder.ParserType getParserType() { + return this.parserType; + } + /** * Set the {@link EncodingMode encoding mode} to use. *

By default this is set to {@link EncodingMode#TEMPLATE_AND_VALUES @@ -265,12 +288,12 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory { result = (baseUri != null ? baseUri.cloneBuilder() : UriComponentsBuilder.newInstance()); } else if (baseUri != null) { - UriComponentsBuilder builder = UriComponentsBuilder.fromUriString(uriTemplate); + UriComponentsBuilder builder = parseUri(uriTemplate); UriComponents uri = builder.build(); result = (uri.getHost() == null ? baseUri.cloneBuilder().uriComponents(uri) : builder); } else { - result = UriComponentsBuilder.fromUriString(uriTemplate); + result = parseUri(uriTemplate); } if (encodingMode.equals(EncodingMode.TEMPLATE_AND_VALUES)) { result.encode(); @@ -279,6 +302,12 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory { return result; } + private UriComponentsBuilder parseUri(String uriTemplate) { + return (getParserType() != null ? + UriComponentsBuilder.fromUriString(uriTemplate, getParserType()) : + UriComponentsBuilder.fromUriString(uriTemplate)); + } + private void parsePathIfNecessary(UriComponentsBuilder result) { if (parsePath && encodingMode.equals(EncodingMode.URI_COMPONENT)) { UriComponents uric = result.build(); diff --git a/spring-web/src/main/java/org/springframework/web/util/UriComponentsBuilder.java b/spring-web/src/main/java/org/springframework/web/util/UriComponentsBuilder.java index 418ba0d353..6a69addda8 100644 --- a/spring-web/src/main/java/org/springframework/web/util/UriComponentsBuilder.java +++ b/spring-web/src/main/java/org/springframework/web/util/UriComponentsBuilder.java @@ -197,7 +197,19 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable { } /** - * Create a builder that is initialized with the given URI string. + * Variant of {@link #fromUriString(String, ParserType)} that defaults to + * the {@link ParserType#RFC} parsing. + */ + public static UriComponentsBuilder fromUriString(String uri) throws InvalidUrlException { + Assert.notNull(uri, "URI must not be null"); + if (uri.isEmpty()) { + return new UriComponentsBuilder(); + } + return fromUriString(uri, ParserType.RFC); + } + + /** + * Create a builder that is initialized by parsing the given URI string. *

Note: The presence of reserved characters can prevent * correct parsing of the URI string. For example if a query parameter * contains {@code '='} or {@code '&'} characters, the query string cannot @@ -208,47 +220,27 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable { * UriComponentsBuilder.fromUriString(uriString).buildAndExpand("hot&cold"); * * @param uri the URI string to initialize with + * @param parserType the parsing algorithm to use * @return the new {@code UriComponentsBuilder} * @throws InvalidUrlException if {@code uri} cannot be parsed + * @since 6.2 */ - public static UriComponentsBuilder fromUriString(String uri) throws InvalidUrlException { + public static UriComponentsBuilder fromUriString(String uri, ParserType parserType) throws InvalidUrlException { Assert.notNull(uri, "URI must not be null"); - - UriComponentsBuilder builder = new UriComponentsBuilder(); - if (!uri.isEmpty()) { - WhatWgUrlParser.UrlRecord urlRecord = WhatWgUrlParser.parse(uri, EMPTY_URL_RECORD, null, null); - if (!urlRecord.scheme().isEmpty()) { - builder.scheme(urlRecord.scheme()); - } - if (urlRecord.includesCredentials()) { - StringBuilder userInfo = new StringBuilder(urlRecord.username()); - if (!urlRecord.password().isEmpty()) { - userInfo.append(':'); - userInfo.append(urlRecord.password()); - } - builder.userInfo(userInfo.toString()); - } - if (urlRecord.host() != null && !(urlRecord.host() instanceof WhatWgUrlParser.EmptyHost)) { - builder.host(urlRecord.host().toString()); - } - if (urlRecord.port() != null) { - builder.port(urlRecord.port().toString()); - } - if (urlRecord.path().isOpaque()) { - String ssp = urlRecord.path() + urlRecord.search(); - builder.schemeSpecificPart(ssp); - } - else { - builder.path(urlRecord.path().toString()); - if (StringUtils.hasLength(urlRecord.query())) { - builder.query(urlRecord.query()); - } - } - if (StringUtils.hasLength(urlRecord.fragment())) { - builder.fragment(urlRecord.fragment()); - } + if (uri.isEmpty()) { + return new UriComponentsBuilder(); } - return builder; + UriComponentsBuilder builder = new UriComponentsBuilder(); + return switch (parserType) { + case RFC -> { + RfcUriParser.UriRecord record = RfcUriParser.parse(uri); + yield builder.rfcUriRecord(record); + } + case WHAT_WG -> { + WhatWgUrlParser.UrlRecord record = WhatWgUrlParser.parse(uri, EMPTY_URL_RECORD, null, null); + yield builder.whatWgUrlRecord(record); + } + }; } /** @@ -517,6 +509,58 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable { return this; } + /** + * Internal method to initialize this builder from an RFC {@code UriRecord}. + */ + private UriComponentsBuilder rfcUriRecord(RfcUriParser.UriRecord record) { + scheme(record.scheme()); + if (record.isOpaque()) { + if (record.path() != null) { + schemeSpecificPart(record.path()); + } + } + else { + userInfo(record.user()); + host(record.host()); + port(record.port()); + if (record.path() != null) { + path(record.path()); + } + query(record.query()); + } + fragment(record.fragment()); + return this; + } + + /** + * Internal method to initialize this builder from a WhatWG {@code UrlRecord}. + */ + private UriComponentsBuilder whatWgUrlRecord(WhatWgUrlParser.UrlRecord record) { + if (!record.scheme().isEmpty()) { + scheme(record.scheme()); + } + if (record.path().isOpaque()) { + String ssp = record.path() + record.search(); + schemeSpecificPart(ssp); + } + else { + userInfo(record.userInfo()); + String hostname = record.hostname(); + if (StringUtils.hasText(hostname)) { + host(hostname); + } + if (record.port() != null) { + port(record.portString()); + } + path(record.path().toString()); + query(record.query()); + if (StringUtils.hasText(record.fragment())) { + fragment(record.fragment()); + } + } + return this; + } + @Override public UriComponentsBuilder scheme(@Nullable String scheme) { this.scheme = scheme; @@ -790,6 +834,34 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable { } + /** + * Enum to represent different URI parsing mechanisms. + */ + public enum ParserType { + + /** + * Parser that expects URI's conforming to RFC 3986 syntax. + */ + RFC, + + /** + * Parser based on algorithm defined in the WhatWG URL Living standard. + * Browsers use this algorithm to align on lenient parsing of user typed + * URL's that may deviate from RFC syntax. + *

For more details, see: + *

+ *

Use this if you need to leniently handle URL's that don't conform + * to RFC syntax, or for alignment with browser parsing. + */ + WHAT_WG + + } + + private static class CompositePathComponentBuilder implements PathComponentBuilder { private final Deque builders = new ArrayDeque<>(); diff --git a/spring-web/src/main/java/org/springframework/web/util/WhatWgUrlParser.java b/spring-web/src/main/java/org/springframework/web/util/WhatWgUrlParser.java index 6fe9a1aad4..235e60d939 100644 --- a/spring-web/src/main/java/org/springframework/web/util/WhatWgUrlParser.java +++ b/spring-web/src/main/java/org/springframework/web/util/WhatWgUrlParser.java @@ -38,7 +38,11 @@ import org.springframework.util.Assert; /** * Implementation of the * URL parsing algorithm - * of the WhatWG URL Living standard. + * of the WhatWG URL Living standard. Browsers use this algorithm to align on + * lenient parsing of user typed URL's that may deviate from RFC syntax. + * Use this, via {@link UriComponentsBuilder.ParserType#WHAT_WG}, if you need to + * leniently handle URL's that don't confirm to RFC syntax, or for alignment + * with browser behavior. * *

Comments in this class correlate to the parsing algorithm. * The implementation differs from the spec in the following ways: @@ -1895,6 +1899,22 @@ final class WhatWgUrlParser { } } + /** + * Convenience method to return the full user info. + */ + @Nullable + public String userInfo() { + if (!includesCredentials()) { + return null; + } + StringBuilder userInfo = new StringBuilder(username()); + if (!password().isEmpty()) { + userInfo.append(':'); + userInfo.append(password()); + } + return userInfo.toString(); + } + /** * A URL’s host is {@code null} or a {@linkplain Host host}. * It is initially {@code null}. diff --git a/spring-web/src/test/java/org/springframework/web/filter/ForwardedHeaderFilterTests.java b/spring-web/src/test/java/org/springframework/web/filter/ForwardedHeaderFilterTests.java index e7b532a904..576d891533 100644 --- a/spring-web/src/test/java/org/springframework/web/filter/ForwardedHeaderFilterTests.java +++ b/spring-web/src/test/java/org/springframework/web/filter/ForwardedHeaderFilterTests.java @@ -649,7 +649,7 @@ class ForwardedHeaderFilterTests { String location = "//other.info/parent/../foo/bar"; String redirectedUrl = sendRedirect(location); - assertThat(redirectedUrl).isEqualTo(("https://other.info/foo/bar")); + assertThat(redirectedUrl).isEqualTo(("https:" + location)); } @Test diff --git a/spring-web/src/test/java/org/springframework/web/util/UriComponentsBuilderTests.java b/spring-web/src/test/java/org/springframework/web/util/UriComponentsBuilderTests.java index 4abb0f1313..d40ef9a918 100644 --- a/spring-web/src/test/java/org/springframework/web/util/UriComponentsBuilderTests.java +++ b/spring-web/src/test/java/org/springframework/web/util/UriComponentsBuilderTests.java @@ -638,11 +638,11 @@ class UriComponentsBuilderTests { void relativeUrls() { String baseUrl = "https://example.com"; assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toString()) - .isEqualTo(baseUrl + "/bar"); + .isEqualTo(baseUrl + "/foo/../bar"); assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toUriString()) - .isEqualTo(baseUrl + "/bar"); + .isEqualTo(baseUrl + "/foo/../bar"); assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toUri().getPath()) - .isEqualTo("/bar"); + .isEqualTo("/foo/../bar"); assertThat(UriComponentsBuilder.fromUriString(baseUrl).path("foo/../bar").build().toString()) .isEqualTo(baseUrl + "/foo/../bar"); assertThat(UriComponentsBuilder.fromUriString(baseUrl).path("foo/../bar").build().toUriString()) @@ -741,9 +741,9 @@ class UriComponentsBuilderTests { // empty tester.accept("{}", "%7B%7D"); - tester.accept("{ \t}", "%7B%20%7D"); + tester.accept("{ \t}", "%7B%20%09%7D"); tester.accept("/a{}b", "/a%7B%7Db"); - tester.accept("/a{ \t}b", "/a%7B%20%7Db"); + tester.accept("/a{ \t}b", "/a%7B%20%09%7Db"); // nested, matching tester.accept("{foo{}}", "%7Bfoo%7B%7D%7D");