Pluggable URI parsing, use RFC parser by default

See gh-33639
This commit is contained in:
rstoyanchev 2024-10-04 14:36:59 +01:00
parent 52805da733
commit bbb53d03c4
5 changed files with 167 additions and 46 deletions

View File

@ -45,6 +45,9 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory {
@Nullable
private final UriComponentsBuilder baseUri;
@Nullable
private UriComponentsBuilder.ParserType parserType;
private EncodingMode encodingMode = EncodingMode.TEMPLATE_AND_VALUES;
@Nullable
@ -92,6 +95,26 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory {
return (this.baseUri != null);
}
/**
* Set the {@link UriComponentsBuilder.ParserType} to use.
* <p>By default, if the parser type is not specified,
* {@link UriComponentsBuilder} uses {@link UriComponentsBuilder.ParserType#RFC}.
* @param parserType the parser type
* @since 6.2
*/
public void setParserType(UriComponentsBuilder.ParserType parserType) {
this.parserType = parserType;
}
/**
* Return the configured parser type.
* @since 6.2
*/
@Nullable
public UriComponentsBuilder.ParserType getParserType() {
return this.parserType;
}
/**
* Set the {@link EncodingMode encoding mode} to use.
* <p>By default this is set to {@link EncodingMode#TEMPLATE_AND_VALUES
@ -265,12 +288,12 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory {
result = (baseUri != null ? baseUri.cloneBuilder() : UriComponentsBuilder.newInstance());
}
else if (baseUri != null) {
UriComponentsBuilder builder = UriComponentsBuilder.fromUriString(uriTemplate);
UriComponentsBuilder builder = parseUri(uriTemplate);
UriComponents uri = builder.build();
result = (uri.getHost() == null ? baseUri.cloneBuilder().uriComponents(uri) : builder);
}
else {
result = UriComponentsBuilder.fromUriString(uriTemplate);
result = parseUri(uriTemplate);
}
if (encodingMode.equals(EncodingMode.TEMPLATE_AND_VALUES)) {
result.encode();
@ -279,6 +302,12 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory {
return result;
}
private UriComponentsBuilder parseUri(String uriTemplate) {
return (getParserType() != null ?
UriComponentsBuilder.fromUriString(uriTemplate, getParserType()) :
UriComponentsBuilder.fromUriString(uriTemplate));
}
private void parsePathIfNecessary(UriComponentsBuilder result) {
if (parsePath && encodingMode.equals(EncodingMode.URI_COMPONENT)) {
UriComponents uric = result.build();

View File

@ -197,7 +197,19 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
}
/**
* Create a builder that is initialized with the given URI string.
* Variant of {@link #fromUriString(String, ParserType)} that defaults to
* the {@link ParserType#RFC} parsing.
*/
public static UriComponentsBuilder fromUriString(String uri) throws InvalidUrlException {
Assert.notNull(uri, "URI must not be null");
if (uri.isEmpty()) {
return new UriComponentsBuilder();
}
return fromUriString(uri, ParserType.RFC);
}
/**
* Create a builder that is initialized by parsing the given URI string.
* <p><strong>Note:</strong> The presence of reserved characters can prevent
* correct parsing of the URI string. For example if a query parameter
* contains {@code '='} or {@code '&'} characters, the query string cannot
@ -208,47 +220,27 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
* UriComponentsBuilder.fromUriString(uriString).buildAndExpand(&quot;hot&amp;cold&quot;);
* </pre>
* @param uri the URI string to initialize with
* @param parserType the parsing algorithm to use
* @return the new {@code UriComponentsBuilder}
* @throws InvalidUrlException if {@code uri} cannot be parsed
* @since 6.2
*/
public static UriComponentsBuilder fromUriString(String uri) throws InvalidUrlException {
public static UriComponentsBuilder fromUriString(String uri, ParserType parserType) throws InvalidUrlException {
Assert.notNull(uri, "URI must not be null");
UriComponentsBuilder builder = new UriComponentsBuilder();
if (!uri.isEmpty()) {
WhatWgUrlParser.UrlRecord urlRecord = WhatWgUrlParser.parse(uri, EMPTY_URL_RECORD, null, null);
if (!urlRecord.scheme().isEmpty()) {
builder.scheme(urlRecord.scheme());
}
if (urlRecord.includesCredentials()) {
StringBuilder userInfo = new StringBuilder(urlRecord.username());
if (!urlRecord.password().isEmpty()) {
userInfo.append(':');
userInfo.append(urlRecord.password());
}
builder.userInfo(userInfo.toString());
}
if (urlRecord.host() != null && !(urlRecord.host() instanceof WhatWgUrlParser.EmptyHost)) {
builder.host(urlRecord.host().toString());
}
if (urlRecord.port() != null) {
builder.port(urlRecord.port().toString());
}
if (urlRecord.path().isOpaque()) {
String ssp = urlRecord.path() + urlRecord.search();
builder.schemeSpecificPart(ssp);
}
else {
builder.path(urlRecord.path().toString());
if (StringUtils.hasLength(urlRecord.query())) {
builder.query(urlRecord.query());
}
}
if (StringUtils.hasLength(urlRecord.fragment())) {
builder.fragment(urlRecord.fragment());
}
if (uri.isEmpty()) {
return new UriComponentsBuilder();
}
return builder;
UriComponentsBuilder builder = new UriComponentsBuilder();
return switch (parserType) {
case RFC -> {
RfcUriParser.UriRecord record = RfcUriParser.parse(uri);
yield builder.rfcUriRecord(record);
}
case WHAT_WG -> {
WhatWgUrlParser.UrlRecord record = WhatWgUrlParser.parse(uri, EMPTY_URL_RECORD, null, null);
yield builder.whatWgUrlRecord(record);
}
};
}
/**
@ -517,6 +509,58 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
return this;
}
/**
* Internal method to initialize this builder from an RFC {@code UriRecord}.
*/
private UriComponentsBuilder rfcUriRecord(RfcUriParser.UriRecord record) {
scheme(record.scheme());
if (record.isOpaque()) {
if (record.path() != null) {
schemeSpecificPart(record.path());
}
}
else {
userInfo(record.user());
host(record.host());
port(record.port());
if (record.path() != null) {
path(record.path());
}
query(record.query());
}
fragment(record.fragment());
return this;
}
/**
* Internal method to initialize this builder from a WhatWG {@code UrlRecord}.
*/
private UriComponentsBuilder whatWgUrlRecord(WhatWgUrlParser.UrlRecord record) {
if (!record.scheme().isEmpty()) {
scheme(record.scheme());
}
if (record.path().isOpaque()) {
String ssp = record.path() + record.search();
schemeSpecificPart(ssp);
}
else {
userInfo(record.userInfo());
String hostname = record.hostname();
if (StringUtils.hasText(hostname)) {
host(hostname);
}
if (record.port() != null) {
port(record.portString());
}
path(record.path().toString());
query(record.query());
if (StringUtils.hasText(record.fragment())) {
fragment(record.fragment());
}
}
return this;
}
@Override
public UriComponentsBuilder scheme(@Nullable String scheme) {
this.scheme = scheme;
@ -790,6 +834,34 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
}
/**
* Enum to represent different URI parsing mechanisms.
*/
public enum ParserType {
/**
* Parser that expects URI's conforming to RFC 3986 syntax.
*/
RFC,
/**
* Parser based on algorithm defined in the WhatWG URL Living standard.
* Browsers use this algorithm to align on lenient parsing of user typed
* URL's that may deviate from RFC syntax.
* <p>For more details, see:
* <ul>
* <li><a href="https://url.spec.whatwg.org">URL Living Standard</a>
* <li><a href="https://url.spec.whatwg.org/#url-parsing">Section 4.4: URL parsing</a>
* <li><a href="https://github.com/web-platform-tests/wpt/tree/master/url">web-platform-tests</a>
* </ul>
* <p>Use this if you need to leniently handle URL's that don't conform
* to RFC syntax, or for alignment with browser parsing.
*/
WHAT_WG
}
private static class CompositePathComponentBuilder implements PathComponentBuilder {
private final Deque<PathComponentBuilder> builders = new ArrayDeque<>();

View File

@ -38,7 +38,11 @@ import org.springframework.util.Assert;
/**
* Implementation of the
* <a href="https://url.spec.whatwg.org/#url-parsing">URL parsing</a> algorithm
* of the WhatWG URL Living standard.
* of the WhatWG URL Living standard. Browsers use this algorithm to align on
* lenient parsing of user typed URL's that may deviate from RFC syntax.
* Use this, via {@link UriComponentsBuilder.ParserType#WHAT_WG}, if you need to
* leniently handle URL's that don't confirm to RFC syntax, or for alignment
* with browser behavior.
*
* <p>Comments in this class correlate to the parsing algorithm.
* The implementation differs from the spec in the following ways:
@ -1895,6 +1899,22 @@ final class WhatWgUrlParser {
}
}
/**
* Convenience method to return the full user info.
*/
@Nullable
public String userInfo() {
if (!includesCredentials()) {
return null;
}
StringBuilder userInfo = new StringBuilder(username());
if (!password().isEmpty()) {
userInfo.append(':');
userInfo.append(password());
}
return userInfo.toString();
}
/**
* A URLs host is {@code null} or a {@linkplain Host host}.
* It is initially {@code null}.

View File

@ -649,7 +649,7 @@ class ForwardedHeaderFilterTests {
String location = "//other.info/parent/../foo/bar";
String redirectedUrl = sendRedirect(location);
assertThat(redirectedUrl).isEqualTo(("https://other.info/foo/bar"));
assertThat(redirectedUrl).isEqualTo(("https:" + location));
}
@Test

View File

@ -638,11 +638,11 @@ class UriComponentsBuilderTests {
void relativeUrls() {
String baseUrl = "https://example.com";
assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toString())
.isEqualTo(baseUrl + "/bar");
.isEqualTo(baseUrl + "/foo/../bar");
assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toUriString())
.isEqualTo(baseUrl + "/bar");
.isEqualTo(baseUrl + "/foo/../bar");
assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toUri().getPath())
.isEqualTo("/bar");
.isEqualTo("/foo/../bar");
assertThat(UriComponentsBuilder.fromUriString(baseUrl).path("foo/../bar").build().toString())
.isEqualTo(baseUrl + "/foo/../bar");
assertThat(UriComponentsBuilder.fromUriString(baseUrl).path("foo/../bar").build().toUriString())
@ -741,9 +741,9 @@ class UriComponentsBuilderTests {
// empty
tester.accept("{}", "%7B%7D");
tester.accept("{ \t}", "%7B%20%7D");
tester.accept("{ \t}", "%7B%20%09%7D");
tester.accept("/a{}b", "/a%7B%7Db");
tester.accept("/a{ \t}b", "/a%7B%20%7Db");
tester.accept("/a{ \t}b", "/a%7B%20%09%7Db");
// nested, matching
tester.accept("{foo{}}", "%7Bfoo%7B%7D%7D");