Introduce new URL parser

This commit introduces a new URL parser based on algorithm provided in
the Living URL standard. This new UrlParser is used by
UriComponentsBuilder::fromUriString, replacing the regular expressions.

Closes gh-32513
This commit is contained in:
Arjen Poutsma 2024-03-05 09:46:28 +01:00
parent 8727d723f3
commit f21e05a9e4
8 changed files with 2530 additions and 110 deletions

View File

@ -657,6 +657,12 @@ final class HierarchicalUriComponents extends UriComponents {
public boolean isAllowed(int c) {
return isUnreserved(c);
}
},
C0 {
@Override
public boolean isAllowed(int c) {
return !(c >= 0 && c <= 0x1f) && !(c > '~');
}
};
/**

View File

@ -0,0 +1,47 @@
/*
* Copyright 2002-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.web.util;
/**
* Thrown when a URL string cannot be parsed.
*
* @author Arjen Poutsma
* @since 6.2
*/
public class InvalidUrlException extends IllegalArgumentException {
private static final long serialVersionUID = 7409308391039105562L;
/**
* Construct a {@code InvalidUrlException} with the specified detail message.
* @param msg the detail message
*/
public InvalidUrlException(String msg) {
super(msg);
}
/**
* Construct a {@code InvalidUrlException} with the specified detail message
* and nested exception.
* @param msg the detail message
* @param cause the nested exception
*/
public InvalidUrlException(String msg, Throwable cause) {
super(msg, cause);
}
}

View File

@ -97,12 +97,10 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
"^(" + SCHEME_PATTERN + ")?" + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN +
")?" + ")?" + PATH_PATTERN + "(\\?" + QUERY_PATTERN + ")?" + "(#" + LAST_PATTERN + ")?");
private static final Pattern HTTP_URL_PATTERN = Pattern.compile(
"^" + HTTP_PATTERN + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + ")?" +
PATH_PATTERN + "(\\?" + QUERY_PATTERN + ")?" + "(#" + LAST_PATTERN + ")?");
private static final Object[] EMPTY_VALUES = new Object[0];
private static final UrlParser.UrlRecord EMPTY_URL_RECORD = new UrlParser.UrlRecord();
@Nullable
private String scheme;
@ -214,52 +212,45 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
* </pre>
* @param uri the URI string to initialize with
* @return the new {@code UriComponentsBuilder}
* @throws InvalidUrlException if {@code uri} cannot be parsed
*/
public static UriComponentsBuilder fromUriString(String uri) {
public static UriComponentsBuilder fromUriString(String uri) throws InvalidUrlException {
Assert.notNull(uri, "URI must not be null");
Matcher matcher = URI_PATTERN.matcher(uri);
if (matcher.matches()) {
UriComponentsBuilder builder = new UriComponentsBuilder();
String scheme = matcher.group(2);
String userInfo = matcher.group(5);
String host = matcher.group(6);
String port = matcher.group(8);
String path = matcher.group(9);
String query = matcher.group(11);
String fragment = matcher.group(13);
boolean opaque = false;
if (StringUtils.hasLength(scheme)) {
String rest = uri.substring(scheme.length());
if (!rest.startsWith(":/")) {
opaque = true;
}
UriComponentsBuilder builder = new UriComponentsBuilder();
if (!uri.isEmpty()) {
UrlParser.UrlRecord urlRecord = UrlParser.parse(uri, EMPTY_URL_RECORD, null, null);
if (!urlRecord.scheme().isEmpty()) {
builder.scheme(urlRecord.scheme());
}
builder.scheme(scheme);
if (opaque) {
String ssp = uri.substring(scheme.length() + 1);
if (StringUtils.hasLength(fragment)) {
ssp = ssp.substring(0, ssp.length() - (fragment.length() + 1));
if (urlRecord.includesCredentials()) {
StringBuilder userInfo = new StringBuilder(urlRecord.username());
if (!urlRecord.password().isEmpty()) {
userInfo.append(':');
userInfo.append(urlRecord.password());
}
builder.schemeSpecificPart(ssp);
builder.userInfo(userInfo.toString());
}
if (urlRecord.host() != null && !(urlRecord.host() instanceof UrlParser.EmptyHost)) {
builder.host(urlRecord.host().toString());
}
if (urlRecord.port() != null) {
builder.port(urlRecord.port());
}
if (urlRecord.path().isOpaque()) {
builder.schemeSpecificPart(urlRecord.path().toString());
}
else {
checkSchemeAndHost(uri, scheme, host);
builder.userInfo(userInfo);
builder.host(host);
if (StringUtils.hasLength(port)) {
builder.port(port);
builder.path(urlRecord.path().toString());
if (StringUtils.hasLength(urlRecord.query())) {
builder.query(urlRecord.query());
}
builder.path(path);
builder.query(query);
}
if (StringUtils.hasText(fragment)) {
builder.fragment(fragment);
if (StringUtils.hasLength(urlRecord.fragment())) {
builder.fragment(urlRecord.fragment());
}
return builder;
}
else {
throw new IllegalArgumentException("[" + uri + "] is not a valid URI");
}
return builder;
}
/**
@ -275,33 +266,11 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
* </pre>
* @param httpUrl the source URI
* @return the URI components of the URI
* @deprecated as of 6.2, in favor of {@link #fromUriString(String)}
*/
public static UriComponentsBuilder fromHttpUrl(String httpUrl) {
Assert.notNull(httpUrl, "HTTP URL must not be null");
Matcher matcher = HTTP_URL_PATTERN.matcher(httpUrl);
if (matcher.matches()) {
UriComponentsBuilder builder = new UriComponentsBuilder();
String scheme = matcher.group(1);
builder.scheme(scheme != null ? scheme.toLowerCase() : null);
builder.userInfo(matcher.group(4));
String host = matcher.group(5);
checkSchemeAndHost(httpUrl, scheme, host);
builder.host(host);
String port = matcher.group(7);
if (StringUtils.hasLength(port)) {
builder.port(port);
}
builder.path(matcher.group(8));
builder.query(matcher.group(10));
String fragment = matcher.group(12);
if (StringUtils.hasText(fragment)) {
builder.fragment(fragment);
}
return builder;
}
else {
throw new IllegalArgumentException("[" + httpUrl + "] is not a valid HTTP URL");
}
@Deprecated(since = "6.2")
public static UriComponentsBuilder fromHttpUrl(String httpUrl) throws InvalidUrlException {
return fromUriString(httpUrl);
}
private static void checkSchemeAndHost(String uri, @Nullable String scheme, @Nullable String host) {

File diff suppressed because it is too large Load Diff

View File

@ -639,7 +639,7 @@ class ForwardedHeaderFilterTests {
String location = "//other.info/parent/../foo/bar";
String redirectedUrl = sendRedirect(location);
assertThat(redirectedUrl).isEqualTo(("https:" + location));
assertThat(redirectedUrl).isEqualTo(("https://other.info/foo/bar"));
}
@Test

View File

@ -33,7 +33,6 @@ import org.springframework.util.MultiValueMap;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException;
import static org.assertj.core.api.Assertions.assertThatIllegalStateException;
/**
* Tests for {@link UriComponentsBuilder}.
@ -228,15 +227,11 @@ class UriComponentsBuilderTests {
void fromUriStringIPv6Host() {
UriComponents result = UriComponentsBuilder
.fromUriString("http://[1abc:2abc:3abc::5ABC:6abc]:8080/resource").build().encode();
assertThat(result.getHost()).isEqualTo("[1abc:2abc:3abc::5ABC:6abc]");
UriComponents resultWithScopeId = UriComponentsBuilder
.fromUriString("http://[1abc:2abc:3abc::5ABC:6abc%eth0]:8080/resource").build().encode();
assertThat(resultWithScopeId.getHost()).isEqualTo("[1abc:2abc:3abc::5ABC:6abc%25eth0]");
assertThat(result.getHost()).isEqualToIgnoringCase("[1abc:2abc:3abc::5ABC:6abc]");
UriComponents resultIPv4compatible = UriComponentsBuilder
.fromUriString("http://[::192.168.1.1]:8080/resource").build().encode();
assertThat(resultIPv4compatible.getHost()).isEqualTo("[::192.168.1.1]");
assertThat(resultIPv4compatible.getHost()).isEqualTo("[::c0a8:101]");
}
@Test
@ -257,27 +252,27 @@ class UriComponentsBuilderTests {
@Test // SPR-14828
void fromHttpUrlQueryParamEncodedAndContainingPlus() {
String httpUrl = "http://localhost:8080/test/print?value=%EA%B0%80+%EB%82%98";
URI uri = UriComponentsBuilder.fromHttpUrl(httpUrl).build(true).toUri();
URI uri = UriComponentsBuilder.fromUriString(httpUrl).build(true).toUri();
assertThat(uri.toString()).isEqualTo(httpUrl);
}
@Test // SPR-10779
void fromHttpUrlCaseInsensitiveScheme() {
assertThat(UriComponentsBuilder.fromHttpUrl("HTTP://www.google.com").build().getScheme()).isEqualTo("http");
assertThat(UriComponentsBuilder.fromHttpUrl("HTTPS://www.google.com").build().getScheme()).isEqualTo("https");
assertThat(UriComponentsBuilder.fromUriString("HTTP://www.google.com").build().getScheme()).isEqualTo("http");
assertThat(UriComponentsBuilder.fromUriString("HTTPS://www.google.com").build().getScheme()).isEqualTo("https");
}
@Test // SPR-10539
void fromHttpUrlInvalidIPv6Host() {
assertThatIllegalArgumentException().isThrownBy(() ->
UriComponentsBuilder.fromHttpUrl("http://[1abc:2abc:3abc::5ABC:6abc:8080/resource"));
UriComponentsBuilder.fromUriString("http://[1abc:2abc:3abc::5ABC:6abc:8080/resource"));
}
@Test
void fromHttpUrlWithoutFragment() {
String httpUrl = "http://localhost:8080/test/print";
UriComponents uriComponents = UriComponentsBuilder.fromHttpUrl(httpUrl).build();
UriComponents uriComponents = UriComponentsBuilder.fromUriString(httpUrl).build();
assertThat(uriComponents.getScheme()).isEqualTo("http");
assertThat(uriComponents.getUserInfo()).isNull();
assertThat(uriComponents.getHost()).isEqualTo("localhost");
@ -289,7 +284,7 @@ class UriComponentsBuilderTests {
assertThat(uriComponents.toUri().toString()).isEqualTo(httpUrl);
httpUrl = "http://user:test@localhost:8080/test/print?foo=bar";
uriComponents = UriComponentsBuilder.fromHttpUrl(httpUrl).build();
uriComponents = UriComponentsBuilder.fromUriString(httpUrl).build();
assertThat(uriComponents.getScheme()).isEqualTo("http");
assertThat(uriComponents.getUserInfo()).isEqualTo("user:test");
assertThat(uriComponents.getHost()).isEqualTo("localhost");
@ -301,7 +296,7 @@ class UriComponentsBuilderTests {
assertThat(uriComponents.toUri().toString()).isEqualTo(httpUrl);
httpUrl = "http://localhost:8080/test/print?foo=bar";
uriComponents = UriComponentsBuilder.fromHttpUrl(httpUrl).build();
uriComponents = UriComponentsBuilder.fromUriString(httpUrl).build();
assertThat(uriComponents.getScheme()).isEqualTo("http");
assertThat(uriComponents.getUserInfo()).isNull();
assertThat(uriComponents.getHost()).isEqualTo("localhost");
@ -315,20 +310,20 @@ class UriComponentsBuilderTests {
@Test // gh-25300
void fromHttpUrlWithFragment() {
String httpUrl = "https://example.com#baz";
UriComponents uriComponents = UriComponentsBuilder.fromHttpUrl(httpUrl).build();
String httpUrl = "https://example.com/#baz";
UriComponents uriComponents = UriComponentsBuilder.fromUriString(httpUrl).build();
assertThat(uriComponents.getScheme()).isEqualTo("https");
assertThat(uriComponents.getUserInfo()).isNull();
assertThat(uriComponents.getHost()).isEqualTo("example.com");
assertThat(uriComponents.getPort()).isEqualTo(-1);
assertThat(uriComponents.getPath()).isNullOrEmpty();
assertThat(uriComponents.getPath()).isEqualTo("/");
assertThat(uriComponents.getPathSegments()).isEmpty();
assertThat(uriComponents.getQuery()).isNull();
assertThat(uriComponents.getFragment()).isEqualTo("baz");
assertThat(uriComponents.toUri().toString()).isEqualTo(httpUrl);
httpUrl = "http://localhost:8080/test/print#baz";
uriComponents = UriComponentsBuilder.fromHttpUrl(httpUrl).build();
uriComponents = UriComponentsBuilder.fromUriString(httpUrl).build();
assertThat(uriComponents.getScheme()).isEqualTo("http");
assertThat(uriComponents.getUserInfo()).isNull();
assertThat(uriComponents.getHost()).isEqualTo("localhost");
@ -340,7 +335,7 @@ class UriComponentsBuilderTests {
assertThat(uriComponents.toUri().toString()).isEqualTo(httpUrl);
httpUrl = "http://localhost:8080/test/print?foo=bar#baz";
uriComponents = UriComponentsBuilder.fromHttpUrl(httpUrl).build();
uriComponents = UriComponentsBuilder.fromUriString(httpUrl).build();
assertThat(uriComponents.getScheme()).isEqualTo("http");
assertThat(uriComponents.getUserInfo()).isNull();
assertThat(uriComponents.getHost()).isEqualTo("localhost");
@ -637,17 +632,11 @@ class UriComponentsBuilderTests {
void relativeUrls() {
String baseUrl = "https://example.com";
assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toString())
.isEqualTo(baseUrl + "/foo/../bar");
.isEqualTo(baseUrl + "/bar");
assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toUriString())
.isEqualTo(baseUrl + "/foo/../bar");
.isEqualTo(baseUrl + "/bar");
assertThat(UriComponentsBuilder.fromUriString(baseUrl + "/foo/../bar").build().toUri().getPath())
.isEqualTo("/foo/../bar");
assertThat(UriComponentsBuilder.fromUriString("../../").build().toString())
.isEqualTo("../../");
assertThat(UriComponentsBuilder.fromUriString("../../").build().toUriString())
.isEqualTo("../../");
assertThat(UriComponentsBuilder.fromUriString("../../").build().toUri().getPath())
.isEqualTo("../../");
.isEqualTo("/bar");
assertThat(UriComponentsBuilder.fromUriString(baseUrl).path("foo/../bar").build().toString())
.isEqualTo(baseUrl + "/foo/../bar");
assertThat(UriComponentsBuilder.fromUriString(baseUrl).path("foo/../bar").build().toUriString())
@ -746,9 +735,9 @@ class UriComponentsBuilderTests {
// empty
tester.accept("{}", "%7B%7D");
tester.accept("{ \t}", "%7B%20%09%7D");
tester.accept("{ \t}", "%7B%20%7D");
tester.accept("/a{}b", "/a%7B%7Db");
tester.accept("/a{ \t}b", "/a%7B%20%09%7Db");
tester.accept("/a{ \t}b", "/a%7B%20%7Db");
// nested, matching
tester.accept("{foo{}}", "%7Bfoo%7B%7D%7D");
@ -815,19 +804,17 @@ class UriComponentsBuilderTests {
@Test
void verifyInvalidPort() {
String url = "http://localhost:XXX/path";
assertThatIllegalStateException()
.isThrownBy(() -> UriComponentsBuilder.fromUriString(url).build().toUri())
.withMessage("The port must be an integer: XXX");
assertThatIllegalStateException()
.isThrownBy(() -> UriComponentsBuilder.fromHttpUrl(url).build().toUri())
.withMessage("The port must be an integer: XXX");
assertThatIllegalArgumentException()
.isThrownBy(() -> UriComponentsBuilder.fromUriString(url).build().toUri());
assertThatIllegalArgumentException()
.isThrownBy(() -> UriComponentsBuilder.fromUriString(url).build().toUri());
}
@Test // gh-27039
void expandPortAndPathWithoutSeparator() {
URI uri = UriComponentsBuilder
.fromUriString("ws://localhost:{port}{path}")
.buildAndExpand(7777, "/test")
.fromUriString("ws://localhost:{port}/{path}")
.buildAndExpand(7777, "test")
.toUri();
assertThat(uri.toString()).isEqualTo("ws://localhost:7777/test");
}

View File

@ -27,9 +27,9 @@ import java.util.Collections;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
import static org.assertj.core.api.Assertions.assertThatIllegalArgumentException;
import static org.assertj.core.api.Assertions.assertThatIllegalStateException;
import static org.springframework.web.util.UriComponentsBuilder.fromHttpUrl;
import static org.springframework.web.util.UriComponentsBuilder.fromUriString;
/**
@ -159,10 +159,9 @@ class UriComponentsTests {
@Test // gh-28521
void invalidPort() {
assertExceptionsForInvalidPort(fromUriString("https://example.com:XXX/bar").build());
assertThatExceptionOfType(InvalidUrlException.class)
.isThrownBy(() -> fromUriString("https://example.com:XXX/bar"));
assertExceptionsForInvalidPort(fromUriString("https://example.com/bar").port("XXX").build());
assertExceptionsForInvalidPort(fromHttpUrl("https://example.com:XXX/bar").build());
assertExceptionsForInvalidPort(fromHttpUrl("https://example.com/bar").port("XXX").build());
}
private void assertExceptionsForInvalidPort(UriComponents uriComponents) {
@ -243,7 +242,6 @@ class UriComponentsTests {
UriComponents uric2 = UriComponentsBuilder.fromUriString(baseUrl + "/foo/bar").build();
UriComponents uric3 = UriComponentsBuilder.fromUriString(baseUrl + "/foo/bin").build();
assertThat(uric1).isInstanceOf(OpaqueUriComponents.class);
assertThat(uric1).isEqualTo(uric1);
assertThat(uric1).isEqualTo(uric2);
assertThat(uric1).isNotEqualTo(uric3);

View File

@ -0,0 +1,62 @@
/*
* Copyright 2002-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.web.util;
import org.junit.jupiter.api.Test;
import org.springframework.lang.Nullable;
import static org.assertj.core.api.Assertions.assertThat;
/**
* @author Arjen Poutsma
*/
class UrlParserTests {
private static final UrlParser.UrlRecord EMPTY_URL_RECORD = new UrlParser.UrlRecord();
@Test
void parse() {
testParse("https://example.com", "https", "example.com", null, "", null, null);
testParse("https://example.com/", "https", "example.com", null, "/", null, null);
testParse("https://example.com/foo", "https", "example.com", null, "/foo", null, null);
testParse("https://example.com/foo/", "https", "example.com", null, "/foo/", null, null);
testParse("https://example.com:81/foo", "https", "example.com", "81", "/foo", null, null);
testParse("/foo", "", null, null, "/foo", null, null);
testParse("/foo/", "", null, null, "/foo/", null, null);
testParse("/foo/../bar", "", null, null, "/bar", null, null);
testParse("/foo/../bar/", "", null, null, "/bar/", null, null);
testParse("//other.info/foo/bar", "", "other.info", null, "/foo/bar", null, null);
testParse("//other.info/parent/../foo/bar", "", "other.info", null, "/foo/bar", null, null);
}
private void testParse(String input, String scheme, @Nullable String host, @Nullable String port, String path, @Nullable String query, @Nullable String fragment) {
UrlParser.UrlRecord result = UrlParser.parse(input, EMPTY_URL_RECORD, null, null);
assertThat(result.scheme()).as("Invalid scheme").isEqualTo(scheme);
if (host != null) {
assertThat(result.host()).as("Host is null").isNotNull();
assertThat(result.host().toString()).as("Invalid host").isEqualTo(host);
}
else {
assertThat(result.host()).as("Host is not null").isNull();
}
assertThat(result.port()).as("Invalid port").isEqualTo(port);
assertThat(result.path().toString()).as("Invalid path").isEqualTo(path);
assertThat(result.query()).as("Invalid query").isEqualTo(query);
assertThat(result.fragment()).as("Invalid fragment").isEqualTo(fragment);
}
}