Add support for state override in new URL parser

See gh-32513
This commit is contained in:
Arjen Poutsma 2024-04-17 16:33:53 +02:00
parent f21e05a9e4
commit af23cc233e
3 changed files with 163 additions and 41 deletions

View File

@ -235,7 +235,7 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
builder.host(urlRecord.host().toString()); builder.host(urlRecord.host().toString());
} }
if (urlRecord.port() != null) { if (urlRecord.port() != null) {
builder.port(urlRecord.port()); builder.port(urlRecord.port().toString());
} }
if (urlRecord.path().isOpaque()) { if (urlRecord.path().isOpaque()) {
builder.schemeSpecificPart(urlRecord.path().toString()); builder.schemeSpecificPart(urlRecord.path().toString());

View File

@ -85,6 +85,9 @@ final class UrlParser {
@Nullable @Nullable
private State previousState; private State previousState;
@Nullable
private State stateOverride;
private boolean atSignSeen; private boolean atSignSeen;
private boolean passwordTokenSeen; private boolean passwordTokenSeen;
@ -142,6 +145,7 @@ final class UrlParser {
sanitizeInput(); sanitizeInput();
// Let state be state override if given, or scheme start state otherwise. // Let state be state override if given, or scheme start state otherwise.
this.state = stateOverride != null ? stateOverride : State.SCHEME_START; this.state = stateOverride != null ? stateOverride : State.SCHEME_START;
this.stateOverride = stateOverride;
// Keep running the following state machine by switching on state. // Keep running the following state machine by switching on state.
// If after a run pointer points to the EOF code point, go to the next step. // If after a run pointer points to the EOF code point, go to the next step.
@ -279,7 +283,7 @@ final class UrlParser {
(ch >= 'a' && ch <= 'z'); (ch >= 'a' && ch <= 'z');
} }
private static boolean containsOnlyAsciiDigits(String string) { private static boolean containsOnlyAsciiDigits(CharSequence string) {
for (int i=0; i< string.length(); i++ ) { for (int i=0; i< string.length(); i++ ) {
char ch = string.charAt(i); char ch = string.charAt(i);
if (!isAsciiDigit(ch)) { if (!isAsciiDigit(ch)) {
@ -330,6 +334,16 @@ final class UrlParser {
(ch >= 0x00A0 && ch <= 0x10FFFD && !Character.isSurrogate((char) ch) && !isNonCharacter(ch)); (ch >= 0x00A0 && ch <= 0x10FFFD && !Character.isSurrogate((char) ch) && !isNonCharacter(ch));
} }
private static boolean isSpecialScheme(String scheme) {
return "ftp".equals(scheme) ||
"file".equals(scheme) ||
"http".equals(scheme) ||
"https".equals(scheme) ||
"ws".equals(scheme) ||
"wss".equals(scheme);
}
private static int defaultPort(@Nullable String scheme) { private static int defaultPort(@Nullable String scheme) {
if (scheme != null) { if (scheme != null) {
return switch (scheme) { return switch (scheme) {
@ -483,11 +497,15 @@ final class UrlParser {
p.previousState = SCHEME; p.previousState = SCHEME;
p.state = URL_TEMPLATE; p.state = URL_TEMPLATE;
} }
// Otherwise, set state to no scheme state and decrease pointer by 1. // Otherwise, if state override is not given, set state to no scheme state and decrease pointer by 1.
else { else if (p.stateOverride == null) {
p.setState(NO_SCHEME); p.setState(NO_SCHEME);
p.pointer--; p.pointer--;
} }
// Otherwise, return failure.
else {
p.failure(null);
}
} }
}, },
SCHEME { SCHEME {
@ -504,8 +522,40 @@ final class UrlParser {
} }
// Otherwise, if c is U+003A (:), then: // Otherwise, if c is U+003A (:), then:
else if (c == ':') { else if (c == ':') {
// If state override is given, then:
if (p.stateOverride != null) {
boolean urlSpecialScheme = url.isSpecial();
String bufferString = p.buffer.toString();
boolean bufferSpecialScheme = isSpecialScheme(bufferString);
// If urls scheme is a special scheme and buffer is not a special scheme, then return.
if (urlSpecialScheme && !bufferSpecialScheme) {
return;
}
// If urls scheme is not a special scheme and buffer is a special scheme, then return.
if (!urlSpecialScheme && bufferSpecialScheme) {
return;
}
// If url includes credentials or has a non-null port, and buffer is "file", then return.
if ((url.includesCredentials() || url.port() != null) && "file".equals(bufferString)) {
return;
}
// If urls scheme is "file" and its host is an empty host, then return.
if ("file".equals(url.scheme()) && (url.host() == null || url.host() == EmptyHost.INSTANCE)) {
return;
}
}
// Set urls scheme to buffer. // Set urls scheme to buffer.
url.scheme = p.buffer.toString(); url.scheme = p.buffer.toString();
// If state override is given, then:
if (p.stateOverride != null) {
// If urls port is urls schemes default port, then set urls port to null.
if (url.port instanceof IntPort intPort &&
intPort.value() == defaultPort(url.scheme)) {
url.port = null;
// Return.
return;
}
}
// Set buffer to the empty string. // Set buffer to the empty string.
p.emptyBuffer(); p.emptyBuffer();
// If urls scheme is "file", then: // If urls scheme is "file", then:
@ -539,12 +589,16 @@ final class UrlParser {
p.setState(OPAQUE_PATH); p.setState(OPAQUE_PATH);
} }
} }
// Otherwise, set buffer to the empty string, state to no scheme state, and start over (from the first code point in input). // Otherwise, if state override is not given, set buffer to the empty string, state to no scheme state, and start over (from the first code point in input).
else { else if (p.stateOverride == null) {
p.emptyBuffer(); p.emptyBuffer();
p.setState(NO_SCHEME); p.setState(NO_SCHEME);
p.pointer = -1; p.pointer = -1;
} }
// Otherwise, return failure.
else {
p.failure(null);
}
} }
}, },
@ -798,12 +852,21 @@ final class UrlParser {
HOST { HOST {
@Override @Override
public void handle(int c, UrlRecord url, UrlParser p) { public void handle(int c, UrlRecord url, UrlParser p) {
// If state override is given and urls scheme is "file", then decrease pointer by 1 and set state to file host state.
if (p.stateOverride != null && "file".equals(url.scheme())) {
p.pointer--;
p.setState(FILE_HOST);
}
// Otherwise, if c is U+003A (:) and insideBrackets is false, then: // Otherwise, if c is U+003A (:) and insideBrackets is false, then:
if (c == ':' && !p.insideBrackets) { else if (c == ':' && !p.insideBrackets) {
// If buffer is the empty string, host-missing validation error, return failure. // If buffer is the empty string, host-missing validation error, return failure.
if (p.buffer.isEmpty()) { if (p.buffer.isEmpty()) {
p.failure("Missing host."); p.failure("Missing host.");
} }
// If state override is given and state override is hostname state, then return.
if (p.stateOverride == HOST) {
return;
}
// Let host be the result of host parsing buffer with url is not special. // Let host be the result of host parsing buffer with url is not special.
Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler); Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler);
// Set urls host to host, buffer to the empty string, and state to port state. // Set urls host to host, buffer to the empty string, and state to port state.
@ -822,6 +885,12 @@ final class UrlParser {
if (url.isSpecial() && p.buffer.isEmpty()) { if (url.isSpecial() && p.buffer.isEmpty()) {
p.failure("The input has a special scheme, but does not contain a host."); p.failure("The input has a special scheme, but does not contain a host.");
} }
// Otherwise, if state override is given, buffer is the empty string, and either url includes credentials or urls port is non-null, return.
else if (p.stateOverride != null && p.buffer.isEmpty() &&
(url.includesCredentials() || url.port() != null )) {
return;
}
// EXTRA: if buffer is not empty
if (!p.buffer.isEmpty()) { if (!p.buffer.isEmpty()) {
// Let host be the result of host parsing buffer with url is not special. // Let host be the result of host parsing buffer with url is not special.
Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler); Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler);
@ -833,6 +902,10 @@ final class UrlParser {
} }
p.emptyBuffer(); p.emptyBuffer();
p.setState(PATH_START); p.setState(PATH_START);
// If state override is given, then return.
if (p.stateOverride != null) {
return;
}
} }
// Otherwise: // Otherwise:
else { else {
@ -864,19 +937,14 @@ final class UrlParser {
// Otherwise, if one of the following is true: // Otherwise, if one of the following is true:
// - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#) // - c is the EOF code point, U+002F (/), U+003F (?), or U+0023 (#)
// - url is special and c is U+005C (\) // - url is special and c is U+005C (\)
// - state override is given
else if (c == EOF || c == '/' || c == '?' || c == '#' || else if (c == EOF || c == '/' || c == '?' || c == '#' ||
(url.isSpecial() && c == '\\')) { (url.isSpecial() && c == '\\') ||
(p.stateOverride != null)) {
// If buffer is not the empty string, then: // If buffer is not the empty string, then:
if (!p.buffer.isEmpty()) { if (!p.buffer.isEmpty()) {
boolean isNumber = true;
for (int i=0; i < p.buffer.length(); i++) {
if (!isAsciiDigit(p.buffer.charAt(i))) {
isNumber = false;
break;
}
}
// EXTRA: if buffer contains only ASCII digits, then // EXTRA: if buffer contains only ASCII digits, then
if (isNumber) { if (containsOnlyAsciiDigits(p.buffer)) {
try { try {
// Let port be the mathematical integer value that is represented by buffer in radix-10 using ASCII digits for digits with values 0 through 9. // Let port be the mathematical integer value that is represented by buffer in radix-10 using ASCII digits for digits with values 0 through 9.
int port = Integer.parseInt(p.buffer, 0, p.buffer.length(), 10); int port = Integer.parseInt(p.buffer, 0, p.buffer.length(), 10);
@ -890,7 +958,7 @@ final class UrlParser {
url.port = null; url.port = null;
} }
else { else {
url.port = Integer.toString(port); url.port = new IntPort(port);
} }
} }
catch (NumberFormatException ex) { catch (NumberFormatException ex) {
@ -899,11 +967,15 @@ final class UrlParser {
} }
// EXTRA: otherwise, set url's port to buffer // EXTRA: otherwise, set url's port to buffer
else { else {
url.port = p.buffer.toString(); url.port = new StringPort(p.buffer.toString());
} }
// Set buffer to the empty string. // Set buffer to the empty string.
p.emptyBuffer(); p.emptyBuffer();
} }
// If state override is given, then return.
if (p.stateOverride != null) {
return;
}
// Set state to path start state and decrease pointer by 1. // Set state to path start state and decrease pointer by 1.
p.setState(PATH_START); p.setState(PATH_START);
p.pointer--; p.pointer--;
@ -1016,8 +1088,8 @@ final class UrlParser {
// If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by 1 and then: // If c is the EOF code point, U+002F (/), U+005C (\), U+003F (?), or U+0023 (#), then decrease pointer by 1 and then:
if (c == EOF || c == '/' || c == '\\' || c == '?' || c == '#') { if (c == EOF || c == '/' || c == '\\' || c == '?' || c == '#') {
p.pointer--; p.pointer--;
// If buffer is a Windows drive letter, file-invalid-Windows-drive-letter-host validation error, set state to path state. // If state override is not given and buffer is a Windows drive letter, file-invalid-Windows-drive-letter-host validation error, set state to path state.
if (isWindowsDriveLetter(p.buffer, false)) { if (p.stateOverride == null && isWindowsDriveLetter(p.buffer, false)) {
p.validationError("A file: URLs host is a Windows drive letter."); p.validationError("A file: URLs host is a Windows drive letter.");
p.setState(PATH); p.setState(PATH);
} }
@ -1025,10 +1097,14 @@ final class UrlParser {
else if (p.buffer.isEmpty()) { else if (p.buffer.isEmpty()) {
// Set urls host to the empty string. // Set urls host to the empty string.
url.host = EmptyHost.INSTANCE; url.host = EmptyHost.INSTANCE;
// If state override is given, then return.
if (p.stateOverride != null) {
return;
}
// Set state to path start state. // Set state to path start state.
p.setState(PATH_START); p.setState(PATH_START);
} }
// Otherwise, basicUrlParser these steps: // Otherwise, run these steps:
else { else {
// Let host be the result of host parsing buffer with url is not special. // Let host be the result of host parsing buffer with url is not special.
Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler); Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler);
@ -1038,6 +1114,10 @@ final class UrlParser {
} }
// Set urls host to host. // Set urls host to host.
url.host = host; url.host = host;
// If state override is given, then return.
if (p.stateOverride != null) {
return;
}
// Set buffer to the empty string and state to path start state. // Set buffer to the empty string and state to path start state.
p.emptyBuffer(); p.emptyBuffer();
p.setState(PATH_START); p.setState(PATH_START);
@ -1068,13 +1148,13 @@ final class UrlParser {
p.append('/'); p.append('/');
} }
} }
// Otherwise, if c is U+003F (?), set urls query to the empty string and state to query state. // Otherwise, if state override is not given and if c is U+003F (?), set urls query to the empty string and state to query state.
else if (c == '?') { else if (p.stateOverride == null && c == '?') {
url.query = ""; url.query = "";
p.setState(QUERY); p.setState(QUERY);
} }
// Otherwise, if c is U+0023 (#), set urls fragment to the empty string and state to fragment state. // Otherwise, if state override is not given and if c is U+0023 (#), set urls fragment to the empty string and state to fragment state.
else if (c =='#') { else if (p.stateOverride == null && c =='#') {
url.fragment = ""; url.fragment = "";
p.setState(FRAGMENT); p.setState(FRAGMENT);
} }
@ -1090,10 +1170,10 @@ final class UrlParser {
else { else {
p.append('/'); p.append('/');
} }
} }
else { // Otherwise, if state override is given and urls host is null, append the empty string to urls path.
throw new IllegalStateException(); else if (p.stateOverride != null && url.host() == null) {
url.path().append("");
} }
} }
}, },
@ -1103,11 +1183,11 @@ final class UrlParser {
// If one of the following is true: // If one of the following is true:
// - c is the EOF code point or U+002F (/) // - c is the EOF code point or U+002F (/)
// - url is special and c is U+005C (\) // - url is special and c is U+005C (\)
// - c is U+003F (?) or U+0023 (#) // - state override is not given and c is U+003F (?) or U+0023 (#)
// then: // then:
if (c == EOF || c == '/' || if (c == EOF || c == '/' ||
url.isSpecial() && c == '\\' || url.isSpecial() && c == '\\' ||
c == '?' || c == '#') { (p.stateOverride == null && (c == '?' || c == '#'))) {
// If url is special and c is U+005C (\), invalid-reverse-solidus validation error. // If url is special and c is U+005C (\), invalid-reverse-solidus validation error.
if (p.validate() && url.isSpecial() && c == '\\') { if (p.validate() && url.isSpecial() && c == '\\') {
p.validationError("URL uses \"\\\" instead of \"/\""); p.validationError("URL uses \"\\\" instead of \"/\"");
@ -1238,9 +1318,9 @@ final class UrlParser {
p.encoding = StandardCharsets.UTF_8; p.encoding = StandardCharsets.UTF_8;
} }
// If one of the following is true: // If one of the following is true:
// - c is U+0023 (#) // - state override is not given and c is U+0023 (#)
// - c is the EOF code point // - c is the EOF code point
if (c == '#' || c == EOF) { if ( (p.stateOverride == null && c == '#') || c == EOF) {
// Let queryPercentEncodeSet be the special-query percent-encode set if url is special; otherwise the query percent-encode set. // Let queryPercentEncodeSet be the special-query percent-encode set if url is special; otherwise the query percent-encode set.
// Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to urls query. // Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to urls query.
String encoded = p.percentEncode(p.buffer.toString(), HierarchicalUriComponents.Type.QUERY); String encoded = p.percentEncode(p.buffer.toString(), HierarchicalUriComponents.Type.QUERY);
@ -1346,7 +1426,7 @@ final class UrlParser {
private Host host = null; private Host host = null;
@Nullable @Nullable
private String port = null; private Port port = null;
private Path path = new PathSegments(); private Path path = new PathSegments();
@ -1364,12 +1444,7 @@ final class UrlParser {
* A URL is special if its scheme is a special scheme. A URL is not special if its scheme is not a special scheme. * A URL is special if its scheme is a special scheme. A URL is not special if its scheme is not a special scheme.
*/ */
public boolean isSpecial() { public boolean isSpecial() {
return "ftp".equals(this.scheme) || return isSpecialScheme(this.scheme);
"file".equals(this.scheme) ||
"http".equals(this.scheme) ||
"https".equals(this.scheme) ||
"ws".equals(this.scheme) ||
"wss".equals(this.scheme);
} }
@ -1423,7 +1498,7 @@ final class UrlParser {
* port, or a string containing a uri template . It is initially {@code null}. * port, or a string containing a uri template . It is initially {@code null}.
*/ */
@Nullable @Nullable
public String port() { public Port port() {
return this.port; return this.port;
} }
@ -2175,6 +2250,47 @@ final class UrlParser {
} }
} }
sealed interface Port permits StringPort, IntPort {
}
static final class StringPort implements Port {
private final String port;
public StringPort(String port) {
this.port = port;
}
public String value() {
return this.port;
}
@Override
public String toString() {
return this.port;
}
}
static final class IntPort implements Port {
private final int port;
public IntPort(int port) {
this.port = port;
}
public int value() {
return this.port;
}
@Override
public String toString() {
return Integer.toString(this.port);
}
}
sealed interface Path permits PathSegment, PathSegments { sealed interface Path permits PathSegment, PathSegments {
void append(String s); void append(String s);

View File

@ -54,7 +54,13 @@ class UrlParserTests {
else { else {
assertThat(result.host()).as("Host is not null").isNull(); assertThat(result.host()).as("Host is not null").isNull();
} }
assertThat(result.port()).as("Invalid port").isEqualTo(port); if (port != null) {
assertThat(result.port()).as("Port is null").isNotNull();
assertThat(result.port().toString()).as("Invalid port").isEqualTo(port);
}
else {
assertThat(result.port()).as("Port is not null").isNull();
}
assertThat(result.path().toString()).as("Invalid path").isEqualTo(path); assertThat(result.path().toString()).as("Invalid path").isEqualTo(path);
assertThat(result.query()).as("Invalid query").isEqualTo(query); assertThat(result.query()).as("Invalid query").isEqualTo(query);
assertThat(result.fragment()).as("Invalid fragment").isEqualTo(fragment); assertThat(result.fragment()).as("Invalid fragment").isEqualTo(fragment);