diff --git a/spring-web/src/main/java/org/springframework/web/util/HierarchicalUriComponents.java b/spring-web/src/main/java/org/springframework/web/util/HierarchicalUriComponents.java index d00445be44..c97f84e698 100644 --- a/spring-web/src/main/java/org/springframework/web/util/HierarchicalUriComponents.java +++ b/spring-web/src/main/java/org/springframework/web/util/HierarchicalUriComponents.java @@ -657,12 +657,6 @@ final class HierarchicalUriComponents extends UriComponents { public boolean isAllowed(int c) { return isUnreserved(c); } - }, - C0 { - @Override - public boolean isAllowed(int c) { - return !(c >= 0 && c <= 0x1f) && !(c > '~'); - } }; /** diff --git a/spring-web/src/main/java/org/springframework/web/util/UrlParser.java b/spring-web/src/main/java/org/springframework/web/util/UrlParser.java index 538b952887..d09969cd4a 100644 --- a/spring-web/src/main/java/org/springframework/web/util/UrlParser.java +++ b/spring-web/src/main/java/org/springframework/web/util/UrlParser.java @@ -23,9 +23,10 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Objects; -import java.util.StringTokenizer; import java.util.function.Consumer; +import java.util.function.IntPredicate; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -234,23 +235,70 @@ final class UrlParser { this.state = newState; } - private static List tokenize(String str, String delimiters) { - StringTokenizer st = new StringTokenizer(str, delimiters); + private static List strictSplit(String input, int delimiter) { + // Let position be a position variable for input, initially pointing at the start of input. + int position = 0; + // Let tokens be a list of strings, initially empty. List tokens = new ArrayList<>(); - while (st.hasMoreTokens()) { - tokens.add(st.nextToken()); + // Let token be the result of collecting a sequence of code points that are not equal to delimiter from input, given position. + int delIdx = input.indexOf(delimiter, position); + String token = (delIdx != EOF) ? input.substring(position, delIdx) : input.substring(position); + position = delIdx; + // Append token to tokens. + tokens.add(token); + // While position is not past the end of input: + while (position != EOF) { + // Assert: the code point at position within input is delimiter. + Assert.state(input.codePointAt(position) == delimiter, "Codepoint is not a delimiter"); + // Advance position by 1. + position++; + delIdx = input.indexOf(delimiter, position); + // Let token be the result of collecting a sequence of code points that are not equal to delimiter from input, given position. + token = (delIdx != EOF) ? input.substring(position, delIdx) : input.substring(position); + position = delIdx; + // Append token to tokens. + tokens.add(token); } return tokens; } private static String domainToAscii(String domain, boolean beStrict) { + // If beStrict is false, domain is an ASCII string, and strictly splitting domain on U+002E (.) does not produce any item that starts with an ASCII case-insensitive match for "xn--", this step is equivalent to ASCII lowercasing domain. + boolean onlyLowerCase = !beStrict; + if (!beStrict && containsOnlyAscii(domain)) { + int dotIdx = domain.indexOf('.'); + while (dotIdx != -1) { + if (domain.length() - dotIdx > 4) { + // ASCII case-insensitive match for "xn--" + char ch0 = domain.charAt(dotIdx + 1); + char ch1 = domain.charAt(dotIdx + 2); + char ch2 = domain.charAt(dotIdx + 3); + char ch3 = domain.charAt(dotIdx + 4); + if ((ch0 == 'x' || ch0 == 'X') && + (ch1 == 'n' || ch1 == 'N') && + ch2 == '-' && ch3 == '_') { + onlyLowerCase = false; + break; + } + } + dotIdx = domain.indexOf('.', dotIdx + 1); + } + } + if (onlyLowerCase) { + return domain.toLowerCase(Locale.ENGLISH); + } // Let result be the result of running Unicode ToASCII (https://www.unicode.org/reports/tr46/#ToASCII) with domain_name set to domain, UseSTD3ASCIIRules set to beStrict, CheckHyphens set to false, CheckBidi set to true, CheckJoiners set to true, Transitional_Processing set to false, and VerifyDnsLength set to beStrict. [UTS46] int flag = 0; if (beStrict) { flag |= IDN.USE_STD3_ASCII_RULES; } // Implementation note: implementing Unicode ToASCII is beyond the scope of this parser, we use java.net.IDN.toASCII - return IDN.toASCII(domain, flag); + try { + return IDN.toASCII(domain, flag); + } + catch (IllegalArgumentException ex) { + throw new InvalidUrlException("Could not convert \"" + domain + "\" to ASCII: " + ex.getMessage(), ex); + } } private boolean validate() { @@ -284,6 +332,50 @@ final class UrlParser { throw new InvalidUrlException(message.toString()); } + /** + * The C0 control percent-encode set are the C0 controls and all code points greater than U+007E (~). + */ + private static boolean c0ControlPercentEncodeSet(int ch) { + return isC0Control(ch) || Integer.compareUnsigned(ch, '~') > 0; + } + + /** + * The fragment percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+003C (<), U+003E (>), and U+0060 (`). + */ + private static boolean fragmentPercentEncodeSet(int ch) { + return c0ControlPercentEncodeSet(ch) || ch == ' ' || ch == '"' || ch == '<' || ch == '>' || ch == '`'; + } + + /** + * The query percent-encode set is the C0 control percent-encode set and U+0020 SPACE, U+0022 ("), U+0023 (#), U+003C (<), and U+003E (>). + */ + private static boolean queryPercentEncodeSet(int ch) { + return c0ControlPercentEncodeSet(ch) || ch == ' ' || ch == '"' || ch == '#' || ch == '<' || ch == '>'; + } + + /** + * The special-query percent-encode set is the query percent-encode set and U+0027 ('). + */ + private static boolean specialQueryPercentEncodeSet(int ch) { + return queryPercentEncodeSet(ch) || ch == '\''; + } + + + /** + * The path percent-encode set is the query percent-encode set and U+003F (?), U+0060 (`), U+007B ({), and U+007D (}). + */ + private static boolean pathPercentEncodeSet(int ch) { + return queryPercentEncodeSet(ch) || ch == '?' || ch == '`' || ch == '{' || ch == '}'; + } + + /** + * The userinfo percent-encode set is the path percent-encode set and U+002F (/), U+003A (:), U+003B (;), U+003D (=), U+0040 (@), U+005B ([) to U+005E (^), inclusive, and U+007C (|). + */ + private static boolean userinfoPercentEncodeSet(int ch) { + return pathPercentEncodeSet(ch) || ch == '/' || ch == ':' || ch == ';' || ch == '=' || ch == '@' || + (Integer.compareUnsigned(ch, '[') >= 0 && Integer.compareUnsigned(ch, '^') <= 0) || ch == '|'; + } + private static boolean isC0Control(int ch) { return ch >= 0 && ch <= 0x1F; } @@ -307,6 +399,21 @@ final class UrlParser { return true; } + private static boolean containsOnlyAscii(CharSequence string) { + for (int i=0; i< string.length(); i++ ) { + char ch = string.charAt(i); + if (!isAsciiCodePoint(ch)) { + return false; + } + } + return true; + } + + private static boolean isAsciiCodePoint(int ch) { + // An ASCII code point is a code point in the range U+0000 NULL to U+007F DELETE, inclusive. + return Integer.compareUnsigned(ch, 0) >= 0 && Integer.compareUnsigned(ch, 127) <= 0; + } + private static boolean isAsciiDigit(int ch) { return (ch >= '0' && ch <= '9'); } @@ -400,76 +507,151 @@ final class UrlParser { } } - private String percentEncode(int c, HierarchicalUriComponents.Type type) { - return percentEncode(Character.toString(c), type); + private static String percentDecode(String input) { + try { + return UriUtils.decode(input, StandardCharsets.UTF_8); + } + catch (IllegalArgumentException ex) { + throw new InvalidUrlException("Could not decode \"" + input + "\": " + ex.getMessage(), ex); + } } - private String percentEncode(String source, HierarchicalUriComponents.Type type) { - if (this.encoding != null) { - return HierarchicalUriComponents.encodeUriComponent(source, this.encoding, type); + private String percentEncode(int c, IntPredicate percentEncodeSet) { + return percentEncode(Character.toString(c), percentEncodeSet); + } + + private String percentEncode(String input, IntPredicate percentEncodeSet) { + if (this.encoding == null) { + return input; } else { - return source; + byte[] bytes = input.getBytes(this.encoding); + boolean original = true; + for (byte b : bytes) { + if (percentEncodeSet.test(b)) { + original = false; + break; + } + } + if (original) { + return input; + } + StringBuilder output = new StringBuilder(); + for (byte b : bytes) { + if (!percentEncodeSet.test(b)) { + output.append((char)b); + } + else { + output.append('%'); + char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); + char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); + output.append(hex1); + output.append(hex2); + } + } + return output.toString(); } } /** - * A single-dot URL path segment is a URL path segment that is "." or an ASCII case-insensitive match for "%2e". + * A single-dot URL path segment is a URL path segment that is "[/]." or an ASCII case-insensitive match for "[/]%2e". */ private static boolean isSingleDotPathSegment(StringBuilder b) { int len = b.length(); - if (len == 1) { - char ch0 = b.charAt(0); - return ch0 == '.'; - } - else if (len == 3) { - // ASCII case-insensitive match for "%2e". - char ch0 = b.charAt(0); - char ch1 = b.charAt(1); - char ch2 = b.charAt(2); - return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E'); - } - else { - return false; + switch (len) { + case 1 -> { + char ch0 = b.charAt(0); + return ch0 == '.'; + } + case 2 -> { + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + return ch0 == '/' && ch1 == '.'; + } + case 3 -> { + // ASCII case-insensitive match for "%2e". + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + char ch2 = b.charAt(2); + return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E'); + } + case 4 -> { + // ASCII case-insensitive match for "/%2e". + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + char ch2 = b.charAt(2); + char ch3 = b.charAt(3); + return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E'); + } + default -> { + return false; + } } } /** - * A double-dot URL path segment is a URL path segment that is "/.." or an ASCII case-insensitive match for "/.%2e", "/%2e.", or "/%2e%2e". + * A double-dot URL path segment is a URL path segment that is "[/].." or an ASCII case-insensitive match for "/.%2e", "/%2e.", or "/%2e%2e". */ private static boolean isDoubleDotPathSegment(StringBuilder b) { int len = b.length(); - if (len == 3) { - char ch0 = b.charAt(0); - char ch1 = b.charAt(1); - char ch2 = b.charAt(2); - return ch0 == '/' && ch1 == '.' && ch2 == '.'; - } - else if (len == 5) { - char ch0 = b.charAt(0); - char ch1 = b.charAt(1); - char ch2 = b.charAt(2); - char ch3 = b.charAt(3); - char ch4 = b.charAt(4); - // case-insensitive match for "/.%2e" or "/%2e." - return ch0 == '/' && - (ch1 == '.' && ch2 == '%' && ch3 == '2' && (ch4 == 'e' || ch4 == 'E') - || (ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') && ch4 == '.')); - } - else if (len == 7) { - char ch0 = b.charAt(0); - char ch1 = b.charAt(1); - char ch2 = b.charAt(2); - char ch3 = b.charAt(3); - char ch4 = b.charAt(4); - char ch5 = b.charAt(5); - char ch6 = b.charAt(6); - // case-insensitive match for "/%2e%2e". - return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') - && ch4 == '%' && ch5 == '2' && (ch6 == 'e' || ch6 == 'E'); - } - else { - return false; + switch (len) { + case 2 -> { + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + return ch0 == '.' && ch1 == '.'; + } + case 3 -> { + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + char ch2 = b.charAt(2); + return ch0 == '/' && ch1 == '.' && ch2 == '.'; + } + case 4 -> { + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + char ch2 = b.charAt(2); + char ch3 = b.charAt(3); + // case-insensitive match for ".%2e" or "%2e." + return (ch0 == '.' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') || + (ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E') && ch3 == '.')); + } + case 5 -> { + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + char ch2 = b.charAt(2); + char ch3 = b.charAt(3); + char ch4 = b.charAt(4); + // case-insensitive match for "/.%2e" or "/%2e." + return ch0 == '/' && + (ch1 == '.' && ch2 == '%' && ch3 == '2' && (ch4 == 'e' || ch4 == 'E') + || (ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') && ch4 == '.')); + } + case 6 -> { + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + char ch2 = b.charAt(2); + char ch3 = b.charAt(3); + char ch4 = b.charAt(4); + char ch5 = b.charAt(5); + // case-insensitive match for "%2e%2e". + return ch0 == '%' && ch1 == '2' && (ch2 == 'e' || ch2 == 'E') + && ch3 == '%' && ch4 == '2' && (ch5 == 'e' || ch5 == 'E'); + } + case 7 -> { + char ch0 = b.charAt(0); + char ch1 = b.charAt(1); + char ch2 = b.charAt(2); + char ch3 = b.charAt(3); + char ch4 = b.charAt(4); + char ch5 = b.charAt(5); + char ch6 = b.charAt(6); + // case-insensitive match for "/%2e%2e". + return ch0 == '/' && ch1 == '%' && ch2 == '2' && (ch3 == 'e' || ch3 == 'E') + && ch4 == '%' && ch5 == '2' && (ch6 == 'e' || ch6 == 'E'); + } + default -> { + return false; + } } } @@ -656,8 +838,8 @@ final class UrlParser { // If base is null, or base has an opaque path and c is not U+0023 (#), missing-scheme-non-relative-URL // validation error, return failure. if (p.base == null || p.base.path().isOpaque() && c != '#') { - p.failure("The input is missing a scheme, because it does not begin with an ASCII alpha \"" + Character.toString(c) + - "\", and no base URL was provided."); + p.failure("The input is missing a scheme, because it does not begin with an ASCII alpha \"" + + (c != EOF ? Character.toString(c) : "") + "\", and no base URL was provided."); } // Otherwise, if base has an opaque path and c is U+0023 (#), set url’s scheme to base’s scheme, url’s // path to base’s path, url’s query to base’s query, url’s fragment to the empty string, and set state to fragment state. @@ -738,8 +920,8 @@ final class UrlParser { else { // Set url’s username to base’s username, url’s password to base’s password, url’s host to base’s host, // url’s port to base’s port, url’s path to a clone of base’s path, and url’s query to base’s query. - url.username = p.base.username(); - url.password = p.base.password(); + url.username.replace(0, url.username.length(), p.base.username()); + url.password.replace(0, url.password.length(), p.base.password()); url.host = p.base.host(); url.port = p.base.port(); url.path = p.base.path().clone(); @@ -789,8 +971,8 @@ final class UrlParser { // to base’s host, url’s port to base’s port, state to path state, and then, decrease pointer by 1. else { Assert.state(p.base != null, "No base URL available"); - url.username = p.base.username(); - url.password = p.base.password(); + url.username.replace(0, url.username.length(), p.base.username()); + url.password.replace(0, url.password.length(), p.base.password()); url.host = p.base.host(); url.port = p.base.port(); p.setState(PATH); @@ -850,9 +1032,6 @@ final class UrlParser { p.atSignSeen = true; int bufferLen = p.buffer.length(); - StringBuilder username = new StringBuilder(bufferLen); - StringBuilder password = new StringBuilder(bufferLen); - // For each codePoint in buffer: for (int i = 0; i < bufferLen; i++) { int codePoint = p.buffer.codePointAt(i); @@ -862,18 +1041,16 @@ final class UrlParser { continue; } // Let encodedCodePoints be the result of running UTF-8 percent-encode codePoint using the userinfo percent-encode set. - String encodedCodePoints = p.percentEncode(codePoint,HierarchicalUriComponents.Type.USER_INFO); + String encodedCodePoints = p.percentEncode(codePoint, UrlParser::userinfoPercentEncodeSet); // If passwordTokenSeen is true, then append encodedCodePoints to url’s password. if (p.passwordTokenSeen) { - password.append(encodedCodePoints); + url.password.append(encodedCodePoints); } // Otherwise, append encodedCodePoints to url’s username. else { - username.append(encodedCodePoints); + url.username.append(encodedCodePoints); } } - url.username = username.toString(); - url.password = password.toString(); // Set buffer to the empty string. p.emptyBuffer(); } @@ -917,7 +1094,7 @@ final class UrlParser { return; } // Let host be the result of host parsing buffer with url is not special. - Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler); + Host host = Host.parse(p.buffer.toString(), !url.isSpecial(), p); // Set url’s host to host, buffer to the empty string, and state to port state. url.host = host; p.emptyBuffer(); @@ -943,7 +1120,7 @@ final class UrlParser { // EXTRA: if buffer is not empty if (!p.buffer.isEmpty()) { // Let host be the result of host parsing buffer with url is not special. - Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler); + Host host = Host.parse(p.buffer.toString(), !url.isSpecial(), p); // Set url’s host to host, buffer to the empty string, and state to path start state. url.host = host; } @@ -1005,7 +1182,7 @@ final class UrlParser { } int defaultPort = defaultPort(url.scheme); // Set url’s port to null, if port is url’s scheme’s default port; otherwise to port. - if (defaultPort == -1 || port == defaultPort) { + if (defaultPort != -1 && port == defaultPort) { url.port = null; } else { @@ -1160,7 +1337,7 @@ final class UrlParser { // Otherwise, run these steps: else { // Let host be the result of host parsing buffer with url is not special. - Host host = Host.parse(p.buffer.toString(), false, p.validationErrorHandler); + Host host = Host.parse(p.buffer.toString(), !url.isSpecial(), p); // If host is "localhost", then set host to the empty string. if (host instanceof Domain domain && domain.domain().equals("localhost")) { host = EmptyHost.INSTANCE; @@ -1308,7 +1485,7 @@ final class UrlParser { } } // UTF-8 percent-encode c using the path percent-encode set and append the result to buffer. - String encoded = p.percentEncode(c, HierarchicalUriComponents.Type.PATH_SEGMENT); + String encoded = p.percentEncode(c, UrlParser::pathPercentEncodeSet); p.buffer.append(encoded); } } @@ -1353,7 +1530,7 @@ final class UrlParser { } // If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path. if (c != EOF) { - String encoded = p.percentEncode(c, HierarchicalUriComponents.Type.C0); + String encoded = p.percentEncode(c, UrlParser::c0ControlPercentEncodeSet); url.path.append(encoded); } } @@ -1376,8 +1553,9 @@ final class UrlParser { // - c is the EOF code point if ( (p.stateOverride == null && c == '#') || c == EOF) { // Let queryPercentEncodeSet be the special-query percent-encode set if url is special; otherwise the query percent-encode set. + IntPredicate queryPercentEncodeSet = url.isSpecial() ? UrlParser::specialQueryPercentEncodeSet : UrlParser::queryPercentEncodeSet; // Percent-encode after encoding, with encoding, buffer, and queryPercentEncodeSet, and append the result to url’s query. - String encoded = p.percentEncode(p.buffer.toString(), HierarchicalUriComponents.Type.QUERY); + String encoded = p.percentEncode(p.buffer.toString(), queryPercentEncodeSet); Assert.state(url.query != null, "Url's query should not be null"); url.query += encoded; // Set buffer to the empty string. @@ -1432,7 +1610,7 @@ final class UrlParser { } } // UTF-8 percent-encode c using the fragment percent-encode set and append the result to url’s fragment. - String encoded = p.percentEncode(c, HierarchicalUriComponents.Type.FRAGMENT); + String encoded = p.percentEncode(c, UrlParser::fragmentPercentEncodeSet); Assert.state(url.fragment != null, "Url's fragment should not be null"); url.fragment += encoded; } @@ -1472,9 +1650,9 @@ final class UrlParser { private String scheme = ""; - private String username = ""; + private StringBuilder username = new StringBuilder(); - private String password = ""; + private StringBuilder password = new StringBuilder(); @Nullable private Host host = null; @@ -1517,6 +1695,33 @@ final class UrlParser { } + /** + * The serialization of an origin is the string obtained by applying the following algorithm to the given origin origin: + * If origin is an opaque origin, then return "null". + * Otherwise, let result be origin's scheme. + * Append "://" to result. + * Append origin's host, serialized, to result. + * If origin's port is non-null, append a U+003A COLON character (:), and origin's port, serialized, to result. + * Return result. + */ + public String origin() { + String scheme = scheme(); + if (scheme.equals("ftp") || scheme.equals("http") || scheme.equals("https") || scheme.equals("ws") || scheme.equals("wss")) { + StringBuilder builder = new StringBuilder(scheme); + builder.append("://"); + builder.append(host()); + Port port = port(); + if (port != null) { + builder.append(':'); + builder.append(port); + } + return builder.toString(); + } + else { + return "null"; + } + } + /** * A URL’s scheme is an ASCII string that identifies the type of URL and can be used to dispatch a URL for * further processing after parsing. It is initially the empty string. @@ -1525,18 +1730,25 @@ final class UrlParser { return this.scheme; } + /** + * The protocol getter steps are to return this’s URL’s scheme, followed by U+003A (:). + */ + public String protocol() { + return scheme() + ":"; + } + /** * A URL’s username is an ASCII string identifying a username. It is initially the empty string. */ public String username() { - return this.username; + return this.username.toString(); } /** * A URL’s password is an ASCII string identifying a password. It is initially the empty string. */ public String password() { - return this.password; + return this.password.toString(); } /** @@ -1547,6 +1759,36 @@ final class UrlParser { return this.host; } + /** + *The host getter steps are: + * Let url be this’s URL. + * If url’s host is null, then return the empty string. + * If url’s port is null, return url’s host, serialized. + * Return url’s host, serialized, followed by U+003A (:) and url’s port, serialized. + */ + public String hostString() { + if (host() == null) { + return ""; + } + StringBuilder builder = new StringBuilder(hostname()); + Port port = port(); + if (port != null) { + builder.append(':'); + builder.append(port); + } + return builder.toString(); + } + + public String hostname() { + Host host = host(); + if (host == null) { + return ""; + } + else { + return host.toString(); + } + } + /** * A URL’s port is either null, a string representing a 16-bit unsigned integer that identifies a networking * port, or a string containing a uri template . It is initially {@code null}. @@ -1556,6 +1798,15 @@ final class UrlParser { return this.port; } + public String portString() { + if (port() == null) { + return ""; + } + else { + return port().toString(); + } + } + /** * A URL’s path is a URL {@linkplain Path path}, usually identifying a location. It is initially {@code « »}. */ @@ -1563,6 +1814,10 @@ final class UrlParser { return this.path; } + public String pathname() { + return path().name(); + } + /** * To shorten a url’s path: *
    @@ -1585,6 +1840,21 @@ final class UrlParser { return this.query; } + /** + * The search getter steps are: + * If this’s URL’s query is either null or the empty string, then return the empty string. + * Return U+003F (?), followed by this’s URL’s query. + */ + public String search() { + String query = query(); + if (query == null) { + return ""; + } + else { + return "?" + query; + } + } + /** * A URL’s fragment is either {@code null} or an ASCII string that can be used for further processing on the * resource the URL’s other components identify. It is initially {@code null}. @@ -1594,6 +1864,77 @@ final class UrlParser { return this.fragment; } + /** + * The hash getter steps are: + * If this’s URL’s fragment is either null or the empty string, then return the empty string. + * Return U+0023 (#), followed by this’s URL’s fragment. + */ + public String hash() { + String fragment = fragment(); + if (fragment == null || fragment.isEmpty()) { + return ""; + } + else { + return "#" + fragment; + } + } + + public String href() { + // Let output be url’s scheme and U+003A (:) concatenated. + StringBuilder output = new StringBuilder(scheme()); + output.append(':'); + Host host = host(); + // If url’s host is non-null: + if (host != null) { + // Append "//" to output. + output.append("//"); + // If url includes credentials, then: + if (includesCredentials()) { + // Append url’s username to output. + output.append(username()); + String password = password(); + // If url’s password is not the empty string, then append U+003A (:), followed by url’s password, to output. + if (!password.isEmpty()) { + output.append(':'); + output.append(password); + } + // Append U+0040 (@) to output. + output.append('@'); + } + // Append url’s host, serialized, to output. + output.append(hostname()); + Port port = port(); + // If url’s port is non-null, append U+003A (:) followed by url’s port, serialized, to output. + if (port != null) { + output.append(':'); + output.append(port()); + } + } + // If url’s host is null, url does not have an opaque path, url’s path’s size is greater than 1, and url’s path[0] is the empty string, then append U+002F (/) followed by U+002E (.) to output. + else if (!hasOpaquePath() && + path() instanceof PathSegments pathSegments && + pathSegments.size() > 1 && + pathSegments.get(0).isEmpty()) { + output.append("/."); + } + // Append the result of URL path serializing url to output. + output.append(pathname()); + // If url’s query is non-null, append U+003F (?), followed by url’s query, to output. + String query = query(); + if (query != null) { + output.append('?'); + output.append(query); + } + // If exclude fragment is false and url’s fragment is non-null, then append U+0023 (#), followed by url’s fragment, to output. + String fragment = fragment(); + if (fragment != null) { + output.append('#'); + output.append(fragment); + } + // Return output. + return output.toString(); + } + @Override public boolean equals(Object obj) { if (obj == this) { @@ -1646,7 +1987,7 @@ final class UrlParser { * The host parser takes a scalar value string input with an optional * boolean isOpaque (default false), and then runs these steps. They return failure or a host. */ - static Host parse(String input, boolean isOpaque, @Nullable Consumer validationErrorHandler) { + static Host parse(String input, boolean isOpaque, UrlParser p) { // If input starts with U+005B ([), then: if (!input.isEmpty() && input.charAt(0) == '[') { int last = input.length() - 1; @@ -1660,13 +2001,13 @@ final class UrlParser { } // If isOpaque is true, then return the result of opaque-host parsing input. if (isOpaque) { - return OpaqueHost.parse(input); + return OpaqueHost.parse(input, p); } // Assert: input is not the empty string. Assert.state(!input.isEmpty(), "Input should not be empty"); // Let domain be the result of running UTF-8 decode without BOM on the percent-decoding of input. - String domain = UriUtils.decode(input, StandardCharsets.UTF_8); + String domain = percentDecode(input); // Let asciiDomain be the result of running domain to ASCII with domain and false. String asciiDomain = domainToAscii(domain, false); @@ -1679,7 +2020,7 @@ final class UrlParser { } // If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain. if (endsInNumber(asciiDomain)) { - Ipv4Address address = Ipv4Address.parse(asciiDomain, validationErrorHandler); + Ipv4Address address = Ipv4Address.parse(asciiDomain, p); return new IpAddressHost(address); } // Return asciiDomain. @@ -1690,8 +2031,11 @@ final class UrlParser { private static boolean endsInNumber(String input) { // Let parts be the result of strictly splitting input on U+002E (.). - List parts = tokenize(input, "."); + List parts = strictSplit(input, '.'); int lastIdx = parts.size() - 1; + if (lastIdx == -1) { + return false; + } // If the last item in parts is the empty string, then: if (parts.get(lastIdx).isEmpty()) { // If parts’s size is 1, then return false. @@ -1807,11 +2151,62 @@ final class UrlParser { } } - record OpaqueHost(String domain) implements Host { + static final class OpaqueHost implements Host { - public static OpaqueHost parse(String input) { - throw new UnsupportedOperationException("Not implemented yet"); + private final String host; + + private OpaqueHost(String host) { + this.host = host; } + + /** + * The opaque-host parser takes a scalar value string input, and then runs these steps. They return failure or + * an opaque host. + */ + public static OpaqueHost parse(String input, UrlParser p) { + for (int i = 0; i < input.length(); i++) { + char ch = input.charAt(i); + // If input contains a forbidden host code point, host-invalid-code-point validation error, return failure. + if (isForbiddenHost(ch)) { + throw new InvalidUrlException("An opaque host contains a forbidden host code point."); + } + // If input contains a code point that is not a URL code point and not U+0025 (%), invalid-URL-unit validation error. + if (p.validate() && !isUrlCodePoint(ch) && ch != '%') { + p.validationError("Code point \"" + ch + "\" is not a URL unit."); + } + //If input contains a U+0025 (%) and the two code points following it are not ASCII hex digits, invalid-URL-unit validation error. + if (p.validate() && ch == '%' && (input.length() - i < 2 || !isAsciiDigit(input.charAt(i + 1)) || !isAsciiDigit(input.charAt(i + 2)))) { + p.validationError("Code point \"" + ch + "\" is not a URL unit."); + } + } + //Return the result of running UTF-8 percent-encode on input using the C0 control percent-encode set. + String encoded = p.percentEncode(input, UrlParser::c0ControlPercentEncodeSet); + return new OpaqueHost(encoded); + } + + @Override + public boolean equals(Object obj) { + if (obj == this) { + return true; + } + else if (obj instanceof OpaqueHost other) { + return this.host.equals(other.host); + } + else { + return false; + } + } + + @Override + public int hashCode() { + return this.host.hashCode(); + } + + @Override + public String toString() { + return this.host; + } + } static final class EmptyHost implements Host { @@ -1876,16 +2271,14 @@ final class UrlParser { return output.toString(); } - public static Ipv4Address parse(String input, @Nullable Consumer validationErrorHandler) { + public static Ipv4Address parse(String input, UrlParser p) { // Let parts be the result of strictly splitting input on U+002E (.). - List parts = tokenize(input, "."); + List parts = strictSplit(input, '.'); int partsSize = parts.size(); // If the last item in parts is the empty string, then: if (parts.get(partsSize - 1).isEmpty()) { // IPv4-empty-part validation error. - if (validationErrorHandler != null) { - validationErrorHandler.accept("IPv4 address ends with \".\""); - } + p.validationError("IPv4 address ends with \".\""); // If parts’s size is greater than 1, then remove the last item from parts. if (partsSize > 1) { parts.remove(partsSize - 1); @@ -1903,8 +2296,8 @@ final class UrlParser { String part = parts.get(i); // Let result be the result of parsing part. ParseIpv4NumberResult result = parseIpv4Number(part); - if (validationErrorHandler != null && result.validationError()) { - validationErrorHandler.accept("The IPv4 address contains numbers expressed using hexadecimal or octal digits."); + if (p.validate() && result.validationError()) { + p.validationError("The IPv4 address contains numbers expressed using hexadecimal or octal digits."); } // Append result to numbers. numbers.add(result.number()); @@ -1912,8 +2305,8 @@ final class UrlParser { for (Iterator iterator = numbers.iterator(); iterator.hasNext(); ) { Integer number = iterator.next(); // If any item in numbers is greater than 255, IPv4-out-of-range-part validation error. - if (validationErrorHandler != null && number > 255) { - validationErrorHandler.accept("An IPv4 address part exceeds 255."); + if (p.validate() && number > 255) { + p.validationError("An IPv4 address part exceeds 255."); } if (iterator.hasNext()) { // If any but the last item in numbers is greater than 255, then return failure. @@ -2356,6 +2749,8 @@ final class UrlParser { boolean isOpaque(); Path clone(); + + String name(); } static final class PathSegment implements Path { @@ -2384,6 +2779,15 @@ final class UrlParser { this.segment.append(s); } + @Override + public String name() { + String name = segment(); + if (name.startsWith("/")) { + name = name.substring(1); + } + return name; + } + @Override public boolean isEmpty() { return this.segment.isEmpty(); @@ -2483,6 +2887,16 @@ final class UrlParser { return new PathSegments(this.segments); } + @Override + public String name() { + StringBuilder output = new StringBuilder(); + for (PathSegment segment : this.segments) { + output.append('/'); + output.append(segment.name()); + } + return output.toString(); + } + @Override public boolean equals(Object o) { if (o == this) {