Refine StringUtils#uriDecode and update documentation
Backport Bot / build (push) Has been cancelled
Details
Build and Deploy Snapshot / Build and Deploy Snapshot (push) Has been cancelled
Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:false version:17], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled
Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:true version:21], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled
Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:true version:23], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled
Details
Deploy Docs / Dispatch docs deployment (push) Has been cancelled
Details
Build and Deploy Snapshot / Verify (push) Has been cancelled
Details
Backport Bot / build (push) Has been cancelled
Details
Build and Deploy Snapshot / Build and Deploy Snapshot (push) Has been cancelled
Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:false version:17], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled
Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:true version:21], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled
Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:true version:23], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled
Details
Deploy Docs / Dispatch docs deployment (push) Has been cancelled
Details
Build and Deploy Snapshot / Verify (push) Has been cancelled
Details
Refine the StringUtils#uriDecode method in the following ways: - Use a StringBuilder instead of ByteArrayOutputStream, and only decode %-encoded sequences. - Use HexFormat.fromHexDigits to decode hex sequences. - Decode to a byte array that is only allocated if encoded sequences are encountered. This commit adds another optimization mainly for the use case where there is no encoded sequence, and updates the Javadoc of both StringUtils#uriDecode and UriUtils#decode to match the implementation. Signed-off-by: Patrick Strawderman <pstrawderman@netflix.com> Co-Authored-by: Sebastien Deleuze <sebastien.deleuze@broadcom.com> Closes gh-35253
This commit is contained in:
parent
f3832c7262
commit
24e66b63d1
|
@ -16,7 +16,6 @@
|
||||||
|
|
||||||
package org.springframework.util;
|
package org.springframework.util;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.nio.charset.Charset;
|
import java.nio.charset.Charset;
|
||||||
import java.util.ArrayDeque;
|
import java.util.ArrayDeque;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -25,6 +24,7 @@ import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.Deque;
|
import java.util.Deque;
|
||||||
import java.util.Enumeration;
|
import java.util.Enumeration;
|
||||||
|
import java.util.HexFormat;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.LinkedHashSet;
|
import java.util.LinkedHashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -803,54 +803,60 @@ public abstract class StringUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode the given encoded URI component value. Based on the following rules:
|
* Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
|
||||||
* <ul>
|
* by an hexadecimal representation of the character in the specified charset, letting other
|
||||||
* <li>Alphanumeric characters {@code "a"} through {@code "z"}, {@code "A"} through {@code "Z"},
|
* characters unchanged.
|
||||||
* and {@code "0"} through {@code "9"} stay the same.</li>
|
* @param source the encoded {@code String}
|
||||||
* <li>Special characters {@code "-"}, {@code "_"}, {@code "."}, and {@code "*"} stay the same.</li>
|
* @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
|
||||||
* <li>A sequence "<i>{@code %xy}</i>" is interpreted as a hexadecimal representation of the character.</li>
|
|
||||||
* <li>For all other characters (including those already decoded), the output is undefined.</li>
|
|
||||||
* </ul>
|
|
||||||
* @param source the encoded String
|
|
||||||
* @param charset the character set
|
|
||||||
* @return the decoded value
|
* @return the decoded value
|
||||||
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
|
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
|
||||||
* @since 5.0
|
* @since 5.0
|
||||||
* @see java.net.URLDecoder#decode(String, String)
|
* @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
|
||||||
*/
|
*/
|
||||||
public static String uriDecode(String source, Charset charset) {
|
public static String uriDecode(String source, Charset charset) {
|
||||||
int length = source.length();
|
int length = source.length();
|
||||||
if (length == 0) {
|
int firstPercentIndex = source.indexOf('%');
|
||||||
|
if (length == 0 || firstPercentIndex < 0) {
|
||||||
return source;
|
return source;
|
||||||
}
|
}
|
||||||
Assert.notNull(charset, "Charset must not be null");
|
|
||||||
|
|
||||||
ByteArrayOutputStream baos = new ByteArrayOutputStream(length);
|
StringBuilder output = new StringBuilder(length);
|
||||||
boolean changed = false;
|
output.append(source, 0, firstPercentIndex);
|
||||||
for (int i = 0; i < length; i++) {
|
byte[] bytes = null;
|
||||||
int ch = source.charAt(i);
|
int i = firstPercentIndex;
|
||||||
|
while (i < length) {
|
||||||
|
char ch = source.charAt(i);
|
||||||
if (ch == '%') {
|
if (ch == '%') {
|
||||||
if (i + 2 < length) {
|
try {
|
||||||
char hex1 = source.charAt(i + 1);
|
if (bytes == null) {
|
||||||
char hex2 = source.charAt(i + 2);
|
bytes = new byte[(length - i) / 3];
|
||||||
int u = Character.digit(hex1, 16);
|
|
||||||
int l = Character.digit(hex2, 16);
|
|
||||||
if (u == -1 || l == -1) {
|
|
||||||
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
|
|
||||||
}
|
}
|
||||||
baos.write((char) ((u << 4) + l));
|
|
||||||
i += 2;
|
int pos = 0;
|
||||||
changed = true;
|
while (i + 2 < length && ch == '%') {
|
||||||
|
bytes[pos++] = (byte) HexFormat.fromHexDigits(source, i + 1, i + 3);
|
||||||
|
i += 3;
|
||||||
|
if (i < length) {
|
||||||
|
ch = source.charAt(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i < length && ch == '%') {
|
||||||
|
throw new IllegalArgumentException("Incomplete trailing escape (%) pattern");
|
||||||
|
}
|
||||||
|
|
||||||
|
output.append(new String(bytes, 0, pos, charset));
|
||||||
}
|
}
|
||||||
else {
|
catch (NumberFormatException ex) {
|
||||||
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
|
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
baos.write(ch);
|
output.append(ch);
|
||||||
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (changed ? StreamUtils.copyToString(baos, charset) : source);
|
return output.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -373,15 +373,16 @@ public abstract class UriUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode the given encoded URI component.
|
* Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
|
||||||
* <p>See {@link StringUtils#uriDecode(String, Charset)} for the decoding rules.
|
* by an hexadecimal representation of the character in the specified charset, letting other
|
||||||
* @param source the encoded String
|
* characters unchanged.
|
||||||
* @param charset the character encoding to use
|
* @param source the encoded {@code String}
|
||||||
|
* @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
|
||||||
* @return the decoded value
|
* @return the decoded value
|
||||||
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
|
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
|
||||||
* @since 5.0
|
* @since 5.0
|
||||||
* @see StringUtils#uriDecode(String, Charset)
|
* @see StringUtils#uriDecode(String, Charset)
|
||||||
* @see java.net.URLDecoder#decode(String, String)
|
* @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
|
||||||
*/
|
*/
|
||||||
public static String decode(String source, Charset charset) {
|
public static String decode(String source, Charset charset) {
|
||||||
return StringUtils.uriDecode(source, charset);
|
return StringUtils.uriDecode(source, charset);
|
||||||
|
|
|
@ -107,12 +107,21 @@ class UriUtilsTests {
|
||||||
assertThat(UriUtils.decode("T%C5%8Dky%C5%8D", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
|
assertThat(UriUtils.decode("T%C5%8Dky%C5%8D", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
|
||||||
assertThat(UriUtils.decode("/Z%C3%BCrich", CHARSET)).as("Invalid encoded result").isEqualTo("/Z\u00fcrich");
|
assertThat(UriUtils.decode("/Z%C3%BCrich", CHARSET)).as("Invalid encoded result").isEqualTo("/Z\u00fcrich");
|
||||||
assertThat(UriUtils.decode("T\u014dky\u014d", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
|
assertThat(UriUtils.decode("T\u014dky\u014d", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
|
||||||
|
assertThat(UriUtils.decode("%20\u2019", CHARSET)).as("Invalid encoded result").isEqualTo(" \u2019");
|
||||||
|
assertThat(UriUtils.decode("\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo("śpřìńġ");
|
||||||
|
assertThat(UriUtils.decode("%20\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo(" śpřìńġ");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void decodeInvalidSequence() {
|
void decodeInvalidSequence() {
|
||||||
assertThatIllegalArgumentException().isThrownBy(() ->
|
assertThatIllegalArgumentException().isThrownBy(() ->
|
||||||
UriUtils.decode("foo%2", CHARSET));
|
UriUtils.decode("foo%2", CHARSET));
|
||||||
|
assertThatIllegalArgumentException().isThrownBy(() ->
|
||||||
|
UriUtils.decode("foo%", CHARSET));
|
||||||
|
assertThatIllegalArgumentException().isThrownBy(() ->
|
||||||
|
UriUtils.decode("%", CHARSET));
|
||||||
|
assertThatIllegalArgumentException().isThrownBy(() ->
|
||||||
|
UriUtils.decode("%zz", CHARSET));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
Loading…
Reference in New Issue