Refine StringUtils#uriDecode and update documentation
Backport Bot / build (push) Has been cancelled Details
Build and Deploy Snapshot / Build and Deploy Snapshot (push) Has been cancelled Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:false version:17], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:true version:21], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled Details
CI / ${{ matrix.os.name}} | Java ${{ matrix.java.version}} (map[toolchain:true version:23], map[id:ubuntu-latest name:Linux]) (push) Has been cancelled Details
Deploy Docs / Dispatch docs deployment (push) Has been cancelled Details
Build and Deploy Snapshot / Verify (push) Has been cancelled Details

Refine the StringUtils#uriDecode method in the following ways:
- Use a StringBuilder instead of ByteArrayOutputStream, and only decode
  %-encoded sequences.
- Use HexFormat.fromHexDigits to decode hex sequences.
- Decode to a byte array that is only allocated if encoded sequences are
  encountered.

This commit adds another optimization mainly for the use case where
there is no encoded sequence, and updates the Javadoc of both
StringUtils#uriDecode and UriUtils#decode to match the implementation.

Signed-off-by: Patrick Strawderman <pstrawderman@netflix.com>
Co-Authored-by: Sebastien Deleuze <sebastien.deleuze@broadcom.com>

Closes gh-35253
This commit is contained in:
Patrick Strawderman 2025-03-28 11:54:16 -07:00 committed by Brian Clozel
parent f3832c7262
commit 24e66b63d1
3 changed files with 52 additions and 36 deletions

View File

@ -16,7 +16,6 @@
package org.springframework.util;
import java.io.ByteArrayOutputStream;
import java.nio.charset.Charset;
import java.util.ArrayDeque;
import java.util.ArrayList;
@ -25,6 +24,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Deque;
import java.util.Enumeration;
import java.util.HexFormat;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
@ -803,54 +803,60 @@ public abstract class StringUtils {
}
/**
* Decode the given encoded URI component value. Based on the following rules:
* <ul>
* <li>Alphanumeric characters {@code "a"} through {@code "z"}, {@code "A"} through {@code "Z"},
* and {@code "0"} through {@code "9"} stay the same.</li>
* <li>Special characters {@code "-"}, {@code "_"}, {@code "."}, and {@code "*"} stay the same.</li>
* <li>A sequence "<i>{@code %xy}</i>" is interpreted as a hexadecimal representation of the character.</li>
* <li>For all other characters (including those already decoded), the output is undefined.</li>
* </ul>
* @param source the encoded String
* @param charset the character set
* Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
* by an hexadecimal representation of the character in the specified charset, letting other
* characters unchanged.
* @param source the encoded {@code String}
* @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
* @return the decoded value
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
* @since 5.0
* @see java.net.URLDecoder#decode(String, String)
* @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
*/
public static String uriDecode(String source, Charset charset) {
int length = source.length();
if (length == 0) {
int firstPercentIndex = source.indexOf('%');
if (length == 0 || firstPercentIndex < 0) {
return source;
}
Assert.notNull(charset, "Charset must not be null");
ByteArrayOutputStream baos = new ByteArrayOutputStream(length);
boolean changed = false;
for (int i = 0; i < length; i++) {
int ch = source.charAt(i);
StringBuilder output = new StringBuilder(length);
output.append(source, 0, firstPercentIndex);
byte[] bytes = null;
int i = firstPercentIndex;
while (i < length) {
char ch = source.charAt(i);
if (ch == '%') {
if (i + 2 < length) {
char hex1 = source.charAt(i + 1);
char hex2 = source.charAt(i + 2);
int u = Character.digit(hex1, 16);
int l = Character.digit(hex2, 16);
if (u == -1 || l == -1) {
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
try {
if (bytes == null) {
bytes = new byte[(length - i) / 3];
}
baos.write((char) ((u << 4) + l));
i += 2;
changed = true;
int pos = 0;
while (i + 2 < length && ch == '%') {
bytes[pos++] = (byte) HexFormat.fromHexDigits(source, i + 1, i + 3);
i += 3;
if (i < length) {
ch = source.charAt(i);
}
else {
}
if (i < length && ch == '%') {
throw new IllegalArgumentException("Incomplete trailing escape (%) pattern");
}
output.append(new String(bytes, 0, pos, charset));
}
catch (NumberFormatException ex) {
throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
}
}
else {
baos.write(ch);
output.append(ch);
i++;
}
}
return (changed ? StreamUtils.copyToString(baos, charset) : source);
return output.toString();
}
/**

View File

@ -373,15 +373,16 @@ public abstract class UriUtils {
}
/**
* Decode the given encoded URI component.
* <p>See {@link StringUtils#uriDecode(String, Charset)} for the decoding rules.
* @param source the encoded String
* @param charset the character encoding to use
* Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
* by an hexadecimal representation of the character in the specified charset, letting other
* characters unchanged.
* @param source the encoded {@code String}
* @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
* @return the decoded value
* @throws IllegalArgumentException when the given source contains invalid encoded sequences
* @since 5.0
* @see StringUtils#uriDecode(String, Charset)
* @see java.net.URLDecoder#decode(String, String)
* @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
*/
public static String decode(String source, Charset charset) {
return StringUtils.uriDecode(source, charset);

View File

@ -107,12 +107,21 @@ class UriUtilsTests {
assertThat(UriUtils.decode("T%C5%8Dky%C5%8D", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
assertThat(UriUtils.decode("/Z%C3%BCrich", CHARSET)).as("Invalid encoded result").isEqualTo("/Z\u00fcrich");
assertThat(UriUtils.decode("T\u014dky\u014d", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
assertThat(UriUtils.decode("%20\u2019", CHARSET)).as("Invalid encoded result").isEqualTo(" \u2019");
assertThat(UriUtils.decode("\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo("śpřìńġ");
assertThat(UriUtils.decode("%20\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo(" śpřìńġ");
}
@Test
void decodeInvalidSequence() {
assertThatIllegalArgumentException().isThrownBy(() ->
UriUtils.decode("foo%2", CHARSET));
assertThatIllegalArgumentException().isThrownBy(() ->
UriUtils.decode("foo%", CHARSET));
assertThatIllegalArgumentException().isThrownBy(() ->
UriUtils.decode("%", CHARSET));
assertThatIllegalArgumentException().isThrownBy(() ->
UriUtils.decode("%zz", CHARSET));
}
@Test