From 24e66b63d1d12c45e4af333d6271451083f81e90 Mon Sep 17 00:00:00 2001 From: Patrick Strawderman Date: Fri, 28 Mar 2025 11:54:16 -0700 Subject: [PATCH] Refine StringUtils#uriDecode and update documentation Refine the StringUtils#uriDecode method in the following ways: - Use a StringBuilder instead of ByteArrayOutputStream, and only decode %-encoded sequences. - Use HexFormat.fromHexDigits to decode hex sequences. - Decode to a byte array that is only allocated if encoded sequences are encountered. This commit adds another optimization mainly for the use case where there is no encoded sequence, and updates the Javadoc of both StringUtils#uriDecode and UriUtils#decode to match the implementation. Signed-off-by: Patrick Strawderman Co-Authored-by: Sebastien Deleuze Closes gh-35253 --- .../org/springframework/util/StringUtils.java | 68 ++++++++++--------- .../springframework/web/util/UriUtils.java | 11 +-- .../web/util/UriUtilsTests.java | 9 +++ 3 files changed, 52 insertions(+), 36 deletions(-) diff --git a/spring-core/src/main/java/org/springframework/util/StringUtils.java b/spring-core/src/main/java/org/springframework/util/StringUtils.java index 8b2553ce276..760fc3da87a 100644 --- a/spring-core/src/main/java/org/springframework/util/StringUtils.java +++ b/spring-core/src/main/java/org/springframework/util/StringUtils.java @@ -16,7 +16,6 @@ package org.springframework.util; -import java.io.ByteArrayOutputStream; import java.nio.charset.Charset; import java.util.ArrayDeque; import java.util.ArrayList; @@ -25,6 +24,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Deque; import java.util.Enumeration; +import java.util.HexFormat; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; @@ -803,54 +803,60 @@ public abstract class StringUtils { } /** - * Decode the given encoded URI component value. Based on the following rules: - *
    - *
  • Alphanumeric characters {@code "a"} through {@code "z"}, {@code "A"} through {@code "Z"}, - * and {@code "0"} through {@code "9"} stay the same.
  • - *
  • Special characters {@code "-"}, {@code "_"}, {@code "."}, and {@code "*"} stay the same.
  • - *
  • A sequence "{@code %xy}" is interpreted as a hexadecimal representation of the character.
  • - *
  • For all other characters (including those already decoded), the output is undefined.
  • - *
- * @param source the encoded String - * @param charset the character set + * Decode the given encoded URI component value by replacing "{@code %xy}" sequences + * by an hexadecimal representation of the character in the specified charset, letting other + * characters unchanged. + * @param source the encoded {@code String} + * @param charset the character encoding to use to decode the "{@code %xy}" sequences * @return the decoded value * @throws IllegalArgumentException when the given source contains invalid encoded sequences * @since 5.0 - * @see java.net.URLDecoder#decode(String, String) + * @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding */ public static String uriDecode(String source, Charset charset) { int length = source.length(); - if (length == 0) { + int firstPercentIndex = source.indexOf('%'); + if (length == 0 || firstPercentIndex < 0) { return source; } - Assert.notNull(charset, "Charset must not be null"); - ByteArrayOutputStream baos = new ByteArrayOutputStream(length); - boolean changed = false; - for (int i = 0; i < length; i++) { - int ch = source.charAt(i); + StringBuilder output = new StringBuilder(length); + output.append(source, 0, firstPercentIndex); + byte[] bytes = null; + int i = firstPercentIndex; + while (i < length) { + char ch = source.charAt(i); if (ch == '%') { - if (i + 2 < length) { - char hex1 = source.charAt(i + 1); - char hex2 = source.charAt(i + 2); - int u = Character.digit(hex1, 16); - int l = Character.digit(hex2, 16); - if (u == -1 || l == -1) { - throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\""); + try { + if (bytes == null) { + bytes = new byte[(length - i) / 3]; } - baos.write((char) ((u << 4) + l)); - i += 2; - changed = true; + + int pos = 0; + while (i + 2 < length && ch == '%') { + bytes[pos++] = (byte) HexFormat.fromHexDigits(source, i + 1, i + 3); + i += 3; + if (i < length) { + ch = source.charAt(i); + } + } + + if (i < length && ch == '%') { + throw new IllegalArgumentException("Incomplete trailing escape (%) pattern"); + } + + output.append(new String(bytes, 0, pos, charset)); } - else { + catch (NumberFormatException ex) { throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\""); } } else { - baos.write(ch); + output.append(ch); + i++; } } - return (changed ? StreamUtils.copyToString(baos, charset) : source); + return output.toString(); } /** diff --git a/spring-web/src/main/java/org/springframework/web/util/UriUtils.java b/spring-web/src/main/java/org/springframework/web/util/UriUtils.java index 0b89fb3bd06..7711470fa99 100644 --- a/spring-web/src/main/java/org/springframework/web/util/UriUtils.java +++ b/spring-web/src/main/java/org/springframework/web/util/UriUtils.java @@ -373,15 +373,16 @@ public abstract class UriUtils { } /** - * Decode the given encoded URI component. - *

See {@link StringUtils#uriDecode(String, Charset)} for the decoding rules. - * @param source the encoded String - * @param charset the character encoding to use + * Decode the given encoded URI component value by replacing "{@code %xy}" sequences + * by an hexadecimal representation of the character in the specified charset, letting other + * characters unchanged. + * @param source the encoded {@code String} + * @param charset the character encoding to use to decode the "{@code %xy}" sequences * @return the decoded value * @throws IllegalArgumentException when the given source contains invalid encoded sequences * @since 5.0 * @see StringUtils#uriDecode(String, Charset) - * @see java.net.URLDecoder#decode(String, String) + * @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding */ public static String decode(String source, Charset charset) { return StringUtils.uriDecode(source, charset); diff --git a/spring-web/src/test/java/org/springframework/web/util/UriUtilsTests.java b/spring-web/src/test/java/org/springframework/web/util/UriUtilsTests.java index 53587c51bac..ff3159a5e20 100644 --- a/spring-web/src/test/java/org/springframework/web/util/UriUtilsTests.java +++ b/spring-web/src/test/java/org/springframework/web/util/UriUtilsTests.java @@ -107,12 +107,21 @@ class UriUtilsTests { assertThat(UriUtils.decode("T%C5%8Dky%C5%8D", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d"); assertThat(UriUtils.decode("/Z%C3%BCrich", CHARSET)).as("Invalid encoded result").isEqualTo("/Z\u00fcrich"); assertThat(UriUtils.decode("T\u014dky\u014d", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d"); + assertThat(UriUtils.decode("%20\u2019", CHARSET)).as("Invalid encoded result").isEqualTo(" \u2019"); + assertThat(UriUtils.decode("\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo("śpřìńġ"); + assertThat(UriUtils.decode("%20\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo(" śpřìńġ"); } @Test void decodeInvalidSequence() { assertThatIllegalArgumentException().isThrownBy(() -> UriUtils.decode("foo%2", CHARSET)); + assertThatIllegalArgumentException().isThrownBy(() -> + UriUtils.decode("foo%", CHARSET)); + assertThatIllegalArgumentException().isThrownBy(() -> + UriUtils.decode("%", CHARSET)); + assertThatIllegalArgumentException().isThrownBy(() -> + UriUtils.decode("%zz", CHARSET)); } @Test