Refine StringUtils#uriDecode and update documentation

Refine the StringUtils#uriDecode method in the following ways: - Use a StringBuilder instead of ByteArrayOutputStream, and only decode %-encoded sequences. - Use HexFormat.fromHexDigits to decode hex sequences. - Decode to a byte array that is only allocated if encoded sequences are encountered. This commit adds another optimization mainly for the use case where there is no encoded sequence, and updates the Javadoc of both StringUtils#uriDecode and UriUtils#decode to match the implementation. Signed-off-by: Patrick Strawderman <pstrawderman@netflix.com> Co-Authored-by: Sebastien Deleuze <sebastien.deleuze@broadcom.com> Closes gh-35253
2025-03-28 11:54:16 -07:00 · 2025-03-28 11:54:16 -07:00 · 24e66b63d1
parent f3832c7262
commit 24e66b63d1
3 changed files with 52 additions and 36 deletions
--- a/spring-core/src/main/java/org/springframework/util/StringUtils.java
+++ b/spring-core/src/main/java/org/springframework/util/StringUtils.java
@ -16,7 +16,6 @@

 package org.springframework.util;

-import java.io.ByteArrayOutputStream;
 import java.nio.charset.Charset;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
@ -25,6 +24,7 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.Deque;
 import java.util.Enumeration;
+import java.util.HexFormat;
 import java.util.Iterator;
 import java.util.LinkedHashSet;
 import java.util.List;
@ -803,54 +803,60 @@ public abstract class StringUtils {
 	}

 	/**
-	 * Decode the given encoded URI component value. Based on the following rules:
-	 * <ul>
-	 * <li>Alphanumeric characters {@code "a"} through {@code "z"}, {@code "A"} through {@code "Z"},
-	 * and {@code "0"} through {@code "9"} stay the same.</li>
-	 * <li>Special characters {@code "-"}, {@code "_"}, {@code "."}, and {@code "*"} stay the same.</li>
-	 * <li>A sequence "<i>{@code %xy}</i>" is interpreted as a hexadecimal representation of the character.</li>
-	 * <li>For all other characters (including those already decoded), the output is undefined.</li>
-	 * </ul>
-	 * @param source the encoded String
-	 * @param charset the character set
+	 * Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
+	 * by an hexadecimal representation of the character in the specified charset, letting other
+	 * characters unchanged.
+	 * @param source the encoded {@code String}
+	 * @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
 	 * @return the decoded value
 	 * @throws IllegalArgumentException when the given source contains invalid encoded sequences
 	 * @since 5.0
-	 * @see java.net.URLDecoder#decode(String, String)
+	 * @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
 	 */
 	public static String uriDecode(String source, Charset charset) {
 		int length = source.length();
-		if (length == 0) {
+		int firstPercentIndex = source.indexOf('%');
+		if (length == 0 || firstPercentIndex < 0) {
 			return source;
 		}
-		Assert.notNull(charset, "Charset must not be null");

-		ByteArrayOutputStream baos = new ByteArrayOutputStream(length);
-		boolean changed = false;
-		for (int i = 0; i < length; i++) {
-			int ch = source.charAt(i);
+		StringBuilder output = new StringBuilder(length);
+		output.append(source, 0, firstPercentIndex);
+		byte[] bytes = null;
+		int i = firstPercentIndex;
+		while (i < length) {
+			char ch = source.charAt(i);
 			if (ch == '%') {
-				if (i + 2 < length) {
-					char hex1 = source.charAt(i + 1);
-					char hex2 = source.charAt(i + 2);
-					int u = Character.digit(hex1, 16);
-					int l = Character.digit(hex2, 16);
-					if (u == -1 || l == -1) {
-						throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
+				try {
+					if (bytes == null) {
+						bytes = new byte[(length - i) / 3];
 					}
-					baos.write((char) ((u << 4) + l));
-					i += 2;
-					changed = true;
+
+					int pos = 0;
+					while (i + 2 < length && ch == '%') {
+						bytes[pos++] = (byte) HexFormat.fromHexDigits(source, i + 1, i + 3);
+						i += 3;
+						if (i < length) {
+							ch = source.charAt(i);
 						}
-				else {
+					}
+
+					if (i < length && ch == '%') {
+						throw new IllegalArgumentException("Incomplete trailing escape (%) pattern");
+					}
+
+					output.append(new String(bytes, 0, pos, charset));
+				}
+				catch (NumberFormatException ex) {
 					throw new IllegalArgumentException("Invalid encoded sequence \"" + source.substring(i) + "\"");
 				}
 			}
 			else {
-				baos.write(ch);
+				output.append(ch);
+				i++;
 			}
 		}
-		return (changed ? StreamUtils.copyToString(baos, charset) : source);
+		return output.toString();
 	}

 	/**
--- a/spring-web/src/main/java/org/springframework/web/util/UriUtils.java
+++ b/spring-web/src/main/java/org/springframework/web/util/UriUtils.java
@ -373,15 +373,16 @@ public abstract class UriUtils {
 	}

 	/**
-	 * Decode the given encoded URI component.
-	 * <p>See {@link StringUtils#uriDecode(String, Charset)} for the decoding rules.
-	 * @param source the encoded String
-	 * @param charset the character encoding to use
+	 * Decode the given encoded URI component value by replacing "<i>{@code %xy}</i>" sequences
+	 * by an hexadecimal representation of the character in the specified charset, letting other
+	 * characters unchanged.
+	 * @param source the encoded {@code String}
+	 * @param charset the character encoding to use to decode the "<i>{@code %xy}</i>" sequences
 	 * @return the decoded value
 	 * @throws IllegalArgumentException when the given source contains invalid encoded sequences
 	 * @since 5.0
 	 * @see StringUtils#uriDecode(String, Charset)
-	 * @see java.net.URLDecoder#decode(String, String)
+	 * @see java.net.URLDecoder#decode(String, String) java.net.URLDecoder#decode for HTML form decoding
 	 */
 	public static String decode(String source, Charset charset) {
 		return StringUtils.uriDecode(source, charset);
--- a/spring-web/src/test/java/org/springframework/web/util/UriUtilsTests.java
+++ b/spring-web/src/test/java/org/springframework/web/util/UriUtilsTests.java
@ -107,12 +107,21 @@ class UriUtilsTests {
 		assertThat(UriUtils.decode("T%C5%8Dky%C5%8D", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
 		assertThat(UriUtils.decode("/Z%C3%BCrich", CHARSET)).as("Invalid encoded result").isEqualTo("/Z\u00fcrich");
 		assertThat(UriUtils.decode("T\u014dky\u014d", CHARSET)).as("Invalid encoded result").isEqualTo("T\u014dky\u014d");
+		assertThat(UriUtils.decode("%20\u2019", CHARSET)).as("Invalid encoded result").isEqualTo(" \u2019");
+		assertThat(UriUtils.decode("\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo("śpřìńġ");
+		assertThat(UriUtils.decode("%20\u015bp\u0159\u00ec\u0144\u0121", CHARSET)).as("Invalid encoded result").isEqualTo(" śpřìńġ");
 	}

 	@Test
 	void decodeInvalidSequence() {
 		assertThatIllegalArgumentException().isThrownBy(() ->
 				UriUtils.decode("foo%2", CHARSET));
+		assertThatIllegalArgumentException().isThrownBy(() ->
+				UriUtils.decode("foo%", CHARSET));
+		assertThatIllegalArgumentException().isThrownBy(() ->
+				UriUtils.decode("%", CHARSET));
+		assertThatIllegalArgumentException().isThrownBy(() ->
+				UriUtils.decode("%zz", CHARSET));
 	}

 	@Test