Update variable detection in UriComponentsBuilder#encode

This commit better aligns how URI variable placeholders are detected
in UriComponentsBuilder#encode (i.e. the pre-encoding of the literal
parts of a URI template) and how they are expanded later on.
The latter relies on a pattern that stops at the first closing '}'
which excludes the possibility for well-formed, nested placeholders
other than variables with regex syntax, e.g. "{year:\d{1,4}}".

UriComponentsBuilder#encode now also stops at the first closing '}' and
further ensures the placeholder is not empty and that it has '{' before
deciding to treat it as a URI variable.

Closes gh-26466
This commit is contained in:
Rossen Stoyanchev 2021-02-10 21:10:03 +00:00
parent c9147c4281
commit 8791928f61
5 changed files with 90 additions and 26 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright 2002-2020 the original author or authors.
* Copyright 2002-2021 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -767,44 +767,47 @@ final class HierarchicalUriComponents extends UriComponents {
private final StringBuilder output = new StringBuilder();
private boolean variableWithNameAndRegex;
public UriTemplateEncoder(Charset charset) {
this.charset = charset;
}
@Override
public String apply(String source, Type type) {
// Only URI variable (nothing to encode)..
if (source.length() > 1 && source.charAt(0) == '{' && source.charAt(source.length() -1) == '}') {
// URI variable only?
if (isUriVariable(source)) {
return source;
}
// Only literal (encode full source)..
// Literal template only?
if (source.indexOf('{') == -1) {
return encodeUriComponent(source, this.charset, type);
}
// Mixed literal parts and URI variables, maybe (encode literal parts only)..
int level = 0;
clear(this.currentLiteral);
clear(this.currentVariable);
clear(this.output);
for (int i = 0; i < source.length(); i++) {
char c = source.charAt(i);
if (c == ':' && level == 1) {
this.variableWithNameAndRegex = true;
}
if (c == '{') {
level++;
if (level == 1) {
encodeAndAppendCurrentLiteral(type);
append(this.currentLiteral, true, type);
}
}
if (c == '}' && level > 0) {
level--;
this.currentVariable.append('}');
if (level == 0) {
this.output.append(this.currentVariable);
clear(this.currentVariable);
boolean encode = !isUriVariable(this.currentVariable);
append(this.currentVariable, encode, type);
}
else if (!this.variableWithNameAndRegex) {
append(this.currentVariable, true, type);
level = 0;
}
}
else if (level > 0) {
@ -817,13 +820,38 @@ final class HierarchicalUriComponents extends UriComponents {
if (level > 0) {
this.currentLiteral.append(this.currentVariable);
}
encodeAndAppendCurrentLiteral(type);
append(this.currentLiteral, true, type);
return this.output.toString();
}
private void encodeAndAppendCurrentLiteral(Type type) {
this.output.append(encodeUriComponent(this.currentLiteral.toString(), this.charset, type));
clear(this.currentLiteral);
/**
* Whether the given String is a single URI variable that can be
* expanded. It must have '{' and '}' surrounding non-empty text and no
* nested placeholders unless it is a variable with regex syntax,
* e.g. {@code "/{year:\d{1,4}}"}.
*/
private boolean isUriVariable(CharSequence source) {
if (source.length() < 2 || source.charAt(0) != '{' || source.charAt(source.length() -1) != '}') {
return false;
}
boolean hasText = false;
for (int i = 1; i < source.length() - 1; i++) {
char c = source.charAt(i);
if (c == ':' && i > 1) {
return true;
}
if (c == '{' || c == '}') {
return false;
}
hasText = (hasText || !Character.isWhitespace(c));
}
return hasText;
}
private void append(StringBuilder sb, boolean encode, Type type) {
this.output.append(encode ? encodeUriComponent(sb.toString(), this.charset, type) : sb);
clear(sb);
this.variableWithNameAndRegex = false;
}
private void clear(StringBuilder sb) {

View File

@ -1,5 +1,5 @@
/*
* Copyright 2002-2020 the original author or authors.
* Copyright 2002-2021 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -201,10 +201,11 @@ public abstract class UriComponents implements Serializable {
/**
* Concatenate all URI components to return the fully formed URI String.
* <p>This method does nothing more than a simple concatenation based on
* current values. That means it could produce different results if invoked
* before vs after methods that can change individual values such as
* {@code encode}, {@code expand}, or {@code normalize}.
* <p>This method amounts to simple String concatenation of the current
* URI component values and as such the result may contain illegal URI
* characters, for example if URI variables have not been expanded or if
* encoding has not been applied via {@link UriComponentsBuilder#encode()}
* or {@link #encode()}.
*/
public abstract String toUriString();

View File

@ -1,5 +1,5 @@
/*
* Copyright 2002-2020 the original author or authors.
* Copyright 2002-2021 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -412,12 +412,15 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
* also escaping characters with reserved meaning.
* <p>For most cases, this method is more likely to give the expected result
* because in treats URI variables as opaque data to be fully encoded, while
* {@link UriComponents#encode()} is useful only if intentionally expanding
* URI variables that contain reserved characters.
* {@link UriComponents#encode()} is useful when intentionally expanding URI
* variables that contain reserved characters.
* <p>For example ';' is legal in a path but has reserved meaning. This
* method replaces ";" with "%3B" in URI variables but not in the URI
* template. By contrast, {@link UriComponents#encode()} never replaces ";"
* since it is a legal character in a path.
* <p>When not expanding URI variables at all, prefer use of
* {@link UriComponents#encode()} since that will also encode anything that
* incidentally looks like a URI variable.
* @since 5.0.8
*/
public final UriComponentsBuilder encode() {

View File

@ -25,6 +25,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.BiConsumer;
import org.junit.jupiter.api.Test;
@ -1037,6 +1038,35 @@ class UriComponentsBuilderTests {
assertThat(result1.getSchemeSpecificPart()).isEqualTo(null);
}
@Test // gh-26466
void encodeTemplateWithInvalidPlaceholderSyntax() {
BiConsumer<String, String> tester = (in, out) ->
assertThat(UriComponentsBuilder.fromUriString(in).encode().toUriString()).isEqualTo(out);
// empty
tester.accept("{}", "%7B%7D");
tester.accept("{ \t}", "%7B%20%09%7D");
tester.accept("/a{}b", "/a%7B%7Db");
tester.accept("/a{ \t}b", "/a%7B%20%09%7Db");
// nested, matching
tester.accept("{foo{}}", "%7Bfoo%7B%7D%7D");
tester.accept("{foo{bar}baz}", "%7Bfoo%7Bbar%7Dbaz%7D");
tester.accept("/a{foo{}}b", "/a%7Bfoo%7B%7D%7Db");
tester.accept("/a{foo{bar}baz}b", "/a%7Bfoo%7Bbar%7Dbaz%7Db");
// mismatched
tester.accept("{foo{{}", "%7Bfoo%7B%7B%7D");
tester.accept("{foo}}", "{foo}%7D");
tester.accept("/a{foo{{}bar", "/a%7Bfoo%7B%7B%7Dbar");
tester.accept("/a{foo}}b", "/a{foo}%7Db");
// variable with regex
tester.accept("{year:\\d{1,4}}", "{year:\\d{1,4}}");
tester.accept("/a{year:\\d{1,4}}b", "/a{year:\\d{1,4}}b");
}
@Test // SPR-11856
void fromHttpRequestForwardedHeader() {
MockHttpServletRequest request = new MockHttpServletRequest();

View File

@ -214,8 +214,10 @@ TIP: Consider ";", which is legal in a path but has reserved meaning. The first
replaces ";", since it is a legal character in a path.
For most cases, the first option is likely to give the expected result, because it treats URI
variables as opaque data to be fully encoded, while option 2 is useful only if
URI variables intentionally contain reserved characters.
variables as opaque data to be fully encoded, while the second option is useful if URI
variables do intentionally contain reserved characters. The second option is also useful
when not expanding URI variables at all since that will also encode anything that
incidentally looks like a URI variable.
The following example uses the first option: