Conditionally htmlEscape chars based on encoding
This commit adds new htmlEscape methods that take the character encoding as a parameter. According to specs and recommendations, the list of chars to be html escaped depends on the encoding used in the response. If the current char encoding supports chars natively, we shouldn't escape those; of course, reserved chars (<,>,',",&) should always be escaped. See: http://www.w3.org/TR/html4/sgml/entities.html#h-24.3 See: spring-projects/spring-framework#385 by @candrews Issue: SPR-9293
This commit is contained in:
parent
4d3ade563a
commit
369cabf064
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright 2002-2012 the original author or authors.
|
||||
* Copyright 2002-2014 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -107,14 +107,42 @@ class HtmlCharacterEntityReferences {
|
|||
* Return true if the given character is mapped to a supported entity reference.
|
||||
*/
|
||||
public boolean isMappedToReference(char character) {
|
||||
return (convertToReference(character) != null);
|
||||
return isMappedToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the given character is mapped to a supported entity reference.
|
||||
*/
|
||||
public boolean isMappedToReference(char character, String encoding) {
|
||||
return (convertToReference(character, encoding) != null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the reference mapped to the given character or {@code null}.
|
||||
*/
|
||||
public String convertToReference(char character) {
|
||||
if (character < 1000 || (character >= 8000 && character < 10000)) {
|
||||
return convertToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the reference mapped to the given character or {@code null}.
|
||||
*/
|
||||
public String convertToReference(char character, String encoding) {
|
||||
if(encoding.startsWith("UTF-")){
|
||||
switch(character){
|
||||
case '<':
|
||||
return "<";
|
||||
case '>':
|
||||
return ">";
|
||||
case '"':
|
||||
return """;
|
||||
case '&':
|
||||
return "&";
|
||||
case '\'':
|
||||
return "'";
|
||||
}
|
||||
}
|
||||
else if (character < 1000 || (character >= 8000 && character < 10000)) {
|
||||
int index = (character < 1000 ? character : character - 7000);
|
||||
String entityReference = this.characterToEntityReferenceMap[index];
|
||||
if (entityReference != null) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright 2002-2012 the original author or authors.
|
||||
* Copyright 2002-2014 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -16,6 +16,8 @@
|
|||
|
||||
package org.springframework.web.util;
|
||||
|
||||
import org.springframework.util.Assert;
|
||||
|
||||
/**
|
||||
* Utility class for HTML escaping. Escapes and unescapes
|
||||
* based on the W3C HTML 4.01 recommendation, handling
|
||||
|
@ -57,13 +59,33 @@ public abstract class HtmlUtils {
|
|||
* @return the escaped string
|
||||
*/
|
||||
public static String htmlEscape(String input) {
|
||||
return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn special characters into HTML character references.
|
||||
* Handles complete character set defined in HTML 4.01 recommendation.
|
||||
* <p>Escapes all special characters to their corresponding
|
||||
* entity reference (e.g. {@code <}) at least as required by the
|
||||
* specified encoding. In other words, if a special character does
|
||||
* not have to be escaped for the given encoding, it may not be.
|
||||
* <p>Reference:
|
||||
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
|
||||
* http://www.w3.org/TR/html4/sgml/entities.html
|
||||
* </a>
|
||||
* @param input the (unescaped) input string
|
||||
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
|
||||
* @return the escaped string
|
||||
*/
|
||||
public static String htmlEscape(String input, String encoding) {
|
||||
Assert.notNull(encoding, "encoding is required");
|
||||
if (input == null) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder escaped = new StringBuilder(input.length() * 2);
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
char character = input.charAt(i);
|
||||
String reference = characterEntityReferences.convertToReference(character);
|
||||
String reference = characterEntityReferences.convertToReference(character, encoding);
|
||||
if (reference != null) {
|
||||
escaped.append(reference);
|
||||
}
|
||||
|
@ -87,13 +109,33 @@ public abstract class HtmlUtils {
|
|||
* @return the escaped string
|
||||
*/
|
||||
public static String htmlEscapeDecimal(String input) {
|
||||
return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn special characters into HTML character references.
|
||||
* Handles complete character set defined in HTML 4.01 recommendation.
|
||||
* <p>Escapes all special characters to their corresponding numeric
|
||||
* reference in decimal format (&#<i>Decimal</i>;) at least as required by the
|
||||
* specified encoding. In other words, if a special character does
|
||||
* not have to be escaped for the given encoding, it may not be.
|
||||
* <p>Reference:
|
||||
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
|
||||
* http://www.w3.org/TR/html4/sgml/entities.html
|
||||
* </a>
|
||||
* @param input the (unescaped) input string
|
||||
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
|
||||
* @return the escaped string
|
||||
*/
|
||||
public static String htmlEscapeDecimal(String input, String encoding) {
|
||||
Assert.notNull(encoding, "encoding is required");
|
||||
if (input == null) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder escaped = new StringBuilder(input.length() * 2);
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
char character = input.charAt(i);
|
||||
if (characterEntityReferences.isMappedToReference(character)) {
|
||||
if (characterEntityReferences.isMappedToReference(character, encoding)) {
|
||||
escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START);
|
||||
escaped.append((int) character);
|
||||
escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);
|
||||
|
@ -118,13 +160,33 @@ public abstract class HtmlUtils {
|
|||
* @return the escaped string
|
||||
*/
|
||||
public static String htmlEscapeHex(String input) {
|
||||
return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
|
||||
}
|
||||
|
||||
/**
|
||||
* Turn special characters into HTML character references.
|
||||
* Handles complete character set defined in HTML 4.01 recommendation.
|
||||
* <p>Escapes all special characters to their corresponding numeric
|
||||
* reference in hex format (&#x<i>Hex</i>;) at least as required by the
|
||||
* specified encoding. In other words, if a special character does
|
||||
* not have to be escaped for the given encoding, it may not be.
|
||||
* <p>Reference:
|
||||
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
|
||||
* http://www.w3.org/TR/html4/sgml/entities.html
|
||||
* </a>
|
||||
* @param input the (unescaped) input string
|
||||
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
|
||||
* @return the escaped string
|
||||
*/
|
||||
public static String htmlEscapeHex(String input, String encoding) {
|
||||
Assert.notNull(encoding, "encoding is required");
|
||||
if (input == null) {
|
||||
return null;
|
||||
}
|
||||
StringBuilder escaped = new StringBuilder(input.length() * 2);
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
char character = input.charAt(i);
|
||||
if (characterEntityReferences.isMappedToReference(character)) {
|
||||
if (characterEntityReferences.isMappedToReference(character, encoding)) {
|
||||
escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START);
|
||||
escaped.append(Integer.toString(character, 16));
|
||||
escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);
|
||||
|
|
|
@ -76,6 +76,20 @@ public class HtmlCharacterEntityReferencesTests {
|
|||
(char) -1, entityReferences.convertToCharacter("invalid"));
|
||||
}
|
||||
|
||||
// SPR-9293
|
||||
@Test
|
||||
public void testConvertToReferenceUTF8() {
|
||||
HtmlCharacterEntityReferences entityReferences = new HtmlCharacterEntityReferences();
|
||||
String utf8 = "UTF-8";
|
||||
assertEquals("<", entityReferences.convertToReference('<', utf8));
|
||||
assertEquals(">", entityReferences.convertToReference('>', utf8));
|
||||
assertEquals("&", entityReferences.convertToReference('&', utf8));
|
||||
assertEquals(""", entityReferences.convertToReference('"', utf8));
|
||||
assertEquals("'", entityReferences.convertToReference('\'', utf8));
|
||||
assertNull(entityReferences.convertToReference((char) 233, utf8));
|
||||
assertNull(entityReferences.convertToReference((char) 934, utf8));
|
||||
}
|
||||
|
||||
private Map<Integer, String> getReferenceCharacterMap() {
|
||||
CharacterEntityResourceIterator entityIterator = new CharacterEntityResourceIterator();
|
||||
Map<Integer, String> referencedCharactersMap = new HashMap<Integer, String>();
|
||||
|
|
|
@ -71,6 +71,28 @@ public class HtmlUtilsTests {
|
|||
"ϑ", HtmlUtils.htmlEscapeDecimal("" + (char) 977));
|
||||
}
|
||||
|
||||
// SPR-9293
|
||||
@Test
|
||||
public void testEncodeIntoHtmlCharacterSetFromUtf8() {
|
||||
String utf8 = ("UTF-8");
|
||||
assertNull("A null string should be converted to a null string",
|
||||
HtmlUtils.htmlEscape(null, utf8));
|
||||
assertEquals("An empty string should be converted to an empty string",
|
||||
"", HtmlUtils.htmlEscape("", utf8));
|
||||
assertEquals("A string containing no special characters should not be affected",
|
||||
"A sentence containing no special characters.",
|
||||
HtmlUtils.htmlEscape("A sentence containing no special characters."));
|
||||
|
||||
assertEquals("'< >' should be encoded to '< >'",
|
||||
"< >", HtmlUtils.htmlEscape("< >", utf8));
|
||||
assertEquals("'< >' should be encoded to '< >'",
|
||||
"< >", HtmlUtils.htmlEscapeDecimal("< >", utf8));
|
||||
|
||||
assertEquals("UTF-8 supported chars should not be escaped",
|
||||
"Μερικοί Ελληνικοί "χαρακτήρες"",
|
||||
HtmlUtils.htmlEscape("Μερικοί Ελληνικοί \"χαρακτήρες\"", utf8));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDecodeFromHtmlCharacterSet() {
|
||||
assertNull("A null string should be converted to a null string",
|
||||
|
|
Loading…
Reference in New Issue