Conditionally htmlEscape chars based on encoding

This commit adds new htmlEscape methods that take the character encoding
as a parameter. According to specs and recommendations, the list of
chars to be html escaped depends on the encoding used in the response.
If the current char encoding supports chars natively, we shouldn't
escape those; of course, reserved chars (<,>,',",&) should always be
escaped.

See: http://www.w3.org/TR/html4/sgml/entities.html#h-24.3
See: spring-projects/spring-framework#385 by @candrews

Issue: SPR-9293
This commit is contained in:
Brian Clozel 2014-10-17 16:19:55 +02:00
parent 4d3ade563a
commit 369cabf064
4 changed files with 133 additions and 7 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright 2002-2012 the original author or authors.
* Copyright 2002-2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -107,14 +107,42 @@ class HtmlCharacterEntityReferences {
* Return true if the given character is mapped to a supported entity reference.
*/
public boolean isMappedToReference(char character) {
return (convertToReference(character) != null);
return isMappedToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Return true if the given character is mapped to a supported entity reference.
*/
public boolean isMappedToReference(char character, String encoding) {
return (convertToReference(character, encoding) != null);
}
/**
* Return the reference mapped to the given character or {@code null}.
*/
public String convertToReference(char character) {
if (character < 1000 || (character >= 8000 && character < 10000)) {
return convertToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Return the reference mapped to the given character or {@code null}.
*/
public String convertToReference(char character, String encoding) {
if(encoding.startsWith("UTF-")){
switch(character){
case '<':
return "&lt;";
case '>':
return "&gt;";
case '"':
return "&quot;";
case '&':
return "&amp;";
case '\'':
return "&#39;";
}
}
else if (character < 1000 || (character >= 8000 && character < 10000)) {
int index = (character < 1000 ? character : character - 7000);
String entityReference = this.characterToEntityReferenceMap[index];
if (entityReference != null) {

View File

@ -1,5 +1,5 @@
/*
* Copyright 2002-2012 the original author or authors.
* Copyright 2002-2014 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -16,6 +16,8 @@
package org.springframework.web.util;
import org.springframework.util.Assert;
/**
* Utility class for HTML escaping. Escapes and unescapes
* based on the W3C HTML 4.01 recommendation, handling
@ -57,13 +59,33 @@ public abstract class HtmlUtils {
* @return the escaped string
*/
public static String htmlEscape(String input) {
return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Turn special characters into HTML character references.
* Handles complete character set defined in HTML 4.01 recommendation.
* <p>Escapes all special characters to their corresponding
* entity reference (e.g. {@code &lt;}) at least as required by the
* specified encoding. In other words, if a special character does
* not have to be escaped for the given encoding, it may not be.
* <p>Reference:
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
* http://www.w3.org/TR/html4/sgml/entities.html
* </a>
* @param input the (unescaped) input string
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
* @return the escaped string
*/
public static String htmlEscape(String input, String encoding) {
Assert.notNull(encoding, "encoding is required");
if (input == null) {
return null;
}
StringBuilder escaped = new StringBuilder(input.length() * 2);
for (int i = 0; i < input.length(); i++) {
char character = input.charAt(i);
String reference = characterEntityReferences.convertToReference(character);
String reference = characterEntityReferences.convertToReference(character, encoding);
if (reference != null) {
escaped.append(reference);
}
@ -87,13 +109,33 @@ public abstract class HtmlUtils {
* @return the escaped string
*/
public static String htmlEscapeDecimal(String input) {
return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Turn special characters into HTML character references.
* Handles complete character set defined in HTML 4.01 recommendation.
* <p>Escapes all special characters to their corresponding numeric
* reference in decimal format (&#<i>Decimal</i>;) at least as required by the
* specified encoding. In other words, if a special character does
* not have to be escaped for the given encoding, it may not be.
* <p>Reference:
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
* http://www.w3.org/TR/html4/sgml/entities.html
* </a>
* @param input the (unescaped) input string
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
* @return the escaped string
*/
public static String htmlEscapeDecimal(String input, String encoding) {
Assert.notNull(encoding, "encoding is required");
if (input == null) {
return null;
}
StringBuilder escaped = new StringBuilder(input.length() * 2);
for (int i = 0; i < input.length(); i++) {
char character = input.charAt(i);
if (characterEntityReferences.isMappedToReference(character)) {
if (characterEntityReferences.isMappedToReference(character, encoding)) {
escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START);
escaped.append((int) character);
escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);
@ -118,13 +160,33 @@ public abstract class HtmlUtils {
* @return the escaped string
*/
public static String htmlEscapeHex(String input) {
return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Turn special characters into HTML character references.
* Handles complete character set defined in HTML 4.01 recommendation.
* <p>Escapes all special characters to their corresponding numeric
* reference in hex format (&#x<i>Hex</i>;) at least as required by the
* specified encoding. In other words, if a special character does
* not have to be escaped for the given encoding, it may not be.
* <p>Reference:
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
* http://www.w3.org/TR/html4/sgml/entities.html
* </a>
* @param input the (unescaped) input string
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
* @return the escaped string
*/
public static String htmlEscapeHex(String input, String encoding) {
Assert.notNull(encoding, "encoding is required");
if (input == null) {
return null;
}
StringBuilder escaped = new StringBuilder(input.length() * 2);
for (int i = 0; i < input.length(); i++) {
char character = input.charAt(i);
if (characterEntityReferences.isMappedToReference(character)) {
if (characterEntityReferences.isMappedToReference(character, encoding)) {
escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START);
escaped.append(Integer.toString(character, 16));
escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);

View File

@ -76,6 +76,20 @@ public class HtmlCharacterEntityReferencesTests {
(char) -1, entityReferences.convertToCharacter("invalid"));
}
// SPR-9293
@Test
public void testConvertToReferenceUTF8() {
HtmlCharacterEntityReferences entityReferences = new HtmlCharacterEntityReferences();
String utf8 = "UTF-8";
assertEquals("&lt;", entityReferences.convertToReference('<', utf8));
assertEquals("&gt;", entityReferences.convertToReference('>', utf8));
assertEquals("&amp;", entityReferences.convertToReference('&', utf8));
assertEquals("&quot;", entityReferences.convertToReference('"', utf8));
assertEquals("&#39;", entityReferences.convertToReference('\'', utf8));
assertNull(entityReferences.convertToReference((char) 233, utf8));
assertNull(entityReferences.convertToReference((char) 934, utf8));
}
private Map<Integer, String> getReferenceCharacterMap() {
CharacterEntityResourceIterator entityIterator = new CharacterEntityResourceIterator();
Map<Integer, String> referencedCharactersMap = new HashMap<Integer, String>();

View File

@ -71,6 +71,28 @@ public class HtmlUtilsTests {
"&#977;", HtmlUtils.htmlEscapeDecimal("" + (char) 977));
}
// SPR-9293
@Test
public void testEncodeIntoHtmlCharacterSetFromUtf8() {
String utf8 = ("UTF-8");
assertNull("A null string should be converted to a null string",
HtmlUtils.htmlEscape(null, utf8));
assertEquals("An empty string should be converted to an empty string",
"", HtmlUtils.htmlEscape("", utf8));
assertEquals("A string containing no special characters should not be affected",
"A sentence containing no special characters.",
HtmlUtils.htmlEscape("A sentence containing no special characters."));
assertEquals("'< >' should be encoded to '&lt; &gt;'",
"&lt; &gt;", HtmlUtils.htmlEscape("< >", utf8));
assertEquals("'< >' should be encoded to '&#60; &#62;'",
"&#60; &#62;", HtmlUtils.htmlEscapeDecimal("< >", utf8));
assertEquals("UTF-8 supported chars should not be escaped",
"Μερικοί Ελληνικοί &quot;χαρακτήρες&quot;",
HtmlUtils.htmlEscape("Μερικοί Ελληνικοί \"χαρακτήρες\"", utf8));
}
@Test
public void testDecodeFromHtmlCharacterSet() {
assertNull("A null string should be converted to a null string",