Conditionally htmlEscape chars based on encoding

This commit adds new htmlEscape methods that take the character encoding
as a parameter. According to specs and recommendations, the list of
chars to be html escaped depends on the encoding used in the response.
If the current char encoding supports chars natively, we shouldn't
escape those; of course, reserved chars (<,>,',",&) should always be
escaped.

See: http://www.w3.org/TR/html4/sgml/entities.html#h-24.3
See: spring-projects/spring-framework#385 by @candrews

Issue: SPR-9293
This commit is contained in:
Brian Clozel 2014-10-17 16:19:55 +02:00
parent 4d3ade563a
commit 369cabf064
4 changed files with 133 additions and 7 deletions

View File

@ -1,5 +1,5 @@
/* /*
* Copyright 2002-2012 the original author or authors. * Copyright 2002-2014 the original author or authors.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -107,14 +107,42 @@ class HtmlCharacterEntityReferences {
* Return true if the given character is mapped to a supported entity reference. * Return true if the given character is mapped to a supported entity reference.
*/ */
public boolean isMappedToReference(char character) { public boolean isMappedToReference(char character) {
return (convertToReference(character) != null); return isMappedToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Return true if the given character is mapped to a supported entity reference.
*/
public boolean isMappedToReference(char character, String encoding) {
return (convertToReference(character, encoding) != null);
} }
/** /**
* Return the reference mapped to the given character or {@code null}. * Return the reference mapped to the given character or {@code null}.
*/ */
public String convertToReference(char character) { public String convertToReference(char character) {
if (character < 1000 || (character >= 8000 && character < 10000)) { return convertToReference(character, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Return the reference mapped to the given character or {@code null}.
*/
public String convertToReference(char character, String encoding) {
if(encoding.startsWith("UTF-")){
switch(character){
case '<':
return "&lt;";
case '>':
return "&gt;";
case '"':
return "&quot;";
case '&':
return "&amp;";
case '\'':
return "&#39;";
}
}
else if (character < 1000 || (character >= 8000 && character < 10000)) {
int index = (character < 1000 ? character : character - 7000); int index = (character < 1000 ? character : character - 7000);
String entityReference = this.characterToEntityReferenceMap[index]; String entityReference = this.characterToEntityReferenceMap[index];
if (entityReference != null) { if (entityReference != null) {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright 2002-2012 the original author or authors. * Copyright 2002-2014 the original author or authors.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -16,6 +16,8 @@
package org.springframework.web.util; package org.springframework.web.util;
import org.springframework.util.Assert;
/** /**
* Utility class for HTML escaping. Escapes and unescapes * Utility class for HTML escaping. Escapes and unescapes
* based on the W3C HTML 4.01 recommendation, handling * based on the W3C HTML 4.01 recommendation, handling
@ -57,13 +59,33 @@ public abstract class HtmlUtils {
* @return the escaped string * @return the escaped string
*/ */
public static String htmlEscape(String input) { public static String htmlEscape(String input) {
return htmlEscape(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Turn special characters into HTML character references.
* Handles complete character set defined in HTML 4.01 recommendation.
* <p>Escapes all special characters to their corresponding
* entity reference (e.g. {@code &lt;}) at least as required by the
* specified encoding. In other words, if a special character does
* not have to be escaped for the given encoding, it may not be.
* <p>Reference:
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
* http://www.w3.org/TR/html4/sgml/entities.html
* </a>
* @param input the (unescaped) input string
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
* @return the escaped string
*/
public static String htmlEscape(String input, String encoding) {
Assert.notNull(encoding, "encoding is required");
if (input == null) { if (input == null) {
return null; return null;
} }
StringBuilder escaped = new StringBuilder(input.length() * 2); StringBuilder escaped = new StringBuilder(input.length() * 2);
for (int i = 0; i < input.length(); i++) { for (int i = 0; i < input.length(); i++) {
char character = input.charAt(i); char character = input.charAt(i);
String reference = characterEntityReferences.convertToReference(character); String reference = characterEntityReferences.convertToReference(character, encoding);
if (reference != null) { if (reference != null) {
escaped.append(reference); escaped.append(reference);
} }
@ -87,13 +109,33 @@ public abstract class HtmlUtils {
* @return the escaped string * @return the escaped string
*/ */
public static String htmlEscapeDecimal(String input) { public static String htmlEscapeDecimal(String input) {
return htmlEscapeDecimal(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Turn special characters into HTML character references.
* Handles complete character set defined in HTML 4.01 recommendation.
* <p>Escapes all special characters to their corresponding numeric
* reference in decimal format (&#<i>Decimal</i>;) at least as required by the
* specified encoding. In other words, if a special character does
* not have to be escaped for the given encoding, it may not be.
* <p>Reference:
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
* http://www.w3.org/TR/html4/sgml/entities.html
* </a>
* @param input the (unescaped) input string
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
* @return the escaped string
*/
public static String htmlEscapeDecimal(String input, String encoding) {
Assert.notNull(encoding, "encoding is required");
if (input == null) { if (input == null) {
return null; return null;
} }
StringBuilder escaped = new StringBuilder(input.length() * 2); StringBuilder escaped = new StringBuilder(input.length() * 2);
for (int i = 0; i < input.length(); i++) { for (int i = 0; i < input.length(); i++) {
char character = input.charAt(i); char character = input.charAt(i);
if (characterEntityReferences.isMappedToReference(character)) { if (characterEntityReferences.isMappedToReference(character, encoding)) {
escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START); escaped.append(HtmlCharacterEntityReferences.DECIMAL_REFERENCE_START);
escaped.append((int) character); escaped.append((int) character);
escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);
@ -118,13 +160,33 @@ public abstract class HtmlUtils {
* @return the escaped string * @return the escaped string
*/ */
public static String htmlEscapeHex(String input) { public static String htmlEscapeHex(String input) {
return htmlEscapeHex(input, WebUtils.DEFAULT_CHARACTER_ENCODING);
}
/**
* Turn special characters into HTML character references.
* Handles complete character set defined in HTML 4.01 recommendation.
* <p>Escapes all special characters to their corresponding numeric
* reference in hex format (&#x<i>Hex</i>;) at least as required by the
* specified encoding. In other words, if a special character does
* not have to be escaped for the given encoding, it may not be.
* <p>Reference:
* <a href="http://www.w3.org/TR/html4/sgml/entities.html">
* http://www.w3.org/TR/html4/sgml/entities.html
* </a>
* @param input the (unescaped) input string
* @param encoding The name of a supported {@link java.nio.charset.Charset charset}
* @return the escaped string
*/
public static String htmlEscapeHex(String input, String encoding) {
Assert.notNull(encoding, "encoding is required");
if (input == null) { if (input == null) {
return null; return null;
} }
StringBuilder escaped = new StringBuilder(input.length() * 2); StringBuilder escaped = new StringBuilder(input.length() * 2);
for (int i = 0; i < input.length(); i++) { for (int i = 0; i < input.length(); i++) {
char character = input.charAt(i); char character = input.charAt(i);
if (characterEntityReferences.isMappedToReference(character)) { if (characterEntityReferences.isMappedToReference(character, encoding)) {
escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START); escaped.append(HtmlCharacterEntityReferences.HEX_REFERENCE_START);
escaped.append(Integer.toString(character, 16)); escaped.append(Integer.toString(character, 16));
escaped.append(HtmlCharacterEntityReferences.REFERENCE_END); escaped.append(HtmlCharacterEntityReferences.REFERENCE_END);

View File

@ -76,6 +76,20 @@ public class HtmlCharacterEntityReferencesTests {
(char) -1, entityReferences.convertToCharacter("invalid")); (char) -1, entityReferences.convertToCharacter("invalid"));
} }
// SPR-9293
@Test
public void testConvertToReferenceUTF8() {
HtmlCharacterEntityReferences entityReferences = new HtmlCharacterEntityReferences();
String utf8 = "UTF-8";
assertEquals("&lt;", entityReferences.convertToReference('<', utf8));
assertEquals("&gt;", entityReferences.convertToReference('>', utf8));
assertEquals("&amp;", entityReferences.convertToReference('&', utf8));
assertEquals("&quot;", entityReferences.convertToReference('"', utf8));
assertEquals("&#39;", entityReferences.convertToReference('\'', utf8));
assertNull(entityReferences.convertToReference((char) 233, utf8));
assertNull(entityReferences.convertToReference((char) 934, utf8));
}
private Map<Integer, String> getReferenceCharacterMap() { private Map<Integer, String> getReferenceCharacterMap() {
CharacterEntityResourceIterator entityIterator = new CharacterEntityResourceIterator(); CharacterEntityResourceIterator entityIterator = new CharacterEntityResourceIterator();
Map<Integer, String> referencedCharactersMap = new HashMap<Integer, String>(); Map<Integer, String> referencedCharactersMap = new HashMap<Integer, String>();

View File

@ -71,6 +71,28 @@ public class HtmlUtilsTests {
"&#977;", HtmlUtils.htmlEscapeDecimal("" + (char) 977)); "&#977;", HtmlUtils.htmlEscapeDecimal("" + (char) 977));
} }
// SPR-9293
@Test
public void testEncodeIntoHtmlCharacterSetFromUtf8() {
String utf8 = ("UTF-8");
assertNull("A null string should be converted to a null string",
HtmlUtils.htmlEscape(null, utf8));
assertEquals("An empty string should be converted to an empty string",
"", HtmlUtils.htmlEscape("", utf8));
assertEquals("A string containing no special characters should not be affected",
"A sentence containing no special characters.",
HtmlUtils.htmlEscape("A sentence containing no special characters."));
assertEquals("'< >' should be encoded to '&lt; &gt;'",
"&lt; &gt;", HtmlUtils.htmlEscape("< >", utf8));
assertEquals("'< >' should be encoded to '&#60; &#62;'",
"&#60; &#62;", HtmlUtils.htmlEscapeDecimal("< >", utf8));
assertEquals("UTF-8 supported chars should not be escaped",
"Μερικοί Ελληνικοί &quot;χαρακτήρες&quot;",
HtmlUtils.htmlEscape("Μερικοί Ελληνικοί \"χαρακτήρες\"", utf8));
}
@Test @Test
public void testDecodeFromHtmlCharacterSet() { public void testDecodeFromHtmlCharacterSet() {
assertNull("A null string should be converted to a null string", assertNull("A null string should be converted to a null string",