Refactor media types parsing improvements

Issue: SPR-17459
This commit is contained in:
Rossen Stoyanchev 2018-11-13 22:43:29 -05:00
parent f4b05dc2e7
commit ba3fef3e8a
4 changed files with 59 additions and 136 deletions

View File

@ -28,6 +28,7 @@ import java.util.LinkedHashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Random; import java.util.Random;
import java.util.stream.Collectors;
import org.springframework.lang.Nullable; import org.springframework.lang.Nullable;
import org.springframework.util.MimeType.SpecificityComparator; import org.springframework.util.MimeType.SpecificityComparator;
@ -249,32 +250,53 @@ public abstract class MimeTypeUtils {
} }
/** /**
* Parse the given, comma-separated string into a list of {@code MimeType} objects. * Parse the comma-separated string into a list of {@code MimeType} objects.
* @param mimeTypes the string to parse * @param mimeTypes the string to parse
* @return the list of mime types * @return the list of mime types
* @throws IllegalArgumentException if the string cannot be parsed * @throws InvalidMimeTypeException if the string cannot be parsed
*/ */
public static List<MimeType> parseMimeTypes(String mimeTypes) { public static List<MimeType> parseMimeTypes(String mimeTypes) {
if (!StringUtils.hasLength(mimeTypes)) { if (!StringUtils.hasLength(mimeTypes)) {
return Collections.emptyList(); return Collections.emptyList();
} }
boolean isQuoted = false; return tokenize(mimeTypes).stream()
int nextBeginIndex = 0; .map(MimeTypeUtils::parseMimeType).collect(Collectors.toList());
List<MimeType> tokens = new ArrayList<>(); }
for(int i = 0; i < mimeTypes.length() - 1; i++) {
//tokenizing on commas that are not within double quotes /**
if(mimeTypes.charAt(i) == ',' && !isQuoted) { * Tokenize the given comma-separated string of {@code MimeType} objects
tokens.add(parseMimeType(mimeTypes.substring(nextBeginIndex,i))); * into a {@code List<String>}. Unlike simple tokenization by ",", this
nextBeginIndex = i + 1; * method takes into account quoted parameters.
//ignoring escaped double quote within double quotes * @param mimeTypes the string to tokenize
} else if(isQuoted && mimeTypes.charAt(i) == '"' && mimeTypes.charAt(i-1) == '\\') { * @return the list of tokens
continue; * @since 5.1.3
} else if(mimeTypes.charAt(i) == '"') { */
isQuoted = !isQuoted; public static List<String> tokenize(String mimeTypes) {
} if (!StringUtils.hasLength(mimeTypes)) {
return Collections.emptyList();
} }
//either the last part of the tokenization or the original string List<String> tokens = new ArrayList<>();
tokens.add(parseMimeType(mimeTypes.substring(nextBeginIndex))); boolean inQuotes = false;
int startIndex = 0;
int i = 0;
while (i < mimeTypes.length()) {
switch (mimeTypes.charAt(i)) {
case '"':
inQuotes = !inQuotes;
break;
case ',':
if (!inQuotes) {
tokens.add(mimeTypes.substring(startIndex, i));
startIndex = i + 1;
}
break;
case '\\':
i++;
break;
}
i++;
}
tokens.add(mimeTypes.substring(startIndex));
return tokens; return tokens;
} }

View File

@ -277,54 +277,23 @@ public class MimeTypeTests {
assertEquals("Invalid amount of mime types", 0, mimeTypes.size()); assertEquals("Invalid amount of mime types", 0, mimeTypes.size());
} }
// SPR-17459 @Test // SPR-17459
@Test public void parseMimeTypesWithQuotedParameters() {
public void parseMimeTypesWithOddNumberOfDoubleQuotedCommas() { testWithQuotedParameters("foo/bar;param=\",\"");
String s = "foo/bar;param=\",\""; testWithQuotedParameters("foo/bar;param=\"s,a,\"");
List<MimeType> mimeTypes = MimeTypeUtils.parseMimeTypes(s); testWithQuotedParameters("foo/bar;param=\"s,\"", "text/x-c");
assertEquals("Invalid amount of mime types", 1, mimeTypes.size()); testWithQuotedParameters("foo/bar;param=\"a\\\"b,c\"");
assertEquals("Comma should be part of the mime type", s, mimeTypes.get(0).toString()); testWithQuotedParameters("foo/bar;param=\"\\\\\"");
testWithQuotedParameters("foo/bar;param=\"\\,\\\"");
} }
// SPR-17459 private void testWithQuotedParameters(String... mimeTypes) {
@Test String s = String.join(",", mimeTypes);
public void parseMimeTypesWithEvenNumberOfDoubleQuotedCommas() { List<MimeType> actual = MimeTypeUtils.parseMimeTypes(s);
String s = "foo/bar;param=\"s,a,\""; assertEquals(mimeTypes.length, actual.size());
List<MimeType> mimeTypes = MimeTypeUtils.parseMimeTypes(s); for (int i=0; i < mimeTypes.length; i++) {
assertEquals("Invalid amount of mime types", 1, mimeTypes.size()); assertEquals(mimeTypes[i], actual.get(i).toString());
assertEquals("Comma should be part of the mime type", s, mimeTypes.get(0).toString()); }
}
// SPR-17459
@Test
public void parseMimeTypesWithAndWithoutDoubleQuotedCommas() {
String s = "foo/bar;param=\"s,\", text/x-c";
List<MimeType> mimeTypes = MimeTypeUtils.parseMimeTypes(s);
assertEquals("Invalid amount of mime types", 2, mimeTypes.size());
assertEquals("Comma should be part of the mime type", "foo/bar;param=\"s,\"", mimeTypes.get(0).toString());
}
// SPR-17459
@Test
public void parseMimeTypesIgnoreEscapedDoubleQuoteWithinDoubleQuotes() {
String s = "foo/bar;param=\"a\\\"b,c\"";
List<MimeType> mimeTypes = MimeTypeUtils.parseMimeTypes(s);
assertEquals("Invalid amount of mime types", 1, mimeTypes.size());
assertEquals("Escaped quote within quotes should be ignored when considering comma tokenization", s, mimeTypes.get(0).toString());
}
// SPR-17459
@Test
public void parseMimeTypesIgnoreEscapedBackslash() {
String s = "foo/bar;param=\"\\\\\"";
List<MimeType> mimeTypes = MimeTypeUtils.parseMimeTypes(s);
assertEquals("Invalid amount of mime types", 1, mimeTypes.size());
assertEquals("Escaped backslash should be ignored when considering comma tokenization", s, mimeTypes.get(0).toString());
s = "foo/bar;param=\"\\,\\\"";
mimeTypes = MimeTypeUtils.parseMimeTypes(s);
assertEquals("Invalid amount of mime types", 1, mimeTypes.size());
assertEquals("Escaped backslash should be ignored when considering comma tokenization", s, mimeTypes.get(0).toString());
} }
@Test @Test

View File

@ -44,7 +44,6 @@ import org.springframework.util.StringUtils;
* @author Rossen Stoyanchev * @author Rossen Stoyanchev
* @author Sebastien Deleuze * @author Sebastien Deleuze
* @author Kazuki Shimizu * @author Kazuki Shimizu
* @author Dimitrios Liapis
* @since 3.0 * @since 3.0
* @see <a href="http://tools.ietf.org/html/rfc7231#section-3.1.1.1"> * @see <a href="http://tools.ietf.org/html/rfc7231#section-3.1.1.1">
* HTTP 1.1: Semantics and Content, section 3.1.1.1</a> * HTTP 1.1: Semantics and Content, section 3.1.1.1</a>
@ -543,7 +542,7 @@ public class MediaType extends MimeType implements Serializable {
} }
/** /**
* Parse the given comma-separated string into a list of {@code MediaType} objects. * Parse the comma-separated string into a list of {@code MediaType} objects.
* <p>This method can be used to parse an Accept or Content-Type header. * <p>This method can be used to parse an Accept or Content-Type header.
* @param mediaTypes the string to parse * @param mediaTypes the string to parse
* @return the list of media types * @return the list of media types
@ -553,24 +552,8 @@ public class MediaType extends MimeType implements Serializable {
if (!StringUtils.hasLength(mediaTypes)) { if (!StringUtils.hasLength(mediaTypes)) {
return Collections.emptyList(); return Collections.emptyList();
} }
boolean isQuoted = false; return MimeTypeUtils.tokenize(mediaTypes).stream()
int nextBeginIndex = 0; .map(MediaType::parseMediaType).collect(Collectors.toList());
List<MediaType> tokens = new ArrayList<>();
for(int i = 0; i < mediaTypes.length() - 1; i++) {
//tokenizing on commas that are not within double quotes
if(mediaTypes.charAt(i) == ',' && !isQuoted) {
tokens.add(parseMediaType(mediaTypes.substring(nextBeginIndex, i)));
nextBeginIndex = i + 1;
//ignoring escaped double quote within double quotes
} else if(isQuoted && mediaTypes.charAt(i) == '"' && mediaTypes.charAt(i-1) == '\\') {
continue;
} else if(mediaTypes.charAt(i) == '"') {
isQuoted = !isQuoted;
}
}
//either the last part of the tokenization or the original string
tokens.add(parseMediaType(mediaTypes.substring(nextBeginIndex)));
return tokens;
} }
/** /**

View File

@ -1,5 +1,5 @@
/* /*
* Copyright 2002-2018 the original author or authors. * Copyright 2002-2016 the original author or authors.
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
@ -32,7 +32,6 @@ import static org.junit.Assert.*;
/** /**
* @author Arjen Poutsma * @author Arjen Poutsma
* @author Juergen Hoeller * @author Juergen Hoeller
* @author Dimitrios Liapis
*/ */
public class MediaTypeTests { public class MediaTypeTests {
@ -144,56 +143,6 @@ public class MediaTypeTests {
assertEquals("Invalid amount of media types", 0, mediaTypes.size()); assertEquals("Invalid amount of media types", 0, mediaTypes.size());
} }
// SPR-17459
@Test
public void parseMediaTypesWithOddNumberOfDoubleQuotedCommas() {
String s = "foo/bar;param=\",\"";
List<MediaType> mediaTypes = MediaType.parseMediaTypes(s);
assertEquals("Invalid amount of media types", 1, mediaTypes.size());
assertEquals("Comma should be part of the media type", s, mediaTypes.get(0).toString());
}
// SPR-17459
@Test
public void parseMediaTypesWithEvenNumberOfDoubleQuotedCommas() {
String s = "foo/bar;param=\"s,a,\"";
List<MediaType> mediaTypes = MediaType.parseMediaTypes(s);
assertEquals("Invalid amount of media types", 1, mediaTypes.size());
assertEquals("Comma should be part of the media type", s, mediaTypes.get(0).toString());
}
// SPR-17459
@Test
public void parseMediaTypesWithAndWithoutDoubleQuotedCommas() {
String s = "foo/bar;param=\"s,\", text/x-c";
List<MediaType> mediaTypes = MediaType.parseMediaTypes(s);
assertEquals("Invalid amount of media types", 2, mediaTypes.size());
assertEquals("Comma should be part of the media type", "foo/bar;param=\"s,\"", mediaTypes.get(0).toString());
}
// SPR-17459
@Test
public void parseMediaTypesIgnoreEscapedDoubleQuoteWithinDoubleQuotes() {
String s = "foo/bar;param=\"a\\\"b,c\"";
List<MediaType> mediaTypes = MediaType.parseMediaTypes(s);
assertEquals("Invalid amount of media types", 1, mediaTypes.size());
assertEquals("Escaped quote within quotes should be ignored when considering comma tokenization", s, mediaTypes.get(0).toString());
}
// SPR-17459
@Test
public void parseMediaTypesIgnoreEscapedBackslash() {
String s = "foo/bar;param=\"\\\\\"";
List<MediaType> mediaTypes = MediaType.parseMediaTypes(s);
assertEquals("Invalid amount of media types", 1, mediaTypes.size());
assertEquals("Escaped quote within quotes should be ignored when considering comma tokenization", s, mediaTypes.get(0).toString());
s = "foo/bar;param=\"\\,\\\"";
mediaTypes = MediaType.parseMediaTypes(s);
assertEquals("Invalid amount of media types", 1, mediaTypes.size());
assertEquals("Escaped quote within quotes should be ignored when considering comma tokenization", s, mediaTypes.get(0).toString());
}
@Test @Test
public void compareTo() { public void compareTo() {
MediaType audioBasic = new MediaType("audio", "basic"); MediaType audioBasic = new MediaType("audio", "basic");