From 5f208936ec0bf387735cc40eff09cc8dc8228d05 Mon Sep 17 00:00:00 2001 From: Arjen Poutsma Date: Wed, 7 Sep 2011 13:58:51 +0000 Subject: [PATCH] Added parse methods --- .../springframework/web/util/UriUtils.java | 416 +++++++++++------- .../web/util/UriUtilsTests.java | 47 +- 2 files changed, 309 insertions(+), 154 deletions(-) diff --git a/org.springframework.web/src/main/java/org/springframework/web/util/UriUtils.java b/org.springframework.web/src/main/java/org/springframework/web/util/UriUtils.java index cb9e4fe4c12..559ec6efc0a 100644 --- a/org.springframework.web/src/main/java/org/springframework/web/util/UriUtils.java +++ b/org.springframework.web/src/main/java/org/springframework/web/util/UriUtils.java @@ -18,25 +18,25 @@ package org.springframework.web.util; import java.io.ByteArrayOutputStream; import java.io.UnsupportedEncodingException; +import java.util.EnumMap; +import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.springframework.util.Assert; /** - * Utility class for URI encoding and decoding based on RFC 3986. Offers encoding methods for - * the various URI components. + * Utility class for URI encoding and decoding based on RFC 3986. Offers encoding methods for the various URI + * components. * - *

All {@code encode*(String, String} methods in this class operate in a similar way: - *

+ *

All {@code encode*(String, String} methods in this class operate in a similar way:

* * @author Arjen Poutsma - * @since 3.0 * @see RFC 3986 + * @since 3.0 */ public abstract class UriUtils { @@ -64,34 +64,50 @@ public abstract class UriUtils { ")?" + ")?" + PATH_PATTERN + "(\\?" + QUERY_PATTERN + ")?" + "(#" + LAST_PATTERN + ")?"); private static final Pattern HTTP_URL_PATTERN = Pattern.compile( - "^" + HTTP_PATTERN + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + - ")?" + PATH_PATTERN + "(\\?" + LAST_PATTERN + ")?"); - + "^" + HTTP_PATTERN + "(//(" + USERINFO_PATTERN + "@)?" + HOST_PATTERN + "(:" + PORT_PATTERN + ")?" + ")?" + + PATH_PATTERN + "(\\?" + LAST_PATTERN + ")?"); + + // Parsing + /** - * Encodes the given source URI into an encoded String. All various URI components - * are encoded according to their respective valid character sets. - * @param uri the URI to be encoded - * @param encoding the character encoding to encode to - * @return the encoded URI - * @throws IllegalArgumentException when the given uri parameter is not a valid URI - * @throws UnsupportedEncodingException when the given encoding parameter is not supported + * Parses the given source URI into a mapping of URI components to string values. + * + *

The returned map will contain keys for + *

+ * though the values assigned to these keys is {@code null} if they do not occur in the given source URI. + * + *

Note that the returned map will never contain mappings for {@link UriComponent#PATH_SEGMENT}, + * nor {@link UriComponent#QUERY_PARAM}, since those components can occur multiple times in the URI. + * + * @param uri the source URI + * @return the URI components of the URI */ - public static String encodeUri(String uri, String encoding) throws UnsupportedEncodingException { + public static Map parseUriComponents(String uri) { Assert.notNull(uri, "'uri' must not be null"); - Assert.hasLength(encoding, "'encoding' must not be empty"); Matcher m = URI_PATTERN.matcher(uri); if (m.matches()) { - String scheme = m.group(2); - String authority = m.group(3); - String userinfo = m.group(5); - String host = m.group(6); - String port = m.group(8); - String path = m.group(9); - String query = m.group(11); - String fragment = m.group(13); + Map result = new EnumMap(UriComponent.class); - return encodeUriComponents(scheme, authority, userinfo, host, port, path, query, fragment, encoding); + result.put(UriComponent.SCHEME, m.group(2)); + result.put(UriComponent.AUTHORITY, m.group(3)); + result.put(UriComponent.USER_INFO, m.group(5)); + result.put(UriComponent.HOST, m.group(6)); + result.put(UriComponent.PORT, m.group(8)); + result.put(UriComponent.PATH, m.group(9)); + result.put(UriComponent.QUERY, m.group(11)); + result.put(UriComponent.FRAGMENT, m.group(13)); + + return result; } else { throw new IllegalArgumentException("[" + uri + "] is not a valid URI"); @@ -99,79 +115,205 @@ public abstract class UriUtils { } /** - * Encodes the given HTTP URI into an encoded String. All various URI components - * are encoded according to their respective valid character sets. - *

Note that this method does not support fragments ({@code #}), - * as these are not supposed to be sent to the server, but retained by the client. - * @param httpUrl the HTTP URL to be encoded - * @param encoding the character encoding to encode to - * @return the encoded URL - * @throws IllegalArgumentException when the given uri parameter is not a valid URI - * @throws UnsupportedEncodingException when the given encoding parameter is not supported + * Parses the given source HTTP URL into a mapping of URI components to string values. + * + * + *

The returned map will contain keys for + *

+ * though the values assigned to these keys is {@code null} if they do not occur in the given source URI. + * + *

Note that the returned map will never contain mappings for {@link UriComponent#PATH_SEGMENT}, + * nor {@link UriComponent#QUERY_PARAM}, since those components can occur multiple times in the URI. + * + *

Note that this method does not support fragments ({@code #}), as these are not supposed to be + * sent to the server, but retained by the client. + * + * @param httpUrl the source URI + * @return the URI components of the URI */ - public static String encodeHttpUrl(String httpUrl, String encoding) throws UnsupportedEncodingException { + public static Map parseHttpUrlComponents(String httpUrl) { Assert.notNull(httpUrl, "'httpUrl' must not be null"); - Assert.hasLength(encoding, "'encoding' must not be empty"); Matcher m = HTTP_URL_PATTERN.matcher(httpUrl); if (m.matches()) { - String scheme = m.group(1); - String authority = m.group(2); - String userinfo = m.group(4); - String host = m.group(5); - String portString = m.group(7); - String path = m.group(8); - String query = m.group(10); + Map result = new EnumMap(UriComponent.class); - return encodeUriComponents(scheme, authority, userinfo, host, portString, path, query, null, encoding); + result.put(UriComponent.SCHEME, m.group(1)); + result.put(UriComponent.AUTHORITY, m.group(2)); + result.put(UriComponent.USER_INFO, m.group(4)); + result.put(UriComponent.HOST, m.group(5)); + result.put(UriComponent.PORT, m.group(7)); + result.put(UriComponent.PATH, m.group(8)); + result.put(UriComponent.QUERY, m.group(10)); + + return result; } else { throw new IllegalArgumentException("[" + httpUrl + "] is not a valid HTTP URL"); } } + // building + /** - * Encodes the given source URI components into an encoded String. - * All various URI components are optional, but encoded according - * to their respective valid character sets. - * @param scheme the scheme - * @param authority the authority - * @param userinfo the user info - * @param host the host - * @param port the port - * @param path the path - * @param query the query - * @param fragment the fragment - * @return the encoded URI - * @throws IllegalArgumentException when the given uri parameter is not a valid URI + * Builds a URI from the given URI components. The given map should contain at least one entry. + * + *

Note that {@link UriComponent#PATH_SEGMENT} and {@link UriComponent#QUERY_PARAM} keys (if any) + * will not be used to build the URI, in favor of {@link UriComponent#PATH} and {@link UriComponent#QUERY} + * respectively. + * + * @param uriComponents the components to build the URI out of + * @return the URI created from the given components */ - public static String encodeUriComponents(String scheme, - String authority, - String userinfo, - String host, - String port, - String path, - String query, - String fragment) { - try { - return encodeUriComponents(scheme, authority, userinfo, host, port, path, query, fragment, - DEFAULT_ENCODING); - } - catch (UnsupportedEncodingException e) { - throw new InternalError("'UTF-8' encoding not supported"); - } + public static String buildUri(Map uriComponents) { + Assert.notEmpty(uriComponents, "'uriComponents' must not be empty"); + + return buildUri(uriComponents.get(UriComponent.SCHEME), uriComponents.get(UriComponent.AUTHORITY), + uriComponents.get(UriComponent.USER_INFO), uriComponents.get(UriComponent.HOST), + uriComponents.get(UriComponent.PORT), uriComponents.get(UriComponent.PATH), + uriComponents.get(UriComponent.QUERY), uriComponents.get(UriComponent.FRAGMENT)); } /** - * Encodes the given source URI components into an encoded String. - * All various URI components are optional, but encoded according - * to their respective valid character sets. + * Builds a URI from the given URI component parameters. All parameters can be {@code null}. + * * @param scheme the scheme * @param authority the authority * @param userinfo the user info * @param host the host * @param port the port * @param path the path - * @param query the query + * @param query the query + * @param fragment the fragment + * @return the URI created from the given components + */ + public static String buildUri(String scheme, + String authority, + String userinfo, + String host, + String port, + String path, + String query, + String fragment) { + StringBuilder uriBuilder = new StringBuilder(); + + if (scheme != null) { + uriBuilder.append(scheme); + uriBuilder.append(':'); + } + + if (userinfo != null || host != null || port != null) { + uriBuilder.append("//"); + if (userinfo != null) { + uriBuilder.append(userinfo); + uriBuilder.append('@'); + } + if (host != null) { + uriBuilder.append(host); + } + if (port != null) { + uriBuilder.append(':'); + uriBuilder.append(port); + } + } + else if (authority != null) { + uriBuilder.append("//"); + uriBuilder.append(authority); + } + + if (path != null) { + uriBuilder.append(path); + } + + if (query != null) { + uriBuilder.append('?'); + uriBuilder.append(query); + } + + if (fragment != null) { + uriBuilder.append('#'); + uriBuilder.append(fragment); + } + + return uriBuilder.toString(); + } + + // encoding + + /** + * Encodes the given source URI into an encoded String. All various URI components are encoded according to their + * respective valid character sets. + * + * @param uri the URI to be encoded + * @param encoding the character encoding to encode to + * @return the encoded URI + * @throws IllegalArgumentException when the given uri parameter is not a valid URI + * @throws UnsupportedEncodingException when the given encoding parameter is not supported + */ + public static String encodeUri(String uri, String encoding) throws UnsupportedEncodingException { + Map uriComponents = parseUriComponents(uri); + return encodeUriComponents(uriComponents, encoding); + } + + /** + * Encodes the given HTTP URI into an encoded String. All various URI components are encoded according to their + * respective valid character sets.

Note that this method does not support fragments ({@code #}), + * as these are not supposed to be sent to the server, but retained by the client. + * + * @param httpUrl the HTTP URL to be encoded + * @param encoding the character encoding to encode to + * @return the encoded URL + * @throws IllegalArgumentException when the given uri parameter is not a valid URI + * @throws UnsupportedEncodingException when the given encoding parameter is not supported + */ + public static String encodeHttpUrl(String httpUrl, String encoding) throws UnsupportedEncodingException { + Map uriComponents = parseHttpUrlComponents(httpUrl); + return encodeUriComponents(uriComponents, encoding); + } + + /** + * Encodes the given source URI components into an encoded String. All various URI components are optional, but encoded + * according to their respective valid character sets. + * + * @param uriComponents the URI components + * @param encoding the character encoding to encode to + * @return the encoded URI + * @throws IllegalArgumentException when the given uri parameter is not a valid URI + * @throws UnsupportedEncodingException when the given encoding parameter is not supported + */ + public static String encodeUriComponents(Map uriComponents, + String encoding) throws UnsupportedEncodingException { + Assert.notEmpty(uriComponents, "'uriComponents' must not be empty"); + + Map encodedUriComponents = new EnumMap(UriComponent.class); + for (Map.Entry entry : uriComponents.entrySet()) { + if (entry.getValue() != null) { + String encodedValue = encode(entry.getValue(), encoding, entry.getKey(), false); + encodedUriComponents.put(entry.getKey(), encodedValue); + } + } + return buildUri(encodedUriComponents); + } + + /** + * Encodes the given source URI components into an encoded String. All various URI components are optional, but encoded + * according to their respective valid character sets. + * + * @param scheme the scheme + * @param authority the authority + * @param userInfo the user info + * @param host the host + * @param port the port + * @param path the path + * @param query the query * @param fragment the fragment * @param encoding the character encoding to encode to * @return the encoded URI @@ -180,7 +322,7 @@ public abstract class UriUtils { */ public static String encodeUriComponents(String scheme, String authority, - String userinfo, + String userInfo , String host, String port, String path, @@ -189,50 +331,37 @@ public abstract class UriUtils { String encoding) throws UnsupportedEncodingException { Assert.hasLength(encoding, "'encoding' must not be empty"); - StringBuilder sb = new StringBuilder(); if (scheme != null) { - sb.append(encodeScheme(scheme, encoding)); - sb.append(':'); + scheme = encodeScheme(scheme, encoding); } - - if (userinfo != null || host != null || port != null) { - sb.append("//"); - if (userinfo != null) { - sb.append(encodeUserInfo(userinfo, encoding)); - sb.append('@'); - } - if (host != null) { - sb.append(encodeHost(host, encoding)); - } - if (port != null) { - sb.append(':'); - sb.append(encodePort(port, encoding)); - } - } else if (authority != null) { - sb.append("//"); - sb.append(encodeAuthority(authority, encoding)); + if (authority != null) { + authority = encodeAuthority(authority, encoding); + } + if (userInfo != null) { + userInfo = encodeUserInfo(userInfo, encoding); + } + if (host != null) { + host = encodeHost(host, encoding); + } + if (port != null) { + port = encodePort(port, encoding); } - if (path != null) { - sb.append(encodePath(path, encoding)); + path = encodePath(path, encoding); } - if (query != null) { - sb.append('?'); - sb.append(encodeQuery(query, encoding)); + query = encodeQuery(query, encoding); } - if (fragment != null) { - sb.append('#'); - sb.append(encodeFragment(fragment, encoding)); + fragment = encodeFragment(fragment, encoding); } - - return sb.toString(); + return buildUri(scheme, authority, userInfo, host, port, path, query, fragment); } /** * Encodes the given URI scheme with the given encoding. + * * @param scheme the scheme to be encoded * @param encoding the character encoding to encode to * @return the encoded scheme @@ -244,6 +373,7 @@ public abstract class UriUtils { /** * Encodes the given URI authority with the given encoding. + * * @param authority the authority to be encoded * @param encoding the character encoding to encode to * @return the encoded authority @@ -255,6 +385,7 @@ public abstract class UriUtils { /** * Encodes the given URI user info with the given encoding. + * * @param userInfo the user info to be encoded * @param encoding the character encoding to encode to * @return the encoded user info @@ -266,6 +397,7 @@ public abstract class UriUtils { /** * Encodes the given URI host with the given encoding. + * * @param host the host to be encoded * @param encoding the character encoding to encode to * @return the encoded host @@ -277,6 +409,7 @@ public abstract class UriUtils { /** * Encodes the given URI port with the given encoding. + * * @param port the port to be encoded * @param encoding the character encoding to encode to * @return the encoded port @@ -288,6 +421,7 @@ public abstract class UriUtils { /** * Encodes the given URI path with the given encoding. + * * @param path the path to be encoded * @param encoding the character encoding to encode to * @return the encoded path @@ -299,6 +433,7 @@ public abstract class UriUtils { /** * Encodes the given URI path segment with the given encoding. + * * @param segment the segment to be encoded * @param encoding the character encoding to encode to * @return the encoded segment @@ -310,7 +445,8 @@ public abstract class UriUtils { /** * Encodes the given URI query with the given encoding. - * @param query the query to be encoded + * + * @param query the query to be encoded * @param encoding the character encoding to encode to * @return the encoded query * @throws UnsupportedEncodingException when the given encoding parameter is not supported @@ -321,6 +457,7 @@ public abstract class UriUtils { /** * Encodes the given URI query parameter with the given encoding. + * * @param queryParam the query parameter to be encoded * @param encoding the character encoding to encode to * @return the encoded query parameter @@ -332,6 +469,7 @@ public abstract class UriUtils { /** * Encodes the given URI fragment with the given encoding. + * * @param fragment the fragment to be encoded * @param encoding the character encoding to encode to * @return the encoded fragment @@ -344,22 +482,13 @@ public abstract class UriUtils { /** * Encodes the given source into an encoded String using the rules specified by the given component. This method * encodes with the default encoding (i.e. UTF-8). + * * @param source the source string * @param uriComponent the URI component for the source - * @param allowTemplateVars whether URI template variables are allowed. If {@code true}, '{' and '}' characters - * are not encoded, even though they might not be valid for the component + * @param allowTemplateVars whether URI template variables are allowed. If {@code true}, '{' and '}' characters are not + * encoded, even though they might not be valid for the component * @return the encoded URI * @throws IllegalArgumentException when the given uri parameter is not a valid URI - * @see #SCHEME_COMPONENT - * @see #AUTHORITY_COMPONENT - * @see #USER_INFO_COMPONENT - * @see #HOST_COMPONENT - * @see #PORT_COMPONENT - * @see #PATH_COMPONENT - * @see #PATH_SEGMENT_COMPONENT - * @see #QUERY_COMPONENT - * @see #QUERY_PARAM_COMPONENT - * @see #FRAGMENT_COMPONENT */ public static String encode(String source, UriComponent uriComponent, boolean allowTemplateVars) { try { @@ -372,23 +501,14 @@ public abstract class UriUtils { /** * Encodes the given source into an encoded String using the rules specified by the given component. + * * @param source the source string * @param encoding the encoding of the source string * @param uriComponent the URI component for the source - * @param allowTemplateVars whether URI template variables are allowed. If {@code true}, '{' and '}' characters - * are not encoded, even though they might not be valid for the component + * @param allowTemplateVars whether URI template variables are allowed. If {@code true}, '{' and '}' characters are not + * encoded, even though they might not be valid for the component * @return the encoded URI * @throws IllegalArgumentException when the given uri parameter is not a valid URI - * @see #SCHEME_COMPONENT - * @see #AUTHORITY_COMPONENT - * @see #USER_INFO_COMPONENT - * @see #HOST_COMPONENT - * @see #PORT_COMPONENT - * @see #PATH_COMPONENT - * @see #PATH_SEGMENT_COMPONENT - * @see #QUERY_COMPONENT - * @see #QUERY_PARAM_COMPONENT - * @see #FRAGMENT_COMPONENT */ public static String encode(String source, String encoding, UriComponent uriComponent, boolean allowTemplateVars) throws UnsupportedEncodingException { @@ -427,22 +547,14 @@ public abstract class UriUtils { return bos.toByteArray(); } - /** - * Decodes the given encoded source String into an URI. Based on the following - * rules: - *

+ * Decodes the given encoded source String into an URI. Based on the following rules: + * * @param source the source string * @param encoding the encoding * @return the decoded URI diff --git a/org.springframework.web/src/test/java/org/springframework/web/util/UriUtilsTests.java b/org.springframework.web/src/test/java/org/springframework/web/util/UriUtilsTests.java index 01de9f59d5c..e3c67d92943 100644 --- a/org.springframework.web/src/test/java/org/springframework/web/util/UriUtilsTests.java +++ b/org.springframework.web/src/test/java/org/springframework/web/util/UriUtilsTests.java @@ -1,5 +1,5 @@ /* - * Copyright 2002-2010 the original author or authors. + * Copyright 2002-2011 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,9 +17,11 @@ package org.springframework.web.util; import java.io.UnsupportedEncodingException; +import java.util.Map; + +import org.junit.Test; import static org.junit.Assert.*; -import org.junit.Test; /** * @author Arjen Poutsma @@ -28,6 +30,47 @@ public class UriUtilsTests { private static final String ENC = "UTF-8"; + + @Test + public void parseUriComponents() { + Map result = UriUtils.parseUriComponents("http://www.ietf.org/rfc/rfc3986.txt"); + assertEquals("http", result.get(UriComponent.SCHEME)); + assertNull(result.get(UriComponent.USER_INFO)); + assertEquals("www.ietf.org", result.get(UriComponent.HOST)); + assertNull(result.get(UriComponent.PORT)); + assertEquals("/rfc/rfc3986.txt", result.get(UriComponent.PATH)); + assertNull(result.get(UriComponent.QUERY)); + assertNull(result.get(UriComponent.FRAGMENT)); + + result = UriUtils.parseUriComponents( + "http://arjen:foobar@java.sun.com:80/javase/6/docs/api/java/util/BitSet.html?foo=bar#and(java.util.BitSet)"); + assertEquals("http", result.get(UriComponent.SCHEME)); + assertEquals("arjen:foobar", result.get(UriComponent.USER_INFO)); + assertEquals("java.sun.com", result.get(UriComponent.HOST)); + assertEquals("80", result.get(UriComponent.PORT)); + assertEquals("/javase/6/docs/api/java/util/BitSet.html", result.get(UriComponent.PATH)); + assertEquals("foo=bar", result.get(UriComponent.QUERY)); + assertEquals("and(java.util.BitSet)", result.get(UriComponent.FRAGMENT)); + + result = UriUtils.parseUriComponents("mailto:java-net@java.sun.com"); + assertEquals("mailto", result.get(UriComponent.SCHEME)); + assertNull(result.get(UriComponent.USER_INFO)); + assertNull(result.get(UriComponent.HOST)); + assertNull(result.get(UriComponent.PORT)); + assertEquals("java-net@java.sun.com", result.get(UriComponent.PATH)); + assertNull(result.get(UriComponent.QUERY)); + assertNull(result.get(UriComponent.FRAGMENT)); + + result = UriUtils.parseUriComponents("docs/guide/collections/designfaq.html#28"); + assertNull(result.get(UriComponent.SCHEME)); + assertNull(result.get(UriComponent.USER_INFO)); + assertNull(result.get(UriComponent.HOST)); + assertNull(result.get(UriComponent.PORT)); + assertEquals("docs/guide/collections/designfaq.html", result.get(UriComponent.PATH)); + assertNull(result.get(UriComponent.QUERY)); + assertEquals("28", result.get(UriComponent.FRAGMENT)); + } + @Test public void encodeScheme() throws UnsupportedEncodingException { assertEquals("Invalid encoded result", "foobar+-.", UriUtils.encodeScheme("foobar+-.", ENC));