Update documentation for URI parsing types

See gh-33639
This commit is contained in:
rstoyanchev 2024-10-07 09:56:57 +01:00
parent 6c62965cbb
commit ef0a21ec7a
3 changed files with 70 additions and 40 deletions

View File

@ -234,6 +234,34 @@ Kotlin::
======
[[uri-parsing]]
= URI Parsing
[.small]#Spring MVC and Spring WebFlux#
`UriComponentsBuilder` supports two URI parser types:
1. RFC parser -- this parser type expects URI strings to conform to RFC 3986 syntax,
and treats deviations from the syntax as illegal.
2. WhatWG parser -- this parser is based on the
https://github.com/web-platform-tests/wpt/tree/master/url[URL parsing algorithm] in the
https://url.spec.whatwg.org[WhatWG URL Living standard]. It provides lenient handling of
a wide range of cases of unexpected input. Browsers implement this in order to handle
leniently user typed URL's. For more details, see the URL Living Standard and URL parsing
https://github.com/web-platform-tests/wpt/tree/master/url[test cases].
By default, `RestClient`, `WebClient`, and `RestTemplate` use the RFC parser type, and
expect applications to provide with URL templates that conform to RFC syntax. To change
that you can customize the `UriBuilderFactory` on any of the clients.
Applications and frameworks may further rely on `UriComponentsBuilder` for their own needs
to parse user provided URL's in order to inspect and possibly validated URI components
such as the scheme, host, port, path, and query. Such components can decide to use the
WhatWG parser type in order to handle URL's more leniently, and to align with the way
browsers parse URI's, in case of a redirect to the input URL or if it is included in a
response to a browser.
[[uri-encoding]]
= URI Encoding
[.small]#Spring MVC and Spring WebFlux#

View File

@ -97,10 +97,12 @@ public class DefaultUriBuilderFactory implements UriBuilderFactory {
/**
* Set the {@link UriComponentsBuilder.ParserType} to use.
* <p>By default, if the parser type is not specified,
* {@link UriComponentsBuilder} uses {@link UriComponentsBuilder.ParserType#RFC}.
* <p>By default, {@link UriComponentsBuilder} uses the
* {@link UriComponentsBuilder.ParserType#RFC parser type}.
* @param parserType the parser type
* @since 6.2
* @see UriComponentsBuilder.ParserType
* @see UriComponentsBuilder#fromUriString(String, UriComponentsBuilder.ParserType)
*/
public void setParserType(UriComponentsBuilder.ParserType parserType) {
this.parserType = parserType;

View File

@ -42,19 +42,23 @@ import org.springframework.web.util.HierarchicalUriComponents.PathComponent;
import org.springframework.web.util.UriComponents.UriTemplateVariables;
/**
* Builder for {@link UriComponents}.
*
* <p>Typical usage involves:
* Builder for {@link UriComponents}. Use as follows:
* <ol>
* <li>Create a {@code UriComponentsBuilder} with one of the static factory methods
* (such as {@link #fromPath(String)} or {@link #fromUri(URI)})</li>
* <li>Set the various URI components through the respective methods ({@link #scheme(String)},
* {@link #userInfo(String)}, {@link #host(String)}, {@link #port(int)}, {@link #path(String)},
* {@link #pathSegment(String...)}, {@link #queryParam(String, Object...)}, and
* {@link #fragment(String)}.</li>
* <li>Build the {@link UriComponents} instance with the {@link #build()} method.</li>
* <li>Create a builder through a factory method, e.g. {@link #fromUriString(String)}.
* <li>Set URI components (e.g. scheme, host, path, etc) through instance methods.
* <li>Build the {@link UriComponents}.</li>
* <li>Expand URI variables from a map or array or variable values.
* <li>Encode via {@link UriComponents#encode()}.</li>
* <li>Use {@link UriComponents#toUri()} or {@link UriComponents#toUriString()}.
* </ol>
*
* <p>By default, URI parsing is based on the {@link ParserType#RFC RFC parser type},
* which expects input strings to conform to RFC 3986 syntax. The alternative
* {@link ParserType#WHAT_WG WhatWG parser type}, based on the algorithm from
* the WhatWG <a href="https://url.spec.whatwg.org">URL Living Standard</a>
* provides more lenient handling of a wide range of cases that occur in user
* types URL's.
*
* @author Arjen Poutsma
* @author Rossen Stoyanchev
* @author Phillip Webb
@ -785,6 +789,30 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
}
/**
* Enum to provide a choice of URI parsers to use in {@link #fromUriString(String, ParserType)}.
* @since 6.2
*/
public enum ParserType {
/**
* This parser type expects URI's to conform to RFC 3986 syntax.
*/
RFC,
/**
* This parser follows the
* <a href="https://url.spec.whatwg.org/#url-parsing">URL parsing algorithm</a>
* in the WhatWG URL Living standard that browsers implement to align on
* lenient handling of user typed URL's that may not conform to RFC syntax.
* @see <a href="https://url.spec.whatwg.org">URL Living Standard</a>
* @see <a href="https://github.com/web-platform-tests/wpt/tree/master/url">URL tests</a>
*/
WHAT_WG
}
private interface PathComponentBuilder {
@Nullable
@ -794,34 +822,6 @@ public class UriComponentsBuilder implements UriBuilder, Cloneable {
}
/**
* Enum to represent different URI parsing mechanisms.
*/
public enum ParserType {
/**
* Parser that expects URI's conforming to RFC 3986 syntax.
*/
RFC,
/**
* Parser based on algorithm defined in the WhatWG URL Living standard.
* Browsers use this algorithm to align on lenient parsing of user typed
* URL's that may deviate from RFC syntax.
* <p>For more details, see:
* <ul>
* <li><a href="https://url.spec.whatwg.org">URL Living Standard</a>
* <li><a href="https://url.spec.whatwg.org/#url-parsing">Section 4.4: URL parsing</a>
* <li><a href="https://github.com/web-platform-tests/wpt/tree/master/url">web-platform-tests</a>
* </ul>
* <p>Use this if you need to leniently handle URL's that don't conform
* to RFC syntax, or for alignment with browser parsing.
*/
WHAT_WG
}
private static class CompositePathComponentBuilder implements PathComponentBuilder {
private final Deque<PathComponentBuilder> builders = new ArrayDeque<>();