mirror of https://github.com/apache/jmeter.git
Bug 59885 - Optimize css parsing for embedded resources download by introducing a cache
Based on PR 219 contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) This closes #219 on github. Bugzilla Id: 59885 git-svn-id: https://svn.apache.org/repos/asf/jmeter/trunk@1754678 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
db1a75c3a3
commit
d0abd8837a
|
|
@ -726,6 +726,13 @@ HTTPResponse.parsers=htmlParser wmlParser cssParser
|
|||
# CSS Parser based on ph-css
|
||||
cssParser.className=org.apache.jmeter.protocol.http.parser.CssParser
|
||||
cssParser.types=text/css
|
||||
|
||||
# CSS parser LRU cache size
|
||||
# This cache stores the URLs found in a CSS to avoid continuously parsing the CSS
|
||||
# By default the cache size is 400
|
||||
# It can be disabled by setting its value to 0
|
||||
#css.parser.cache.size=400
|
||||
|
||||
#---------------------------------------------------------------------------
|
||||
# HTML Parser configuration
|
||||
#---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -21,9 +21,13 @@ package org.apache.jmeter.protocol.http.parser;
|
|||
import java.net.URL;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.commons.collections.map.LRUMap;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.jmeter.util.JMeterUtils;
|
||||
import org.apache.jorphan.logging.LoggingManager;
|
||||
|
|
@ -50,7 +54,20 @@ import com.helger.css.reader.errorhandler.LoggingCSSParseErrorHandler;
|
|||
public class CssParser implements LinkExtractorParser {
|
||||
private static final boolean IGNORE_UNRECOVERABLE_PARSING_ERROR = JMeterUtils.getPropDefault("httpsampler.ignore_failed_embedded_resource", false); //$NON-NLS-1$
|
||||
private static final Logger LOG = LoggingManager.getLoggerForClass();
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
private static final int CSS_URL_CACHE_MAX_SIZE = JMeterUtils.getPropDefault("css.parser.cache.size", 400);
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
private static Map<String, URLCollection> CSS_URL_CACHE =
|
||||
CSS_URL_CACHE_MAX_SIZE > 0 ? Collections.synchronizedMap(new LRUMap(CSS_URL_CACHE_MAX_SIZE)) : null;
|
||||
|
||||
|
||||
private static final class CustomLoggingCSSParseExceptionCallback extends LoggingCSSParseExceptionCallback {
|
||||
/**
|
||||
*
|
||||
|
|
@ -76,6 +93,7 @@ public class CssParser implements LinkExtractorParser {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
|
@ -93,40 +111,55 @@ public class CssParser implements LinkExtractorParser {
|
|||
public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] data,
|
||||
final URL baseUrl, String encoding) throws LinkExtractorParseException {
|
||||
try {
|
||||
String cssContent = new String(data, encoding);
|
||||
final CascadingStyleSheet aCSS = CSSReader.readFromStringStream(cssContent,
|
||||
new CSSReaderSettings()
|
||||
.setBrowserCompliantMode(true)
|
||||
.setFallbackCharset(Charset.forName(encoding))
|
||||
.setCSSVersion (ECSSVersion.CSS30)
|
||||
.setCustomErrorHandler(new LoggingCSSParseErrorHandler())
|
||||
.setCustomExceptionHandler (new CustomLoggingCSSParseExceptionCallback(baseUrl)));
|
||||
final List<URLString> list = new ArrayList<>();
|
||||
final URLCollection urlCollection = new URLCollection(list);
|
||||
if(aCSS != null) {
|
||||
CSSVisitor.visitCSSUrl(aCSS, new DefaultCSSUrlVisitor() {
|
||||
@Override
|
||||
public void onImport(final CSSImportRule importRule) {
|
||||
String location = importRule.getLocationString();
|
||||
if(!StringUtils.isEmpty(location)) {
|
||||
urlCollection.addURL(location, baseUrl);
|
||||
}
|
||||
}
|
||||
// Call for URLs outside of URLs
|
||||
@Override
|
||||
public void onUrlDeclaration(
|
||||
final ICSSTopLevelRule aTopLevelRule,
|
||||
final CSSDeclaration aDeclaration,
|
||||
final CSSExpressionMemberTermURI aURITerm) {
|
||||
// NOOP
|
||||
// Browser fetch such urls only when CSS rule matches
|
||||
// so we disable this code
|
||||
//urlCollection.addURL(aURITerm.getURIString(), baseUrl);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
LOG.warn("Failed parsing url:"+baseUrl+", got null CascadingStyleSheet");
|
||||
boolean cacheEnabled = CSS_URL_CACHE_MAX_SIZE > 0;
|
||||
String md5Key = null;
|
||||
URLCollection urlCollection = null;
|
||||
if(cacheEnabled) {
|
||||
md5Key = DigestUtils.md5Hex(data);
|
||||
urlCollection = CSS_URL_CACHE.get(md5Key);
|
||||
}
|
||||
|
||||
if(urlCollection == null) {
|
||||
String cssContent = new String(data, encoding);
|
||||
final CascadingStyleSheet aCSS = CSSReader.readFromStringStream(cssContent,
|
||||
new CSSReaderSettings()
|
||||
.setBrowserCompliantMode(true)
|
||||
.setFallbackCharset(Charset.forName(encoding))
|
||||
.setCSSVersion (ECSSVersion.CSS30)
|
||||
.setCustomErrorHandler(new LoggingCSSParseErrorHandler())
|
||||
.setCustomExceptionHandler (new CustomLoggingCSSParseExceptionCallback(baseUrl)));
|
||||
final List<URLString> list = new ArrayList<>();
|
||||
urlCollection = new URLCollection(list);
|
||||
final URLCollection localCollection = urlCollection;
|
||||
if(aCSS != null) {
|
||||
CSSVisitor.visitCSSUrl(aCSS, new DefaultCSSUrlVisitor() {
|
||||
@Override
|
||||
public void onImport(final CSSImportRule importRule) {
|
||||
String location = importRule.getLocationString();
|
||||
if(!StringUtils.isEmpty(location)) {
|
||||
localCollection.addURL(location, baseUrl);
|
||||
}
|
||||
}
|
||||
// Call for URLs outside of URLs
|
||||
@Override
|
||||
public void onUrlDeclaration(
|
||||
final ICSSTopLevelRule aTopLevelRule,
|
||||
final CSSDeclaration aDeclaration,
|
||||
final CSSExpressionMemberTermURI aURITerm) {
|
||||
// NOOP
|
||||
// Browser fetch such urls only when CSS rule matches
|
||||
// so we disable this code
|
||||
//urlCollection.addURL(aURITerm.getURIString(), baseUrl);
|
||||
}
|
||||
});
|
||||
if(cacheEnabled) {
|
||||
CSS_URL_CACHE.put(md5Key, urlCollection);
|
||||
}
|
||||
} else {
|
||||
LOG.warn("Failed parsing url:"+baseUrl+", got null CascadingStyleSheet");
|
||||
}
|
||||
}
|
||||
|
||||
if(LOG.isDebugEnabled()) {
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (Iterator<URL> iterator = urlCollection.iterator(); iterator.hasNext();) {
|
||||
|
|
@ -135,6 +168,7 @@ public class CssParser implements LinkExtractorParser {
|
|||
}
|
||||
LOG.debug("Parsed:"+baseUrl+", got:"+builder.toString());
|
||||
}
|
||||
|
||||
return urlCollection.iterator();
|
||||
} catch (Exception e) {
|
||||
throw new LinkExtractorParseException(e);
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ Summary
|
|||
<ch_section>Incompatible changes</ch_section>
|
||||
|
||||
<ul>
|
||||
<li>Sample change...</li>
|
||||
<li>A cache for CSS Parsing of URLs has been introduced in this version, it is enabled by default. It is controlled by property <code>css.parser.cache.size</code>. It can be disabled by setting its value to 0. See <bugzilla>59885</bugzilla></li>
|
||||
</ul>
|
||||
|
||||
<h3>Deprecated and removed elements</h3>
|
||||
|
|
@ -80,6 +80,7 @@ Summary
|
|||
<h3>HTTP Samplers and Test Script Recorder</h3>
|
||||
<ul>
|
||||
<li><bug>59882</bug>Reduce memory allocations for better throughput. Contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) through <pr>217</pr></li>
|
||||
<li><bug>59885</bug>Optimize css parsing for embedded resources download by introducing a cache. Contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) through <pr>219</pr></li>
|
||||
</ul>
|
||||
|
||||
<h3>Other samplers</h3>
|
||||
|
|
|
|||
|
|
@ -445,14 +445,18 @@ Uncomment this line if you put anything in httpclient.parameters file</property>
|
|||
</section>
|
||||
<section name="§-num;.24 HTML Parser configuration" anchor="parser_config">
|
||||
<properties>
|
||||
<property name="HTTPResponse.parsers"> Space-separated list of parser groups<br/>, defaults to:htmlParser wmlParser cssParser</property>
|
||||
<property name="cssParser.className"> for each parser, there should be a parser.types and a parser.className property<br/> CSS Parser based on ph-css<br/>, defaults to:org.apache.jmeter.protocol.http.parser.CssParser</property>
|
||||
<property name="cssParser.types">, defaults to:text/css</property>
|
||||
<property name=" see https://bz.apache.org/bugzilla/show_bug.cgi?id"> Define the HTML parser to be used.<br/> Default parser:<br/> This new parser (since 2.10) should perform better than all others<br/>, defaults to:55632</property>
|
||||
<property name="htmlParser.className"> Do not comment this property<br/>, defaults to:org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser</property>
|
||||
<property name="htmlParser.className"> Other parsers:<br/> Default parser before 2.10<br/>, defaults to:org.apache.jmeter.protocol.http.parser.JTidyHTMLParser</property>
|
||||
<property name="htmlParser.className"> Note that Regexp extractor may detect references that have been commented out.<br/> In many cases it will work OK, but you should be aware that it may generate <br/> additional references.<br/>, defaults to:org.apache.jmeter.protocol.http.parser.RegexpHTMLParser</property>
|
||||
<property name="htmlParser.className"> This parser is based on JSoup, it should be the most accurate but less performant<br/> than LagartoBasedHtmlParser<br/>, defaults to:org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser</property>
|
||||
<property name="HTTPResponse.parsers">Space-separated list of parser groups<br/>, defaults to:htmlParser wmlParser cssParser. For each parser, there should be a parser.types and a parser.className property</property>
|
||||
<property name="cssParser.className"> CSS Parser based on ph-css<br/>, defaults to:org.apache.jmeter.protocol.http.parser.CssParser</property>
|
||||
<property name="cssParser.types">content types handled by cssParser, defaults to:text/css</property>
|
||||
<property name="css.parser.cache.size">CSS parser LRU cache size. This cache stores the URLs found in a CSS to avoid continuously parsing the CSS. By default the cache size is 400. It can be disabled by setting its value to 0.</property>
|
||||
<property name="htmlParser.className">Define the HTML parser to be used. This new parser (since 2.10) should perform better than all others. see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632. Do not comment this property<br/>, defaults to:org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser</property>
|
||||
Other parsers:<br/>
|
||||
<ul>
|
||||
<li>org.apache.jmeter.protocol.http.parser.JTidyHTMLParser : Default parser before 2.10<br/>
|
||||
<li>org.apache.jmeter.protocol.http.parser.RegexpHTMLParser : Note that Regexp extractor may detect references that have been commented out.<br/> In many cases it will work OK, but you should be aware that it may generate additional references.</li>
|
||||
<li>org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser:This parser is based on JSoup, it should be the most accurate but less performant than LagartoBasedHtmlParser, defaults to:org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser</li>
|
||||
</li>
|
||||
</ul>
|
||||
<property name="htmlParser.types">Used by HTTPSamplerBase to associate htmlParser with content types below <br/>, defaults to:text/html application/xhtml+xml application/xml text/xml</property>
|
||||
<property name="wmlParser.className">, defaults to:org.apache.jmeter.protocol.http.parser.RegexpHTMLParser</property>
|
||||
<property name="wmlParser.types">Used by HTTPSamplerBase to associate wmlParser with content types below <br/>, defaults to:text/vnd.wap.wml </property>
|
||||
|
|
|
|||
Loading…
Reference in New Issue