Bug 59885 - Optimize css parsing for embedded resources download by introducing a cache

Based on PR 219 contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) This closes #219 on github. Bugzilla Id: 59885 git-svn-id: https://svn.apache.org/repos/asf/jmeter/trunk@1754678 13f79535-47bb-0310-9956-ffa450edef68
2016-07-31 18:50:01 +00:00 · 2016-07-31 18:50:01 +00:00 · d0abd8837a
parent db1a75c3a3
commit d0abd8837a
4 changed files with 88 additions and 42 deletions
--- a/bin/jmeter.properties
+++ b/bin/jmeter.properties
@ -726,6 +726,13 @@ HTTPResponse.parsers=htmlParser wmlParser cssParser
 # CSS Parser based on ph-css
 cssParser.className=org.apache.jmeter.protocol.http.parser.CssParser
 cssParser.types=text/css
+
+# CSS parser LRU cache size
+# This cache stores the URLs found in a CSS to avoid continuously parsing the CSS
+# By default the cache size is 400
+# It can be disabled by setting its value to 0
+#css.parser.cache.size=400
+
 #---------------------------------------------------------------------------
 # HTML Parser configuration
 #---------------------------------------------------------------------------
--- a/src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java
+++ b/src/protocol/http/org/apache/jmeter/protocol/http/parser/CssParser.java
@ -21,9 +21,13 @@ package org.apache.jmeter.protocol.http.parser;
 import java.net.URL;
 import java.nio.charset.Charset;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
+import java.util.Map;

+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.commons.collections.map.LRUMap;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.jmeter.util.JMeterUtils;
 import org.apache.jorphan.logging.LoggingManager;
@ -50,7 +54,20 @@ import com.helger.css.reader.errorhandler.LoggingCSSParseErrorHandler;
 public class CssParser implements LinkExtractorParser {
    private static final boolean IGNORE_UNRECOVERABLE_PARSING_ERROR = JMeterUtils.getPropDefault("httpsampler.ignore_failed_embedded_resource", false); //$NON-NLS-1$
    private static final Logger LOG = LoggingManager.getLoggerForClass();
+    
+    /**
+     * 
+     */
+    private static final int CSS_URL_CACHE_MAX_SIZE = JMeterUtils.getPropDefault("css.parser.cache.size", 400);
+    
+    /**
+     * 
+     */
+    @SuppressWarnings("unchecked")
+    private static Map<String, URLCollection> CSS_URL_CACHE = 
+            CSS_URL_CACHE_MAX_SIZE > 0 ? Collections.synchronizedMap(new LRUMap(CSS_URL_CACHE_MAX_SIZE)) : null;

+    
    private static final class CustomLoggingCSSParseExceptionCallback extends LoggingCSSParseExceptionCallback {
        /**
         * 
@ -76,6 +93,7 @@ public class CssParser implements LinkExtractorParser {
            }
        }
    }
+    
    /**
     * 
     */
@ -93,40 +111,55 @@ public class CssParser implements LinkExtractorParser {
    public Iterator<URL> getEmbeddedResourceURLs(String userAgent, byte[] data,
            final URL baseUrl, String encoding) throws LinkExtractorParseException {
        try {
-            String cssContent = new String(data, encoding);
-            final CascadingStyleSheet aCSS = CSSReader.readFromStringStream(cssContent,
-                        new CSSReaderSettings()
-                            .setBrowserCompliantMode(true)
-                            .setFallbackCharset(Charset.forName(encoding))
-                            .setCSSVersion (ECSSVersion.CSS30)
-                            .setCustomErrorHandler(new LoggingCSSParseErrorHandler())
-                            .setCustomExceptionHandler (new CustomLoggingCSSParseExceptionCallback(baseUrl)));
-            final List<URLString> list = new ArrayList<>();
-            final URLCollection urlCollection = new URLCollection(list);
-            if(aCSS != null) {
-                CSSVisitor.visitCSSUrl(aCSS, new DefaultCSSUrlVisitor() {
-                    @Override
-                    public void onImport(final CSSImportRule importRule) {
-                        String location = importRule.getLocationString();
-                        if(!StringUtils.isEmpty(location)) {
-                            urlCollection.addURL(location, baseUrl);
-                        }
-                    }
-                    // Call for URLs outside of URLs
-                    @Override
-                    public void onUrlDeclaration(
-                            final ICSSTopLevelRule aTopLevelRule,
-                            final CSSDeclaration aDeclaration,
-                            final CSSExpressionMemberTermURI aURITerm) {
-                        // NOOP
-                        // Browser fetch such urls only when CSS rule matches
-                        // so we disable this code
-                        //urlCollection.addURL(aURITerm.getURIString(), baseUrl);
-                    }
-                });
-            } else {
-               LOG.warn("Failed parsing url:"+baseUrl+", got null CascadingStyleSheet");
+            boolean cacheEnabled = CSS_URL_CACHE_MAX_SIZE > 0;
+            String md5Key = null;
+            URLCollection urlCollection = null;
+            if(cacheEnabled) {
+                md5Key = DigestUtils.md5Hex(data);
+                urlCollection = CSS_URL_CACHE.get(md5Key);                
            }
+            
+            if(urlCollection == null) {
+                String cssContent = new String(data, encoding);
+                final CascadingStyleSheet aCSS = CSSReader.readFromStringStream(cssContent,
+                            new CSSReaderSettings()
+                                .setBrowserCompliantMode(true)
+                                .setFallbackCharset(Charset.forName(encoding))
+                                .setCSSVersion (ECSSVersion.CSS30)
+                                .setCustomErrorHandler(new LoggingCSSParseErrorHandler())
+                                .setCustomExceptionHandler (new CustomLoggingCSSParseExceptionCallback(baseUrl)));
+                final List<URLString> list = new ArrayList<>();
+                urlCollection = new URLCollection(list);
+                final URLCollection localCollection = urlCollection;
+                if(aCSS != null) {
+                    CSSVisitor.visitCSSUrl(aCSS, new DefaultCSSUrlVisitor() {
+                        @Override
+                        public void onImport(final CSSImportRule importRule) {
+                            String location = importRule.getLocationString();
+                            if(!StringUtils.isEmpty(location)) {
+                                localCollection.addURL(location, baseUrl);
+                            }
+                        }
+                        // Call for URLs outside of URLs
+                        @Override
+                        public void onUrlDeclaration(
+                                final ICSSTopLevelRule aTopLevelRule,
+                                final CSSDeclaration aDeclaration,
+                                final CSSExpressionMemberTermURI aURITerm) {
+                            // NOOP
+                            // Browser fetch such urls only when CSS rule matches
+                            // so we disable this code
+                            //urlCollection.addURL(aURITerm.getURIString(), baseUrl);
+                        }
+                    });
+                    if(cacheEnabled) {
+                        CSS_URL_CACHE.put(md5Key, urlCollection);
+                    }
+                } else {
+                   LOG.warn("Failed parsing url:"+baseUrl+", got null CascadingStyleSheet");
+                }
+            }
+            
            if(LOG.isDebugEnabled()) {
                StringBuilder builder = new StringBuilder();
                for (Iterator<URL> iterator = urlCollection.iterator(); iterator.hasNext();) {
@ -135,6 +168,7 @@ public class CssParser implements LinkExtractorParser {
                }
                LOG.debug("Parsed:"+baseUrl+", got:"+builder.toString());
            }
+            
            return urlCollection.iterator();
        } catch (Exception e) {
            throw new LinkExtractorParseException(e);
--- a/xdocs/changes.xml
+++ b/xdocs/changes.xml
@ -65,7 +65,7 @@ Summary
 <ch_section>Incompatible changes</ch_section>

 <ul>
-    <li>Sample change...</li>
+    <li>A cache for CSS Parsing of URLs has been introduced in this version, it is enabled by default. It is controlled by property <code>css.parser.cache.size</code>. It can be disabled by setting its value to 0. See <bugzilla>59885</bugzilla></li>
 </ul>

 <h3>Deprecated and removed elements</h3>
@ -80,6 +80,7 @@ Summary
 <h3>HTTP Samplers and Test Script Recorder</h3>
 <ul>
    <li><bug>59882</bug>Reduce memory allocations for better throughput. Contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) through <pr>217</pr></li>
+    <li><bug>59885</bug>Optimize css parsing for embedded resources download by introducing a cache. Contributed by Benoit Wiart (b.wiart at ubik-ingenierie.com) through <pr>219</pr></li>
 </ul>

 <h3>Other samplers</h3>
--- a/xdocs/usermanual/properties_reference.xml
+++ b/xdocs/usermanual/properties_reference.xml
@ -445,14 +445,18 @@ Uncomment this line if you put anything in httpclient.parameters file</property>
 </section>
 <section name="&sect-num;.24 HTML Parser configuration" anchor="parser_config">
 <properties>
-<property name="HTTPResponse.parsers"> Space-separated list of parser groups<br/>, defaults to:htmlParser wmlParser cssParser</property>
-<property name="cssParser.className"> for each parser, there should be a parser.types and a parser.className property<br/> CSS Parser based on ph-css<br/>, defaults to:org.apache.jmeter.protocol.http.parser.CssParser</property>
-<property name="cssParser.types">, defaults to:text/css</property>
-<property name=" see https://bz.apache.org/bugzilla/show_bug.cgi?id"> Define the HTML parser to be used.<br/> Default parser:<br/> This new parser (since 2.10) should perform better than all others<br/>, defaults to:55632</property>
-<property name="htmlParser.className"> Do not comment this property<br/>, defaults to:org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser</property>
-<property name="htmlParser.className"> Other parsers:<br/> Default parser before 2.10<br/>, defaults to:org.apache.jmeter.protocol.http.parser.JTidyHTMLParser</property>
-<property name="htmlParser.className"> Note that Regexp extractor may detect references that have been commented out.<br/> In many cases it will work OK, but you should be aware that it may generate <br/> additional references.<br/>, defaults to:org.apache.jmeter.protocol.http.parser.RegexpHTMLParser</property>
-<property name="htmlParser.className"> This parser is based on JSoup, it should be the most accurate but less performant<br/> than LagartoBasedHtmlParser<br/>, defaults to:org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser</property>
+<property name="HTTPResponse.parsers">Space-separated list of parser groups<br/>, defaults to:htmlParser wmlParser cssParser. For each parser, there should be a parser.types and a parser.className property</property>
+<property name="cssParser.className"> CSS Parser based on ph-css<br/>, defaults to:org.apache.jmeter.protocol.http.parser.CssParser</property>
+<property name="cssParser.types">content types handled by cssParser, defaults to:text/css</property>
+<property name="css.parser.cache.size">CSS parser LRU cache size. This cache stores the URLs found in a CSS to avoid continuously parsing the CSS. By default the cache size is 400. It can be disabled by setting its value to 0.</property>
+<property name="htmlParser.className">Define the HTML parser to be used. This new parser (since 2.10) should perform better than all others. see https://bz.apache.org/bugzilla/show_bug.cgi?id=55632. Do not comment this property<br/>, defaults to:org.apache.jmeter.protocol.http.parser.LagartoBasedHtmlParser</property>
+Other parsers:<br/>
+<ul> 
+<li>org.apache.jmeter.protocol.http.parser.JTidyHTMLParser : Default parser before 2.10<br/> 
+<li>org.apache.jmeter.protocol.http.parser.RegexpHTMLParser : Note that Regexp extractor may detect references that have been commented out.<br/> In many cases it will work OK, but you should be aware that it may generate additional references.</li>
+<li>org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser:This parser is based on JSoup, it should be the most accurate but less performant than LagartoBasedHtmlParser, defaults to:org.apache.jmeter.protocol.http.parser.JsoupBasedHtmlParser</li>
+</li>
+</ul>
 <property name="htmlParser.types">Used by HTTPSamplerBase to associate htmlParser with content types below <br/>, defaults to:text/html application/xhtml+xml application/xml text/xml</property>
 <property name="wmlParser.className">, defaults to:org.apache.jmeter.protocol.http.parser.RegexpHTMLParser</property>
 <property name="wmlParser.types">Used by HTTPSamplerBase to associate wmlParser with content types below <br/>, defaults to:text/vnd.wap.wml </property>