From 129abc4d59f9b8802d70d2db0ae8996f84b62fd4 Mon Sep 17 00:00:00 2001 From: Philippe Mouawad Date: Sat, 2 Mar 2013 21:29:48 +0000 Subject: [PATCH] Bug 54629 - HTMLParser does not extract tag urls Bugzilla Id: 54629 git-svn-id: https://svn.apache.org/repos/asf/jmeter/trunk@1451946 13f79535-47bb-0310-9956-ffa450edef68 Former-commit-id: b8a912a15e84b139c4c53ca3a4abe0f3fa11f1ed --- .../jmeter/protocol/http/parser/HTMLParser.java | 1 + .../http/parser/HtmlParserHTMLParser.java | 15 +++++++++++++++ .../protocol/http/parser/JTidyHTMLParser.java | 15 +++++++++++++++ .../protocol/http/parser/RegexpHTMLParser.java | 2 +- xdocs/changes.xml | 1 + 5 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java b/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java index 49c2043481..9735bd4f4b 100644 --- a/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java +++ b/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java @@ -53,6 +53,7 @@ public abstract class HTMLParser { protected static final String TAG_IMAGE = "img";// $NON-NLS-1$ protected static final String TAG_INPUT = "input";// $NON-NLS-1$ protected static final String TAG_LINK = "link";// $NON-NLS-1$ + protected static final String TAG_OBJECT = "object";// $NON-NLS-1$ protected static final String TAG_SCRIPT = "script";// $NON-NLS-1$ protected static final String STYLESHEET = "stylesheet";// $NON-NLS-1$ diff --git a/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java b/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java index bc865416ef..bf6ec47bcd 100644 --- a/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java +++ b/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java @@ -22,6 +22,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; +import org.apache.commons.lang3.StringUtils; import org.apache.jmeter.protocol.http.util.ConversionUtils; import org.apache.jorphan.logging.LoggingManager; import org.apache.log.Logger; @@ -35,6 +36,7 @@ import org.htmlparser.tags.CompositeTag; import org.htmlparser.tags.FrameTag; import org.htmlparser.tags.ImageTag; import org.htmlparser.tags.InputTag; +import org.htmlparser.tags.ObjectTag; import org.htmlparser.tags.ScriptTag; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.ParserException; @@ -146,6 +148,19 @@ class HtmlParserHTMLParser extends HTMLParser { // and archives (.jar and .zip) files as well. AppletTag applet = (AppletTag) tag; binUrlStr = applet.getAppletClass(); + } else if (tag instanceof ObjectTag) { + // look for Objects + ObjectTag applet = (ObjectTag) tag; + String data = applet.getAttribute("codebase"); + if(!StringUtils.isEmpty(data)) { + binUrlStr = data; + } + + data = applet.getAttribute("data"); + if(!StringUtils.isEmpty(data)) { + binUrlStr = data; + } + } else if (tag instanceof InputTag) { // we check the input tag type for image if (ATT_IS_IMAGE.equalsIgnoreCase(tag.getAttribute(ATT_TYPE))) { diff --git a/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java b/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java index 5e28035501..801624fa0d 100644 --- a/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java +++ b/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java @@ -23,6 +23,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.Iterator; +import org.apache.commons.lang3.StringUtils; import org.apache.jmeter.protocol.http.util.ConversionUtils; import org.apache.jorphan.logging.LoggingManager; import org.apache.log.Logger; @@ -119,6 +120,20 @@ class JTidyHTMLParser extends HTMLParser { urls.addURL(getValue(attrs, "code"), baseUrl); break; } + + if (name.equalsIgnoreCase(TAG_OBJECT)) { + String data = getValue(attrs, "codebase"); + if(!StringUtils.isEmpty(data)) { + urls.addURL(data, baseUrl); + } + + data = getValue(attrs, "data"); + if(!StringUtils.isEmpty(data)) { + urls.addURL(data, baseUrl); + } + break; + } + if (name.equalsIgnoreCase(TAG_INPUT)) { String src = getValue(attrs, ATT_SRC); String typ = getValue(attrs, ATT_TYPE); diff --git a/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java b/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java index b3210b6b91..14e1e316ac 100644 --- a/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java +++ b/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java @@ -98,7 +98,7 @@ class RegexpHTMLParser extends HTMLParser { + "|BASE" + SEP + "HREF" + VALUE + "|(?:IMG|SCRIPT|FRAME|IFRAME|BGSOUND)" + SEP + "SRC" + VALUE + "|APPLET" + SEP + "CODE(?:BASE)?" + VALUE - + "|(?:EMBED|OBJECT)" + SEP + "(?:SRC|CODEBASE)" + VALUE + + "|(?:EMBED|OBJECT)" + SEP + "(?:SRC|CODEBASE|DATA)" + VALUE + "|(?:BODY|TABLE|TR|TD)" + SEP + "BACKGROUND" + VALUE + "|[^<]+?STYLE\\s*=['\"].*?URL\\(\\s*['\"](.+?)['\"]\\s*\\)" + "|INPUT(?:" + SEP + "(?:SRC" + VALUE diff --git a/xdocs/changes.xml b/xdocs/changes.xml index b7c1035b02..e5a0a1541e 100644 --- a/xdocs/changes.xml +++ b/xdocs/changes.xml @@ -94,6 +94,7 @@ This does not affect JMeter operation.
  • 54293 - JMeter rejects html tags '<' in query params as invalid when they are accepted by the browser
  • 54142 - HTTP Proxy Server throws an exception when path contains "|" character
  • 54627 - JMeter Proxy GUI: Type of sampler settings takes the whole screen with when there are samplers with long name
  • +
  • 54629 54629 - HTMLParser does not extract <object> tag urls
  • Other Samplers