mirror of https://github.com/webpack/webpack.git
add HtmlParser
This commit is contained in:
parent
8295202bc8
commit
7b762de9f0
|
@ -217,7 +217,9 @@
|
|||
"gitter",
|
||||
"codecov",
|
||||
"opencollective",
|
||||
"dependabot"
|
||||
"dependabot",
|
||||
"domelementtype",
|
||||
"domhandler"
|
||||
],
|
||||
"ignoreRegExpList": ["/Author.+/", "/data:.*/", "/\"mappings\":\".+\"/"],
|
||||
"ignorePaths": ["**/dist/**", "examples/**/README.md"]
|
||||
|
|
|
@ -0,0 +1,209 @@
|
|||
/*
|
||||
MIT License http://www.opensource.org/licenses/mit-license.php
|
||||
Author Ivan Kopeykin @vankop
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
const ElementType = require("domelementtype");
|
||||
const { DomHandler } = require("domhandler");
|
||||
const { Parser: HtmlParser2 } = require("htmlparser2");
|
||||
const { SyncBailHook, HookMap } = require("tapable");
|
||||
const Parser = require("../Parser");
|
||||
|
||||
/** @typedef {import("htmlparser2").ParserOptions} HtmlParserOptions */
|
||||
/** @typedef {import("htmlparser2").DomHandlerOptions} HtmlParserDomHandlerOptions */
|
||||
/** @typedef {import("domhandler").Node} DomNode */
|
||||
/** @typedef {import("domhandler").Element} DomHandlerElement */
|
||||
/** @typedef {import("domhandler").DataNode} DomDataNode */
|
||||
/** @typedef {import("../Parser").ParserState} ParserState */
|
||||
/** @typedef {import("../Parser").PreparsedAst} PreparsedAst */
|
||||
/** @typedef {{[k: string]: {value: string, range: Readonly<[number, number]>}}} TagAttributes */
|
||||
/** @typedef {Omit<DomHandlerElement, 'attribs'> & {attribs: TagAttributes}} DomElement */
|
||||
|
||||
/**
|
||||
* Webpack need custom handler to get attributes indexes in DOM structure
|
||||
*/
|
||||
class CustomDomHandler extends DomHandler {
|
||||
constructor(cb, options, elementCb, errorCb) {
|
||||
super(cb, options, elementCb);
|
||||
/** @type {{[k: string]: [number, number]}} */
|
||||
this._attributes = undefined;
|
||||
// process errors, if any
|
||||
this.onerror = errorCb;
|
||||
}
|
||||
|
||||
// cspell:word onattribute
|
||||
onattribute(name, value) {
|
||||
if (!this._attributes) this._attributes = {};
|
||||
//@ts-expect-error
|
||||
const tokenizer = this._parser._tokenizer;
|
||||
const html = tokenizer._buffer;
|
||||
const endIndex = tokenizer._index;
|
||||
const startIndex = endIndex - value.length;
|
||||
const unquoted = html[endIndex] !== '"' && html[endIndex] !== "'";
|
||||
this._attributes[name] = [unquoted ? startIndex : startIndex - 1, endIndex];
|
||||
}
|
||||
|
||||
// cspell:word onopentag
|
||||
onopentag(name, attributes) {
|
||||
super.onopentag(name, attributes);
|
||||
//@ts-expect-error
|
||||
const attribs = this._tagStack[this._tagStack.length - 1].attribs;
|
||||
|
||||
for (const attributeName of Object.keys(this._attributes)) {
|
||||
const value = attribs[attributeName];
|
||||
attribs[attributeName] = {
|
||||
value,
|
||||
range: this._attributes[attributeName]
|
||||
};
|
||||
}
|
||||
this._attributes = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
class HtmlParser extends Parser {
|
||||
/**
|
||||
* @param {HtmlParserOptions=} options htmlparser2 parser options
|
||||
*/
|
||||
constructor(options) {
|
||||
super();
|
||||
|
||||
this._options = options;
|
||||
this.hooks = Object.freeze({
|
||||
/** @type {HookMap<SyncBailHook<[DomElement], true | void | undefined | null>>} */
|
||||
tag: new HookMap(() => new SyncBailHook(["tag"])),
|
||||
/** @type {SyncBailHook<[DomDataNode], void | undefined | null>} */
|
||||
text: new SyncBailHook(["text"]),
|
||||
/** @type {SyncBailHook<[DomDataNode], void | undefined | null>} */
|
||||
directive: new SyncBailHook(["directive"]),
|
||||
/** @type {SyncBailHook<[DomDataNode], void | undefined | null>} */
|
||||
comment: new SyncBailHook(["directive"])
|
||||
});
|
||||
|
||||
/** @type {ParserState} */
|
||||
this.state = undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DomNode[]} nodes nodes
|
||||
*/
|
||||
walkNodes(nodes) {
|
||||
for (const node of nodes) this.walkNode(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DomNode} node nodes
|
||||
*/
|
||||
walkNode(node) {
|
||||
switch (node.type) {
|
||||
case ElementType.Script:
|
||||
case ElementType.Style:
|
||||
case ElementType.Tag:
|
||||
this.walkElement(/** @type {DomElement} */ (node));
|
||||
break;
|
||||
case ElementType.Comment:
|
||||
this.walkComment(/** @type {DomDataNode} */ (node));
|
||||
break;
|
||||
case ElementType.Directive:
|
||||
this.walkDirective(/** @type {DomDataNode} */ (node));
|
||||
break;
|
||||
case ElementType.Text:
|
||||
this.walkText(/** @type {DomDataNode} */ (node));
|
||||
break;
|
||||
case ElementType.CDATA:
|
||||
case ElementType.Doctype:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DomElement} element element
|
||||
*/
|
||||
walkElement(element) {
|
||||
const name = element.tagName;
|
||||
this.hooks.tag.for(name).call(element);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DomDataNode} node element
|
||||
*/
|
||||
walkText(node) {
|
||||
this.hooks.text.call(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DomDataNode} node element
|
||||
*/
|
||||
walkDirective(node) {
|
||||
this.hooks.directive.call(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {DomDataNode} node element
|
||||
*/
|
||||
walkComment(node) {
|
||||
this.hooks.comment.call(node);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string | Buffer | PreparsedAst} source the source to parse
|
||||
* @param {ParserState} state the parser state
|
||||
* @returns {ParserState} the parser state
|
||||
*/
|
||||
parse(source, state) {
|
||||
if (source === null) {
|
||||
throw new Error("source must not be null");
|
||||
}
|
||||
if (Buffer.isBuffer(source)) {
|
||||
source = source.toString("utf-8");
|
||||
}
|
||||
|
||||
const oldState = this.state;
|
||||
const dom = HtmlParser._parse(
|
||||
/** @type {string} */ (source),
|
||||
this._options
|
||||
);
|
||||
this.walkNodes(dom);
|
||||
this.state = oldState;
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} code code
|
||||
* @param {HtmlParserOptions} options options
|
||||
* @private
|
||||
* @returns {DomNode[]} dom
|
||||
*/
|
||||
static _parse(code, options) {
|
||||
/** @type {HtmlParserOptions & HtmlParserDomHandlerOptions} */
|
||||
const htmlParserOptions = {
|
||||
...options,
|
||||
withStartIndices: true,
|
||||
withEndIndices: true
|
||||
};
|
||||
|
||||
let dom;
|
||||
let errors = [];
|
||||
|
||||
try {
|
||||
const handler = new CustomDomHandler(
|
||||
undefined,
|
||||
htmlParserOptions,
|
||||
undefined,
|
||||
e => errors.push(e)
|
||||
);
|
||||
new HtmlParser2(handler, options).end(code);
|
||||
dom = handler.dom;
|
||||
} catch (e) {
|
||||
errors = [e];
|
||||
}
|
||||
|
||||
if (errors.length > 0) throw errors[0];
|
||||
|
||||
return dom;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = HtmlParser;
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
MIT License http://www.opensource.org/licenses/mit-license.php
|
||||
Author Ivan Kopeykin @vankop
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
/** @typedef {import("domhandler").NodeWithChildren} DomNodeWithChildren */
|
||||
|
||||
/**
|
||||
* @param {DomNodeWithChildren} node node with children
|
||||
* @returns {[number, number]|null} range
|
||||
* @example
|
||||
* for
|
||||
* <script> void 0;</script>
|
||||
* range of text node " void 0;" will be returned
|
||||
*/
|
||||
function childrenRange(node) {
|
||||
const firstChild = node.firstChild;
|
||||
if (!firstChild) return null;
|
||||
|
||||
return [firstChild.startIndex, node.lastChild.endIndex];
|
||||
}
|
||||
|
||||
exports.childrenRange = childrenRange;
|
|
@ -307,6 +307,12 @@ module.exports = mergeExports(fn, {
|
|||
}
|
||||
},
|
||||
|
||||
html: {
|
||||
get HtmlParser() {
|
||||
return require("./html/HtmlParser");
|
||||
}
|
||||
},
|
||||
|
||||
javascript: {
|
||||
get EnableChunkLoadingPlugin() {
|
||||
return require("./javascript/EnableChunkLoadingPlugin");
|
||||
|
|
|
@ -13,11 +13,14 @@
|
|||
"@webassemblyjs/wasm-parser": "1.9.0",
|
||||
"acorn": "^7.4.0",
|
||||
"chrome-trace-event": "^1.0.2",
|
||||
"domelementtype": "^2.0.2",
|
||||
"domhandler": "^3.0.0",
|
||||
"enhanced-resolve": "^5.0.0",
|
||||
"eslint-scope": "^5.1.0",
|
||||
"events": "^3.2.0",
|
||||
"glob-to-regexp": "^0.4.1",
|
||||
"graceful-fs": "^4.2.4",
|
||||
"htmlparser2": "^4.1.0",
|
||||
"json-parse-better-errors": "^1.0.2",
|
||||
"loader-runner": "^4.0.0",
|
||||
"mime-types": "^2.1.27",
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
"use strict";
|
||||
|
||||
const HtmlParser = require("../lib/html/HtmlParser");
|
||||
|
||||
const options = {
|
||||
decodeEntities: false,
|
||||
lowerCaseTags: false,
|
||||
lowerCaseAttributeNames: false,
|
||||
recognizeCDATA: true,
|
||||
recognizeSelfClosing: true
|
||||
};
|
||||
|
||||
describe("correct attributes range", () => {
|
||||
it("with quotes", () => {
|
||||
let range;
|
||||
const testParser = new HtmlParser(options);
|
||||
testParser.hooks.tag.for("img").tap("Test", element => {
|
||||
range = element.attribs.src.range;
|
||||
});
|
||||
const pre = "<img src=";
|
||||
const post = "/>";
|
||||
const code = `${pre}"http://ok.ok"${post}`;
|
||||
testParser.parse(code, {});
|
||||
expect(range).toEqual([pre.length, code.length - post.length - 1]);
|
||||
});
|
||||
|
||||
it("without quotes", () => {
|
||||
let range;
|
||||
const testParser = new HtmlParser(options);
|
||||
testParser.hooks.tag.for("img").tap("Test", element => {
|
||||
range = element.attribs.src.range;
|
||||
});
|
||||
const pre = "<img src=";
|
||||
const post = "/>";
|
||||
const code = `${pre}nosrc ${post}`;
|
||||
testParser.parse(code, {});
|
||||
expect(range).toEqual([pre.length, code.length - post.length - 1]);
|
||||
});
|
||||
});
|
|
@ -4,6 +4,7 @@
|
|||
* Run `yarn special-lint-fix` to update
|
||||
*/
|
||||
|
||||
import { DataNode, Element, Node as NodeImport } from "domhandler/lib/node";
|
||||
import {
|
||||
ArrayExpression,
|
||||
ArrayPattern,
|
||||
|
@ -78,6 +79,7 @@ import {
|
|||
YieldExpression
|
||||
} from "estree";
|
||||
import { Stats as FsStats, WriteStream } from "fs";
|
||||
import { ParserOptions } from "htmlparser2/lib/Parser";
|
||||
import { default as ValidationError } from "schema-utils/declarations/ValidationError";
|
||||
import {
|
||||
AsArray,
|
||||
|
@ -3581,6 +3583,73 @@ declare class HotModuleReplacementPlugin {
|
|||
apply(compiler: Compiler): void;
|
||||
static getParserHooks(parser: JavascriptParser): HMRJavascriptParserHooks;
|
||||
}
|
||||
declare class HtmlParser extends Parser {
|
||||
constructor(options?: ParserOptions);
|
||||
hooks: Readonly<{
|
||||
tag: HookMap<
|
||||
SyncBailHook<
|
||||
[
|
||||
Pick<
|
||||
Element,
|
||||
| "type"
|
||||
| "name"
|
||||
| "tagName"
|
||||
| "children"
|
||||
| "firstChild"
|
||||
| "lastChild"
|
||||
| "childNodes"
|
||||
| "parent"
|
||||
| "prev"
|
||||
| "next"
|
||||
| "startIndex"
|
||||
| "endIndex"
|
||||
| "nodeType"
|
||||
| "parentNode"
|
||||
| "previousSibling"
|
||||
| "nextSibling"
|
||||
> & {
|
||||
attribs: {
|
||||
[index: string]: { value: string; range: [number, number] };
|
||||
};
|
||||
}
|
||||
],
|
||||
true | void
|
||||
>
|
||||
>;
|
||||
text: SyncBailHook<[DataNode], void>;
|
||||
directive: SyncBailHook<[DataNode], void>;
|
||||
comment: SyncBailHook<[DataNode], void>;
|
||||
}>;
|
||||
state: Record<string, any> & ParserStateBase;
|
||||
walkNodes(nodes: NodeImport[]): void;
|
||||
walkNode(node: NodeImport): void;
|
||||
walkElement(
|
||||
element: Pick<
|
||||
Element,
|
||||
| "type"
|
||||
| "name"
|
||||
| "tagName"
|
||||
| "children"
|
||||
| "firstChild"
|
||||
| "lastChild"
|
||||
| "childNodes"
|
||||
| "parent"
|
||||
| "prev"
|
||||
| "next"
|
||||
| "startIndex"
|
||||
| "endIndex"
|
||||
| "nodeType"
|
||||
| "parentNode"
|
||||
| "previousSibling"
|
||||
| "nextSibling"
|
||||
> & {
|
||||
attribs: { [index: string]: { value: string; range: [number, number] } };
|
||||
}
|
||||
): void;
|
||||
walkText(node: DataNode): void;
|
||||
walkDirective(node: DataNode): void;
|
||||
walkComment(node: DataNode): void;
|
||||
}
|
||||
declare class HttpUriPlugin {
|
||||
constructor();
|
||||
|
||||
|
@ -9941,6 +10010,9 @@ declare namespace exports {
|
|||
HashedModuleIdsPlugin
|
||||
};
|
||||
}
|
||||
export namespace html {
|
||||
export { HtmlParser };
|
||||
}
|
||||
export namespace javascript {
|
||||
export {
|
||||
EnableChunkLoadingPlugin,
|
||||
|
|
45
yarn.lock
45
yarn.lock
|
@ -2260,6 +2260,20 @@ doctypes@^1.1.0:
|
|||
resolved "https://registry.yarnpkg.com/doctypes/-/doctypes-1.1.0.tgz#ea80b106a87538774e8a3a4a5afe293de489e0a9"
|
||||
integrity sha1-6oCxBqh1OHdOijpKWv4pPeSJ4Kk=
|
||||
|
||||
dom-serializer@^1.0.1:
|
||||
version "1.1.0"
|
||||
resolved "https://registry.yarnpkg.com/dom-serializer/-/dom-serializer-1.1.0.tgz#5f7c828f1bfc44887dc2a315ab5c45691d544b58"
|
||||
integrity sha512-ox7bvGXt2n+uLWtCRLybYx60IrOlWL/aCebWJk1T0d4m3y2tzf4U3ij9wBMUb6YJZpz06HCCYuyCDveE2xXmzQ==
|
||||
dependencies:
|
||||
domelementtype "^2.0.1"
|
||||
domhandler "^3.0.0"
|
||||
entities "^2.0.0"
|
||||
|
||||
domelementtype@^2.0.1, domelementtype@^2.0.2:
|
||||
version "2.0.2"
|
||||
resolved "https://registry.yarnpkg.com/domelementtype/-/domelementtype-2.0.2.tgz#f3b6e549201e46f588b59463dd77187131fe6971"
|
||||
integrity sha512-wFwTwCVebUrMgGeAwRL/NhZtHAUyT9n9yg4IMDwf10+6iCMxSkVq9MGCVEH+QZWo1nNidy8kNvwmv4zWHDTqvA==
|
||||
|
||||
domexception@^1.0.1:
|
||||
version "1.0.1"
|
||||
resolved "https://registry.yarnpkg.com/domexception/-/domexception-1.0.1.tgz#937442644ca6a31261ef36e3ec677fe805582c90"
|
||||
|
@ -2267,6 +2281,22 @@ domexception@^1.0.1:
|
|||
dependencies:
|
||||
webidl-conversions "^4.0.2"
|
||||
|
||||
domhandler@^3.0.0:
|
||||
version "3.0.0"
|
||||
resolved "https://registry.yarnpkg.com/domhandler/-/domhandler-3.0.0.tgz#51cd13efca31da95bbb0c5bee3a48300e333b3e9"
|
||||
integrity sha512-eKLdI5v9m67kbXQbJSNn1zjh0SDzvzWVWtX+qEI3eMjZw8daH9k8rlj1FZY9memPwjiskQFbe7vHVVJIAqoEhw==
|
||||
dependencies:
|
||||
domelementtype "^2.0.1"
|
||||
|
||||
domutils@^2.0.0:
|
||||
version "2.3.0"
|
||||
resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.3.0.tgz#6469c63a3da2de0c3016f3a59e6a969e10705bce"
|
||||
integrity sha512-xWC75PM3QF6MjE5e58OzwTX0B/rPQnlqH0YyXB/c056RtVJA+eu60da2I/bdnEHzEYC00g8QaZUlAbqOZVbOsw==
|
||||
dependencies:
|
||||
dom-serializer "^1.0.1"
|
||||
domelementtype "^2.0.1"
|
||||
domhandler "^3.0.0"
|
||||
|
||||
dot-prop@^5.2.0:
|
||||
version "5.2.0"
|
||||
resolved "https://registry.yarnpkg.com/dot-prop/-/dot-prop-5.2.0.tgz#c34ecc29556dc45f1f4c22697b6f4904e0cc4fcb"
|
||||
|
@ -2328,6 +2358,11 @@ enquirer@^2.3.6:
|
|||
dependencies:
|
||||
ansi-colors "^4.1.1"
|
||||
|
||||
entities@^2.0.0:
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/entities/-/entities-2.0.3.tgz#5c487e5742ab93c15abb5da22759b8590ec03b7f"
|
||||
integrity sha512-MyoZ0jgnLvB2X3Lg5HqpFmn1kybDiIfEQmKzTb5apr51Rb+T3KdmMiqa70T+bhGnyv7bQ6WMj2QMHpGMmlrUYQ==
|
||||
|
||||
errno@^0.1.1, errno@^0.1.3:
|
||||
version "0.1.7"
|
||||
resolved "https://registry.yarnpkg.com/errno/-/errno-0.1.7.tgz#4684d71779ad39af177e3f007996f7c67c852618"
|
||||
|
@ -3202,6 +3237,16 @@ html-escaper@^2.0.0:
|
|||
resolved "https://registry.yarnpkg.com/html-escaper/-/html-escaper-2.0.2.tgz#dfd60027da36a36dfcbe236262c00a5822681453"
|
||||
integrity sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==
|
||||
|
||||
htmlparser2@^4.1.0:
|
||||
version "4.1.0"
|
||||
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-4.1.0.tgz#9a4ef161f2e4625ebf7dfbe6c0a2f52d18a59e78"
|
||||
integrity sha512-4zDq1a1zhE4gQso/c5LP1OtrhYTncXNSpvJYtWJBtXAETPlMfi3IFNjGuQbYLuVY4ZR0QMqRVvo4Pdy9KLyP8Q==
|
||||
dependencies:
|
||||
domelementtype "^2.0.1"
|
||||
domhandler "^3.0.0"
|
||||
domutils "^2.0.0"
|
||||
entities "^2.0.0"
|
||||
|
||||
http-signature@~1.2.0:
|
||||
version "1.2.0"
|
||||
resolved "https://registry.yarnpkg.com/http-signature/-/http-signature-1.2.0.tgz#9aecd925114772f3d95b65a60abb8f7c18fbace1"
|
||||
|
|
Loading…
Reference in New Issue