From a1b10a21ac479e3756db4c1a6dc4d6bbb8701c60 Mon Sep 17 00:00:00 2001 From: Evan You Date: Sun, 19 Nov 2023 10:39:11 +0800 Subject: [PATCH] wip: pass more compiler-dom tests --- .../compiler-core/src/parser/Tokenizer.ts | 30 ++++++++++--- packages/compiler-core/src/parser/index.ts | 45 ++++++++++++------- packages/compiler-dom/__tests__/parse.spec.ts | 12 +---- 3 files changed, 55 insertions(+), 32 deletions(-) diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts index 2c128a670..fdbc84a10 100644 --- a/packages/compiler-core/src/parser/Tokenizer.ts +++ b/packages/compiler-core/src/parser/Tokenizer.ts @@ -178,7 +178,7 @@ export enum QuoteType { export interface Callbacks { ontext(start: number, endIndex: number): void - ontextentity(char: string, endIndex: number): void + ontextentity(char: string, start: number, endIndex: number): void oninterpolation(start: number, endIndex: number): void @@ -188,7 +188,7 @@ export interface Callbacks { onclosetag(start: number, endIndex: number): void onattribdata(start: number, endIndex: number): void - onattribentity(char: string): void + onattribentity(char: string, start: number, end: number): void onattribend(quote: QuoteType, endIndex: number): void onattribname(start: number, endIndex: number): void onattribnameend(endIndex: number): void @@ -325,6 +325,9 @@ export default class Tokenizer { } else { this.delimiterIndex++ } + } else if (this.inRCDATA) { + this.state = State.InSpecialTag + this.stateInSpecialTag(c) } else { this.state = State.Text this.stateText(c) @@ -343,7 +346,11 @@ export default class Tokenizer { if (c === this.delimiterClose[this.delimiterIndex]) { if (this.delimiterIndex === this.delimiterClose.length - 1) { this.cbs.oninterpolation(this.sectionStart, this.index + 1) - this.state = State.Text + if (this.inRCDATA) { + this.state = State.InSpecialTag + } else { + this.state = State.Text + } this.sectionStart = this.index + 1 } else { this.delimiterIndex++ @@ -410,6 +417,11 @@ export default class Tokenizer { // We have to parse entities in and <textarea> tags. if (!__BROWSER__ && c === CharCodes.Amp) { this.startEntity() + } else if (c === this.delimiterOpen[0]) { + // We also need to handle interpolation + this.state = State.InterpolationOpen + this.delimiterIndex = 0 + this.stateInterpolationOpen(c) } } else if (this.fastForwardTo(CharCodes.Lt)) { // Outside of <title> and <textarea> tags, we can fast-forward. @@ -1077,7 +1089,11 @@ export default class Tokenizer { this.sectionStart = this.entityStart + consumed this.index = this.sectionStart - 1 - this.cbs.onattribentity(fromCodePoint(cp)) + this.cbs.onattribentity( + fromCodePoint(cp), + this.entityStart, + this.sectionStart + ) } else { if (this.sectionStart < this.entityStart) { this.cbs.ontext(this.sectionStart, this.entityStart) @@ -1085,7 +1101,11 @@ export default class Tokenizer { this.sectionStart = this.entityStart + consumed this.index = this.sectionStart - 1 - this.cbs.ontextentity(fromCodePoint(cp), this.sectionStart) + this.cbs.ontextentity( + fromCodePoint(cp), + this.entityStart, + this.sectionStart + ) } } } diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts index 858c4aaff..981d39c3b 100644 --- a/packages/compiler-core/src/parser/index.ts +++ b/packages/compiler-core/src/parser/index.ts @@ -26,6 +26,7 @@ import { CompilerCompatOptions } from '../compat/compatConfig' import { NO, extend } from '@vue/shared' import { defaultOnError, defaultOnWarn } from '../errors' import { forAliasRE, isCoreComponent } from '../utils' +import { decodeHTML } from 'entities/lib/decode.js' type OptionalOptions = | 'decodeEntities' @@ -69,8 +70,8 @@ const tokenizer = new Tokenizer(stack, { onText(getSlice(start, end), start, end) }, - ontextentity(char, end) { - onText(char, end - 1, end) + ontextentity(char, start, end) { + onText(char, start, end) }, oninterpolation(start, end) { @@ -85,13 +86,18 @@ const tokenizer = new Tokenizer(stack, { while (isWhitespace(currentInput.charCodeAt(innerEnd - 1))) { innerEnd-- } + let exp = getSlice(innerStart, innerEnd) + // decode entities for backwards compat + if (exp.includes('&')) { + if (__BROWSER__) { + exp = currentOptions.decodeEntities!(exp, false) + } else { + exp = decodeHTML(exp) + } + } addNode({ type: NodeTypes.INTERPOLATION, - content: createSimpleExpression( - getSlice(innerStart, innerEnd), - false, - getLoc(innerStart, innerEnd) - ), + content: createSimpleExpression(exp, false, getLoc(innerStart, innerEnd)), loc: getLoc(start, end) }) }, @@ -101,7 +107,7 @@ const tokenizer = new Tokenizer(stack, { currentElement = { type: NodeTypes.ELEMENT, tag: name, - ns: currentOptions.getNamespace(name, getParent()), + ns: currentOptions.getNamespace(name, stack[0]), tagType: ElementTypes.ELEMENT, // will be refined on tag close props: [], children: [], @@ -227,8 +233,10 @@ const tokenizer = new Tokenizer(stack, { currentAttrEndIndex = end }, - onattribentity(char) { + onattribentity(char, start, end) { currentAttrValue += char + if (currentAttrStartIndex < 0) currentAttrStartIndex = start + currentAttrEndIndex = end }, onattribnameend(end) { @@ -316,7 +324,11 @@ const tokenizer = new Tokenizer(stack, { }, oncdata(start, end) { - // TODO throw error + if (stack[0].ns !== Namespaces.HTML) { + onText(getSlice(start, end), start, end) + } else { + // TODO throw error if ns is html + } } }) @@ -418,7 +430,7 @@ function onText(content: string, start: number, end: number) { // TODO do not do this in <script> or <style> content = currentOptions.decodeEntities!(content, false) } - const parent = getParent() + const parent = stack[0] || currentRoot const lastNode = parent.children[parent.children.length - 1] if (lastNode?.type === NodeTypes.TEXT) { // merge @@ -436,7 +448,10 @@ function onText(content: string, start: number, end: number) { function onCloseTag(el: ElementNode, end: number) { // attach end position let offset = 0 - while (currentInput.charCodeAt(end + offset) !== CharCodes.Gt) { + while ( + currentInput.charCodeAt(end + offset) !== CharCodes.Gt && + end + offset < currentInput.length + ) { offset++ } el.loc.end = tokenizer.getPos(end + offset + 1) @@ -634,11 +649,7 @@ function condense(str: string) { } function addNode(node: TemplateChildNode) { - getParent().children.push(node) -} - -function getParent() { - return stack[0] || currentRoot + ;(stack[0] || currentRoot).children.push(node) } function getLoc(start: number, end?: number): SourceLocation { diff --git a/packages/compiler-dom/__tests__/parse.spec.ts b/packages/compiler-dom/__tests__/parse.spec.ts index ed75b211a..5f15fa7c9 100644 --- a/packages/compiler-dom/__tests__/parse.spec.ts +++ b/packages/compiler-dom/__tests__/parse.spec.ts @@ -3,7 +3,6 @@ import { NodeTypes, ElementNode, TextNode, - ErrorCodes, ElementTypes, InterpolationNode, AttributeNode, @@ -302,15 +301,8 @@ describe('DOM parser', () => { test('Strict end tag detection for textarea.', () => { const ast = parse( - '<textarea>hello</textarea</textarea0></texTArea a="<>">', - { - ...parserOptions, - onError: err => { - if (err.code !== ErrorCodes.END_TAG_WITH_ATTRIBUTES) { - throw err - } - } - } + '<textarea>hello</textarea</textarea0></texTArea>', + parserOptions ) const element = ast.children[0] as ElementNode const text = element.children[0] as TextNode