wip: treat template with preprocessor as plain text

This commit is contained in:
Evan You 2023-11-20 22:49:16 +08:00
parent 5fc695be26
commit e5afca6c1e
5 changed files with 48 additions and 33 deletions

View File

@ -129,7 +129,7 @@ const enum State {
BeforeSpecialS, // Decide if we deal with `<script` or `<style`
BeforeSpecialT, // Decide if we deal with `<title` or `<textarea`
SpecialStartSequence,
InSpecialTag,
InRCDATA,
InEntity,
@ -245,6 +245,11 @@ export default class Tokenizer {
private readonly entityDecoder?: EntityDecoder
public mode = ParseMode.BASE
public get inSFCRoot() {
return this.mode === ParseMode.SFC && this.stack.length === 0
}
constructor(
private readonly stack: ElementNode[],
private readonly cbs: Callbacks
@ -256,8 +261,6 @@ export default class Tokenizer {
}
}
public mode = ParseMode.BASE
public reset(): void {
this.state = State.Text
this.mode = ParseMode.BASE
@ -328,8 +331,8 @@ export default class Tokenizer {
this.delimiterIndex++
}
} else if (this.inRCDATA) {
this.state = State.InSpecialTag
this.stateInSpecialTag(c)
this.state = State.InRCDATA
this.stateInRCDATA(c)
} else {
this.state = State.Text
this.stateText(c)
@ -349,7 +352,7 @@ export default class Tokenizer {
if (this.delimiterIndex === this.delimiterClose.length - 1) {
this.cbs.oninterpolation(this.sectionStart, this.index + 1)
if (this.inRCDATA) {
this.state = State.InSpecialTag
this.state = State.InRCDATA
} else {
this.state = State.Text
}
@ -386,7 +389,7 @@ export default class Tokenizer {
}
/** Look for an end tag. For <title> and <textarea>, also decode entities. */
private stateInSpecialTag(c: number): void {
private stateInRCDATA(c: number): void {
if (this.sequenceIndex === this.currentSequence.length) {
if (c === CharCodes.Gt || isWhitespace(c)) {
const endOfText = this.index - this.currentSequence.length
@ -413,8 +416,7 @@ export default class Tokenizer {
} else if (this.sequenceIndex === 0) {
if (
this.currentSequence === Sequences.TitleEnd ||
(this.currentSequence === Sequences.TextareaEnd &&
!(this.mode === ParseMode.SFC && this.stack.length === 0))
(this.currentSequence === Sequences.TextareaEnd && !this.inSFCRoot)
) {
// We have to parse entities in <title> and <textarea> tags.
if (!__BROWSER__ && c === CharCodes.Amp) {
@ -507,10 +509,14 @@ export default class Tokenizer {
}
private startSpecial(sequence: Uint8Array, offset: number) {
this.enterRCDATA(sequence, offset)
this.state = State.SpecialStartSequence
}
public enterRCDATA(sequence: Uint8Array, offset: number) {
this.inRCDATA = true
this.currentSequence = sequence
this.sequenceIndex = offset
this.state = State.SpecialStartSequence
}
private stateBeforeTagName(c: number): void {
@ -525,7 +531,7 @@ export default class Tokenizer {
if (this.mode === ParseMode.BASE) {
// no special tags in base mode
this.state = State.InTagName
} else if (this.mode === ParseMode.SFC && this.stack.length === 0) {
} else if (this.inSFCRoot) {
// SFC mode + root level
// - everything except <template> is RAWTEXT
// - <template> with lang other than html is also RAWTEXT
@ -560,8 +566,7 @@ export default class Tokenizer {
if (isEndOfTagSection(c)) {
const tag = this.buffer.slice(this.sectionStart, this.index)
if (tag !== 'template') {
this.inRCDATA = true
this.currentSequence = toCharCodes(`</` + tag)
this.enterRCDATA(toCharCodes(`</` + tag), 0)
}
this.handleTagName(c)
}
@ -603,8 +608,7 @@ export default class Tokenizer {
if (c === CharCodes.Gt) {
this.cbs.onopentagend(this.index)
if (this.inRCDATA) {
this.state = State.InSpecialTag
this.sequenceIndex = 0
this.state = State.InRCDATA
} else {
this.state = State.Text
}
@ -827,7 +831,7 @@ export default class Tokenizer {
this.state = State.InEntity
this.entityStart = this.index
this.entityDecoder!.startEntity(
this.baseState === State.Text || this.baseState === State.InSpecialTag
this.baseState === State.Text || this.baseState === State.InRCDATA
? DecodingMode.Legacy
: DecodingMode.Attribute
)
@ -885,8 +889,8 @@ export default class Tokenizer {
this.stateSpecialStartSequence(c)
break
}
case State.InSpecialTag: {
this.stateInSpecialTag(c)
case State.InRCDATA: {
this.stateInRCDATA(c)
break
}
case State.CDATASequence: {
@ -1016,7 +1020,7 @@ export default class Tokenizer {
if (this.sectionStart !== this.index) {
if (
this.state === State.Text ||
(this.state === State.InSpecialTag && this.sequenceIndex === 0)
(this.state === State.InRCDATA && this.sequenceIndex === 0)
) {
this.cbs.ontext(this.sectionStart, this.index)
this.sectionStart = this.index
@ -1083,10 +1087,7 @@ export default class Tokenizer {
private emitCodePoint(cp: number, consumed: number): void {
if (!__BROWSER__) {
if (
this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag
) {
if (this.baseState !== State.Text && this.baseState !== State.InRCDATA) {
if (this.sectionStart < this.entityStart) {
this.cbs.onattribdata(this.sectionStart, this.entityStart)
}

View File

@ -111,10 +111,9 @@ const tokenizer = new Tokenizer(stack, {
onopentagname(start, end) {
const name = getSlice(start, end)
// in SFC mode, root-level tags locations are for its inner content.
const startIndex =
tokenizer.mode === ParseMode.SFC && stack.length === 0
? end + fastForward(end, CharCodes.Gt) + 1
: start - 1
const startIndex = tokenizer.inSFCRoot
? end + fastForward(end, CharCodes.Gt) + 1
: start - 1
currentElement = {
type: NodeTypes.ELEMENT,
tag: name,
@ -296,6 +295,16 @@ const tokenizer = new Tokenizer(stack, {
? getLoc(currentAttrStartIndex, currentAttrEndIndex)
: getLoc(currentAttrStartIndex - 1, currentAttrEndIndex + 1)
}
if (
currentAttrValue &&
tokenizer.inSFCRoot &&
currentElement.tag === 'template' &&
currentProp.name === 'lang'
) {
// SFC root template with preprocessor lang, force tokenizer to
// RCDATA mode
tokenizer.enterRCDATA(toCharCodes(`</template`), 0)
}
} else {
// directive
currentProp.rawExp = currentAttrValue
@ -464,7 +473,7 @@ function onText(content: string, start: number, end: number) {
function onCloseTag(el: ElementNode, end: number) {
// attach end position
if (tokenizer.mode === ParseMode.SFC && stack.length === 0) {
if (tokenizer.inSFCRoot) {
// SFC root tag, end position should be inner end
if (el.children.length) {
el.loc.end = extend({}, el.children[el.children.length - 1].loc.end)

View File

@ -203,13 +203,15 @@ h1 { color: red }
})
// #1120
test('alternative template lang should be treated as plain text', () => {
const content = `p(v-if="1 < 2") test`
test('template with preprocessor lang should be treated as plain text', () => {
const content = `p(v-if="1 < 2") test <div/>`
const { descriptor, errors } = parse(
`<template lang="pug">` + content + `</template>`
)
expect(errors.length).toBe(0)
expect(descriptor.template!.content).toBe(content)
// should not attempt to parse the content
expect(descriptor.template!.ast.children.length).toBe(1)
})
//#2566

View File

@ -132,7 +132,8 @@ export function compileTemplate(
try {
return doCompileTemplate({
...options,
source: preprocess(options, preprocessor)
source: preprocess(options, preprocessor),
ast: undefined // invalidate AST if template goes through preprocessor
})
} catch (e: any) {
return {

View File

@ -245,8 +245,10 @@ export function parse(
)
}
}
// no need to genMap for template as its AST already accounts for the
// position in the SFC
// only genMap for template when it needs preprocessor
if (descriptor.template && descriptor.template.lang) {
genMap(descriptor.template)
}
genMap(descriptor.script)
descriptor.styles.forEach(genMap)
descriptor.customBlocks.forEach(genMap)