wip: treat template with preprocessor as plain text

This commit is contained in:
Evan You 2023-11-20 22:49:16 +08:00
parent 5fc695be26
commit e5afca6c1e
5 changed files with 48 additions and 33 deletions

View File

@ -129,7 +129,7 @@ const enum State {
BeforeSpecialS, // Decide if we deal with `<script` or `<style` BeforeSpecialS, // Decide if we deal with `<script` or `<style`
BeforeSpecialT, // Decide if we deal with `<title` or `<textarea` BeforeSpecialT, // Decide if we deal with `<title` or `<textarea`
SpecialStartSequence, SpecialStartSequence,
InSpecialTag, InRCDATA,
InEntity, InEntity,
@ -245,6 +245,11 @@ export default class Tokenizer {
private readonly entityDecoder?: EntityDecoder private readonly entityDecoder?: EntityDecoder
public mode = ParseMode.BASE
public get inSFCRoot() {
return this.mode === ParseMode.SFC && this.stack.length === 0
}
constructor( constructor(
private readonly stack: ElementNode[], private readonly stack: ElementNode[],
private readonly cbs: Callbacks private readonly cbs: Callbacks
@ -256,8 +261,6 @@ export default class Tokenizer {
} }
} }
public mode = ParseMode.BASE
public reset(): void { public reset(): void {
this.state = State.Text this.state = State.Text
this.mode = ParseMode.BASE this.mode = ParseMode.BASE
@ -328,8 +331,8 @@ export default class Tokenizer {
this.delimiterIndex++ this.delimiterIndex++
} }
} else if (this.inRCDATA) { } else if (this.inRCDATA) {
this.state = State.InSpecialTag this.state = State.InRCDATA
this.stateInSpecialTag(c) this.stateInRCDATA(c)
} else { } else {
this.state = State.Text this.state = State.Text
this.stateText(c) this.stateText(c)
@ -349,7 +352,7 @@ export default class Tokenizer {
if (this.delimiterIndex === this.delimiterClose.length - 1) { if (this.delimiterIndex === this.delimiterClose.length - 1) {
this.cbs.oninterpolation(this.sectionStart, this.index + 1) this.cbs.oninterpolation(this.sectionStart, this.index + 1)
if (this.inRCDATA) { if (this.inRCDATA) {
this.state = State.InSpecialTag this.state = State.InRCDATA
} else { } else {
this.state = State.Text this.state = State.Text
} }
@ -386,7 +389,7 @@ export default class Tokenizer {
} }
/** Look for an end tag. For <title> and <textarea>, also decode entities. */ /** Look for an end tag. For <title> and <textarea>, also decode entities. */
private stateInSpecialTag(c: number): void { private stateInRCDATA(c: number): void {
if (this.sequenceIndex === this.currentSequence.length) { if (this.sequenceIndex === this.currentSequence.length) {
if (c === CharCodes.Gt || isWhitespace(c)) { if (c === CharCodes.Gt || isWhitespace(c)) {
const endOfText = this.index - this.currentSequence.length const endOfText = this.index - this.currentSequence.length
@ -413,8 +416,7 @@ export default class Tokenizer {
} else if (this.sequenceIndex === 0) { } else if (this.sequenceIndex === 0) {
if ( if (
this.currentSequence === Sequences.TitleEnd || this.currentSequence === Sequences.TitleEnd ||
(this.currentSequence === Sequences.TextareaEnd && (this.currentSequence === Sequences.TextareaEnd && !this.inSFCRoot)
!(this.mode === ParseMode.SFC && this.stack.length === 0))
) { ) {
// We have to parse entities in <title> and <textarea> tags. // We have to parse entities in <title> and <textarea> tags.
if (!__BROWSER__ && c === CharCodes.Amp) { if (!__BROWSER__ && c === CharCodes.Amp) {
@ -507,10 +509,14 @@ export default class Tokenizer {
} }
private startSpecial(sequence: Uint8Array, offset: number) { private startSpecial(sequence: Uint8Array, offset: number) {
this.enterRCDATA(sequence, offset)
this.state = State.SpecialStartSequence
}
public enterRCDATA(sequence: Uint8Array, offset: number) {
this.inRCDATA = true this.inRCDATA = true
this.currentSequence = sequence this.currentSequence = sequence
this.sequenceIndex = offset this.sequenceIndex = offset
this.state = State.SpecialStartSequence
} }
private stateBeforeTagName(c: number): void { private stateBeforeTagName(c: number): void {
@ -525,7 +531,7 @@ export default class Tokenizer {
if (this.mode === ParseMode.BASE) { if (this.mode === ParseMode.BASE) {
// no special tags in base mode // no special tags in base mode
this.state = State.InTagName this.state = State.InTagName
} else if (this.mode === ParseMode.SFC && this.stack.length === 0) { } else if (this.inSFCRoot) {
// SFC mode + root level // SFC mode + root level
// - everything except <template> is RAWTEXT // - everything except <template> is RAWTEXT
// - <template> with lang other than html is also RAWTEXT // - <template> with lang other than html is also RAWTEXT
@ -560,8 +566,7 @@ export default class Tokenizer {
if (isEndOfTagSection(c)) { if (isEndOfTagSection(c)) {
const tag = this.buffer.slice(this.sectionStart, this.index) const tag = this.buffer.slice(this.sectionStart, this.index)
if (tag !== 'template') { if (tag !== 'template') {
this.inRCDATA = true this.enterRCDATA(toCharCodes(`</` + tag), 0)
this.currentSequence = toCharCodes(`</` + tag)
} }
this.handleTagName(c) this.handleTagName(c)
} }
@ -603,8 +608,7 @@ export default class Tokenizer {
if (c === CharCodes.Gt) { if (c === CharCodes.Gt) {
this.cbs.onopentagend(this.index) this.cbs.onopentagend(this.index)
if (this.inRCDATA) { if (this.inRCDATA) {
this.state = State.InSpecialTag this.state = State.InRCDATA
this.sequenceIndex = 0
} else { } else {
this.state = State.Text this.state = State.Text
} }
@ -827,7 +831,7 @@ export default class Tokenizer {
this.state = State.InEntity this.state = State.InEntity
this.entityStart = this.index this.entityStart = this.index
this.entityDecoder!.startEntity( this.entityDecoder!.startEntity(
this.baseState === State.Text || this.baseState === State.InSpecialTag this.baseState === State.Text || this.baseState === State.InRCDATA
? DecodingMode.Legacy ? DecodingMode.Legacy
: DecodingMode.Attribute : DecodingMode.Attribute
) )
@ -885,8 +889,8 @@ export default class Tokenizer {
this.stateSpecialStartSequence(c) this.stateSpecialStartSequence(c)
break break
} }
case State.InSpecialTag: { case State.InRCDATA: {
this.stateInSpecialTag(c) this.stateInRCDATA(c)
break break
} }
case State.CDATASequence: { case State.CDATASequence: {
@ -1016,7 +1020,7 @@ export default class Tokenizer {
if (this.sectionStart !== this.index) { if (this.sectionStart !== this.index) {
if ( if (
this.state === State.Text || this.state === State.Text ||
(this.state === State.InSpecialTag && this.sequenceIndex === 0) (this.state === State.InRCDATA && this.sequenceIndex === 0)
) { ) {
this.cbs.ontext(this.sectionStart, this.index) this.cbs.ontext(this.sectionStart, this.index)
this.sectionStart = this.index this.sectionStart = this.index
@ -1083,10 +1087,7 @@ export default class Tokenizer {
private emitCodePoint(cp: number, consumed: number): void { private emitCodePoint(cp: number, consumed: number): void {
if (!__BROWSER__) { if (!__BROWSER__) {
if ( if (this.baseState !== State.Text && this.baseState !== State.InRCDATA) {
this.baseState !== State.Text &&
this.baseState !== State.InSpecialTag
) {
if (this.sectionStart < this.entityStart) { if (this.sectionStart < this.entityStart) {
this.cbs.onattribdata(this.sectionStart, this.entityStart) this.cbs.onattribdata(this.sectionStart, this.entityStart)
} }

View File

@ -111,10 +111,9 @@ const tokenizer = new Tokenizer(stack, {
onopentagname(start, end) { onopentagname(start, end) {
const name = getSlice(start, end) const name = getSlice(start, end)
// in SFC mode, root-level tags locations are for its inner content. // in SFC mode, root-level tags locations are for its inner content.
const startIndex = const startIndex = tokenizer.inSFCRoot
tokenizer.mode === ParseMode.SFC && stack.length === 0 ? end + fastForward(end, CharCodes.Gt) + 1
? end + fastForward(end, CharCodes.Gt) + 1 : start - 1
: start - 1
currentElement = { currentElement = {
type: NodeTypes.ELEMENT, type: NodeTypes.ELEMENT,
tag: name, tag: name,
@ -296,6 +295,16 @@ const tokenizer = new Tokenizer(stack, {
? getLoc(currentAttrStartIndex, currentAttrEndIndex) ? getLoc(currentAttrStartIndex, currentAttrEndIndex)
: getLoc(currentAttrStartIndex - 1, currentAttrEndIndex + 1) : getLoc(currentAttrStartIndex - 1, currentAttrEndIndex + 1)
} }
if (
currentAttrValue &&
tokenizer.inSFCRoot &&
currentElement.tag === 'template' &&
currentProp.name === 'lang'
) {
// SFC root template with preprocessor lang, force tokenizer to
// RCDATA mode
tokenizer.enterRCDATA(toCharCodes(`</template`), 0)
}
} else { } else {
// directive // directive
currentProp.rawExp = currentAttrValue currentProp.rawExp = currentAttrValue
@ -464,7 +473,7 @@ function onText(content: string, start: number, end: number) {
function onCloseTag(el: ElementNode, end: number) { function onCloseTag(el: ElementNode, end: number) {
// attach end position // attach end position
if (tokenizer.mode === ParseMode.SFC && stack.length === 0) { if (tokenizer.inSFCRoot) {
// SFC root tag, end position should be inner end // SFC root tag, end position should be inner end
if (el.children.length) { if (el.children.length) {
el.loc.end = extend({}, el.children[el.children.length - 1].loc.end) el.loc.end = extend({}, el.children[el.children.length - 1].loc.end)

View File

@ -203,13 +203,15 @@ h1 { color: red }
}) })
// #1120 // #1120
test('alternative template lang should be treated as plain text', () => { test('template with preprocessor lang should be treated as plain text', () => {
const content = `p(v-if="1 < 2") test` const content = `p(v-if="1 < 2") test <div/>`
const { descriptor, errors } = parse( const { descriptor, errors } = parse(
`<template lang="pug">` + content + `</template>` `<template lang="pug">` + content + `</template>`
) )
expect(errors.length).toBe(0) expect(errors.length).toBe(0)
expect(descriptor.template!.content).toBe(content) expect(descriptor.template!.content).toBe(content)
// should not attempt to parse the content
expect(descriptor.template!.ast.children.length).toBe(1)
}) })
//#2566 //#2566

View File

@ -132,7 +132,8 @@ export function compileTemplate(
try { try {
return doCompileTemplate({ return doCompileTemplate({
...options, ...options,
source: preprocess(options, preprocessor) source: preprocess(options, preprocessor),
ast: undefined // invalidate AST if template goes through preprocessor
}) })
} catch (e: any) { } catch (e: any) {
return { return {

View File

@ -245,8 +245,10 @@ export function parse(
) )
} }
} }
// no need to genMap for template as its AST already accounts for the // only genMap for template when it needs preprocessor
// position in the SFC if (descriptor.template && descriptor.template.lang) {
genMap(descriptor.template)
}
genMap(descriptor.script) genMap(descriptor.script)
descriptor.styles.forEach(genMap) descriptor.styles.forEach(genMap)
descriptor.customBlocks.forEach(genMap) descriptor.customBlocks.forEach(genMap)