wip: treat template with preprocessor as plain text

2023-11-20 22:49:16 +08:00 · 2023-11-20 22:49:16 +08:00 · e5afca6c1e
parent 5fc695be26
commit e5afca6c1e
5 changed files with 48 additions and 33 deletions
--- a/packages/compiler-core/src/parser/Tokenizer.ts
+++ b/packages/compiler-core/src/parser/Tokenizer.ts
@ -129,7 +129,7 @@ const enum State {
  BeforeSpecialS, // Decide if we deal with `<script` or `<style`
  BeforeSpecialT, // Decide if we deal with `<title` or `<textarea`
  SpecialStartSequence,
-  InSpecialTag,
+  InRCDATA,
  InEntity,
@ -245,6 +245,11 @@ export default class Tokenizer {
  private readonly entityDecoder?: EntityDecoder
  public mode = ParseMode.BASE
  public get inSFCRoot() {
    return this.mode === ParseMode.SFC && this.stack.length === 0
  }
  constructor(
    private readonly stack: ElementNode[],
    private readonly cbs: Callbacks
@ -256,8 +261,6 @@ export default class Tokenizer {
    }
  }
  public mode = ParseMode.BASE
  public reset(): void {
    this.state = State.Text
    this.mode = ParseMode.BASE
@ -328,8 +331,8 @@ export default class Tokenizer {
        this.delimiterIndex++
      }
    } else if (this.inRCDATA) {
-      this.state = State.InSpecialTag
+      this.state = State.InRCDATA
-      this.stateInSpecialTag(c)
+      this.stateInRCDATA(c)
    } else {
      this.state = State.Text
      this.stateText(c)
@ -349,7 +352,7 @@ export default class Tokenizer {
      if (this.delimiterIndex === this.delimiterClose.length - 1) {
        this.cbs.oninterpolation(this.sectionStart, this.index + 1)
        if (this.inRCDATA) {
-          this.state = State.InSpecialTag
+          this.state = State.InRCDATA
        } else {
          this.state = State.Text
        }
@ -386,7 +389,7 @@ export default class Tokenizer {
  }
  /** Look for an end tag. For <title> and <textarea>, also decode entities. */
-  private stateInSpecialTag(c: number): void {
+  private stateInRCDATA(c: number): void {
    if (this.sequenceIndex === this.currentSequence.length) {
      if (c === CharCodes.Gt || isWhitespace(c)) {
        const endOfText = this.index - this.currentSequence.length
@ -413,8 +416,7 @@ export default class Tokenizer {
    } else if (this.sequenceIndex === 0) {
      if (
        this.currentSequence === Sequences.TitleEnd ||
-        (this.currentSequence === Sequences.TextareaEnd &&
+        (this.currentSequence === Sequences.TextareaEnd && !this.inSFCRoot)
          !(this.mode === ParseMode.SFC && this.stack.length === 0))
      ) {
        // We have to parse entities in <title> and <textarea> tags.
        if (!__BROWSER__ && c === CharCodes.Amp) {
@ -507,10 +509,14 @@ export default class Tokenizer {
  }
  private startSpecial(sequence: Uint8Array, offset: number) {
    this.enterRCDATA(sequence, offset)
    this.state = State.SpecialStartSequence
  }
  public enterRCDATA(sequence: Uint8Array, offset: number) {
    this.inRCDATA = true
    this.currentSequence = sequence
    this.sequenceIndex = offset
    this.state = State.SpecialStartSequence
  }
  private stateBeforeTagName(c: number): void {
@ -525,7 +531,7 @@ export default class Tokenizer {
      if (this.mode === ParseMode.BASE) {
        // no special tags in base mode
        this.state = State.InTagName
-      } else if (this.mode === ParseMode.SFC && this.stack.length === 0) {
+      } else if (this.inSFCRoot) {
        // SFC mode + root level
        // - everything except <template> is RAWTEXT
        // - <template> with lang other than html is also RAWTEXT
@ -560,8 +566,7 @@ export default class Tokenizer {
    if (isEndOfTagSection(c)) {
      const tag = this.buffer.slice(this.sectionStart, this.index)
      if (tag !== 'template') {
-        this.inRCDATA = true
+        this.enterRCDATA(toCharCodes(`</` + tag), 0)
        this.currentSequence = toCharCodes(`</` + tag)
      }
      this.handleTagName(c)
    }
@ -603,8 +608,7 @@ export default class Tokenizer {
    if (c === CharCodes.Gt) {
      this.cbs.onopentagend(this.index)
      if (this.inRCDATA) {
-        this.state = State.InSpecialTag
+        this.state = State.InRCDATA
        this.sequenceIndex = 0
      } else {
        this.state = State.Text
      }
@ -827,7 +831,7 @@ export default class Tokenizer {
      this.state = State.InEntity
      this.entityStart = this.index
      this.entityDecoder!.startEntity(
-        this.baseState === State.Text || this.baseState === State.InSpecialTag
+        this.baseState === State.Text || this.baseState === State.InRCDATA
          ? DecodingMode.Legacy
          : DecodingMode.Attribute
      )
@ -885,8 +889,8 @@ export default class Tokenizer {
          this.stateSpecialStartSequence(c)
          break
        }
-        case State.InSpecialTag: {
+        case State.InRCDATA: {
-          this.stateInSpecialTag(c)
+          this.stateInRCDATA(c)
          break
        }
        case State.CDATASequence: {
@ -1016,7 +1020,7 @@ export default class Tokenizer {
    if (this.sectionStart !== this.index) {
      if (
        this.state === State.Text ||
-        (this.state === State.InSpecialTag && this.sequenceIndex === 0)
+        (this.state === State.InRCDATA && this.sequenceIndex === 0)
      ) {
        this.cbs.ontext(this.sectionStart, this.index)
        this.sectionStart = this.index
@ -1083,10 +1087,7 @@ export default class Tokenizer {
  private emitCodePoint(cp: number, consumed: number): void {
    if (!__BROWSER__) {
-      if (
+      if (this.baseState !== State.Text && this.baseState !== State.InRCDATA) {
        this.baseState !== State.Text &&
        this.baseState !== State.InSpecialTag
      ) {
        if (this.sectionStart < this.entityStart) {
          this.cbs.onattribdata(this.sectionStart, this.entityStart)
        }
--- a/packages/compiler-core/src/parser/index.ts
+++ b/packages/compiler-core/src/parser/index.ts
@ -111,10 +111,9 @@ const tokenizer = new Tokenizer(stack, {
  onopentagname(start, end) {
    const name = getSlice(start, end)
    // in SFC mode, root-level tags locations are for its inner content.
-    const startIndex =
+    const startIndex = tokenizer.inSFCRoot
-      tokenizer.mode === ParseMode.SFC && stack.length === 0
+      ? end + fastForward(end, CharCodes.Gt) + 1
-        ? end + fastForward(end, CharCodes.Gt) + 1
+      : start - 1
        : start - 1
    currentElement = {
      type: NodeTypes.ELEMENT,
      tag: name,
@ -296,6 +295,16 @@ const tokenizer = new Tokenizer(stack, {
                ? getLoc(currentAttrStartIndex, currentAttrEndIndex)
                : getLoc(currentAttrStartIndex - 1, currentAttrEndIndex + 1)
          }
          if (
            currentAttrValue &&
            tokenizer.inSFCRoot &&
            currentElement.tag === 'template' &&
            currentProp.name === 'lang'
          ) {
            // SFC root template with preprocessor lang, force tokenizer to
            // RCDATA mode
            tokenizer.enterRCDATA(toCharCodes(`</template`), 0)
          }
        } else {
          // directive
          currentProp.rawExp = currentAttrValue
@ -464,7 +473,7 @@ function onText(content: string, start: number, end: number) {
 function onCloseTag(el: ElementNode, end: number) {
  // attach end position
-  if (tokenizer.mode === ParseMode.SFC && stack.length === 0) {
+  if (tokenizer.inSFCRoot) {
    // SFC root tag, end position should be inner end
    if (el.children.length) {
      el.loc.end = extend({}, el.children[el.children.length - 1].loc.end)
--- a/packages/compiler-sfc/tests/parse.spec.ts
+++ b/packages/compiler-sfc/tests/parse.spec.ts
@ -203,13 +203,15 @@ h1 { color: red }
  })
  // #1120
-  test('alternative template lang should be treated as plain text', () => {
+  test('template with preprocessor lang should be treated as plain text', () => {
-    const content = `p(v-if="1 < 2") test`
+    const content = `p(v-if="1 < 2") test <div/>`
    const { descriptor, errors } = parse(
      `<template lang="pug">` + content + `</template>`
    )
    expect(errors.length).toBe(0)
    expect(descriptor.template!.content).toBe(content)
    // should not attempt to parse the content
    expect(descriptor.template!.ast.children.length).toBe(1)
  })
  //#2566
--- a/packages/compiler-sfc/src/compileTemplate.ts
+++ b/packages/compiler-sfc/src/compileTemplate.ts
@ -132,7 +132,8 @@ export function compileTemplate(
    try {
      return doCompileTemplate({
        ...options,
-        source: preprocess(options, preprocessor)
+        source: preprocess(options, preprocessor),
        ast: undefined // invalidate AST if template goes through preprocessor
      })
    } catch (e: any) {
      return {
--- a/packages/compiler-sfc/src/parse.ts
+++ b/packages/compiler-sfc/src/parse.ts
@ -245,8 +245,10 @@ export function parse(
        )
      }
    }
-    // no need to genMap for template as its AST already accounts for the
+    // only genMap for template when it needs preprocessor
-    // position in the SFC
+    if (descriptor.template && descriptor.template.lang) {
      genMap(descriptor.template)
    }
    genMap(descriptor.script)
    descriptor.styles.forEach(genMap)
    descriptor.customBlocks.forEach(genMap)