feat(compiler): improve parsing tolerance for language-tools

This commit is contained in:
Evan You 2023-11-27 16:55:54 +08:00
parent 0721daf22d
commit 41ff68ea57
2 changed files with 36 additions and 8 deletions

View File

@ -1918,6 +1918,29 @@ describe('compiler: parse', () => {
expect(baz.loc.end).toEqual({ line: 2, column: 28, offset })
})
// With standard HTML parsing, the following input would ignore the slash
// and treat "<" and "template" as attributes on the open tag of "Hello",
// causing `<template>` to fail to close, and `<script>` being parsed as its
// child. This is would never be intended in actual templates, but is a common
// intermediate state from user input when parsing for IDE support. We want
// the `<script>` to be at root-level to keep the SFC structure stable for
// Volar to do incremental computations.
test('tag termination handling for IDE', () => {
const spy = vi.fn()
const ast = baseParse(
`<template><Hello\n</template><script>console.log(1)</script>`,
{
onError: spy
}
)
//
expect(ast.children.length).toBe(2)
expect(ast.children[1]).toMatchObject({
type: NodeTypes.ELEMENT,
tag: 'script'
})
})
describe('decodeEntities option', () => {
test('use decode by default', () => {
const ast: any = baseParse('&gt;&lt;&amp;&apos;&quot;&foo;')

View File

@ -301,6 +301,10 @@ export default class Tokenizer {
}
}
private peek() {
return this.buffer.charCodeAt(this.index + 1)
}
private stateText(c: number): void {
if (c === CharCodes.Lt) {
if (this.index > this.sectionStart) {
@ -627,12 +631,16 @@ export default class Tokenizer {
this.sectionStart = this.index + 1
} else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag
if (
(__DEV__ || !__BROWSER__) &&
this.buffer.charCodeAt(this.index + 1) !== CharCodes.Gt
) {
if ((__DEV__ || !__BROWSER__) && this.peek() !== CharCodes.Gt) {
this.cbs.onerr(ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG, this.index)
}
} else if (c === CharCodes.Lt && this.peek() === CharCodes.Slash) {
// special handling for </ appearing in open tag state
// this is different from standard HTML parsing but makes practical sense
// especially for parsing intermedaite input state in IDEs.
this.cbs.onopentagend(this.index)
this.state = State.BeforeTagName
this.sectionStart = this.index
} else if (!isWhitespace(c)) {
if ((__DEV__ || !__BROWSER__) && c === CharCodes.Eq) {
this.cbs.onerr(
@ -644,10 +652,7 @@ export default class Tokenizer {
}
}
private handleAttrStart(c: number) {
if (
c === CharCodes.LowerV &&
this.buffer.charCodeAt(this.index + 1) === CharCodes.Dash
) {
if (c === CharCodes.LowerV && this.peek() === CharCodes.Dash) {
this.state = State.InDirName
this.sectionStart = this.index
} else if (