wip: parse error tests

This commit is contained in:
Evan You 2023-11-22 13:58:50 +08:00
parent b6886a80b1
commit 59227d4124
4 changed files with 4625 additions and 375 deletions

View File

@ -33,14 +33,23 @@ describe('compiler: parse', () => {
})
})
test.skip('simple text with invalid end tag', () => {
test('simple text with invalid end tag', () => {
const onError = vi.fn()
const ast = baseParse('some text</div>', {
onError
})
const ast = baseParse('some text</div>', { onError })
const text = ast.children[0] as TextNode
expect(onError).toBeCalled()
expect(onError.mock.calls).toMatchObject([
[
{
code: ErrorCodes.X_INVALID_END_TAG,
loc: {
start: { column: 10, line: 1, offset: 9 },
end: { column: 10, line: 1, offset: 9 }
}
}
]
])
expect(text).toStrictEqual({
type: NodeTypes.TEXT,
content: 'some text',
@ -1276,7 +1285,7 @@ describe('compiler: parse', () => {
})
})
test.skip('directive with no name', () => {
test('directive with no name', () => {
let errorCode = -1
const ast = baseParse('<div v-/>', {
onError: err => {
@ -1293,6 +1302,10 @@ describe('compiler: parse', () => {
loc: {
start: { offset: 5, line: 1, column: 6 },
end: { offset: 7, line: 1, column: 8 }
},
nameLoc: {
start: { offset: 5, line: 1, column: 6 },
end: { offset: 7, line: 1, column: 8 }
}
})
})
@ -1734,7 +1747,7 @@ describe('compiler: parse', () => {
})
})
test.skip('invalid html', () => {
test('invalid html', () => {
expect(() => {
baseParse(`<div>\n<span>\n</div>\n</span>`)
}).toThrow('Element is missing end tag.')
@ -2031,30 +2044,30 @@ describe('compiler: parse', () => {
options?: Partial<ParserOptions>
}>
} = {
ABRUPT_CLOSING_OF_EMPTY_COMMENT: [
{
code: '<template><!--></template>',
errors: [
{
type: ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template><!---></template>',
errors: [
{
type: ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template><!----></template>',
errors: []
}
],
// ABRUPT_CLOSING_OF_EMPTY_COMMENT: [
// {
// code: '<template><!--></template>',
// errors: [
// {
// type: ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// }
// ]
// },
// {
// code: '<template><!---></template>',
// errors: [
// {
// type: ErrorCodes.ABRUPT_CLOSING_OF_EMPTY_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// }
// ]
// },
// {
// code: '<template><!----></template>',
// errors: []
// }
// ],
CDATA_IN_HTML_CONTENT: [
{
code: '<template><![CDATA[cdata]]></template>',
@ -2081,28 +2094,28 @@ describe('compiler: parse', () => {
]
}
],
END_TAG_WITH_ATTRIBUTES: [
{
code: '<template><div></div id=""></template>',
errors: [
{
type: ErrorCodes.END_TAG_WITH_ATTRIBUTES,
loc: { offset: 21, line: 1, column: 22 }
}
]
}
],
END_TAG_WITH_TRAILING_SOLIDUS: [
{
code: '<template><div></div/></template>',
errors: [
{
type: ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS,
loc: { offset: 20, line: 1, column: 21 }
}
]
}
],
// END_TAG_WITH_ATTRIBUTES: [
// {
// code: '<template><div></div id=""></template>',
// errors: [
// {
// type: ErrorCodes.END_TAG_WITH_ATTRIBUTES,
// loc: { offset: 21, line: 1, column: 22 }
// }
// ]
// }
// ],
// END_TAG_WITH_TRAILING_SOLIDUS: [
// {
// code: '<template><div></div/></template>',
// errors: [
// {
// type: ErrorCodes.END_TAG_WITH_TRAILING_SOLIDUS,
// loc: { offset: 20, line: 1, column: 21 }
// }
// ]
// }
// ],
EOF_BEFORE_TAG_NAME: [
{
code: '<template><',
@ -2193,73 +2206,73 @@ describe('compiler: parse', () => {
loc: { offset: 0, line: 1, column: 1 }
}
]
},
// Bogus comments don't throw eof-in-comment error.
// https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
{
code: '<template><!',
errors: [
{
type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
}
]
},
{
code: '<template><!-',
errors: [
{
type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
}
]
},
{
code: '<template><!abc',
errors: [
{
type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
}
]
}
],
EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT: [
{
code: "<script><!--console.log('hello')",
errors: [
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
},
{
type: ErrorCodes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
loc: { offset: 32, line: 1, column: 33 }
}
]
},
{
code: "<script>console.log('hello')",
errors: [
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
}
]
}
// // Bogus comments don't throw eof-in-comment error.
// // https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
// {
// code: '<template><!',
// errors: [
// {
// type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// },
// {
// type: ErrorCodes.X_MISSING_END_TAG,
// loc: { offset: 0, line: 1, column: 1 }
// }
// ]
// },
// {
// code: '<template><!-',
// errors: [
// {
// type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// },
// {
// type: ErrorCodes.X_MISSING_END_TAG,
// loc: { offset: 0, line: 1, column: 1 }
// }
// ]
// },
// {
// code: '<template><!abc',
// errors: [
// {
// type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// },
// {
// type: ErrorCodes.X_MISSING_END_TAG,
// loc: { offset: 0, line: 1, column: 1 }
// }
// ]
// }
],
// EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT: [
// {
// code: "<script><!--console.log('hello')",
// errors: [
// {
// type: ErrorCodes.X_MISSING_END_TAG,
// loc: { offset: 0, line: 1, column: 1 }
// },
// {
// type: ErrorCodes.EOF_IN_SCRIPT_HTML_COMMENT_LIKE_TEXT,
// loc: { offset: 32, line: 1, column: 33 }
// }
// ]
// },
// {
// code: "<script>console.log('hello')",
// errors: [
// {
// type: ErrorCodes.X_MISSING_END_TAG,
// loc: { offset: 0, line: 1, column: 1 }
// }
// ]
// }
// ],
EOF_IN_TAG: [
{
code: '<template><div',
@ -2268,10 +2281,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 14, line: 1, column: 15 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2285,10 +2294,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 15, line: 1, column: 16 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2302,10 +2307,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 17, line: 1, column: 18 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2319,10 +2320,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 18, line: 1, column: 19 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2332,18 +2329,14 @@ describe('compiler: parse', () => {
{
code: '<template><div id =',
errors: [
{
type: ErrorCodes.MISSING_ATTRIBUTE_VALUE,
loc: { offset: 19, line: 1, column: 20 }
},
// {
// type: ErrorCodes.MISSING_ATTRIBUTE_VALUE,
// loc: { offset: 19, line: 1, column: 20 }
// },
{
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 19, line: 1, column: 20 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2357,10 +2350,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 22, line: 1, column: 23 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2374,10 +2363,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 22, line: 1, column: 23 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2391,10 +2376,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 23, line: 1, column: 24 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2408,10 +2389,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 23, line: 1, column: 24 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2425,10 +2402,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 21, line: 1, column: 22 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2446,10 +2419,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 24, line: 1, column: 25 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2467,10 +2436,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 24, line: 1, column: 25 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2488,10 +2453,6 @@ describe('compiler: parse', () => {
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 23, line: 1, column: 24 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 10, line: 1, column: 11 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
@ -2504,102 +2465,106 @@ describe('compiler: parse', () => {
{
type: ErrorCodes.EOF_IN_TAG,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
INCORRECTLY_CLOSED_COMMENT: [
{
code: '<template><!--comment--!></template>',
errors: [
{
type: ErrorCodes.INCORRECTLY_CLOSED_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
}
]
}
],
INCORRECTLY_OPENED_COMMENT: [
{
code: '<template><!></template>',
errors: [
{
type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template><!-></template>',
errors: [
{
type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
{
code: '<template><!ELEMENT br EMPTY></template>',
errors: [
{
type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
loc: { offset: 10, line: 1, column: 11 }
}
]
},
// Just ignore doctype.
{
code: '<!DOCTYPE html>',
errors: []
}
],
INVALID_FIRST_CHARACTER_OF_TAG_NAME: [
{
code: '<template>a < b</template>',
errors: [
{
type: ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
loc: { offset: 13, line: 1, column: 14 }
}
]
},
{
code: '<template><<3C>></template>',
errors: [
{
type: ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
loc: { offset: 11, line: 1, column: 12 }
}
]
},
{
code: '<template>a </ b</template>',
errors: [
{
type: ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
loc: { offset: 14, line: 1, column: 15 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
}
]
},
{
code: '<template></<2F>></template>',
errors: [
{
type: ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
loc: { offset: 12, line: 1, column: 13 }
}
]
},
// Don't throw invalid-first-character-of-tag-name in interpolation
{
code: '<template>{{a < b}}</template>',
errors: []
}
],
// INCORRECTLY_CLOSED_COMMENT: [
// {
// code: '<template><!--comment--!></template>',
// errors: [
// {
// type: ErrorCodes.INCORRECTLY_CLOSED_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// }
// ]
// }
// ],
// INCORRECTLY_OPENED_COMMENT: [
// {
// code: '<template><!></template>',
// errors: [
// {
// type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// }
// ]
// },
// {
// code: '<template><!-></template>',
// errors: [
// {
// type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// }
// ]
// },
// {
// code: '<template><!ELEMENT br EMPTY></template>',
// errors: [
// {
// type: ErrorCodes.INCORRECTLY_OPENED_COMMENT,
// loc: { offset: 10, line: 1, column: 11 }
// }
// ]
// },
// // Just ignore doctype.
// {
// code: '<!DOCTYPE html>',
// errors: []
// }
// ],
// INVALID_FIRST_CHARACTER_OF_TAG_NAME: [
// {
// code: '<template>a < b</template>',
// errors: [
// {
// type: ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
// loc: { offset: 13, line: 1, column: 14 }
// }
// ]
// },
// {
// code: '<template><<3C>></template>',
// errors: [
// {
// type: ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
// loc: { offset: 11, line: 1, column: 12 }
// }
// ]
// },
// {
// code: '<template>a </ b</template>',
// errors: [
// {
// type: ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
// loc: { offset: 14, line: 1, column: 15 }
// },
// {
// type: ErrorCodes.X_MISSING_END_TAG,
// loc: { offset: 0, line: 1, column: 1 }
// }
// ]
// },
// {
// code: '<template></<2F>></template>',
// errors: [
// {
// type: ErrorCodes.INVALID_FIRST_CHARACTER_OF_TAG_NAME,
// loc: { offset: 12, line: 1, column: 13 }
// }
// ]
// },
// // Don't throw invalid-first-character-of-tag-name in interpolation
// {
// code: '<template>{{a < b}}</template>',
// errors: []
// }
// ],
MISSING_ATTRIBUTE_VALUE: [
{
code: '<template><div id=></div></template>',
@ -2635,73 +2600,73 @@ describe('compiler: parse', () => {
]
}
],
MISSING_WHITESPACE_BETWEEN_ATTRIBUTES: [
{
code: '<template><div id="foo"class="bar"></div></template>',
errors: [
{
type: ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
loc: { offset: 23, line: 1, column: 24 }
}
]
},
// CR doesn't appear in tokenization phase, but all CR are removed in preprocessing.
// https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
{
code: '<template><div id="foo"\r\nclass="bar"></div></template>',
errors: []
}
],
NESTED_COMMENT: [
{
code: '<template><!--a<!--b--></template>',
errors: [
{
type: ErrorCodes.NESTED_COMMENT,
loc: { offset: 15, line: 1, column: 16 }
}
]
},
{
code: '<template><!--a<!--b<!--c--></template>',
errors: [
{
type: ErrorCodes.NESTED_COMMENT,
loc: { offset: 15, line: 1, column: 16 }
},
{
type: ErrorCodes.NESTED_COMMENT,
loc: { offset: 20, line: 1, column: 21 }
}
]
},
{
code: '<template><!--a<!--b<!----></template>',
errors: [
{
type: ErrorCodes.NESTED_COMMENT,
loc: { offset: 15, line: 1, column: 16 }
}
]
},
{
code: '<template><!--a<!--></template>',
errors: []
},
{
code: '<template><!--a<!--',
errors: [
{
type: ErrorCodes.EOF_IN_COMMENT,
loc: { offset: 19, line: 1, column: 20 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
}
]
}
],
// MISSING_WHITESPACE_BETWEEN_ATTRIBUTES: [
// {
// code: '<template><div id="foo"class="bar"></div></template>',
// errors: [
// {
// type: ErrorCodes.MISSING_WHITESPACE_BETWEEN_ATTRIBUTES,
// loc: { offset: 23, line: 1, column: 24 }
// }
// ]
// },
// // CR doesn't appear in tokenization phase, but all CR are removed in preprocessing.
// // https://html.spec.whatwg.org/multipage/parsing.html#preprocessing-the-input-stream
// {
// code: '<template><div id="foo"\r\nclass="bar"></div></template>',
// errors: []
// }
// ],
// NESTED_COMMENT: [
// {
// code: '<template><!--a<!--b--></template>',
// errors: [
// {
// type: ErrorCodes.NESTED_COMMENT,
// loc: { offset: 15, line: 1, column: 16 }
// }
// ]
// },
// {
// code: '<template><!--a<!--b<!--c--></template>',
// errors: [
// {
// type: ErrorCodes.NESTED_COMMENT,
// loc: { offset: 15, line: 1, column: 16 }
// },
// {
// type: ErrorCodes.NESTED_COMMENT,
// loc: { offset: 20, line: 1, column: 21 }
// }
// ]
// },
// {
// code: '<template><!--a<!--b<!----></template>',
// errors: [
// {
// type: ErrorCodes.NESTED_COMMENT,
// loc: { offset: 15, line: 1, column: 16 }
// }
// ]
// },
// {
// code: '<template><!--a<!--></template>',
// errors: []
// },
// {
// code: '<template><!--a<!--',
// errors: [
// {
// type: ErrorCodes.EOF_IN_COMMENT,
// loc: { offset: 19, line: 1, column: 20 }
// },
// {
// type: ErrorCodes.X_MISSING_END_TAG,
// loc: { offset: 0, line: 1, column: 1 }
// }
// ]
// }
// ],
UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME: [
{
code: "<template><div a\"bc=''></div></template>",
@ -2843,6 +2808,19 @@ describe('compiler: parse', () => {
}
]
},
{
code: '<template>a </ b</template>',
errors: [
{
type: ErrorCodes.X_INVALID_END_TAG,
loc: { offset: 12, line: 1, column: 13 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
}
]
},
{
code: "<template>{{'</div>'}}</template>",
errors: []
@ -2903,6 +2881,19 @@ describe('compiler: parse', () => {
}
]
},
{
code: '<div>{{ foo</div>',
errors: [
{
type: ErrorCodes.X_MISSING_INTERPOLATION_END,
loc: { offset: 5, line: 1, column: 6 }
},
{
type: ErrorCodes.X_MISSING_END_TAG,
loc: { offset: 0, line: 1, column: 1 }
}
]
},
{
code: '{{}}',
errors: []
@ -2924,7 +2915,7 @@ describe('compiler: parse', () => {
for (const key of Object.keys(patterns)) {
describe(key, () => {
for (const { code, errors, options } of patterns[key]) {
test.skip(
test(
code.replace(
/[\r\n]/g,
c => `\\x0${c.codePointAt(0)!.toString(16)};`
@ -2933,6 +2924,8 @@ describe('compiler: parse', () => {
const spy = vi.fn()
const ast = baseParse(code, {
parseMode: 'html',
getNamespace: tag =>
tag === 'svg' ? Namespaces.SVG : Namespaces.HTML,
...options,
onError: spy
})

View File

@ -22,6 +22,7 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.
*/
import { ErrorCodes } from '../errors'
import { ElementNode, Position } from '../ast'
/**
@ -54,6 +55,7 @@ export const enum CharCodes {
Amp = 0x26, // "&"
SingleQuote = 0x27, // "'"
DoubleQuote = 0x22, // '"'
GraveAccent = 96, // "`"
Dash = 0x2d, // "-"
Slash = 0x2f, // "/"
Zero = 0x30, // "0"
@ -83,7 +85,7 @@ const defaultDelimitersOpen = new Uint8Array([123, 123]) // "{{"
const defaultDelimitersClose = new Uint8Array([125, 125]) // "}}"
/** All the states the tokenizer can be in. */
const enum State {
export const enum State {
Text = 1,
// interpolation
@ -200,9 +202,10 @@ export interface Callbacks {
oncomment(start: number, endIndex: number): void
oncdata(start: number, endIndex: number): void
// onprocessinginstruction(start: number, endIndex: number): void
onprocessinginstruction(start: number, endIndex: number): void
// ondeclaration(start: number, endIndex: number): void
onend(): void
onerr(code: ErrorCodes, index: number): void
}
/**
@ -211,7 +214,7 @@ export interface Callbacks {
* We don't have `Script`, `Style`, or `Title` here. Instead, we re-use the *End
* sequences with an increased offset.
*/
const Sequences = {
export const Sequences = {
Cdata: new Uint8Array([0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]), // CDATA[
CdataEnd: new Uint8Array([0x5d, 0x5d, 0x3e]), // ]]>
CommentEnd: new Uint8Array([0x2d, 0x2d, 0x3e]), // `-->`
@ -225,11 +228,11 @@ const Sequences = {
export default class Tokenizer {
/** The current state the tokenizer is in. */
private state = State.Text
public state = State.Text
/** The read buffer. */
private buffer = ''
/** The beginning of the section that is currently being read. */
private sectionStart = 0
public sectionStart = 0
/** The index within the buffer that we are currently looking at. */
private index = 0
/** The start of the last entity. */
@ -366,7 +369,7 @@ export default class Tokenizer {
}
}
private currentSequence: Uint8Array = undefined!
public currentSequence: Uint8Array = undefined!
private sequenceIndex = 0
private stateSpecialStartSequence(c: number): void {
const isEnd = this.sequenceIndex === this.currentSequence.length
@ -581,7 +584,12 @@ export default class Tokenizer {
if (isWhitespace(c)) {
// Ignore
} else if (c === CharCodes.Gt) {
if (__DEV__ || !__BROWSER__) {
this.cbs.onerr(ErrorCodes.MISSING_END_TAG_NAME, this.index)
}
this.state = State.Text
// Ignore
this.sectionStart = this.index + 1
} else {
this.state = isTagStartChar(c)
? State.InClosingTagName
@ -599,7 +607,7 @@ export default class Tokenizer {
}
private stateAfterClosingTagName(c: number): void {
// Skip everything until ">"
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
if (c === CharCodes.Gt) {
this.state = State.Text
this.sectionStart = this.index + 1
}
@ -615,7 +623,19 @@ export default class Tokenizer {
this.sectionStart = this.index + 1
} else if (c === CharCodes.Slash) {
this.state = State.InSelfClosingTag
if (
(__DEV__ || !__BROWSER__) &&
this.buffer.charCodeAt(this.index + 1) !== CharCodes.Gt
) {
this.cbs.onerr(ErrorCodes.UNEXPECTED_SOLIDUS_IN_TAG, this.index)
}
} else if (!isWhitespace(c)) {
if ((__DEV__ || !__BROWSER__) && c === CharCodes.Eq) {
this.cbs.onerr(
ErrorCodes.UNEXPECTED_EQUALS_SIGN_BEFORE_ATTRIBUTE_NAME,
this.index
)
}
this.handleAttributeStart(c)
}
}
@ -655,6 +675,16 @@ export default class Tokenizer {
if (c === CharCodes.Eq || isEndOfTagSection(c)) {
this.cbs.onattribname(this.sectionStart, this.index)
this.handleAttributeNameEnd(c)
} else if (
(__DEV__ || !__BROWSER__) &&
(c === CharCodes.DoubleQuote ||
c === CharCodes.SingleQuote ||
c === CharCodes.Lt)
) {
this.cbs.onerr(
ErrorCodes.UNEXPECTED_CHARACTER_IN_ATTRIBUTE_NAME,
this.index
)
}
}
private stateInDirectiveName(c: number): void {
@ -687,7 +717,14 @@ export default class Tokenizer {
if (c === CharCodes.RightSquare) {
this.state = State.InDirectiveArg
} else if (c === CharCodes.Eq || isEndOfTagSection(c)) {
// TODO emit error
this.cbs.ondirarg(this.sectionStart, this.index + 1)
this.handleAttributeNameEnd(c)
if (__DEV__ || !__BROWSER__) {
this.cbs.onerr(
ErrorCodes.X_MISSING_DYNAMIC_DIRECTIVE_ARGUMENT_END,
this.index
)
}
}
}
private stateInDirectiveModifier(c: number): void {
@ -757,6 +794,17 @@ export default class Tokenizer {
this.cbs.onattribend(QuoteType.Unquoted, this.index)
this.state = State.BeforeAttributeName
this.stateBeforeAttributeName(c)
} else if (
((__DEV__ || !__BROWSER__) && c === CharCodes.DoubleQuote) ||
c === CharCodes.SingleQuote ||
c === CharCodes.Lt ||
c === CharCodes.Eq ||
c === CharCodes.GraveAccent
) {
this.cbs.onerr(
ErrorCodes.UNEXPECTED_CHARACTER_IN_UNQUOTED_ATTRIBUTE_VALUE,
this.index
)
} else if (!__BROWSER__ && c === CharCodes.Amp) {
this.startEntity()
}
@ -779,7 +827,7 @@ export default class Tokenizer {
}
private stateInProcessingInstruction(c: number): void {
if (c === CharCodes.Gt || this.fastForwardTo(CharCodes.Gt)) {
// this.cbs.onprocessinginstruction(this.sectionStart, this.index)
this.cbs.onprocessinginstruction(this.sectionStart, this.index)
this.state = State.Text
this.sectionStart = this.index + 1
}

View File

@ -19,6 +19,8 @@ import Tokenizer, {
CharCodes,
ParseMode,
QuoteType,
Sequences,
State,
isWhitespace,
toCharCodes
} from './Tokenizer'
@ -72,6 +74,8 @@ let currentVPreBoundary: ElementNode | null = null
const stack: ElementNode[] = []
const tokenizer = new Tokenizer(stack, {
onerr: emitError,
ontext(start, end) {
onText(getSlice(start, end), start, end)
},
@ -121,7 +125,7 @@ const tokenizer = new Tokenizer(stack, {
tagType: ElementTypes.ELEMENT, // will be refined on tag close
props: [],
children: [],
loc: getLoc(startIndex),
loc: getLoc(startIndex, end),
codegenNode: undefined
}
},
@ -133,15 +137,24 @@ const tokenizer = new Tokenizer(stack, {
onclosetag(start, end) {
const name = getSlice(start, end)
if (!currentOptions.isVoidTag(name)) {
let found = false
for (let i = 0; i < stack.length; i++) {
const e = stack[i]
if (e.tag.toLowerCase() === name.toLowerCase()) {
found = true
if (i > 0) {
emitError(ErrorCodes.X_MISSING_END_TAG, stack[0].loc.start.offset)
}
for (let j = 0; j <= i; j++) {
onCloseTag(stack.shift()!, end)
const el = stack.shift()!
onCloseTag(el, end, j < i)
}
break
}
}
if (!found) {
emitError(ErrorCodes.X_INVALID_END_TAG, backTrack(start, CharCodes.Lt))
}
}
},
@ -166,7 +179,20 @@ const tokenizer = new Tokenizer(stack, {
ondirname(start, end) {
const raw = getSlice(start, end)
if (inVPre) {
const name =
raw === '.' || raw === ':'
? 'bind'
: raw === '@'
? 'on'
: raw === '#'
? 'slot'
: raw.slice(2)
if (!inVPre && name === '') {
emitError(ErrorCodes.X_MISSING_DIRECTIVE_NAME, start)
}
if (inVPre || name === '') {
currentProp = {
type: NodeTypes.ATTRIBUTE,
name: raw,
@ -175,14 +201,6 @@ const tokenizer = new Tokenizer(stack, {
loc: getLoc(start)
}
} else {
const name =
raw === '.' || raw === ':'
? 'bind'
: raw === '@'
? 'on'
: raw === '#'
? 'slot'
: raw.slice(2)
currentProp = {
type: NodeTypes.DIRECTIVE,
name,
@ -265,7 +283,7 @@ const tokenizer = new Tokenizer(stack, {
p => (p.type === NodeTypes.DIRECTIVE ? p.rawName : p.name) === name
)
) {
// TODO duplicate
emitError(ErrorCodes.DUPLICATE_ATTRIBUTE, start)
}
},
@ -287,6 +305,10 @@ const tokenizer = new Tokenizer(stack, {
currentAttrValue = condense(currentAttrValue).trim()
}
if (quote === QuoteType.Unquoted && !currentAttrValue) {
emitError(ErrorCodes.MISSING_ATTRIBUTE_VALUE, end)
}
currentProp!.value = {
type: NodeTypes.TEXT,
content: currentAttrValue,
@ -342,16 +364,52 @@ const tokenizer = new Tokenizer(stack, {
},
onend() {
if (stack.length > 0) {
// has unclosed tag
currentOptions.onError(
// TODO loc info
createCompilerError(ErrorCodes.MISSING_END_TAG_NAME)
)
const end = currentInput.length
// EOF ERRORS
if ((__DEV__ || !__BROWSER__) && tokenizer.state !== State.Text) {
switch (tokenizer.state) {
case State.BeforeTagName:
case State.BeforeClosingTagName:
emitError(ErrorCodes.EOF_BEFORE_TAG_NAME, end)
break
case State.Interpolation:
case State.InterpolationClose:
emitError(
ErrorCodes.X_MISSING_INTERPOLATION_END,
tokenizer.sectionStart
)
break
case State.InCommentLike:
if (tokenizer.currentSequence === Sequences.CdataEnd) {
emitError(ErrorCodes.EOF_IN_CDATA, end)
} else {
emitError(ErrorCodes.EOF_IN_COMMENT, end)
}
break
case State.InTagName:
case State.InSelfClosingTag:
case State.InClosingTagName:
case State.BeforeAttributeName:
case State.InAttributeName:
case State.InDirectiveName:
case State.InDirectiveArg:
case State.InDirectiveDynamicArg:
case State.InDirectiveModifier:
case State.AfterAttributeName:
case State.BeforeAttributeValue:
case State.InAttributeValueDq: // "
case State.InAttributeValueSq: // '
case State.InAttributeValueNq:
emitError(ErrorCodes.EOF_IN_TAG, end)
break
default:
// console.log(tokenizer.state)
break
}
}
const end = currentInput.length - 1
for (let index = 0; index < stack.length; index++) {
onCloseTag(stack[index], end)
onCloseTag(stack[index], end - 1)
emitError(ErrorCodes.X_MISSING_END_TAG, stack[index].loc.start.offset)
}
},
@ -359,7 +417,17 @@ const tokenizer = new Tokenizer(stack, {
if (stack[0].ns !== Namespaces.HTML) {
onText(getSlice(start, end), start, end)
} else {
// TODO throw error if ns is html
emitError(ErrorCodes.CDATA_IN_HTML_CONTENT, start - 9)
}
},
onprocessinginstruction(start) {
// ignore as we do not have runtime handling for this, only check error
if ((stack[0] ? stack[0].ns : currentOptions.ns) === Namespaces.HTML) {
emitError(
ErrorCodes.UNEXPECTED_QUESTION_MARK_INSTEAD_OF_TAG_NAME,
start - 1
)
}
}
})
@ -453,9 +521,11 @@ function endOpenTag(end: number) {
}
function onText(content: string, start: number, end: number) {
if (__BROWSER__ && content.includes('&')) {
// TODO do not do this in <script> or <style>
content = currentOptions.decodeEntities!(content, false)
if (__BROWSER__) {
const tag = stack[0]?.tag
if (tag !== 'script' && tag !== 'style' && content.includes('&')) {
content = currentOptions.decodeEntities!(content, false)
}
}
const parent = stack[0] || currentRoot
const lastNode = parent.children[parent.children.length - 1]
@ -472,7 +542,7 @@ function onText(content: string, start: number, end: number) {
}
}
function onCloseTag(el: ElementNode, end: number) {
function onCloseTag(el: ElementNode, end: number, isImplied = false) {
// attach end position
if (tokenizer.inSFCRoot) {
// SFC root tag, end position should be inner end
@ -481,6 +551,9 @@ function onCloseTag(el: ElementNode, end: number) {
} else {
el.loc.end = extend({}, el.loc.start)
}
} else if (isImplied) {
// implied close, end should be backtracked to close
el.loc.end = tokenizer.getPos(backTrack(end, CharCodes.Lt))
} else {
el.loc.end = tokenizer.getPos(end + fastForward(end, CharCodes.Gt) + 1)
}
@ -527,6 +600,12 @@ function fastForward(start: number, c: number) {
return offset
}
function backTrack(index: number, c: number) {
let i = index
while (currentInput.charCodeAt(i) !== c && i >= 0) i--
return i
}
const specialTemplateDir = new Set(['if', 'else', 'else-if', 'for', 'slot'])
function isFragmentTemplate({ tag, props }: ElementNode): boolean {
if (tag === 'template') {
@ -734,6 +813,10 @@ function dirToAttr(dir: DirectiveNode): AttributeNode {
return attr
}
function emitError(code: ErrorCodes, index: number) {
currentOptions.onError(createCompilerError(code, getLoc(index, index)))
}
function reset() {
tokenizer.reset()
currentElement = null