vue3-core/packages/compiler-core/src/parser/index.ts

631 lines
16 KiB
TypeScript
Raw Normal View History

2023-11-13 21:03:39 +08:00
import { fromCodePoint } from 'entities/lib/decode.js'
import {
2023-11-14 18:03:00 +08:00
AttributeNode,
2023-11-14 20:39:08 +08:00
ConstantTypes,
2023-11-14 18:03:00 +08:00
DirectiveNode,
2023-11-13 21:03:39 +08:00
ElementNode,
ElementTypes,
2023-11-14 20:39:08 +08:00
Namespaces,
2023-11-13 21:03:39 +08:00
NodeTypes,
RootNode,
2023-11-16 01:31:52 +08:00
SimpleExpressionNode,
2023-11-15 17:45:42 +08:00
SourceLocation,
2023-11-13 21:03:39 +08:00
TemplateChildNode,
createRoot
} from '../ast'
2023-11-12 16:58:24 +08:00
import { ParserOptions } from '../options'
2023-11-17 09:22:12 +08:00
import Tokenizer, {
CharCodes,
ParseMode,
QuoteType,
isWhitespace,
toCharCodes
} from './Tokenizer'
2023-11-14 20:39:08 +08:00
import { CompilerCompatOptions } from '../compat/compatConfig'
2023-11-14 21:55:16 +08:00
import { NO, extend } from '@vue/shared'
2023-11-14 20:39:08 +08:00
import { defaultOnError, defaultOnWarn } from '../errors'
2023-11-16 10:54:54 +08:00
import { isCoreComponent } from '../utils'
2023-11-13 21:03:39 +08:00
2023-11-14 20:39:08 +08:00
type OptionalOptions =
| 'whitespace'
| 'isNativeTag'
| 'isBuiltInComponent'
2023-11-17 09:22:12 +08:00
| 'getTextMode'
2023-11-14 20:39:08 +08:00
| keyof CompilerCompatOptions
type MergedParserOptions = Omit<Required<ParserOptions>, OptionalOptions> &
Pick<ParserOptions, OptionalOptions>
// The default decoder only provides escapes for characters reserved as part of
// the template syntax, and is only used if the custom renderer did not provide
// a platform-specific decoder.
const decodeRE = /&(gt|lt|amp|apos|quot);/g
const decodeMap: Record<string, string> = {
gt: '>',
lt: '<',
amp: '&',
apos: "'",
quot: '"'
}
export const defaultParserOptions: MergedParserOptions = {
2023-11-17 09:22:12 +08:00
parseMode: 'base',
2023-11-14 20:39:08 +08:00
delimiters: [`{{`, `}}`],
getNamespace: () => Namespaces.HTML,
isVoidTag: NO,
isPreTag: NO,
isCustomElement: NO,
2023-11-15 23:33:57 +08:00
// TODO handle entities
2023-11-14 20:39:08 +08:00
decodeEntities: (rawText: string): string =>
rawText.replace(decodeRE, (_, p1) => decodeMap[p1]),
onError: defaultOnError,
onWarn: defaultOnWarn,
comments: __DEV__
}
let currentOptions: MergedParserOptions = defaultParserOptions
2023-11-13 21:03:39 +08:00
let currentRoot: RootNode = createRoot([])
// parser state
let currentInput = ''
2023-11-14 18:03:00 +08:00
let currentElement: ElementNode | null = null
let currentProp: AttributeNode | DirectiveNode | null = null
let currentAttrValue = ''
2023-11-15 17:45:42 +08:00
let currentAttrStartIndex = -1
let currentAttrEndIndex = -1
2023-11-14 21:55:16 +08:00
let currentAttrs: Set<string> = new Set()
2023-11-13 21:03:39 +08:00
let inPre = 0
2023-11-16 01:31:52 +08:00
let inVPre = false
let currentElementIsVPreBoundary = false
2023-11-14 18:03:00 +08:00
const stack: ElementNode[] = []
2023-11-13 21:03:39 +08:00
2023-11-17 09:22:12 +08:00
const tokenizer = new Tokenizer(stack, {
2023-11-15 23:33:57 +08:00
ontext(start, end) {
onText(getSlice(start, end), start, end)
},
2023-11-13 21:03:39 +08:00
2023-11-15 23:33:57 +08:00
ontextentity(cp, end) {
onText(fromCodePoint(cp), end - 1, end)
},
2023-11-15 19:36:05 +08:00
2023-11-15 23:33:57 +08:00
oninterpolation(start, end) {
2023-11-16 01:31:52 +08:00
if (inVPre) {
return onText(getSlice(start, end), start, end)
}
2023-11-15 23:33:57 +08:00
let innerStart = start + tokenizer.delimiterOpen.length
let innerEnd = end - tokenizer.delimiterClose.length
while (isWhitespace(currentInput.charCodeAt(innerStart))) {
innerStart++
}
while (isWhitespace(currentInput.charCodeAt(innerEnd - 1))) {
innerEnd--
}
addNode({
type: NodeTypes.INTERPOLATION,
content: {
2023-11-15 17:45:42 +08:00
type: NodeTypes.SIMPLE_EXPRESSION,
2023-11-15 23:33:57 +08:00
isStatic: false,
// Set `isConstant` to false by default and will decide in transformExpression
constType: ConstantTypes.NOT_CONSTANT,
content: getSlice(innerStart, innerEnd),
loc: getLoc(innerStart, innerEnd)
},
loc: getLoc(start, end)
})
},
onopentagname(start, end) {
2023-11-16 01:31:52 +08:00
const name = getSlice(start, end)
currentElement = {
type: NodeTypes.ELEMENT,
tag: name,
ns: currentOptions.getNamespace(name, getParent()),
2023-11-16 10:54:54 +08:00
tagType: ElementTypes.ELEMENT, // will be refined on tag close
2023-11-16 01:31:52 +08:00
props: [],
children: [],
loc: {
start: tokenizer.getPos(start - 1),
// @ts-expect-error to be attached on tag close
end: undefined,
source: ''
},
codegenNode: undefined
}
currentAttrs.clear()
2023-11-15 23:33:57 +08:00
},
onopentagend(end) {
endOpenTag(end)
},
onclosetag(start, end) {
const name = getSlice(start, end)
if (!currentOptions.isVoidTag(name)) {
const pos = stack.findIndex(e => e.tag === name)
if (pos !== -1) {
for (let index = 0; index <= pos; index++) {
onCloseTag(stack.shift()!, end)
}
2023-11-15 19:36:05 +08:00
}
2023-11-15 23:33:57 +08:00
}
},
onselfclosingtag(end) {
closeCurrentTag(end)
},
onattribname(start, end) {
// plain attribute
currentProp = {
type: NodeTypes.ATTRIBUTE,
name: getSlice(start, end),
value: undefined,
loc: getLoc(start)
}
},
ondirname(start, end) {
const raw = getSlice(start, end)
2023-11-16 01:31:52 +08:00
if (inVPre) {
currentProp = {
type: NodeTypes.ATTRIBUTE,
name: raw,
value: undefined,
loc: getLoc(start)
}
} else {
const name =
raw === '.' || raw === ':'
? 'bind'
: raw === '@'
? 'on'
: raw === '#'
? 'slot'
: raw.slice(2)
currentProp = {
type: NodeTypes.DIRECTIVE,
name,
raw,
exp: undefined,
arg: undefined,
modifiers: [],
loc: getLoc(start)
}
if (name === 'pre') {
inVPre = true
currentElementIsVPreBoundary = true
// convert dirs before this one to attributes
const props = currentElement!.props
for (let i = 0; i < props.length; i++) {
if (props[i].type === NodeTypes.DIRECTIVE) {
props[i] = dirToAttr(props[i] as DirectiveNode)
}
}
}
2023-11-15 23:33:57 +08:00
}
},
ondirarg(start, end) {
const arg = getSlice(start, end)
2023-11-16 01:31:52 +08:00
if (inVPre) {
;(currentProp as AttributeNode).name += arg
} else {
const isStatic = arg[0] !== `[`
;(currentProp as DirectiveNode).arg = {
type: NodeTypes.SIMPLE_EXPRESSION,
content: arg,
isStatic,
constType: isStatic
? ConstantTypes.CAN_STRINGIFY
: ConstantTypes.NOT_CONSTANT,
loc: getLoc(start, end)
}
2023-11-15 23:33:57 +08:00
}
},
ondirmodifier(start, end) {
2023-11-16 01:31:52 +08:00
const mod = getSlice(start, end)
if (inVPre) {
;(currentProp as AttributeNode).name += '.' + mod
} else {
;(currentProp as DirectiveNode).modifiers.push(mod)
}
2023-11-15 23:33:57 +08:00
},
onattribdata(start, end) {
currentAttrValue += getSlice(start, end)
if (currentAttrStartIndex < 0) currentAttrStartIndex = start
currentAttrEndIndex = end
},
onattribentity(codepoint) {
currentAttrValue += fromCodePoint(codepoint)
},
onattribnameend(end) {
// check duplicate attrs
const start = currentProp!.loc.start.offset
const name = getSlice(start, end)
2023-11-16 01:31:52 +08:00
if (currentProp!.type === NodeTypes.DIRECTIVE) {
currentProp!.raw = name
}
2023-11-15 23:33:57 +08:00
if (currentAttrs.has(name)) {
currentProp = null
// TODO emit error DUPLICATE_ATTRIBUTE
throw new Error(`duplicate attr ${name}`)
} else {
currentAttrs.add(name)
}
},
onattribend(quote, end) {
if (currentElement && currentProp) {
if (currentAttrValue) {
if (currentProp.type === NodeTypes.ATTRIBUTE) {
// assign value
currentProp!.value = {
type: NodeTypes.TEXT,
content: currentAttrValue,
loc:
quote === QuoteType.Unquoted
? getLoc(currentAttrStartIndex, currentAttrEndIndex)
: getLoc(currentAttrStartIndex - 1, currentAttrEndIndex + 1)
}
} else {
// directive
currentProp.exp = {
type: NodeTypes.SIMPLE_EXPRESSION,
content: currentAttrValue,
isStatic: false,
// Treat as non-constant by default. This can be potentially set
// to other values by `transformExpression` to make it eligible
// for hoisting.
constType: ConstantTypes.NOT_CONSTANT,
loc: getLoc(currentAttrStartIndex, currentAttrEndIndex)
2023-11-15 17:45:42 +08:00
}
}
2023-11-13 21:03:39 +08:00
}
2023-11-15 23:33:57 +08:00
currentProp.loc.end = tokenizer.getPos(end)
2023-11-16 01:31:52 +08:00
if (
currentProp.type !== NodeTypes.DIRECTIVE ||
currentProp.name !== 'pre'
) {
currentElement.props.push(currentProp)
}
2023-11-13 21:03:39 +08:00
}
2023-11-15 23:33:57 +08:00
currentAttrValue = ''
currentAttrStartIndex = currentAttrEndIndex = -1
},
2023-11-16 11:05:31 +08:00
oncomment(start, end) {
if (currentOptions.comments) {
addNode({
type: NodeTypes.COMMENT,
content: getSlice(start, end),
loc: getLoc(start - 4, end + 3)
})
}
2023-11-15 23:33:57 +08:00
},
onend() {
const end = currentInput.length - 1
for (let index = 0; index < stack.length; index++) {
onCloseTag(stack[index], end)
}
},
2023-11-16 11:05:31 +08:00
oncdata(start, end) {
2023-11-15 23:33:57 +08:00
// TODO throw error
2023-11-13 21:03:39 +08:00
}
2023-11-15 23:33:57 +08:00
})
2023-11-13 21:03:39 +08:00
function getSlice(start: number, end: number) {
return currentInput.slice(start, end)
}
2023-11-14 20:39:08 +08:00
function endOpenTag(end: number) {
2023-11-14 18:03:00 +08:00
addNode(currentElement!)
const name = currentElement!.tag
2023-11-15 23:55:50 +08:00
if (currentOptions.isPreTag(name)) {
inPre++
}
if (currentOptions.isVoidTag(name)) {
2023-11-14 20:39:08 +08:00
onCloseTag(currentElement!, end)
2023-11-15 23:55:50 +08:00
} else {
stack.unshift(currentElement!)
2023-11-13 21:03:39 +08:00
}
2023-11-14 18:03:00 +08:00
currentElement = null
2023-11-13 21:03:39 +08:00
}
2023-11-14 18:03:00 +08:00
function closeCurrentTag(end: number) {
const name = currentElement!.tag
2023-11-14 20:39:08 +08:00
endOpenTag(end)
2023-11-14 18:03:00 +08:00
if (stack[0].tag === name) {
onCloseTag(stack.shift()!, end)
2023-11-13 21:03:39 +08:00
}
}
2023-11-14 01:14:33 +08:00
function onText(content: string, start: number, end: number) {
2023-11-13 21:03:39 +08:00
const parent = getParent()
const lastNode = parent.children[parent.children.length - 1]
if (lastNode?.type === NodeTypes.TEXT) {
// merge
lastNode.content += content
2023-11-16 01:31:52 +08:00
lastNode.loc.end = tokenizer.getPos(end)
2023-11-13 21:03:39 +08:00
} else {
parent.children.push({
type: NodeTypes.TEXT,
content,
2023-11-14 01:14:33 +08:00
loc: {
2023-11-15 19:36:05 +08:00
start: tokenizer.getPos(start),
end: tokenizer.getPos(end),
2023-11-15 23:33:57 +08:00
source: ''
2023-11-14 01:14:33 +08:00
}
2023-11-13 21:03:39 +08:00
})
}
}
2023-11-14 18:03:00 +08:00
function onCloseTag(el: ElementNode, end: number) {
// attach end position
let offset = 0
while (currentInput.charCodeAt(end + offset) !== CharCodes.Gt) {
offset++
2023-11-13 21:03:39 +08:00
}
2023-11-15 19:36:05 +08:00
el.loc.end = tokenizer.getPos(end + offset + 1)
2023-11-16 10:54:54 +08:00
// refine element type
const tag = el.tag
if (!inVPre) {
if (tag === 'slot') {
el.tagType = ElementTypes.SLOT
} else if (isFragmentTemplate(el)) {
el.tagType = ElementTypes.TEMPLATE
} else if (isComponent(el)) {
el.tagType = ElementTypes.COMPONENT
}
}
2023-11-13 21:03:39 +08:00
// whitepsace management
2023-11-14 01:14:33 +08:00
el.children = condenseWhitespace(el.children)
2023-11-16 10:54:54 +08:00
if (currentOptions.isPreTag(tag)) {
2023-11-15 23:55:50 +08:00
inPre--
}
2023-11-16 01:31:52 +08:00
if (currentElementIsVPreBoundary) {
inVPre = false
currentElementIsVPreBoundary = false
}
2023-11-14 01:14:33 +08:00
}
2023-11-16 10:54:54 +08:00
const specialTemplateDir = new Set(['if', 'else', 'else-if', 'for', 'slot'])
function isFragmentTemplate({ tag, props }: ElementNode): boolean {
if (tag === 'template') {
for (let i = 0; i < props.length; i++) {
if (
props[i].type === NodeTypes.DIRECTIVE &&
specialTemplateDir.has(props[i].name)
) {
return true
}
}
}
return false
}
2023-11-14 01:14:33 +08:00
2023-11-16 10:54:54 +08:00
function isComponent({ tag, props }: ElementNode): boolean {
if (currentOptions.isCustomElement(tag)) {
return false
}
if (
tag === 'component' ||
isUpperCase(tag.charCodeAt(0)) ||
isCoreComponent(tag) ||
currentOptions.isBuiltInComponent?.(tag) ||
!currentOptions.isNativeTag?.(tag)
) {
return true
}
// at this point the tag should be a native tag, but check for potential "is"
// casting
for (let i = 0; i < props.length; i++) {
const p = props[i]
if (p.type === NodeTypes.ATTRIBUTE) {
if (p.name === 'is' && p.value) {
if (p.value.content.startsWith('vue:')) {
return true
}
// TODO else if (
// __COMPAT__ &&
// checkCompatEnabled(
// CompilerDeprecationTypes.COMPILER_IS_ON_ELEMENT,
// context,
// p.loc
// )
// ) {
// return true
// }
}
}
// TODO else if (
// __COMPAT__ &&
// // :is on plain element - only treat as component in compat mode
// p.name === 'bind' &&
// isStaticArgOf(p.arg, 'is') &&
// checkCompatEnabled(
// CompilerDeprecationTypes.COMPILER_IS_ON_ELEMENT,
// context,
// p.loc
// )
// ) {
// return true
// }
}
return false
}
function isUpperCase(c: number) {
return c > 64 && c < 91
}
const windowsNewlineRE = /\r\n/g
2023-11-14 01:14:33 +08:00
function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] {
2023-11-13 21:03:39 +08:00
const shouldCondense = currentOptions.whitespace !== 'preserve'
let removedWhitespace = false
for (let i = 0; i < nodes.length; i++) {
const node = nodes[i]
if (node.type === NodeTypes.TEXT) {
if (!inPre) {
2023-11-14 21:55:16 +08:00
if (isAllWhitespace(node.content)) {
const prev = nodes[i - 1]?.type
const next = nodes[i + 1]?.type
2023-11-13 21:03:39 +08:00
// Remove if:
// - the whitespace is the first or last node, or:
2023-11-14 21:55:16 +08:00
// - (condense mode) the whitespace is between two comments, or:
2023-11-13 21:03:39 +08:00
// - (condense mode) the whitespace is between comment and element, or:
// - (condense mode) the whitespace is between two elements AND contains newline
if (
!prev ||
!next ||
(shouldCondense &&
2023-11-14 21:55:16 +08:00
((prev === NodeTypes.COMMENT &&
(next === NodeTypes.COMMENT || next === NodeTypes.ELEMENT)) ||
(prev === NodeTypes.ELEMENT &&
(next === NodeTypes.COMMENT ||
(next === NodeTypes.ELEMENT &&
hasNewlineChar(node.content))))))
2023-11-13 21:03:39 +08:00
) {
removedWhitespace = true
nodes[i] = null as any
} else {
// Otherwise, the whitespace is condensed into a single space
node.content = ' '
}
} else if (shouldCondense) {
// in condense mode, consecutive whitespaces in text are condensed
// down to a single space.
2023-11-14 21:55:16 +08:00
node.content = condense(node.content)
2023-11-13 21:03:39 +08:00
}
} else {
// #6410 normalize windows newlines in <pre>:
// in SSR, browsers normalize server-rendered \r\n into a single \n
// in the DOM
2023-11-14 01:14:33 +08:00
node.content = node.content.replace(windowsNewlineRE, '\n')
2023-11-13 21:03:39 +08:00
}
}
}
2023-11-14 01:14:33 +08:00
return removedWhitespace ? nodes.filter(Boolean) : nodes
2023-11-13 21:03:39 +08:00
}
2023-11-14 21:55:16 +08:00
function isAllWhitespace(str: string) {
for (let i = 0; i < str.length; i++) {
if (!isWhitespace(str.charCodeAt(i))) {
return false
}
}
return true
}
function hasNewlineChar(str: string) {
for (let i = 0; i < str.length; i++) {
const c = str.charCodeAt(i)
if (c === CharCodes.NewLine || c === CharCodes.CarriageReturn) {
return true
}
}
return false
}
function condense(str: string) {
let ret = ''
let prevCharIsWhitespace = false
for (let i = 0; i < str.length; i++) {
if (isWhitespace(str.charCodeAt(i))) {
if (!prevCharIsWhitespace) {
ret += ' '
prevCharIsWhitespace = true
}
} else {
ret += str[i]
prevCharIsWhitespace = false
}
}
return ret
}
2023-11-13 21:03:39 +08:00
function addNode(node: TemplateChildNode) {
getParent().children.push(node)
}
function getParent() {
2023-11-14 18:03:00 +08:00
return stack[0] || currentRoot
2023-11-13 21:03:39 +08:00
}
2023-11-15 17:45:42 +08:00
function getLoc(start: number, end?: number): SourceLocation {
return {
2023-11-15 19:36:05 +08:00
start: tokenizer.getPos(start),
2023-11-15 17:45:42 +08:00
// @ts-expect-error allow late attachment
2023-11-15 19:36:05 +08:00
end: end && tokenizer.getPos(end)
2023-11-14 21:55:16 +08:00
}
}
2023-11-16 01:31:52 +08:00
function dirToAttr(dir: DirectiveNode): AttributeNode {
const attr: AttributeNode = {
type: NodeTypes.ATTRIBUTE,
name: dir.raw!,
value: undefined,
loc: dir.loc
}
if (dir.exp) {
// account for quotes
const loc = dir.exp.loc
if (loc.end.offset < dir.loc.end.offset) {
loc.start.offset--
loc.start.column--
loc.end.offset++
loc.end.column++
}
attr.value = {
type: NodeTypes.TEXT,
content: (dir.exp as SimpleExpressionNode).content,
loc
}
}
return attr
}
2023-11-13 21:03:39 +08:00
function reset() {
tokenizer.reset()
2023-11-14 18:03:00 +08:00
currentElement = null
currentProp = null
2023-11-14 21:55:16 +08:00
currentAttrs.clear()
2023-11-14 18:03:00 +08:00
currentAttrValue = ''
2023-11-15 17:45:42 +08:00
currentAttrStartIndex = -1
currentAttrEndIndex = -1
2023-11-13 21:03:39 +08:00
stack.length = 0
}
2023-11-12 21:42:27 +08:00
2023-11-14 20:39:08 +08:00
export function baseParse(input: string, options?: ParserOptions): RootNode {
2023-11-13 21:03:39 +08:00
reset()
2023-11-17 09:22:12 +08:00
currentInput = input
currentOptions = extend({}, defaultParserOptions, options)
tokenizer.mode =
currentOptions.parseMode === 'html'
? ParseMode.HTML
: currentOptions.parseMode === 'sfc'
? ParseMode.SFC
: ParseMode.BASE
2023-11-15 23:33:57 +08:00
const delimiters = options?.delimiters
if (delimiters) {
tokenizer.delimiterOpen = toCharCodes(delimiters[0])
tokenizer.delimiterClose = toCharCodes(delimiters[1])
}
2023-11-17 09:22:12 +08:00
2023-11-13 21:03:39 +08:00
const root = (currentRoot = createRoot([]))
tokenizer.parse(currentInput)
2023-11-15 19:36:05 +08:00
root.loc.end = tokenizer.getPos(input.length)
2023-11-14 01:14:33 +08:00
root.children = condenseWhitespace(root.children)
2023-11-12 16:58:24 +08:00
return root
}