vue3-core/packages/compiler-core/src/parser/index.ts

426 lines
11 KiB
TypeScript
Raw Normal View History

2023-11-13 21:03:39 +08:00
import { fromCodePoint } from 'entities/lib/decode.js'
import {
2023-11-14 18:03:00 +08:00
AttributeNode,
2023-11-14 20:39:08 +08:00
ConstantTypes,
2023-11-14 18:03:00 +08:00
DirectiveNode,
2023-11-13 21:03:39 +08:00
ElementNode,
ElementTypes,
2023-11-14 20:39:08 +08:00
Namespaces,
2023-11-13 21:03:39 +08:00
NodeTypes,
RootNode,
2023-11-15 17:45:42 +08:00
SourceLocation,
2023-11-13 21:03:39 +08:00
TemplateChildNode,
createRoot
} from '../ast'
2023-11-12 16:58:24 +08:00
import { ParserOptions } from '../options'
2023-11-15 17:57:28 +08:00
import Tokenizer, { CharCodes, QuoteType, isWhitespace } from './Tokenizer'
2023-11-14 20:39:08 +08:00
import { CompilerCompatOptions } from '../compat/compatConfig'
2023-11-14 21:55:16 +08:00
import { NO, extend } from '@vue/shared'
2023-11-14 20:39:08 +08:00
import { defaultOnError, defaultOnWarn } from '../errors'
2023-11-13 21:03:39 +08:00
2023-11-14 20:39:08 +08:00
type OptionalOptions =
| 'getTextMode' // TODO
| 'whitespace'
| 'isNativeTag'
| 'isBuiltInComponent'
| keyof CompilerCompatOptions
type MergedParserOptions = Omit<Required<ParserOptions>, OptionalOptions> &
Pick<ParserOptions, OptionalOptions>
// The default decoder only provides escapes for characters reserved as part of
// the template syntax, and is only used if the custom renderer did not provide
// a platform-specific decoder.
const decodeRE = /&(gt|lt|amp|apos|quot);/g
const decodeMap: Record<string, string> = {
gt: '>',
lt: '<',
amp: '&',
apos: "'",
quot: '"'
}
export const defaultParserOptions: MergedParserOptions = {
delimiters: [`{{`, `}}`],
getNamespace: () => Namespaces.HTML,
// getTextMode: () => TextModes.DATA,
isVoidTag: NO,
isPreTag: NO,
isCustomElement: NO,
decodeEntities: (rawText: string): string =>
rawText.replace(decodeRE, (_, p1) => decodeMap[p1]),
onError: defaultOnError,
onWarn: defaultOnWarn,
comments: __DEV__
}
let currentOptions: MergedParserOptions = defaultParserOptions
2023-11-13 21:03:39 +08:00
let currentRoot: RootNode = createRoot([])
// parser state
let currentInput = ''
2023-11-14 18:03:00 +08:00
let currentElement: ElementNode | null = null
let currentProp: AttributeNode | DirectiveNode | null = null
let currentAttrValue = ''
2023-11-15 17:45:42 +08:00
let currentAttrStartIndex = -1
let currentAttrEndIndex = -1
2023-11-14 21:55:16 +08:00
let currentAttrs: Set<string> = new Set()
2023-11-13 21:03:39 +08:00
let inPre = 0
2023-11-15 17:45:42 +08:00
// let inVPre = 0
2023-11-14 18:03:00 +08:00
const stack: ElementNode[] = []
2023-11-13 21:03:39 +08:00
const tokenizer = new Tokenizer(
// TODO handle entities
{ decodeEntities: true },
{
ontext(start, end) {
2023-11-14 01:14:33 +08:00
onText(getSlice(start, end), start, end)
2023-11-13 21:03:39 +08:00
},
ontextentity(cp, end) {
2023-11-14 01:14:33 +08:00
onText(fromCodePoint(cp), end - 1, end)
2023-11-13 21:03:39 +08:00
},
onopentagname(start, end) {
2023-11-14 18:03:00 +08:00
emitOpenTag(getSlice(start, end), start)
2023-11-13 21:03:39 +08:00
},
onopentagend(end) {
2023-11-14 20:39:08 +08:00
endOpenTag(end)
2023-11-13 21:03:39 +08:00
},
onclosetag(start, end) {
const name = getSlice(start, end)
2023-11-14 21:55:16 +08:00
if (!currentOptions.isVoidTag(name)) {
2023-11-14 18:03:00 +08:00
const pos = stack.findIndex(e => e.tag === name)
2023-11-13 21:03:39 +08:00
if (pos !== -1) {
for (let index = 0; index <= pos; index++) {
2023-11-14 18:03:00 +08:00
onCloseTag(stack.shift()!, end)
2023-11-13 21:03:39 +08:00
}
}
}
},
onselfclosingtag(end) {
2023-11-14 18:03:00 +08:00
closeCurrentTag(end)
2023-11-13 21:03:39 +08:00
},
onattribname(start, end) {
2023-11-15 17:45:42 +08:00
// plain attribute
currentProp = {
type: NodeTypes.ATTRIBUTE,
name: getSlice(start, end),
value: undefined,
loc: getLoc(start)
}
},
2023-11-14 20:39:08 +08:00
2023-11-15 17:45:42 +08:00
ondirname(start, end) {
const raw = getSlice(start, end)
const name =
raw === '.' || raw === ':'
? 'bind'
: raw === '@'
? 'on'
: raw === '#'
? 'slot'
: raw.slice(2)
currentProp = {
type: NodeTypes.DIRECTIVE,
name,
exp: undefined,
arg: undefined,
modifiers: [],
loc: getLoc(start)
2023-11-14 18:03:00 +08:00
}
2023-11-13 21:03:39 +08:00
},
2023-11-15 19:36:05 +08:00
2023-11-15 17:45:42 +08:00
ondirarg(start, end) {
const arg = getSlice(start, end)
const isStatic = arg[0] !== `[`
;(currentProp as DirectiveNode).arg = {
type: NodeTypes.SIMPLE_EXPRESSION,
content: arg,
isStatic,
constType: isStatic
? ConstantTypes.CAN_STRINGIFY
: ConstantTypes.NOT_CONSTANT,
loc: getLoc(start, end)
}
},
ondirmodifier(start, end) {
2023-11-15 17:57:28 +08:00
;(currentProp as DirectiveNode).modifiers.push(getSlice(start, end))
2023-11-15 17:45:42 +08:00
},
2023-11-13 21:03:39 +08:00
onattribdata(start, end) {
2023-11-14 18:03:00 +08:00
currentAttrValue += getSlice(start, end)
2023-11-15 17:57:28 +08:00
if (currentAttrStartIndex < 0) currentAttrStartIndex = start
currentAttrEndIndex = end
2023-11-13 21:03:39 +08:00
},
2023-11-15 19:36:05 +08:00
2023-11-13 21:03:39 +08:00
onattribentity(codepoint) {
2023-11-14 18:03:00 +08:00
currentAttrValue += fromCodePoint(codepoint)
2023-11-13 21:03:39 +08:00
},
2023-11-15 19:36:05 +08:00
onattribnameend(end) {
// check duplicate attrs
const start = currentProp!.loc.start.offset
const name = getSlice(start, end)
if (currentAttrs.has(name)) {
currentProp = null
// TODO emit error DUPLICATE_ATTRIBUTE
throw new Error(`duplicate attr ${name}`)
} else {
currentAttrs.add(name)
}
},
2023-11-15 17:57:28 +08:00
onattribend(quote, end) {
2023-11-15 19:36:05 +08:00
if (currentElement && currentProp) {
2023-11-15 17:45:42 +08:00
if (currentAttrValue) {
2023-11-15 19:36:05 +08:00
if (currentProp.type === NodeTypes.ATTRIBUTE) {
2023-11-15 17:45:42 +08:00
// assign value
currentProp!.value = {
type: NodeTypes.TEXT,
content: currentAttrValue,
2023-11-15 17:57:28 +08:00
loc:
quote === QuoteType.Unquoted
? getLoc(currentAttrStartIndex, currentAttrEndIndex)
: getLoc(currentAttrStartIndex - 1, currentAttrEndIndex + 1)
2023-11-15 17:45:42 +08:00
}
} else {
// directive
2023-11-15 19:36:05 +08:00
currentProp.exp = {
2023-11-15 17:45:42 +08:00
type: NodeTypes.SIMPLE_EXPRESSION,
content: currentAttrValue,
isStatic: false,
// Treat as non-constant by default. This can be potentially set
// to other values by `transformExpression` to make it eligible
// for hoisting.
constType: ConstantTypes.NOT_CONSTANT,
loc: getLoc(currentAttrStartIndex, currentAttrEndIndex)
}
}
}
2023-11-15 19:36:05 +08:00
currentProp.loc.end = tokenizer.getPos(end)
2023-11-15 17:45:42 +08:00
currentElement.props.push(currentProp!)
}
2023-11-14 18:03:00 +08:00
currentAttrValue = ''
2023-11-15 17:45:42 +08:00
currentAttrStartIndex = currentAttrEndIndex = -1
2023-11-15 01:14:36 +08:00
},
2023-11-13 21:03:39 +08:00
oncomment(start, end, offset) {
// TODO oncomment
},
onend() {
2023-11-15 01:14:36 +08:00
const end = currentInput.length - 1
2023-11-13 21:03:39 +08:00
for (let index = 0; index < stack.length; index++) {
2023-11-14 18:03:00 +08:00
onCloseTag(stack[index], end)
2023-11-13 21:03:39 +08:00
}
},
oncdata(start, end, offset) {
// TODO throw error
}
}
)
function getSlice(start: number, end: number) {
return currentInput.slice(start, end)
}
2023-11-14 18:03:00 +08:00
function emitOpenTag(name: string, start: number) {
currentElement = {
type: NodeTypes.ELEMENT,
tag: name,
2023-11-14 20:39:08 +08:00
ns: currentOptions.getNamespace(name, getParent()),
2023-11-14 18:03:00 +08:00
// TODO refine tag type
tagType: ElementTypes.ELEMENT,
props: [],
children: [],
loc: {
2023-11-15 19:36:05 +08:00
start: tokenizer.getPos(start - 1),
2023-11-14 18:03:00 +08:00
// @ts-expect-error to be attached on tag close
end: undefined,
source: ''
},
codegenNode: undefined
2023-11-13 21:03:39 +08:00
}
2023-11-14 21:55:16 +08:00
currentAttrs.clear()
2023-11-14 18:03:00 +08:00
}
2023-11-14 20:39:08 +08:00
function endOpenTag(end: number) {
2023-11-14 18:03:00 +08:00
addNode(currentElement!)
const name = currentElement!.tag
2023-11-14 20:39:08 +08:00
if (!currentOptions.isVoidTag(name)) {
2023-11-14 18:03:00 +08:00
stack.unshift(currentElement!)
2023-11-14 20:39:08 +08:00
} else {
onCloseTag(currentElement!, end)
2023-11-13 21:03:39 +08:00
}
2023-11-14 18:03:00 +08:00
currentElement = null
2023-11-13 21:03:39 +08:00
}
2023-11-14 18:03:00 +08:00
function closeCurrentTag(end: number) {
const name = currentElement!.tag
2023-11-14 20:39:08 +08:00
endOpenTag(end)
2023-11-14 18:03:00 +08:00
if (stack[0].tag === name) {
onCloseTag(stack.shift()!, end)
2023-11-13 21:03:39 +08:00
}
}
2023-11-14 01:14:33 +08:00
function onText(content: string, start: number, end: number) {
2023-11-13 21:03:39 +08:00
const parent = getParent()
const lastNode = parent.children[parent.children.length - 1]
if (lastNode?.type === NodeTypes.TEXT) {
// merge
lastNode.content += content
// TODO update loc
} else {
parent.children.push({
type: NodeTypes.TEXT,
content,
2023-11-14 01:14:33 +08:00
loc: {
2023-11-15 19:36:05 +08:00
start: tokenizer.getPos(start),
end: tokenizer.getPos(end),
2023-11-14 01:14:33 +08:00
source: content
}
2023-11-13 21:03:39 +08:00
})
}
}
2023-11-14 18:03:00 +08:00
function onCloseTag(el: ElementNode, end: number) {
// attach end position
let offset = 0
while (currentInput.charCodeAt(end + offset) !== CharCodes.Gt) {
offset++
2023-11-13 21:03:39 +08:00
}
2023-11-15 19:36:05 +08:00
el.loc.end = tokenizer.getPos(end + offset + 1)
2023-11-13 21:03:39 +08:00
// whitepsace management
2023-11-14 01:14:33 +08:00
el.children = condenseWhitespace(el.children)
}
const windowsNewlineRE = /\r\n/g
function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] {
2023-11-13 21:03:39 +08:00
const shouldCondense = currentOptions.whitespace !== 'preserve'
let removedWhitespace = false
for (let i = 0; i < nodes.length; i++) {
const node = nodes[i]
if (node.type === NodeTypes.TEXT) {
if (!inPre) {
2023-11-14 21:55:16 +08:00
if (isAllWhitespace(node.content)) {
const prev = nodes[i - 1]?.type
const next = nodes[i + 1]?.type
2023-11-13 21:03:39 +08:00
// Remove if:
// - the whitespace is the first or last node, or:
2023-11-14 21:55:16 +08:00
// - (condense mode) the whitespace is between two comments, or:
2023-11-13 21:03:39 +08:00
// - (condense mode) the whitespace is between comment and element, or:
// - (condense mode) the whitespace is between two elements AND contains newline
if (
!prev ||
!next ||
(shouldCondense &&
2023-11-14 21:55:16 +08:00
((prev === NodeTypes.COMMENT &&
(next === NodeTypes.COMMENT || next === NodeTypes.ELEMENT)) ||
(prev === NodeTypes.ELEMENT &&
(next === NodeTypes.COMMENT ||
(next === NodeTypes.ELEMENT &&
hasNewlineChar(node.content))))))
2023-11-13 21:03:39 +08:00
) {
removedWhitespace = true
nodes[i] = null as any
} else {
// Otherwise, the whitespace is condensed into a single space
node.content = ' '
}
} else if (shouldCondense) {
// in condense mode, consecutive whitespaces in text are condensed
// down to a single space.
2023-11-14 21:55:16 +08:00
node.content = condense(node.content)
2023-11-13 21:03:39 +08:00
}
} else {
// #6410 normalize windows newlines in <pre>:
// in SSR, browsers normalize server-rendered \r\n into a single \n
// in the DOM
2023-11-14 01:14:33 +08:00
node.content = node.content.replace(windowsNewlineRE, '\n')
2023-11-13 21:03:39 +08:00
}
}
}
2023-11-14 01:14:33 +08:00
return removedWhitespace ? nodes.filter(Boolean) : nodes
2023-11-13 21:03:39 +08:00
}
2023-11-14 21:55:16 +08:00
function isAllWhitespace(str: string) {
for (let i = 0; i < str.length; i++) {
if (!isWhitespace(str.charCodeAt(i))) {
return false
}
}
return true
}
function hasNewlineChar(str: string) {
for (let i = 0; i < str.length; i++) {
const c = str.charCodeAt(i)
if (c === CharCodes.NewLine || c === CharCodes.CarriageReturn) {
return true
}
}
return false
}
function condense(str: string) {
let ret = ''
let prevCharIsWhitespace = false
for (let i = 0; i < str.length; i++) {
if (isWhitespace(str.charCodeAt(i))) {
if (!prevCharIsWhitespace) {
ret += ' '
prevCharIsWhitespace = true
}
} else {
ret += str[i]
prevCharIsWhitespace = false
}
}
return ret
}
2023-11-13 21:03:39 +08:00
function addNode(node: TemplateChildNode) {
getParent().children.push(node)
}
function getParent() {
2023-11-14 18:03:00 +08:00
return stack[0] || currentRoot
2023-11-13 21:03:39 +08:00
}
2023-11-15 17:45:42 +08:00
function getLoc(start: number, end?: number): SourceLocation {
return {
2023-11-15 19:36:05 +08:00
start: tokenizer.getPos(start),
2023-11-15 17:45:42 +08:00
// @ts-expect-error allow late attachment
2023-11-15 19:36:05 +08:00
end: end && tokenizer.getPos(end)
2023-11-14 21:55:16 +08:00
}
}
2023-11-13 21:03:39 +08:00
function reset() {
tokenizer.reset()
2023-11-14 18:03:00 +08:00
currentElement = null
currentProp = null
2023-11-14 21:55:16 +08:00
currentAttrs.clear()
2023-11-14 18:03:00 +08:00
currentAttrValue = ''
2023-11-15 17:45:42 +08:00
currentAttrStartIndex = -1
currentAttrEndIndex = -1
2023-11-13 21:03:39 +08:00
stack.length = 0
}
2023-11-12 21:42:27 +08:00
2023-11-14 20:39:08 +08:00
export function baseParse(input: string, options?: ParserOptions): RootNode {
2023-11-13 21:03:39 +08:00
reset()
2023-11-14 01:14:33 +08:00
currentInput = input
2023-11-14 20:39:08 +08:00
currentOptions = extend({}, defaultParserOptions, options)
2023-11-13 21:03:39 +08:00
const root = (currentRoot = createRoot([]))
tokenizer.parse(currentInput)
2023-11-15 19:36:05 +08:00
root.loc.end = tokenizer.getPos(input.length)
2023-11-14 01:14:33 +08:00
root.children = condenseWhitespace(root.children)
2023-11-12 16:58:24 +08:00
return root
}