vue3-core/packages/compiler-core/src/parser/index.ts

343 lines
8.5 KiB
TypeScript
Raw Normal View History

2023-11-13 21:03:39 +08:00
import { fromCodePoint } from 'entities/lib/decode.js'
import {
2023-11-14 18:03:00 +08:00
AttributeNode,
DirectiveNode,
2023-11-13 21:03:39 +08:00
ElementNode,
ElementTypes,
NodeTypes,
RootNode,
TemplateChildNode,
createRoot
} from '../ast'
2023-11-12 16:58:24 +08:00
import { ParserOptions } from '../options'
2023-11-14 18:03:00 +08:00
import Tokenizer, { CharCodes } from './Tokenizer'
2023-11-13 21:03:39 +08:00
const voidElements = new Set([
'area',
'base',
'basefont',
'br',
'col',
'command',
'embed',
'frame',
'hr',
'img',
'input',
'isindex',
'keygen',
'link',
'meta',
'param',
'source',
'track',
'wbr'
])
const foreignContextElements = new Set(['math', 'svg'])
const htmlIntegrationElements = new Set([
'mi',
'mo',
'mn',
'ms',
'mtext',
'annotation-xml',
'foreignobject',
'desc',
'title'
])
let currentOptions: ParserOptions = {}
let currentRoot: RootNode = createRoot([])
// parser state
let htmlMode = false
let currentInput = ''
2023-11-14 18:03:00 +08:00
let currentElement: ElementNode | null = null
let currentProp: AttributeNode | DirectiveNode | null = null
let currentAttrValue = ''
2023-11-13 21:03:39 +08:00
let inPre = 0
// let inVPre = 0
2023-11-14 18:03:00 +08:00
const stack: ElementNode[] = []
2023-11-13 21:03:39 +08:00
const foreignContext: boolean[] = [false]
const tokenizer = new Tokenizer(
// TODO handle entities
{ decodeEntities: true },
{
ontext(start, end) {
2023-11-14 01:14:33 +08:00
onText(getSlice(start, end), start, end)
2023-11-13 21:03:39 +08:00
},
ontextentity(cp, end) {
2023-11-14 01:14:33 +08:00
onText(fromCodePoint(cp), end - 1, end)
2023-11-13 21:03:39 +08:00
},
onopentagname(start, end) {
2023-11-14 18:03:00 +08:00
emitOpenTag(getSlice(start, end), start)
2023-11-13 21:03:39 +08:00
},
onopentagend(end) {
2023-11-14 18:03:00 +08:00
endOpenTag()
2023-11-13 21:03:39 +08:00
},
onclosetag(start, end) {
const name = getSlice(start, end)
if (
htmlMode &&
(foreignContextElements.has(name) || htmlIntegrationElements.has(name))
) {
foreignContext.shift()
}
if (!voidElements.has(name)) {
2023-11-14 18:03:00 +08:00
const pos = stack.findIndex(e => e.tag === name)
2023-11-13 21:03:39 +08:00
if (pos !== -1) {
for (let index = 0; index <= pos; index++) {
2023-11-14 18:03:00 +08:00
onCloseTag(stack.shift()!, end)
2023-11-13 21:03:39 +08:00
}
} else if (htmlMode && name === 'p') {
// Implicit open before close
2023-11-14 18:03:00 +08:00
emitOpenTag('p', start)
closeCurrentTag(end)
2023-11-13 21:03:39 +08:00
}
} else if (htmlMode && name === 'br') {
// TODO
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
// this.cbs.onopentag?.('br', {}, true)
// this.cbs.onclosetag?.('br', false)
}
},
onselfclosingtag(end) {
2023-11-14 18:03:00 +08:00
closeCurrentTag(end)
2023-11-13 21:03:39 +08:00
},
onattribname(start, end) {
2023-11-14 18:03:00 +08:00
// TODO directives
currentProp = {
type: NodeTypes.ATTRIBUTE,
name: getSlice(start, end),
value: undefined,
loc: {
start: tokenizer.getPositionForIndex(start),
// @ts-expect-error to be attached on attribute end
end: undefined,
source: ''
}
}
2023-11-13 21:03:39 +08:00
},
onattribdata(start, end) {
2023-11-14 18:03:00 +08:00
currentAttrValue += getSlice(start, end)
2023-11-13 21:03:39 +08:00
},
onattribentity(codepoint) {
2023-11-14 18:03:00 +08:00
currentAttrValue += fromCodePoint(codepoint)
2023-11-13 21:03:39 +08:00
},
2023-11-14 01:14:33 +08:00
onattribend(_quote, end) {
2023-11-14 18:03:00 +08:00
if (currentElement) {
if (currentProp!.type === NodeTypes.ATTRIBUTE) {
// assign value
currentProp!.value = {
type: NodeTypes.TEXT,
content: currentAttrValue,
// @ts-expect-error TODO
loc: {}
}
} else {
// TODO
}
currentProp!.loc.end = tokenizer.getPositionForIndex(end)
currentElement.props.push(currentProp!)
2023-11-13 21:03:39 +08:00
}
2023-11-14 18:03:00 +08:00
currentAttrValue = ''
2023-11-13 21:03:39 +08:00
},
oncomment(start, end, offset) {
// TODO oncomment
},
onend() {
2023-11-14 18:03:00 +08:00
const end = currentInput.length
2023-11-13 21:03:39 +08:00
for (let index = 0; index < stack.length; index++) {
2023-11-14 18:03:00 +08:00
onCloseTag(stack[index], end)
2023-11-13 21:03:39 +08:00
}
},
oncdata(start, end, offset) {
// TODO throw error
}
}
)
function getSlice(start: number, end: number) {
return currentInput.slice(start, end)
}
2023-11-14 18:03:00 +08:00
function emitOpenTag(name: string, start: number) {
currentElement = {
type: NodeTypes.ELEMENT,
tag: name,
// TODO refine namespace
ns: 0,
// TODO refine tag type
tagType: ElementTypes.ELEMENT,
props: [],
children: [],
loc: {
start: tokenizer.getPositionForIndex(start - 1),
// @ts-expect-error to be attached on tag close
end: undefined,
source: ''
},
codegenNode: undefined
2023-11-13 21:03:39 +08:00
}
2023-11-14 18:03:00 +08:00
}
function endOpenTag() {
addNode(currentElement!)
const name = currentElement!.tag
2023-11-13 21:03:39 +08:00
if (!voidElements.has(name)) {
2023-11-14 18:03:00 +08:00
stack.unshift(currentElement!)
2023-11-13 21:03:39 +08:00
if (htmlMode) {
if (foreignContextElements.has(name)) {
foreignContext.unshift(true)
} else if (htmlIntegrationElements.has(name)) {
foreignContext.unshift(false)
}
}
}
2023-11-14 18:03:00 +08:00
currentElement = null
2023-11-13 21:03:39 +08:00
}
2023-11-14 18:03:00 +08:00
function closeCurrentTag(end: number) {
const name = currentElement!.tag
endOpenTag()
if (stack[0].tag === name) {
onCloseTag(stack.shift()!, end)
2023-11-13 21:03:39 +08:00
}
}
2023-11-14 01:14:33 +08:00
function onText(content: string, start: number, end: number) {
2023-11-13 21:03:39 +08:00
const parent = getParent()
const lastNode = parent.children[parent.children.length - 1]
if (lastNode?.type === NodeTypes.TEXT) {
// merge
lastNode.content += content
// TODO update loc
} else {
parent.children.push({
type: NodeTypes.TEXT,
content,
2023-11-14 01:14:33 +08:00
loc: {
2023-11-14 16:35:52 +08:00
start: tokenizer.getPositionForIndex(start),
end: tokenizer.getPositionForIndex(end),
2023-11-14 01:14:33 +08:00
source: content
}
2023-11-13 21:03:39 +08:00
})
}
}
2023-11-14 18:03:00 +08:00
function onCloseTag(el: ElementNode, end: number) {
// attach end position
let offset = 0
while (currentInput.charCodeAt(end + offset) !== CharCodes.Gt) {
offset++
2023-11-13 21:03:39 +08:00
}
2023-11-14 18:03:00 +08:00
el.loc.end = tokenizer.getPositionForIndex(end + offset + 1)
2023-11-13 21:03:39 +08:00
// whitepsace management
2023-11-14 01:14:33 +08:00
el.children = condenseWhitespace(el.children)
}
const windowsNewlineRE = /\r\n/g
const consecutiveWhitespaceRE = /[\t\r\n\f ]+/g
const nonWhitespaceRE = /[^\t\r\n\f ]/
function isEmptyText(content: string) {
return !nonWhitespaceRE.test(content)
}
function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] {
2023-11-13 21:03:39 +08:00
const shouldCondense = currentOptions.whitespace !== 'preserve'
let removedWhitespace = false
for (let i = 0; i < nodes.length; i++) {
const node = nodes[i]
if (node.type === NodeTypes.TEXT) {
if (!inPre) {
2023-11-14 01:14:33 +08:00
if (isEmptyText(node.content)) {
2023-11-13 21:03:39 +08:00
const prev = nodes[i - 1]
const next = nodes[i + 1]
// Remove if:
// - the whitespace is the first or last node, or:
// - (condense mode) the whitespace is between twos comments, or:
// - (condense mode) the whitespace is between comment and element, or:
// - (condense mode) the whitespace is between two elements AND contains newline
if (
!prev ||
!next ||
(shouldCondense &&
((prev.type === NodeTypes.COMMENT &&
next.type === NodeTypes.COMMENT) ||
(prev.type === NodeTypes.COMMENT &&
next.type === NodeTypes.ELEMENT) ||
(prev.type === NodeTypes.ELEMENT &&
next.type === NodeTypes.COMMENT) ||
(prev.type === NodeTypes.ELEMENT &&
next.type === NodeTypes.ELEMENT &&
/[\r\n]/.test(node.content))))
) {
removedWhitespace = true
nodes[i] = null as any
} else {
// Otherwise, the whitespace is condensed into a single space
node.content = ' '
}
} else if (shouldCondense) {
// in condense mode, consecutive whitespaces in text are condensed
// down to a single space.
2023-11-14 01:14:33 +08:00
node.content = node.content.replace(consecutiveWhitespaceRE, ' ')
2023-11-13 21:03:39 +08:00
}
} else {
// #6410 normalize windows newlines in <pre>:
// in SSR, browsers normalize server-rendered \r\n into a single \n
// in the DOM
2023-11-14 01:14:33 +08:00
node.content = node.content.replace(windowsNewlineRE, '\n')
2023-11-13 21:03:39 +08:00
}
}
}
2023-11-14 01:14:33 +08:00
return removedWhitespace ? nodes.filter(Boolean) : nodes
2023-11-13 21:03:39 +08:00
}
function addNode(node: TemplateChildNode) {
getParent().children.push(node)
}
function getParent() {
2023-11-14 18:03:00 +08:00
return stack[0] || currentRoot
2023-11-13 21:03:39 +08:00
}
function reset() {
tokenizer.reset()
2023-11-14 18:03:00 +08:00
currentElement = null
currentProp = null
currentAttrValue = ''
2023-11-13 21:03:39 +08:00
stack.length = 0
foreignContext.length = 1
foreignContext[0] = false
}
2023-11-12 21:42:27 +08:00
2023-11-12 16:58:24 +08:00
export function baseParse(
2023-11-13 21:03:39 +08:00
input: string,
2023-11-12 16:58:24 +08:00
options: ParserOptions = {}
): RootNode {
2023-11-13 21:03:39 +08:00
reset()
2023-11-14 01:14:33 +08:00
currentInput = input
2023-11-13 21:03:39 +08:00
currentOptions = options
htmlMode = !!options.htmlMode
const root = (currentRoot = createRoot([]))
tokenizer.parse(currentInput)
2023-11-14 01:14:33 +08:00
root.children = condenseWhitespace(root.children)
2023-11-12 16:58:24 +08:00
return root
}