2023-11-13 21:03:39 +08:00
|
|
|
import { fromCodePoint } from 'entities/lib/decode.js'
|
|
|
|
import {
|
2023-11-14 18:03:00 +08:00
|
|
|
AttributeNode,
|
|
|
|
DirectiveNode,
|
2023-11-13 21:03:39 +08:00
|
|
|
ElementNode,
|
|
|
|
ElementTypes,
|
|
|
|
NodeTypes,
|
|
|
|
RootNode,
|
|
|
|
TemplateChildNode,
|
|
|
|
createRoot
|
|
|
|
} from '../ast'
|
2023-11-12 16:58:24 +08:00
|
|
|
import { ParserOptions } from '../options'
|
2023-11-14 18:03:00 +08:00
|
|
|
import Tokenizer, { CharCodes } from './Tokenizer'
|
2023-11-13 21:03:39 +08:00
|
|
|
|
|
|
|
const voidElements = new Set([
|
|
|
|
'area',
|
|
|
|
'base',
|
|
|
|
'basefont',
|
|
|
|
'br',
|
|
|
|
'col',
|
|
|
|
'command',
|
|
|
|
'embed',
|
|
|
|
'frame',
|
|
|
|
'hr',
|
|
|
|
'img',
|
|
|
|
'input',
|
|
|
|
'isindex',
|
|
|
|
'keygen',
|
|
|
|
'link',
|
|
|
|
'meta',
|
|
|
|
'param',
|
|
|
|
'source',
|
|
|
|
'track',
|
|
|
|
'wbr'
|
|
|
|
])
|
|
|
|
|
|
|
|
const foreignContextElements = new Set(['math', 'svg'])
|
|
|
|
|
|
|
|
const htmlIntegrationElements = new Set([
|
|
|
|
'mi',
|
|
|
|
'mo',
|
|
|
|
'mn',
|
|
|
|
'ms',
|
|
|
|
'mtext',
|
|
|
|
'annotation-xml',
|
|
|
|
'foreignobject',
|
|
|
|
'desc',
|
|
|
|
'title'
|
|
|
|
])
|
|
|
|
|
|
|
|
let currentOptions: ParserOptions = {}
|
|
|
|
let currentRoot: RootNode = createRoot([])
|
|
|
|
|
|
|
|
// parser state
|
|
|
|
let htmlMode = false
|
|
|
|
let currentInput = ''
|
2023-11-14 18:03:00 +08:00
|
|
|
let currentElement: ElementNode | null = null
|
|
|
|
let currentProp: AttributeNode | DirectiveNode | null = null
|
|
|
|
let currentAttrValue = ''
|
2023-11-13 21:03:39 +08:00
|
|
|
let inPre = 0
|
|
|
|
// let inVPre = 0
|
2023-11-14 18:03:00 +08:00
|
|
|
const stack: ElementNode[] = []
|
2023-11-13 21:03:39 +08:00
|
|
|
const foreignContext: boolean[] = [false]
|
|
|
|
|
|
|
|
const tokenizer = new Tokenizer(
|
|
|
|
// TODO handle entities
|
|
|
|
{ decodeEntities: true },
|
|
|
|
{
|
|
|
|
ontext(start, end) {
|
2023-11-14 01:14:33 +08:00
|
|
|
onText(getSlice(start, end), start, end)
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
|
|
|
|
|
|
|
ontextentity(cp, end) {
|
2023-11-14 01:14:33 +08:00
|
|
|
onText(fromCodePoint(cp), end - 1, end)
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
|
|
|
|
|
|
|
onopentagname(start, end) {
|
2023-11-14 18:03:00 +08:00
|
|
|
emitOpenTag(getSlice(start, end), start)
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
|
|
|
|
|
|
|
onopentagend(end) {
|
2023-11-14 18:03:00 +08:00
|
|
|
endOpenTag()
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
|
|
|
|
|
|
|
onclosetag(start, end) {
|
|
|
|
const name = getSlice(start, end)
|
|
|
|
|
|
|
|
if (
|
|
|
|
htmlMode &&
|
|
|
|
(foreignContextElements.has(name) || htmlIntegrationElements.has(name))
|
|
|
|
) {
|
|
|
|
foreignContext.shift()
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!voidElements.has(name)) {
|
2023-11-14 18:03:00 +08:00
|
|
|
const pos = stack.findIndex(e => e.tag === name)
|
2023-11-13 21:03:39 +08:00
|
|
|
if (pos !== -1) {
|
|
|
|
for (let index = 0; index <= pos; index++) {
|
2023-11-14 18:03:00 +08:00
|
|
|
onCloseTag(stack.shift()!, end)
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
} else if (htmlMode && name === 'p') {
|
|
|
|
// Implicit open before close
|
2023-11-14 18:03:00 +08:00
|
|
|
emitOpenTag('p', start)
|
|
|
|
closeCurrentTag(end)
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
} else if (htmlMode && name === 'br') {
|
|
|
|
// TODO
|
|
|
|
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
|
|
|
|
// this.cbs.onopentag?.('br', {}, true)
|
|
|
|
// this.cbs.onclosetag?.('br', false)
|
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
onselfclosingtag(end) {
|
2023-11-14 18:03:00 +08:00
|
|
|
closeCurrentTag(end)
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
|
|
|
|
|
|
|
onattribname(start, end) {
|
2023-11-14 18:03:00 +08:00
|
|
|
// TODO directives
|
|
|
|
currentProp = {
|
|
|
|
type: NodeTypes.ATTRIBUTE,
|
|
|
|
name: getSlice(start, end),
|
|
|
|
value: undefined,
|
|
|
|
loc: {
|
|
|
|
start: tokenizer.getPositionForIndex(start),
|
|
|
|
// @ts-expect-error to be attached on attribute end
|
|
|
|
end: undefined,
|
|
|
|
source: ''
|
|
|
|
}
|
|
|
|
}
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
|
|
|
onattribdata(start, end) {
|
2023-11-14 18:03:00 +08:00
|
|
|
currentAttrValue += getSlice(start, end)
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
|
|
|
onattribentity(codepoint) {
|
2023-11-14 18:03:00 +08:00
|
|
|
currentAttrValue += fromCodePoint(codepoint)
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
2023-11-14 01:14:33 +08:00
|
|
|
onattribend(_quote, end) {
|
2023-11-14 18:03:00 +08:00
|
|
|
if (currentElement) {
|
|
|
|
if (currentProp!.type === NodeTypes.ATTRIBUTE) {
|
|
|
|
// assign value
|
|
|
|
currentProp!.value = {
|
|
|
|
type: NodeTypes.TEXT,
|
|
|
|
content: currentAttrValue,
|
|
|
|
// @ts-expect-error TODO
|
|
|
|
loc: {}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// TODO
|
|
|
|
}
|
|
|
|
currentProp!.loc.end = tokenizer.getPositionForIndex(end)
|
|
|
|
currentElement.props.push(currentProp!)
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
2023-11-14 18:03:00 +08:00
|
|
|
currentAttrValue = ''
|
2023-11-13 21:03:39 +08:00
|
|
|
},
|
|
|
|
|
|
|
|
oncomment(start, end, offset) {
|
|
|
|
// TODO oncomment
|
|
|
|
},
|
|
|
|
|
|
|
|
onend() {
|
2023-11-14 18:03:00 +08:00
|
|
|
const end = currentInput.length
|
2023-11-13 21:03:39 +08:00
|
|
|
for (let index = 0; index < stack.length; index++) {
|
2023-11-14 18:03:00 +08:00
|
|
|
onCloseTag(stack[index], end)
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
},
|
|
|
|
|
|
|
|
oncdata(start, end, offset) {
|
|
|
|
// TODO throw error
|
|
|
|
}
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
function getSlice(start: number, end: number) {
|
|
|
|
return currentInput.slice(start, end)
|
|
|
|
}
|
|
|
|
|
2023-11-14 18:03:00 +08:00
|
|
|
function emitOpenTag(name: string, start: number) {
|
|
|
|
currentElement = {
|
|
|
|
type: NodeTypes.ELEMENT,
|
|
|
|
tag: name,
|
|
|
|
// TODO refine namespace
|
|
|
|
ns: 0,
|
|
|
|
// TODO refine tag type
|
|
|
|
tagType: ElementTypes.ELEMENT,
|
|
|
|
props: [],
|
|
|
|
children: [],
|
|
|
|
loc: {
|
|
|
|
start: tokenizer.getPositionForIndex(start - 1),
|
|
|
|
// @ts-expect-error to be attached on tag close
|
|
|
|
end: undefined,
|
|
|
|
source: ''
|
|
|
|
},
|
|
|
|
codegenNode: undefined
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
2023-11-14 18:03:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
function endOpenTag() {
|
|
|
|
addNode(currentElement!)
|
|
|
|
const name = currentElement!.tag
|
2023-11-13 21:03:39 +08:00
|
|
|
if (!voidElements.has(name)) {
|
2023-11-14 18:03:00 +08:00
|
|
|
stack.unshift(currentElement!)
|
2023-11-13 21:03:39 +08:00
|
|
|
if (htmlMode) {
|
|
|
|
if (foreignContextElements.has(name)) {
|
|
|
|
foreignContext.unshift(true)
|
|
|
|
} else if (htmlIntegrationElements.has(name)) {
|
|
|
|
foreignContext.unshift(false)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-11-14 18:03:00 +08:00
|
|
|
currentElement = null
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
|
2023-11-14 18:03:00 +08:00
|
|
|
function closeCurrentTag(end: number) {
|
|
|
|
const name = currentElement!.tag
|
|
|
|
endOpenTag()
|
|
|
|
if (stack[0].tag === name) {
|
|
|
|
onCloseTag(stack.shift()!, end)
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-14 01:14:33 +08:00
|
|
|
function onText(content: string, start: number, end: number) {
|
2023-11-13 21:03:39 +08:00
|
|
|
const parent = getParent()
|
|
|
|
const lastNode = parent.children[parent.children.length - 1]
|
|
|
|
if (lastNode?.type === NodeTypes.TEXT) {
|
|
|
|
// merge
|
|
|
|
lastNode.content += content
|
|
|
|
// TODO update loc
|
|
|
|
} else {
|
|
|
|
parent.children.push({
|
|
|
|
type: NodeTypes.TEXT,
|
|
|
|
content,
|
2023-11-14 01:14:33 +08:00
|
|
|
loc: {
|
2023-11-14 16:35:52 +08:00
|
|
|
start: tokenizer.getPositionForIndex(start),
|
|
|
|
end: tokenizer.getPositionForIndex(end),
|
2023-11-14 01:14:33 +08:00
|
|
|
source: content
|
|
|
|
}
|
2023-11-13 21:03:39 +08:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-14 18:03:00 +08:00
|
|
|
function onCloseTag(el: ElementNode, end: number) {
|
|
|
|
// attach end position
|
|
|
|
let offset = 0
|
|
|
|
while (currentInput.charCodeAt(end + offset) !== CharCodes.Gt) {
|
|
|
|
offset++
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
2023-11-14 18:03:00 +08:00
|
|
|
el.loc.end = tokenizer.getPositionForIndex(end + offset + 1)
|
2023-11-13 21:03:39 +08:00
|
|
|
// whitepsace management
|
2023-11-14 01:14:33 +08:00
|
|
|
el.children = condenseWhitespace(el.children)
|
|
|
|
}
|
|
|
|
|
|
|
|
const windowsNewlineRE = /\r\n/g
|
|
|
|
const consecutiveWhitespaceRE = /[\t\r\n\f ]+/g
|
|
|
|
const nonWhitespaceRE = /[^\t\r\n\f ]/
|
|
|
|
|
|
|
|
function isEmptyText(content: string) {
|
|
|
|
return !nonWhitespaceRE.test(content)
|
|
|
|
}
|
|
|
|
|
|
|
|
function condenseWhitespace(nodes: TemplateChildNode[]): TemplateChildNode[] {
|
2023-11-13 21:03:39 +08:00
|
|
|
const shouldCondense = currentOptions.whitespace !== 'preserve'
|
|
|
|
let removedWhitespace = false
|
|
|
|
for (let i = 0; i < nodes.length; i++) {
|
|
|
|
const node = nodes[i]
|
|
|
|
if (node.type === NodeTypes.TEXT) {
|
|
|
|
if (!inPre) {
|
2023-11-14 01:14:33 +08:00
|
|
|
if (isEmptyText(node.content)) {
|
2023-11-13 21:03:39 +08:00
|
|
|
const prev = nodes[i - 1]
|
|
|
|
const next = nodes[i + 1]
|
|
|
|
// Remove if:
|
|
|
|
// - the whitespace is the first or last node, or:
|
|
|
|
// - (condense mode) the whitespace is between twos comments, or:
|
|
|
|
// - (condense mode) the whitespace is between comment and element, or:
|
|
|
|
// - (condense mode) the whitespace is between two elements AND contains newline
|
|
|
|
if (
|
|
|
|
!prev ||
|
|
|
|
!next ||
|
|
|
|
(shouldCondense &&
|
|
|
|
((prev.type === NodeTypes.COMMENT &&
|
|
|
|
next.type === NodeTypes.COMMENT) ||
|
|
|
|
(prev.type === NodeTypes.COMMENT &&
|
|
|
|
next.type === NodeTypes.ELEMENT) ||
|
|
|
|
(prev.type === NodeTypes.ELEMENT &&
|
|
|
|
next.type === NodeTypes.COMMENT) ||
|
|
|
|
(prev.type === NodeTypes.ELEMENT &&
|
|
|
|
next.type === NodeTypes.ELEMENT &&
|
|
|
|
/[\r\n]/.test(node.content))))
|
|
|
|
) {
|
|
|
|
removedWhitespace = true
|
|
|
|
nodes[i] = null as any
|
|
|
|
} else {
|
|
|
|
// Otherwise, the whitespace is condensed into a single space
|
|
|
|
node.content = ' '
|
|
|
|
}
|
|
|
|
} else if (shouldCondense) {
|
|
|
|
// in condense mode, consecutive whitespaces in text are condensed
|
|
|
|
// down to a single space.
|
2023-11-14 01:14:33 +08:00
|
|
|
node.content = node.content.replace(consecutiveWhitespaceRE, ' ')
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// #6410 normalize windows newlines in <pre>:
|
|
|
|
// in SSR, browsers normalize server-rendered \r\n into a single \n
|
|
|
|
// in the DOM
|
2023-11-14 01:14:33 +08:00
|
|
|
node.content = node.content.replace(windowsNewlineRE, '\n')
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-11-14 01:14:33 +08:00
|
|
|
return removedWhitespace ? nodes.filter(Boolean) : nodes
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
function addNode(node: TemplateChildNode) {
|
|
|
|
getParent().children.push(node)
|
|
|
|
}
|
|
|
|
|
|
|
|
function getParent() {
|
2023-11-14 18:03:00 +08:00
|
|
|
return stack[0] || currentRoot
|
2023-11-13 21:03:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
function reset() {
|
|
|
|
tokenizer.reset()
|
2023-11-14 18:03:00 +08:00
|
|
|
currentElement = null
|
|
|
|
currentProp = null
|
|
|
|
currentAttrValue = ''
|
2023-11-13 21:03:39 +08:00
|
|
|
stack.length = 0
|
|
|
|
foreignContext.length = 1
|
|
|
|
foreignContext[0] = false
|
|
|
|
}
|
2023-11-12 21:42:27 +08:00
|
|
|
|
2023-11-12 16:58:24 +08:00
|
|
|
export function baseParse(
|
2023-11-13 21:03:39 +08:00
|
|
|
input: string,
|
2023-11-12 16:58:24 +08:00
|
|
|
options: ParserOptions = {}
|
|
|
|
): RootNode {
|
2023-11-13 21:03:39 +08:00
|
|
|
reset()
|
2023-11-14 01:14:33 +08:00
|
|
|
currentInput = input
|
2023-11-13 21:03:39 +08:00
|
|
|
currentOptions = options
|
|
|
|
htmlMode = !!options.htmlMode
|
|
|
|
const root = (currentRoot = createRoot([]))
|
|
|
|
tokenizer.parse(currentInput)
|
2023-11-14 01:14:33 +08:00
|
|
|
root.children = condenseWhitespace(root.children)
|
2023-11-12 16:58:24 +08:00
|
|
|
return root
|
|
|
|
}
|