wip: loc for elements

This commit is contained in:
Evan You 2023-11-14 18:03:00 +08:00
parent f7db16b9cf
commit f131a4723b
2 changed files with 87 additions and 171 deletions

View File

@ -29,7 +29,7 @@ import {
} from 'entities/lib/decode.js' } from 'entities/lib/decode.js'
import { Position } from '../ast' import { Position } from '../ast'
const enum CharCodes { export const enum CharCodes {
Tab = 0x9, // "\t" Tab = 0x9, // "\t"
NewLine = 0xa, // "\n" NewLine = 0xa, // "\n"
FormFeed = 0xc, // "\f" FormFeed = 0xc, // "\f"
@ -216,9 +216,9 @@ export default class Tokenizer {
} }
} }
return { return {
offset: index, column,
line, line,
column offset: index
} }
} }

View File

@ -1,5 +1,7 @@
import { fromCodePoint } from 'entities/lib/decode.js' import { fromCodePoint } from 'entities/lib/decode.js'
import { import {
AttributeNode,
DirectiveNode,
ElementNode, ElementNode,
ElementTypes, ElementTypes,
NodeTypes, NodeTypes,
@ -8,72 +10,7 @@ import {
createRoot createRoot
} from '../ast' } from '../ast'
import { ParserOptions } from '../options' import { ParserOptions } from '../options'
import Tokenizer from './Tokenizer' import Tokenizer, { CharCodes } from './Tokenizer'
import { hasOwn } from '@vue/shared'
const formTags = new Set([
'input',
'option',
'optgroup',
'select',
'button',
'datalist',
'textarea'
])
const pTag = new Set(['p'])
const tableSectionTags = new Set(['thead', 'tbody'])
const ddtTags = new Set(['dd', 'dt'])
const rtpTags = new Set(['rt', 'rp'])
const openImpliesClose = new Map<string, Set<string>>([
['tr', new Set(['tr', 'th', 'td'])],
['th', new Set(['th'])],
['td', new Set(['thead', 'th', 'td'])],
['body', new Set(['head', 'link', 'script'])],
['li', new Set(['li'])],
['p', pTag],
['h1', pTag],
['h2', pTag],
['h3', pTag],
['h4', pTag],
['h5', pTag],
['h6', pTag],
['select', formTags],
['input', formTags],
['output', formTags],
['button', formTags],
['datalist', formTags],
['textarea', formTags],
['option', new Set(['option'])],
['optgroup', new Set(['optgroup', 'option'])],
['dd', ddtTags],
['dt', ddtTags],
['address', pTag],
['article', pTag],
['aside', pTag],
['blockquote', pTag],
['details', pTag],
['div', pTag],
['dl', pTag],
['fieldset', pTag],
['figcaption', pTag],
['figure', pTag],
['footer', pTag],
['form', pTag],
['header', pTag],
['hr', pTag],
['main', pTag],
['nav', pTag],
['ol', pTag],
['pre', pTag],
['section', pTag],
['table', pTag],
['ul', pTag],
['rt', rtpTags],
['rp', rtpTags],
['tbody', tableSectionTags],
['tfoot', tableSectionTags]
])
const voidElements = new Set([ const voidElements = new Set([
'area', 'area',
@ -113,21 +50,16 @@ const htmlIntegrationElements = new Set([
let currentOptions: ParserOptions = {} let currentOptions: ParserOptions = {}
let currentRoot: RootNode = createRoot([]) let currentRoot: RootNode = createRoot([])
let elementStack: ElementNode[] = []
// parser state // parser state
let htmlMode = false let htmlMode = false
let currentInput = '' let currentInput = ''
let openTagStart = 0 let currentElement: ElementNode | null = null
let tagname = '' let currentProp: AttributeNode | DirectiveNode | null = null
let attribname = '' let currentAttrValue = ''
let attribvalue = ''
let attribs: Record<string, string> | null = null
let startIndex = 0
let endIndex = 0
let inPre = 0 let inPre = 0
// let inVPre = 0 // let inVPre = 0
const stack: string[] = [] const stack: ElementNode[] = []
const foreignContext: boolean[] = [false] const foreignContext: boolean[] = [false]
const tokenizer = new Tokenizer( const tokenizer = new Tokenizer(
@ -143,17 +75,14 @@ const tokenizer = new Tokenizer(
}, },
onopentagname(start, end) { onopentagname(start, end) {
emitOpenTag(getSlice(start, (endIndex = end))) emitOpenTag(getSlice(start, end), start)
}, },
onopentagend(end) { onopentagend(end) {
endIndex = end endOpenTag()
endOpenTag(false)
startIndex = end + 1
}, },
onclosetag(start, end) { onclosetag(start, end) {
endIndex = end
const name = getSlice(start, end) const name = getSlice(start, end)
if ( if (
@ -164,16 +93,15 @@ const tokenizer = new Tokenizer(
} }
if (!voidElements.has(name)) { if (!voidElements.has(name)) {
const pos = stack.indexOf(name) const pos = stack.findIndex(e => e.tag === name)
if (pos !== -1) { if (pos !== -1) {
for (let index = 0; index <= pos; index++) { for (let index = 0; index <= pos; index++) {
stack.shift() onCloseTag(stack.shift()!, end)
onCloseTag()
} }
} else if (htmlMode && name === 'p') { } else if (htmlMode && name === 'p') {
// Implicit open before close // Implicit open before close
emitOpenTag('p') emitOpenTag('p', start)
closeCurrentTag(true) closeCurrentTag(end)
} }
} else if (htmlMode && name === 'br') { } else if (htmlMode && name === 'br') {
// TODO // TODO
@ -181,53 +109,64 @@ const tokenizer = new Tokenizer(
// this.cbs.onopentag?.('br', {}, true) // this.cbs.onopentag?.('br', {}, true)
// this.cbs.onclosetag?.('br', false) // this.cbs.onclosetag?.('br', false)
} }
// Set `startIndex` for next node
startIndex = end + 1
}, },
onselfclosingtag(end) { onselfclosingtag(end) {
endIndex = end closeCurrentTag(end)
closeCurrentTag(false)
startIndex = end + 1
}, },
onattribname(start, end) { onattribname(start, end) {
attribname = getSlice((startIndex = start), end) // TODO directives
currentProp = {
type: NodeTypes.ATTRIBUTE,
name: getSlice(start, end),
value: undefined,
loc: {
start: tokenizer.getPositionForIndex(start),
// @ts-expect-error to be attached on attribute end
end: undefined,
source: ''
}
}
}, },
onattribdata(start, end) { onattribdata(start, end) {
attribvalue += getSlice(start, end) currentAttrValue += getSlice(start, end)
}, },
onattribentity(codepoint) { onattribentity(codepoint) {
attribvalue += fromCodePoint(codepoint) currentAttrValue += fromCodePoint(codepoint)
}, },
onattribend(_quote, end) { onattribend(_quote, end) {
endIndex = end if (currentElement) {
if (attribs && !hasOwn(attribs, attribname)) { if (currentProp!.type === NodeTypes.ATTRIBUTE) {
// TODO gen attributes AST nodes // assign value
attribs[attribname] = attribvalue currentProp!.value = {
type: NodeTypes.TEXT,
content: currentAttrValue,
// @ts-expect-error TODO
loc: {}
} }
attribvalue = '' } else {
// TODO
}
currentProp!.loc.end = tokenizer.getPositionForIndex(end)
currentElement.props.push(currentProp!)
}
currentAttrValue = ''
}, },
oncomment(start, end, offset) { oncomment(start, end, offset) {
endIndex = end
// TODO oncomment // TODO oncomment
startIndex = end + 1
}, },
onend() { onend() {
// Set the end index for all remaining tags const end = currentInput.length
endIndex = startIndex
for (let index = 0; index < stack.length; index++) { for (let index = 0; index < stack.length; index++) {
onCloseTag() onCloseTag(stack[index], end)
} }
}, },
oncdata(start, end, offset) { oncdata(start, end, offset) {
endIndex = end
// TODO throw error // TODO throw error
startIndex = end + 1
} }
} }
) )
@ -236,18 +175,31 @@ function getSlice(start: number, end: number) {
return currentInput.slice(start, end) return currentInput.slice(start, end)
} }
function emitOpenTag(name: string) { function emitOpenTag(name: string, start: number) {
openTagStart = startIndex currentElement = {
tagname = name type: NodeTypes.ELEMENT,
const impliesClose = htmlMode && openImpliesClose.get(name) tag: name,
if (impliesClose) { // TODO refine namespace
while (stack.length > 0 && impliesClose.has(stack[0])) { ns: 0,
stack.shift() // TODO refine tag type
onCloseTag() tagType: ElementTypes.ELEMENT,
props: [],
children: [],
loc: {
start: tokenizer.getPositionForIndex(start - 1),
// @ts-expect-error to be attached on tag close
end: undefined,
source: ''
},
codegenNode: undefined
} }
} }
function endOpenTag() {
addNode(currentElement!)
const name = currentElement!.tag
if (!voidElements.has(name)) { if (!voidElements.has(name)) {
stack.unshift(name) stack.unshift(currentElement!)
if (htmlMode) { if (htmlMode) {
if (foreignContextElements.has(name)) { if (foreignContextElements.has(name)) {
foreignContext.unshift(true) foreignContext.unshift(true)
@ -256,30 +208,17 @@ function emitOpenTag(name: string) {
} }
} }
} }
attribs = {} currentElement = null
} }
function closeCurrentTag(isOpenImplied: boolean) { function closeCurrentTag(end: number) {
const name = tagname const name = currentElement!.tag
endOpenTag(isOpenImplied) endOpenTag()
if (stack[0] === name) { if (stack[0].tag === name) {
onCloseTag() onCloseTag(stack.shift()!, end)
stack.shift()
} }
} }
function endOpenTag(isImplied: boolean) {
startIndex = openTagStart
if (attribs) {
onOpenTag(tagname)
attribs = null
}
if (voidElements.has(tagname)) {
onCloseTag()
}
tagname = ''
}
function onText(content: string, start: number, end: number) { function onText(content: string, start: number, end: number) {
const parent = getParent() const parent = getParent()
const lastNode = parent.children[parent.children.length - 1] const lastNode = parent.children[parent.children.length - 1]
@ -300,32 +239,13 @@ function onText(content: string, start: number, end: number) {
} }
} }
function onOpenTag(tag: string) { function onCloseTag(el: ElementNode, end: number) {
const el: ElementNode = { // attach end position
type: NodeTypes.ELEMENT, let offset = 0
tag, while (currentInput.charCodeAt(end + offset) !== CharCodes.Gt) {
// TODO namespace offset++
ns: 0,
// TODO refine tag type
tagType: ElementTypes.ELEMENT,
// TODO props
props: [],
children: [],
loc: {
// @ts-expect-error TODO
start: {},
// @ts-expect-error TODO
end: { offset: endIndex },
source: ''
},
codegenNode: undefined
} }
addNode(el) el.loc.end = tokenizer.getPositionForIndex(end + offset + 1)
elementStack.push(el)
}
function onCloseTag() {
const el = elementStack.pop()!
// whitepsace management // whitepsace management
el.children = condenseWhitespace(el.children) el.children = condenseWhitespace(el.children)
} }
@ -394,19 +314,15 @@ function addNode(node: TemplateChildNode) {
} }
function getParent() { function getParent() {
return elementStack[elementStack.length - 1] || currentRoot return stack[0] || currentRoot
} }
function reset() { function reset() {
tokenizer.reset() tokenizer.reset()
tagname = '' currentElement = null
attribname = '' currentProp = null
attribvalue = '' currentAttrValue = ''
attribs = null
startIndex = 0
endIndex = 0
stack.length = 0 stack.length = 0
elementStack.length = 0
foreignContext.length = 1 foreignContext.length = 1
foreignContext[0] = false foreignContext[0] = false
} }