mirror of https://github.com/vuejs/core.git
wip: port parser
This commit is contained in:
parent
2a6292e37f
commit
19bd714239
|
@ -1,5 +1,5 @@
|
||||||
import { ParserOptions } from '../src/options'
|
import { ParserOptions } from '../src/options'
|
||||||
import { baseParse, TextModes } from '../src/parse'
|
import { TextModes } from '../src/parse'
|
||||||
import { ErrorCodes } from '../src/errors'
|
import { ErrorCodes } from '../src/errors'
|
||||||
import {
|
import {
|
||||||
CommentNode,
|
CommentNode,
|
||||||
|
@ -14,6 +14,8 @@ import {
|
||||||
DirectiveNode
|
DirectiveNode
|
||||||
} from '../src/ast'
|
} from '../src/ast'
|
||||||
|
|
||||||
|
import { baseParse } from '../src/parser/index'
|
||||||
|
|
||||||
describe('compiler: parse', () => {
|
describe('compiler: parse', () => {
|
||||||
describe('Text', () => {
|
describe('Text', () => {
|
||||||
test('simple text', () => {
|
test('simple text', () => {
|
||||||
|
|
|
@ -128,9 +128,9 @@ export interface BaseElementNode extends Node {
|
||||||
ns: Namespace
|
ns: Namespace
|
||||||
tag: string
|
tag: string
|
||||||
tagType: ElementTypes
|
tagType: ElementTypes
|
||||||
isSelfClosing: boolean
|
|
||||||
props: Array<AttributeNode | DirectiveNode>
|
props: Array<AttributeNode | DirectiveNode>
|
||||||
children: TemplateChildNode[]
|
children: TemplateChildNode[]
|
||||||
|
isSelfClosing?: boolean
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface PlainElementNode extends BaseElementNode {
|
export interface PlainElementNode extends BaseElementNode {
|
||||||
|
|
|
@ -70,3 +70,5 @@ export {
|
||||||
warnDeprecation,
|
warnDeprecation,
|
||||||
CompilerDeprecationTypes
|
CompilerDeprecationTypes
|
||||||
} from './compat/compatConfig'
|
} from './compat/compatConfig'
|
||||||
|
|
||||||
|
export { baseParse as newParse } from './parser/index'
|
||||||
|
|
|
@ -17,6 +17,10 @@ export interface ErrorHandlingOptions {
|
||||||
export interface ParserOptions
|
export interface ParserOptions
|
||||||
extends ErrorHandlingOptions,
|
extends ErrorHandlingOptions,
|
||||||
CompilerCompatOptions {
|
CompilerCompatOptions {
|
||||||
|
/**
|
||||||
|
* Parse as HTML. Default: false
|
||||||
|
*/
|
||||||
|
htmlMode?: boolean
|
||||||
/**
|
/**
|
||||||
* e.g. platform native elements, e.g. `<div>` for browsers
|
* e.g. platform native elements, e.g. `<div>` for browsers
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -40,6 +40,7 @@ import {
|
||||||
} from './compat/compatConfig'
|
} from './compat/compatConfig'
|
||||||
|
|
||||||
type OptionalOptions =
|
type OptionalOptions =
|
||||||
|
| 'htmlMode'
|
||||||
| 'whitespace'
|
| 'whitespace'
|
||||||
| 'isNativeTag'
|
| 'isNativeTag'
|
||||||
| 'isBuiltInComponent'
|
| 'isBuiltInComponent'
|
||||||
|
|
|
@ -108,22 +108,6 @@ export interface ParserOptions {
|
||||||
* @default true
|
* @default true
|
||||||
*/
|
*/
|
||||||
decodeEntities?: boolean
|
decodeEntities?: boolean
|
||||||
|
|
||||||
/**
|
|
||||||
* If set to true, CDATA sections will be recognized as text even if the xmlMode option is not enabled.
|
|
||||||
* NOTE: If xmlMode is set to `true` then CDATA sections will always be recognized as text.
|
|
||||||
*
|
|
||||||
* @default false
|
|
||||||
*/
|
|
||||||
recognizeCDATA?: boolean
|
|
||||||
|
|
||||||
/**
|
|
||||||
* If set to `true`, self-closing tags will trigger the onclosetag event even if xmlMode is not set to `true`.
|
|
||||||
* NOTE: If xmlMode is set to `true` then self-closing tags will always be recognized.
|
|
||||||
*
|
|
||||||
* @default false
|
|
||||||
*/
|
|
||||||
recognizeSelfClosing?: boolean
|
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface Handler {
|
export interface Handler {
|
||||||
|
@ -186,7 +170,6 @@ export class Parser implements Callbacks {
|
||||||
/** Determines whether self-closing tags are recognized. */
|
/** Determines whether self-closing tags are recognized. */
|
||||||
private readonly foreignContext: boolean[]
|
private readonly foreignContext: boolean[]
|
||||||
private readonly cbs: Partial<Handler>
|
private readonly cbs: Partial<Handler>
|
||||||
private readonly recognizeSelfClosing: boolean
|
|
||||||
private readonly tokenizer: Tokenizer
|
private readonly tokenizer: Tokenizer
|
||||||
|
|
||||||
private buffer: string = ''
|
private buffer: string = ''
|
||||||
|
@ -196,7 +179,6 @@ export class Parser implements Callbacks {
|
||||||
private readonly options: ParserOptions = {}
|
private readonly options: ParserOptions = {}
|
||||||
) {
|
) {
|
||||||
this.cbs = cbs ?? {}
|
this.cbs = cbs ?? {}
|
||||||
this.recognizeSelfClosing = options.recognizeSelfClosing ?? false
|
|
||||||
this.tokenizer = new Tokenizer(this.options, this)
|
this.tokenizer = new Tokenizer(this.options, this)
|
||||||
this.foreignContext = [false]
|
this.foreignContext = [false]
|
||||||
this.cbs.onparserinit?.(this)
|
this.cbs.onparserinit?.(this)
|
||||||
|
@ -307,15 +289,9 @@ export class Parser implements Callbacks {
|
||||||
/** @internal */
|
/** @internal */
|
||||||
onselfclosingtag(endIndex: number): void {
|
onselfclosingtag(endIndex: number): void {
|
||||||
this.endIndex = endIndex
|
this.endIndex = endIndex
|
||||||
if (this.recognizeSelfClosing || this.foreignContext[0]) {
|
|
||||||
this.closeCurrentTag(false)
|
this.closeCurrentTag(false)
|
||||||
|
|
||||||
// Set `startIndex` for next node
|
// Set `startIndex` for next node
|
||||||
this.startIndex = endIndex + 1
|
this.startIndex = endIndex + 1
|
||||||
} else {
|
|
||||||
// Ignore the fact that the tag is self-closing.
|
|
||||||
this.onopentagend(endIndex)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private closeCurrentTag(isOpenImplied: boolean) {
|
private closeCurrentTag(isOpenImplied: boolean) {
|
||||||
|
@ -417,17 +393,9 @@ export class Parser implements Callbacks {
|
||||||
/** @internal */
|
/** @internal */
|
||||||
oncdata(start: number, endIndex: number, offset: number): void {
|
oncdata(start: number, endIndex: number, offset: number): void {
|
||||||
this.endIndex = endIndex
|
this.endIndex = endIndex
|
||||||
const value = this.getSlice(start, endIndex - offset)
|
|
||||||
|
|
||||||
if (this.options.recognizeCDATA) {
|
|
||||||
this.cbs.oncdatastart?.()
|
this.cbs.oncdatastart?.()
|
||||||
this.cbs.ontext?.(value)
|
this.cbs.ontext?.(this.getSlice(start, endIndex - offset))
|
||||||
this.cbs.oncdataend?.()
|
this.cbs.oncdataend?.()
|
||||||
} else {
|
|
||||||
this.cbs.oncomment?.(`[CDATA[${value}]]`)
|
|
||||||
this.cbs.oncommentend?.()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set `startIndex` for next node
|
// Set `startIndex` for next node
|
||||||
this.startIndex = endIndex + 1
|
this.startIndex = endIndex + 1
|
||||||
}
|
}
|
||||||
|
@ -456,8 +424,7 @@ export class Parser implements Callbacks {
|
||||||
public parse(input: string): void {
|
public parse(input: string): void {
|
||||||
this.reset()
|
this.reset()
|
||||||
this.buffer = input
|
this.buffer = input
|
||||||
this.tokenizer.write(input)
|
this.tokenizer.parse(input)
|
||||||
this.tokenizer.end()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1,3 +1,27 @@
|
||||||
|
/**
|
||||||
|
* This Tokenizer is adapted from htmlparser2 under the MIT License listed at
|
||||||
|
* https://github.com/fb55/htmlparser2/blob/master/LICENSE
|
||||||
|
|
||||||
|
Copyright 2010, 2011, Chris Winberry <chris@winberry.net>. All rights reserved.
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to
|
||||||
|
deal in the Software without restriction, including without limitation the
|
||||||
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
sell copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||||
|
IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
import {
|
import {
|
||||||
EntityDecoder,
|
EntityDecoder,
|
||||||
DecodingMode,
|
DecodingMode,
|
||||||
|
@ -143,10 +167,6 @@ export default class Tokenizer {
|
||||||
private baseState = State.Text
|
private baseState = State.Text
|
||||||
/** For special parsing behavior inside of script and style tags. */
|
/** For special parsing behavior inside of script and style tags. */
|
||||||
private isSpecial = false
|
private isSpecial = false
|
||||||
/** Indicates whether the tokenizer has been paused. */
|
|
||||||
public running = true
|
|
||||||
/** The offset of the current buffer. */
|
|
||||||
private offset = 0
|
|
||||||
|
|
||||||
private readonly decodeEntities: boolean
|
private readonly decodeEntities: boolean
|
||||||
private readonly entityDecoder: EntityDecoder
|
private readonly entityDecoder: EntityDecoder
|
||||||
|
@ -168,29 +188,6 @@ export default class Tokenizer {
|
||||||
this.index = 0
|
this.index = 0
|
||||||
this.baseState = State.Text
|
this.baseState = State.Text
|
||||||
this.currentSequence = undefined!
|
this.currentSequence = undefined!
|
||||||
this.running = true
|
|
||||||
this.offset = 0
|
|
||||||
}
|
|
||||||
|
|
||||||
public write(chunk: string): void {
|
|
||||||
this.offset += this.buffer.length
|
|
||||||
this.buffer = chunk
|
|
||||||
this.parse()
|
|
||||||
}
|
|
||||||
|
|
||||||
public end(): void {
|
|
||||||
if (this.running) this.finish()
|
|
||||||
}
|
|
||||||
|
|
||||||
public pause(): void {
|
|
||||||
this.running = false
|
|
||||||
}
|
|
||||||
|
|
||||||
public resume(): void {
|
|
||||||
this.running = true
|
|
||||||
if (this.index < this.buffer.length + this.offset) {
|
|
||||||
this.parse()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private stateText(c: number): void {
|
private stateText(c: number): void {
|
||||||
|
@ -293,8 +290,8 @@ export default class Tokenizer {
|
||||||
* @returns Whether the character was found.
|
* @returns Whether the character was found.
|
||||||
*/
|
*/
|
||||||
private fastForwardTo(c: number): boolean {
|
private fastForwardTo(c: number): boolean {
|
||||||
while (++this.index < this.buffer.length + this.offset) {
|
while (++this.index < this.buffer.length) {
|
||||||
if (this.buffer.charCodeAt(this.index - this.offset) === c) {
|
if (this.buffer.charCodeAt(this.index) === c) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -305,7 +302,7 @@ export default class Tokenizer {
|
||||||
*
|
*
|
||||||
* TODO: Refactor `parse` to increment index before calling states.
|
* TODO: Refactor `parse` to increment index before calling states.
|
||||||
*/
|
*/
|
||||||
this.index = this.buffer.length + this.offset - 1
|
this.index = this.buffer.length - 1
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
@ -577,10 +574,7 @@ export default class Tokenizer {
|
||||||
}
|
}
|
||||||
|
|
||||||
private stateInEntity(): void {
|
private stateInEntity(): void {
|
||||||
const length = this.entityDecoder.write(
|
const length = this.entityDecoder.write(this.buffer, this.index)
|
||||||
this.buffer,
|
|
||||||
this.index - this.offset
|
|
||||||
)
|
|
||||||
|
|
||||||
// If `length` is positive, we are done with the entity.
|
// If `length` is positive, we are done with the entity.
|
||||||
if (length >= 0) {
|
if (length >= 0) {
|
||||||
|
@ -591,45 +585,19 @@ export default class Tokenizer {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Mark buffer as consumed.
|
// Mark buffer as consumed.
|
||||||
this.index = this.offset + this.buffer.length - 1
|
this.index = this.buffer.length - 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Remove data that has already been consumed from the buffer.
|
|
||||||
*/
|
|
||||||
private cleanup() {
|
|
||||||
// If we are inside of text or attributes, emit what we already have.
|
|
||||||
if (this.running && this.sectionStart !== this.index) {
|
|
||||||
if (
|
|
||||||
this.state === State.Text ||
|
|
||||||
(this.state === State.InSpecialTag && this.sequenceIndex === 0)
|
|
||||||
) {
|
|
||||||
this.cbs.ontext(this.sectionStart, this.index)
|
|
||||||
this.sectionStart = this.index
|
|
||||||
} else if (
|
|
||||||
this.state === State.InAttributeValueDq ||
|
|
||||||
this.state === State.InAttributeValueSq ||
|
|
||||||
this.state === State.InAttributeValueNq
|
|
||||||
) {
|
|
||||||
this.cbs.onattribdata(this.sectionStart, this.index)
|
|
||||||
this.sectionStart = this.index
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private shouldContinue() {
|
|
||||||
return this.index < this.buffer.length + this.offset && this.running
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Iterates through the buffer, calling the function corresponding to the current state.
|
* Iterates through the buffer, calling the function corresponding to the current state.
|
||||||
*
|
*
|
||||||
* States that are more likely to be hit are higher up, as a performance improvement.
|
* States that are more likely to be hit are higher up, as a performance improvement.
|
||||||
*/
|
*/
|
||||||
private parse() {
|
public parse(input: string) {
|
||||||
while (this.shouldContinue()) {
|
this.buffer = input
|
||||||
const c = this.buffer.charCodeAt(this.index - this.offset)
|
while (this.index < this.buffer.length) {
|
||||||
|
const c = this.buffer.charCodeAt(this.index)
|
||||||
switch (this.state) {
|
switch (this.state) {
|
||||||
case State.Text: {
|
case State.Text: {
|
||||||
this.stateText(c)
|
this.stateText(c)
|
||||||
|
@ -735,6 +703,30 @@ export default class Tokenizer {
|
||||||
this.index++
|
this.index++
|
||||||
}
|
}
|
||||||
this.cleanup()
|
this.cleanup()
|
||||||
|
this.finish()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove data that has already been consumed from the buffer.
|
||||||
|
*/
|
||||||
|
private cleanup() {
|
||||||
|
// If we are inside of text or attributes, emit what we already have.
|
||||||
|
if (this.sectionStart !== this.index) {
|
||||||
|
if (
|
||||||
|
this.state === State.Text ||
|
||||||
|
(this.state === State.InSpecialTag && this.sequenceIndex === 0)
|
||||||
|
) {
|
||||||
|
this.cbs.ontext(this.sectionStart, this.index)
|
||||||
|
this.sectionStart = this.index
|
||||||
|
} else if (
|
||||||
|
this.state === State.InAttributeValueDq ||
|
||||||
|
this.state === State.InAttributeValueSq ||
|
||||||
|
this.state === State.InAttributeValueNq
|
||||||
|
) {
|
||||||
|
this.cbs.onattribdata(this.sectionStart, this.index)
|
||||||
|
this.sectionStart = this.index
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private finish() {
|
private finish() {
|
||||||
|
@ -750,7 +742,7 @@ export default class Tokenizer {
|
||||||
|
|
||||||
/** Handle any trailing data. */
|
/** Handle any trailing data. */
|
||||||
private handleTrailingData() {
|
private handleTrailingData() {
|
||||||
const endIndex = this.buffer.length + this.offset
|
const endIndex = this.buffer.length
|
||||||
|
|
||||||
// If there is no remaining data, we are done.
|
// If there is no remaining data, we are done.
|
||||||
if (this.sectionStart >= endIndex) {
|
if (this.sectionStart >= endIndex) {
|
||||||
|
|
|
@ -1,16 +1,429 @@
|
||||||
import { RootNode, createRoot } from '../ast'
|
import { fromCodePoint } from 'entities/lib/decode.js'
|
||||||
|
import {
|
||||||
|
ElementNode,
|
||||||
|
ElementTypes,
|
||||||
|
NodeTypes,
|
||||||
|
RootNode,
|
||||||
|
TemplateChildNode,
|
||||||
|
createRoot
|
||||||
|
} from '../ast'
|
||||||
import { ParserOptions } from '../options'
|
import { ParserOptions } from '../options'
|
||||||
import { Parser } from './Parser'
|
import Tokenizer from './Tokenizer'
|
||||||
|
import { hasOwn } from '@vue/shared'
|
||||||
|
|
||||||
const parser = new Parser({
|
const formTags = new Set([
|
||||||
|
'input',
|
||||||
|
'option',
|
||||||
|
'optgroup',
|
||||||
|
'select',
|
||||||
|
'button',
|
||||||
|
'datalist',
|
||||||
|
'textarea'
|
||||||
|
])
|
||||||
|
const pTag = new Set(['p'])
|
||||||
|
const tableSectionTags = new Set(['thead', 'tbody'])
|
||||||
|
const ddtTags = new Set(['dd', 'dt'])
|
||||||
|
const rtpTags = new Set(['rt', 'rp'])
|
||||||
|
|
||||||
|
const openImpliesClose = new Map<string, Set<string>>([
|
||||||
|
['tr', new Set(['tr', 'th', 'td'])],
|
||||||
|
['th', new Set(['th'])],
|
||||||
|
['td', new Set(['thead', 'th', 'td'])],
|
||||||
|
['body', new Set(['head', 'link', 'script'])],
|
||||||
|
['li', new Set(['li'])],
|
||||||
|
['p', pTag],
|
||||||
|
['h1', pTag],
|
||||||
|
['h2', pTag],
|
||||||
|
['h3', pTag],
|
||||||
|
['h4', pTag],
|
||||||
|
['h5', pTag],
|
||||||
|
['h6', pTag],
|
||||||
|
['select', formTags],
|
||||||
|
['input', formTags],
|
||||||
|
['output', formTags],
|
||||||
|
['button', formTags],
|
||||||
|
['datalist', formTags],
|
||||||
|
['textarea', formTags],
|
||||||
|
['option', new Set(['option'])],
|
||||||
|
['optgroup', new Set(['optgroup', 'option'])],
|
||||||
|
['dd', ddtTags],
|
||||||
|
['dt', ddtTags],
|
||||||
|
['address', pTag],
|
||||||
|
['article', pTag],
|
||||||
|
['aside', pTag],
|
||||||
|
['blockquote', pTag],
|
||||||
|
['details', pTag],
|
||||||
|
['div', pTag],
|
||||||
|
['dl', pTag],
|
||||||
|
['fieldset', pTag],
|
||||||
|
['figcaption', pTag],
|
||||||
|
['figure', pTag],
|
||||||
|
['footer', pTag],
|
||||||
|
['form', pTag],
|
||||||
|
['header', pTag],
|
||||||
|
['hr', pTag],
|
||||||
|
['main', pTag],
|
||||||
|
['nav', pTag],
|
||||||
|
['ol', pTag],
|
||||||
|
['pre', pTag],
|
||||||
|
['section', pTag],
|
||||||
|
['table', pTag],
|
||||||
|
['ul', pTag],
|
||||||
|
['rt', rtpTags],
|
||||||
|
['rp', rtpTags],
|
||||||
|
['tbody', tableSectionTags],
|
||||||
|
['tfoot', tableSectionTags]
|
||||||
|
])
|
||||||
|
|
||||||
|
const voidElements = new Set([
|
||||||
|
'area',
|
||||||
|
'base',
|
||||||
|
'basefont',
|
||||||
|
'br',
|
||||||
|
'col',
|
||||||
|
'command',
|
||||||
|
'embed',
|
||||||
|
'frame',
|
||||||
|
'hr',
|
||||||
|
'img',
|
||||||
|
'input',
|
||||||
|
'isindex',
|
||||||
|
'keygen',
|
||||||
|
'link',
|
||||||
|
'meta',
|
||||||
|
'param',
|
||||||
|
'source',
|
||||||
|
'track',
|
||||||
|
'wbr'
|
||||||
|
])
|
||||||
|
|
||||||
|
const foreignContextElements = new Set(['math', 'svg'])
|
||||||
|
|
||||||
|
const htmlIntegrationElements = new Set([
|
||||||
|
'mi',
|
||||||
|
'mo',
|
||||||
|
'mn',
|
||||||
|
'ms',
|
||||||
|
'mtext',
|
||||||
|
'annotation-xml',
|
||||||
|
'foreignobject',
|
||||||
|
'desc',
|
||||||
|
'title'
|
||||||
|
])
|
||||||
|
|
||||||
|
let currentOptions: ParserOptions = {}
|
||||||
|
let currentRoot: RootNode = createRoot([])
|
||||||
|
let elementStack: ElementNode[] = []
|
||||||
|
|
||||||
|
// parser state
|
||||||
|
let htmlMode = false
|
||||||
|
let currentInput = ''
|
||||||
|
let openTagStart = 0
|
||||||
|
let tagname = ''
|
||||||
|
let attribname = ''
|
||||||
|
let attribvalue = ''
|
||||||
|
let attribs: Record<string, string> | null = null
|
||||||
|
let startIndex = 0
|
||||||
|
let endIndex = 0
|
||||||
|
let inPre = 0
|
||||||
|
// let inVPre = 0
|
||||||
|
const stack: string[] = []
|
||||||
|
const foreignContext: boolean[] = [false]
|
||||||
|
|
||||||
|
const tokenizer = new Tokenizer(
|
||||||
|
// TODO handle entities
|
||||||
|
{ decodeEntities: true },
|
||||||
|
{
|
||||||
|
ontext(start, end) {
|
||||||
|
const content = getSlice(start, end)
|
||||||
|
endIndex = end - 1
|
||||||
|
onText(content)
|
||||||
|
startIndex = end
|
||||||
|
},
|
||||||
|
|
||||||
|
ontextentity(cp, end) {
|
||||||
|
endIndex = end - 1
|
||||||
|
onText(fromCodePoint(cp))
|
||||||
|
startIndex = end
|
||||||
|
},
|
||||||
|
|
||||||
|
onopentagname(start, end) {
|
||||||
|
emitOpenTag(getSlice(start, (endIndex = end)))
|
||||||
|
},
|
||||||
|
|
||||||
|
onopentagend(end) {
|
||||||
|
endIndex = end
|
||||||
|
endOpenTag(false)
|
||||||
|
startIndex = end + 1
|
||||||
|
},
|
||||||
|
|
||||||
|
onclosetag(start, end) {
|
||||||
|
endIndex = end
|
||||||
|
const name = getSlice(start, end)
|
||||||
|
|
||||||
|
if (
|
||||||
|
htmlMode &&
|
||||||
|
(foreignContextElements.has(name) || htmlIntegrationElements.has(name))
|
||||||
|
) {
|
||||||
|
foreignContext.shift()
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!voidElements.has(name)) {
|
||||||
|
const pos = stack.indexOf(name)
|
||||||
|
if (pos !== -1) {
|
||||||
|
for (let index = 0; index <= pos; index++) {
|
||||||
|
stack.shift()
|
||||||
|
onCloseTag()
|
||||||
|
}
|
||||||
|
} else if (htmlMode && name === 'p') {
|
||||||
|
// Implicit open before close
|
||||||
|
emitOpenTag('p')
|
||||||
|
closeCurrentTag(true)
|
||||||
|
}
|
||||||
|
} else if (htmlMode && name === 'br') {
|
||||||
// TODO
|
// TODO
|
||||||
|
// We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed.
|
||||||
|
// this.cbs.onopentag?.('br', {}, true)
|
||||||
|
// this.cbs.onclosetag?.('br', false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set `startIndex` for next node
|
||||||
|
startIndex = end + 1
|
||||||
|
},
|
||||||
|
|
||||||
|
onselfclosingtag(end) {
|
||||||
|
endIndex = end
|
||||||
|
closeCurrentTag(false)
|
||||||
|
startIndex = end + 1
|
||||||
|
},
|
||||||
|
|
||||||
|
onattribname(start, end) {
|
||||||
|
attribname = getSlice((startIndex = start), end)
|
||||||
|
},
|
||||||
|
onattribdata(start, end) {
|
||||||
|
attribvalue += getSlice(start, end)
|
||||||
|
},
|
||||||
|
onattribentity(codepoint) {
|
||||||
|
attribvalue += fromCodePoint(codepoint)
|
||||||
|
},
|
||||||
|
onattribend(quote, end) {
|
||||||
|
endIndex = end
|
||||||
|
if (attribs && !hasOwn(attribs, attribname)) {
|
||||||
|
// TODO gen attributes AST nodes
|
||||||
|
attribs[attribname] = attribvalue
|
||||||
|
}
|
||||||
|
attribvalue = ''
|
||||||
|
},
|
||||||
|
|
||||||
|
oncomment(start, end, offset) {
|
||||||
|
endIndex = end
|
||||||
|
// TODO oncomment
|
||||||
|
startIndex = end + 1
|
||||||
|
},
|
||||||
|
|
||||||
|
onend() {
|
||||||
|
// Set the end index for all remaining tags
|
||||||
|
endIndex = startIndex
|
||||||
|
for (let index = 0; index < stack.length; index++) {
|
||||||
|
onCloseTag()
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
oncdata(start, end, offset) {
|
||||||
|
endIndex = end
|
||||||
|
// TODO throw error
|
||||||
|
startIndex = end + 1
|
||||||
|
},
|
||||||
|
|
||||||
|
// TODO ignore
|
||||||
|
ondeclaration(start, end) {
|
||||||
|
endIndex = end
|
||||||
|
// TODO onprocessinginstruction
|
||||||
|
startIndex = end + 1
|
||||||
|
},
|
||||||
|
|
||||||
|
// TODO ignore
|
||||||
|
onprocessinginstruction(start, end) {
|
||||||
|
endIndex = end
|
||||||
|
// TODO onprocessinginstruction
|
||||||
|
startIndex = end + 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
function getSlice(start: number, end: number) {
|
||||||
|
return currentInput.slice(start, end)
|
||||||
|
}
|
||||||
|
|
||||||
|
function emitOpenTag(name: string) {
|
||||||
|
openTagStart = startIndex
|
||||||
|
tagname = name
|
||||||
|
const impliesClose = htmlMode && openImpliesClose.get(name)
|
||||||
|
if (impliesClose) {
|
||||||
|
while (stack.length > 0 && impliesClose.has(stack[0])) {
|
||||||
|
stack.shift()
|
||||||
|
onCloseTag()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!voidElements.has(name)) {
|
||||||
|
stack.unshift(name)
|
||||||
|
if (htmlMode) {
|
||||||
|
if (foreignContextElements.has(name)) {
|
||||||
|
foreignContext.unshift(true)
|
||||||
|
} else if (htmlIntegrationElements.has(name)) {
|
||||||
|
foreignContext.unshift(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
attribs = {}
|
||||||
|
}
|
||||||
|
|
||||||
|
function closeCurrentTag(isOpenImplied: boolean) {
|
||||||
|
const name = tagname
|
||||||
|
endOpenTag(isOpenImplied)
|
||||||
|
if (stack[0] === name) {
|
||||||
|
onCloseTag()
|
||||||
|
stack.shift()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function endOpenTag(isImplied: boolean) {
|
||||||
|
startIndex = openTagStart
|
||||||
|
if (attribs) {
|
||||||
|
onOpenTag(tagname)
|
||||||
|
attribs = null
|
||||||
|
}
|
||||||
|
if (voidElements.has(tagname)) {
|
||||||
|
onCloseTag()
|
||||||
|
}
|
||||||
|
tagname = ''
|
||||||
|
}
|
||||||
|
|
||||||
|
function onText(content: string) {
|
||||||
|
const parent = getParent()
|
||||||
|
const lastNode = parent.children[parent.children.length - 1]
|
||||||
|
if (lastNode?.type === NodeTypes.TEXT) {
|
||||||
|
// merge
|
||||||
|
lastNode.content += content
|
||||||
|
// TODO update loc
|
||||||
|
} else {
|
||||||
|
parent.children.push({
|
||||||
|
type: NodeTypes.TEXT,
|
||||||
|
content,
|
||||||
|
// @ts-ignore TODO
|
||||||
|
loc: {}
|
||||||
})
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function onOpenTag(tag: string) {
|
||||||
|
const el: ElementNode = {
|
||||||
|
type: NodeTypes.ELEMENT,
|
||||||
|
tag,
|
||||||
|
// TODO namespace
|
||||||
|
ns: 0,
|
||||||
|
// TODO refine tag type
|
||||||
|
tagType: ElementTypes.ELEMENT,
|
||||||
|
// TODO props
|
||||||
|
props: [],
|
||||||
|
children: [],
|
||||||
|
// @ts-ignore TODO
|
||||||
|
loc: {},
|
||||||
|
codegenNode: undefined
|
||||||
|
}
|
||||||
|
addNode(el)
|
||||||
|
elementStack.push(el)
|
||||||
|
}
|
||||||
|
|
||||||
|
function onCloseTag() {
|
||||||
|
const el = elementStack.pop()!
|
||||||
|
// whitepsace management
|
||||||
|
const nodes = el.children
|
||||||
|
const shouldCondense = currentOptions.whitespace !== 'preserve'
|
||||||
|
let removedWhitespace = false
|
||||||
|
for (let i = 0; i < nodes.length; i++) {
|
||||||
|
const node = nodes[i]
|
||||||
|
if (node.type === NodeTypes.TEXT) {
|
||||||
|
if (!inPre) {
|
||||||
|
if (!/[^\t\r\n\f ]/.test(node.content)) {
|
||||||
|
const prev = nodes[i - 1]
|
||||||
|
const next = nodes[i + 1]
|
||||||
|
// Remove if:
|
||||||
|
// - the whitespace is the first or last node, or:
|
||||||
|
// - (condense mode) the whitespace is between twos comments, or:
|
||||||
|
// - (condense mode) the whitespace is between comment and element, or:
|
||||||
|
// - (condense mode) the whitespace is between two elements AND contains newline
|
||||||
|
if (
|
||||||
|
!prev ||
|
||||||
|
!next ||
|
||||||
|
(shouldCondense &&
|
||||||
|
((prev.type === NodeTypes.COMMENT &&
|
||||||
|
next.type === NodeTypes.COMMENT) ||
|
||||||
|
(prev.type === NodeTypes.COMMENT &&
|
||||||
|
next.type === NodeTypes.ELEMENT) ||
|
||||||
|
(prev.type === NodeTypes.ELEMENT &&
|
||||||
|
next.type === NodeTypes.COMMENT) ||
|
||||||
|
(prev.type === NodeTypes.ELEMENT &&
|
||||||
|
next.type === NodeTypes.ELEMENT &&
|
||||||
|
/[\r\n]/.test(node.content))))
|
||||||
|
) {
|
||||||
|
removedWhitespace = true
|
||||||
|
nodes[i] = null as any
|
||||||
|
} else {
|
||||||
|
// Otherwise, the whitespace is condensed into a single space
|
||||||
|
node.content = ' '
|
||||||
|
}
|
||||||
|
} else if (shouldCondense) {
|
||||||
|
// in condense mode, consecutive whitespaces in text are condensed
|
||||||
|
// down to a single space.
|
||||||
|
node.content = node.content.replace(/[\t\r\n\f ]+/g, ' ')
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// #6410 normalize windows newlines in <pre>:
|
||||||
|
// in SSR, browsers normalize server-rendered \r\n into a single \n
|
||||||
|
// in the DOM
|
||||||
|
node.content = node.content.replace(/\r\n/g, '\n')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (removedWhitespace) {
|
||||||
|
el.children = nodes.filter(Boolean)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function addNode(node: TemplateChildNode) {
|
||||||
|
getParent().children.push(node)
|
||||||
|
}
|
||||||
|
|
||||||
|
function getParent() {
|
||||||
|
return elementStack[elementStack.length - 1] || currentRoot
|
||||||
|
}
|
||||||
|
|
||||||
|
function reset() {
|
||||||
|
tokenizer.reset()
|
||||||
|
tagname = ''
|
||||||
|
attribname = ''
|
||||||
|
attribvalue = ''
|
||||||
|
attribs = null
|
||||||
|
startIndex = 0
|
||||||
|
endIndex = 0
|
||||||
|
stack.length = 0
|
||||||
|
elementStack.length = 0
|
||||||
|
foreignContext.length = 1
|
||||||
|
foreignContext[0] = false
|
||||||
|
}
|
||||||
|
|
||||||
export function baseParse(
|
export function baseParse(
|
||||||
content: string,
|
input: string,
|
||||||
options: ParserOptions = {}
|
options: ParserOptions = {}
|
||||||
): RootNode {
|
): RootNode {
|
||||||
const root = createRoot([])
|
reset()
|
||||||
parser.parse(content)
|
currentInput = input.trim()
|
||||||
|
currentOptions = options
|
||||||
|
htmlMode = !!options.htmlMode
|
||||||
|
const root = (currentRoot = createRoot([]))
|
||||||
|
tokenizer.parse(currentInput)
|
||||||
|
// temp hack for ts
|
||||||
|
console.log(endIndex)
|
||||||
return root
|
return root
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue