From 08038a938c7991272ae29f85534abf607e1a8e49 Mon Sep 17 00:00:00 2001 From: Evan You Date: Wed, 15 Nov 2023 01:14:36 +0800 Subject: [PATCH] wip: parse directive in tokenizer --- packages/compiler-core/src/parser/Parser.ts | 446 ------------------ .../compiler-core/src/parser/Tokenizer.ts | 107 ++++- packages/compiler-core/src/parser/index.ts | 78 +-- 3 files changed, 149 insertions(+), 482 deletions(-) delete mode 100644 packages/compiler-core/src/parser/Parser.ts diff --git a/packages/compiler-core/src/parser/Parser.ts b/packages/compiler-core/src/parser/Parser.ts deleted file mode 100644 index be6ff0b41..000000000 --- a/packages/compiler-core/src/parser/Parser.ts +++ /dev/null @@ -1,446 +0,0 @@ -import Tokenizer, { Callbacks, QuoteType } from './Tokenizer.js' -import { fromCodePoint } from 'entities/lib/decode.js' - -const formTags = new Set([ - 'input', - 'option', - 'optgroup', - 'select', - 'button', - 'datalist', - 'textarea' -]) -const pTag = new Set(['p']) -const tableSectionTags = new Set(['thead', 'tbody']) -const ddtTags = new Set(['dd', 'dt']) -const rtpTags = new Set(['rt', 'rp']) - -const openImpliesClose = new Map>([ - ['tr', new Set(['tr', 'th', 'td'])], - ['th', new Set(['th'])], - ['td', new Set(['thead', 'th', 'td'])], - ['body', new Set(['head', 'link', 'script'])], - ['li', new Set(['li'])], - ['p', pTag], - ['h1', pTag], - ['h2', pTag], - ['h3', pTag], - ['h4', pTag], - ['h5', pTag], - ['h6', pTag], - ['select', formTags], - ['input', formTags], - ['output', formTags], - ['button', formTags], - ['datalist', formTags], - ['textarea', formTags], - ['option', new Set(['option'])], - ['optgroup', new Set(['optgroup', 'option'])], - ['dd', ddtTags], - ['dt', ddtTags], - ['address', pTag], - ['article', pTag], - ['aside', pTag], - ['blockquote', pTag], - ['details', pTag], - ['div', pTag], - ['dl', pTag], - ['fieldset', pTag], - ['figcaption', pTag], - ['figure', pTag], - ['footer', pTag], - ['form', pTag], - ['header', pTag], - ['hr', pTag], - ['main', pTag], - ['nav', pTag], - ['ol', pTag], - ['pre', pTag], - ['section', pTag], - ['table', pTag], - ['ul', pTag], - ['rt', rtpTags], - ['rp', rtpTags], - ['tbody', tableSectionTags], - ['tfoot', tableSectionTags] -]) - -const voidElements = new Set([ - 'area', - 'base', - 'basefont', - 'br', - 'col', - 'command', - 'embed', - 'frame', - 'hr', - 'img', - 'input', - 'isindex', - 'keygen', - 'link', - 'meta', - 'param', - 'source', - 'track', - 'wbr' -]) - -const foreignContextElements = new Set(['math', 'svg']) - -const htmlIntegrationElements = new Set([ - 'mi', - 'mo', - 'mn', - 'ms', - 'mtext', - 'annotation-xml', - 'foreignobject', - 'desc', - 'title' -]) - -export interface ParserOptions { - /** - * Decode entities within the document. - * - * @default true - */ - decodeEntities?: boolean -} - -export interface Handler { - onparserinit(parser: Parser): void - - /** - * Resets the handler back to starting state - */ - onreset(): void - - /** - * Signals the handler that parsing is done - */ - onend(): void - onerror(error: Error): void - onclosetag(name: string, isImplied: boolean): void - onopentagname(name: string): void - /** - * - * @param name Name of the attribute - * @param value Value of the attribute. - * @param quote Quotes used around the attribute. `null` if the attribute has no quotes around the value, `undefined` if the attribute has no value. - */ - onattribute( - name: string, - value: string, - quote?: string | undefined | null - ): void - onopentag( - name: string, - attribs: { [s: string]: string }, - isImplied: boolean - ): void - ontext(data: string): void - oncomment(data: string): void - oncdatastart(): void - oncdataend(): void - oncommentend(): void - onprocessinginstruction(name: string, data: string): void -} - -const reNameEnd = /\s|\// - -export class Parser implements Callbacks { - /** The start index of the last event. */ - public startIndex = 0 - /** The end index of the last event. */ - public endIndex = 0 - /** - * Store the start index of the current open tag, - * so we can update the start index for attributes. - */ - private openTagStart = 0 - - private tagname = '' - private attribname = '' - private attribvalue = '' - private attribs: null | { [key: string]: string } = null - private readonly stack: string[] = [] - /** Determines whether self-closing tags are recognized. */ - private readonly foreignContext: boolean[] - private readonly cbs: Partial - private readonly tokenizer: Tokenizer - - private buffer: string = '' - - constructor( - cbs?: Partial | null, - private readonly options: ParserOptions = {} - ) { - this.cbs = cbs ?? {} - this.tokenizer = new Tokenizer(this.options, this) - this.foreignContext = [false] - this.cbs.onparserinit?.(this) - } - - // Tokenizer event handlers - - /** @internal */ - ontext(start: number, endIndex: number): void { - const data = this.getSlice(start, endIndex) - this.endIndex = endIndex - 1 - this.cbs.ontext?.(data) - this.startIndex = endIndex - } - - /** @internal */ - ontextentity(cp: number, endIndex: number): void { - this.endIndex = endIndex - 1 - this.cbs.ontext?.(fromCodePoint(cp)) - this.startIndex = endIndex - } - - /** @internal */ - onopentagname(start: number, endIndex: number): void { - this.emitOpenTag(this.getSlice(start, (this.endIndex = endIndex))) - } - - private emitOpenTag(name: string) { - this.openTagStart = this.startIndex - this.tagname = name - - const impliesClose = openImpliesClose.get(name) - - if (impliesClose) { - while (this.stack.length > 0 && impliesClose.has(this.stack[0])) { - const element = this.stack.shift()! - this.cbs.onclosetag?.(element, true) - } - } - if (!voidElements.has(name)) { - this.stack.unshift(name) - - if (foreignContextElements.has(name)) { - this.foreignContext.unshift(true) - } else if (htmlIntegrationElements.has(name)) { - this.foreignContext.unshift(false) - } - } - this.cbs.onopentagname?.(name) - if (this.cbs.onopentag) this.attribs = {} - } - - private endOpenTag(isImplied: boolean) { - this.startIndex = this.openTagStart - - if (this.attribs) { - this.cbs.onopentag?.(this.tagname, this.attribs, isImplied) - this.attribs = null - } - if (this.cbs.onclosetag && voidElements.has(this.tagname)) { - this.cbs.onclosetag(this.tagname, true) - } - - this.tagname = '' - } - - /** @internal */ - onopentagend(endIndex: number): void { - this.endIndex = endIndex - this.endOpenTag(false) - - // Set `startIndex` for next node - this.startIndex = endIndex + 1 - } - - /** @internal */ - onclosetag(start: number, endIndex: number): void { - const name = this.getSlice(start, (this.endIndex = endIndex)) - - if (foreignContextElements.has(name) || htmlIntegrationElements.has(name)) { - this.foreignContext.shift() - } - - if (!voidElements.has(name)) { - const pos = this.stack.indexOf(name) - if (pos !== -1) { - for (let index = 0; index <= pos; index++) { - const element = this.stack.shift()! - // We know the stack has sufficient elements. - this.cbs.onclosetag?.(element, index !== pos) - } - } else if (name === 'p') { - // Implicit open before close - this.emitOpenTag('p') - this.closeCurrentTag(true) - } - } else if (name === 'br') { - // We can't use `emitOpenTag` for implicit open, as `br` would be implicitly closed. - this.cbs.onopentagname?.('br') - this.cbs.onopentag?.('br', {}, true) - this.cbs.onclosetag?.('br', false) - } - - // Set `startIndex` for next node - this.startIndex = endIndex + 1 - } - - /** @internal */ - onselfclosingtag(endIndex: number): void { - this.endIndex = endIndex - this.closeCurrentTag(false) - // Set `startIndex` for next node - this.startIndex = endIndex + 1 - } - - private closeCurrentTag(isOpenImplied: boolean) { - const name = this.tagname - this.endOpenTag(isOpenImplied) - - // Self-closing tags will be on the top of the stack - if (this.stack[0] === name) { - // If the opening tag isn't implied, the closing tag has to be implied. - this.cbs.onclosetag?.(name, !isOpenImplied) - this.stack.shift() - } - } - - /** @internal */ - onattribname(start: number, endIndex: number): void { - this.attribname = this.getSlice((this.startIndex = start), endIndex) - } - - /** @internal */ - onattribdata(start: number, endIndex: number): void { - this.attribvalue += this.getSlice(start, endIndex) - } - - /** @internal */ - onattribentity(cp: number): void { - this.attribvalue += fromCodePoint(cp) - } - - /** @internal */ - onattribend(quote: QuoteType, endIndex: number): void { - this.endIndex = endIndex - - this.cbs.onattribute?.( - this.attribname, - this.attribvalue, - quote === QuoteType.Double - ? '"' - : quote === QuoteType.Single - ? "'" - : quote === QuoteType.NoValue - ? undefined - : null - ) - - if ( - this.attribs && - !Object.prototype.hasOwnProperty.call(this.attribs, this.attribname) - ) { - this.attribs[this.attribname] = this.attribvalue - } - this.attribvalue = '' - } - - private getInstructionName(value: string) { - const index = value.search(reNameEnd) - return index < 0 ? value : value.slice(0, index) - } - - /** @internal */ - ondeclaration(start: number, endIndex: number): void { - this.endIndex = endIndex - const value = this.getSlice(start, endIndex) - - if (this.cbs.onprocessinginstruction) { - const name = this.getInstructionName(value) - this.cbs.onprocessinginstruction(`!${name}`, `!${value}`) - } - - // Set `startIndex` for next node - this.startIndex = endIndex + 1 - } - - /** @internal */ - onprocessinginstruction(start: number, endIndex: number): void { - this.endIndex = endIndex - const value = this.getSlice(start, endIndex) - - if (this.cbs.onprocessinginstruction) { - const name = this.getInstructionName(value) - this.cbs.onprocessinginstruction(`?${name}`, `?${value}`) - } - - // Set `startIndex` for next node - this.startIndex = endIndex + 1 - } - - /** @internal */ - oncomment(start: number, endIndex: number, offset: number): void { - this.endIndex = endIndex - - this.cbs.oncomment?.(this.getSlice(start, endIndex - offset)) - this.cbs.oncommentend?.() - - // Set `startIndex` for next node - this.startIndex = endIndex + 1 - } - - /** @internal */ - oncdata(start: number, endIndex: number, offset: number): void { - this.endIndex = endIndex - this.cbs.oncdatastart?.() - this.cbs.ontext?.(this.getSlice(start, endIndex - offset)) - this.cbs.oncdataend?.() - // Set `startIndex` for next node - this.startIndex = endIndex + 1 - } - - /** @internal */ - onend(): void { - if (this.cbs.onclosetag) { - // Set the end index for all remaining tags - this.endIndex = this.startIndex - for (let index = 0; index < this.stack.length; index++) { - this.cbs.onclosetag(this.stack[index], true) - } - } - this.cbs.onend?.() - } - - private getSlice(start: number, end: number) { - return this.buffer.slice(start, end) - } - - /** - * Parses a chunk of data and calls the corresponding callbacks. - * - * @param input string to parse. - */ - public parse(input: string): void { - this.reset() - this.buffer = input - this.tokenizer.parse(input) - } - - /** - * Resets the parser to a blank state, ready to parse a new HTML document - */ - public reset(): void { - this.cbs.onreset?.() - this.tokenizer.reset() - this.tagname = '' - this.attribname = '' - this.attribs = null - this.stack.length = 0 - this.startIndex = 0 - this.endIndex = 0 - this.cbs.onparserinit?.(this) - this.foreignContext.length = 0 - this.foreignContext.unshift(false) - } -} diff --git a/packages/compiler-core/src/parser/Tokenizer.ts b/packages/compiler-core/src/parser/Tokenizer.ts index 05bf9eea3..f8dd287bb 100644 --- a/packages/compiler-core/src/parser/Tokenizer.ts +++ b/packages/compiler-core/src/parser/Tokenizer.ts @@ -56,7 +56,13 @@ export const enum CharCodes { UpperZ = 0x5a, // "Z" LowerZ = 0x7a, // "z" LowerX = 0x78, // "x" - OpeningSquareBracket = 0x5b // "[" + OpeningSquareBracket = 0x5b, // "[" + LowerV = 0x76, // "v" + Dot = 0x2e, // "." + Colon = 0x3a, // ":" + At = 0x40, // "@" + LeftSqaure = 91, // "[" + RightSquare = 93 // "]" } /** All the states the tokenizer can be in. */ @@ -72,6 +78,10 @@ const enum State { // Attributes BeforeAttributeName, InAttributeName, + InDirectiveName, + InDirectiveArg, + InDirectiveDynamicArg, + InDirectiveModifier, AfterAttributeName, BeforeAttributeValue, InAttributeValueDq, // " @@ -134,6 +144,10 @@ export interface Callbacks { onattribend(quote: QuoteType, endIndex: number): void onattribname(start: number, endIndex: number): void + ondirname(start: number, endIndex: number): void + ondirarg(start: number, endIndex: number): void + ondirmodifier(start: number, endIndex: number): void + oncomment(start: number, endIndex: number, endOffset: number): void oncdata(start: number, endIndex: number, endOffset: number): void @@ -461,6 +475,26 @@ export default class Tokenizer { } else if (c === CharCodes.Slash) { this.state = State.InSelfClosingTag } else if (!isWhitespace(c)) { + this.enterAttribute(c) + } + } + private enterAttribute(c: number) { + if ( + c === CharCodes.LowerV && + this.buffer.charCodeAt(this.index + 1) === CharCodes.Dash + ) { + this.state = State.InDirectiveName + this.sectionStart = this.index + } else if ( + c === CharCodes.Dot || + c === CharCodes.Colon || + c === CharCodes.At || + c === CharCodes.Number + ) { + this.cbs.ondirname(this.index, this.index + 1) + this.state = State.InDirectiveArg + this.sectionStart = this.index + 1 + } else { this.state = State.InAttributeName this.sectionStart = this.index } @@ -484,6 +518,54 @@ export default class Tokenizer { this.stateAfterAttributeName(c) } } + private stateInDirectiveName(c: number): void { + if (c === CharCodes.Eq || isEndOfTagSection(c)) { + this.cbs.ondirname(this.sectionStart, this.index) + this.sectionStart = this.index + this.state = State.AfterAttributeName + this.stateAfterAttributeName(c) + } else if (c === CharCodes.Colon) { + this.cbs.ondirname(this.sectionStart, this.index) + this.state = State.InDirectiveArg + this.sectionStart = this.index + 1 + } else if (c === CharCodes.Dot) { + this.cbs.ondirname(this.sectionStart, this.index) + this.state = State.InDirectiveModifier + this.sectionStart = this.index + 1 + } + } + private stateInDirectiveArg(c: number): void { + if (c === CharCodes.Eq || isEndOfTagSection(c)) { + this.cbs.ondirarg(this.sectionStart, this.index) + this.sectionStart = this.index + this.state = State.AfterAttributeName + this.stateAfterAttributeName(c) + } else if (c === CharCodes.LeftSqaure) { + this.state = State.InDirectiveDynamicArg + } else if (c === CharCodes.Dot) { + this.cbs.ondirarg(this.sectionStart, this.index) + this.state = State.InDirectiveModifier + this.sectionStart = this.index + 1 + } + } + private stateInDynamicDirectiveArg(c: number): void { + if (c === CharCodes.RightSquare) { + this.state = State.InDirectiveArg + } else if (c === CharCodes.Eq || isEndOfTagSection(c)) { + // TODO emit error + } + } + private stateInDirectiveModifier(c: number): void { + if (c === CharCodes.Eq || isEndOfTagSection(c)) { + this.cbs.ondirmodifier(this.sectionStart, this.index) + this.sectionStart = this.index + this.state = State.AfterAttributeName + this.stateAfterAttributeName(c) + } else if (c === CharCodes.Dot) { + this.cbs.ondirmodifier(this.sectionStart, this.index) + this.sectionStart = this.index + 1 + } + } private stateAfterAttributeName(c: number): void { if (c === CharCodes.Eq) { this.state = State.BeforeAttributeValue @@ -494,8 +576,7 @@ export default class Tokenizer { this.stateBeforeAttributeName(c) } else if (!isWhitespace(c)) { this.cbs.onattribend(QuoteType.NoValue, this.sectionStart) - this.state = State.InAttributeName - this.sectionStart = this.index + this.enterAttribute(c) } } private stateBeforeAttributeValue(c: number): void { @@ -655,6 +736,22 @@ export default class Tokenizer { this.stateInAttributeName(c) break } + case State.InDirectiveName: { + this.stateInDirectiveName(c) + break + } + case State.InDirectiveArg: { + this.stateInDirectiveArg(c) + break + } + case State.InDirectiveDynamicArg: { + this.stateInDynamicDirectiveArg(c) + break + } + case State.InDirectiveModifier: { + this.stateInDirectiveModifier(c) + break + } case State.InCommentLike: { this.stateInCommentLike(c) break @@ -796,6 +893,10 @@ export default class Tokenizer { this.state === State.BeforeAttributeValue || this.state === State.AfterAttributeName || this.state === State.InAttributeName || + this.state === State.InDirectiveName || + this.state === State.InDirectiveArg || + this.state === State.InDirectiveDynamicArg || + this.state === State.InDirectiveModifier || this.state === State.InAttributeValueSq || this.state === State.InAttributeValueDq || this.state === State.InAttributeValueNq || diff --git a/packages/compiler-core/src/parser/index.ts b/packages/compiler-core/src/parser/index.ts index 2e6606086..450ae2263 100644 --- a/packages/compiler-core/src/parser/index.ts +++ b/packages/compiler-core/src/parser/index.ts @@ -142,11 +142,6 @@ const tokenizer = new Tokenizer( onattribname(start, end) { const name = getSlice(start, end) - if (currentAttrs.has(name)) { - // TODO emit error DUPLICATE_ATTRIBUTE - } else { - currentAttrs.add(name) - } if (!inVPre && isDirective(name)) { // directive const match = directiveParseRE.exec(name)! @@ -259,42 +254,59 @@ const tokenizer = new Tokenizer( currentAttrValue += fromCodePoint(codepoint) }, onattribend(_quote, end) { - if (currentElement) { - if (currentAttrValue) { - if (currentProp!.type === NodeTypes.ATTRIBUTE) { - // assign value - currentProp!.value = { - type: NodeTypes.TEXT, - content: currentAttrValue, - // @ts-expect-error TODO - loc: {} - } - } else { - // directive - currentProp!.exp = { - type: NodeTypes.SIMPLE_EXPRESSION, - content: currentAttrValue, - isStatic: false, - // Treat as non-constant by default. This can be potentially set to - // other values by `transformExpression` to make it eligible for hoisting. - constType: ConstantTypes.NOT_CONSTANT, - // @ts-expect-error TODO - loc: {} - } - } - } - currentProp!.loc.end = tokenizer.getPositionForIndex(end) - currentElement.props.push(currentProp!) - } + // TODO check duplicate + // if (currentAttrs.has(name)) { + // // emit error DUPLICATE_ATTRIBUTE + // } else { + // currentAttrs.add(name) + // } + // if (currentElement) { + // if (currentAttrValue) { + // if (currentProp!.type === NodeTypes.ATTRIBUTE) { + // // assign value + // currentProp!.value = { + // type: NodeTypes.TEXT, + // content: currentAttrValue, + // // @ts-expect-error TODO + // loc: {} + // } + // } else { + // // directive + // currentProp!.exp = { + // type: NodeTypes.SIMPLE_EXPRESSION, + // content: currentAttrValue, + // isStatic: false, + // // Treat as non-constant by default. This can be potentially set to + // // other values by `transformExpression` to make it eligible for hoisting. + // constType: ConstantTypes.NOT_CONSTANT, + // // @ts-expect-error TODO + // loc: {} + // } + // } + // } + // currentProp!.loc.end = tokenizer.getPositionForIndex(end) + // currentElement.props.push(currentProp!) + // } currentAttrValue = '' }, + ondirname(start, end) { + // console.log('name ' + getSlice(start, end)) + currentProp + }, + ondirarg(start, end) { + // console.log('arg ' + getSlice(start, end)) + }, + ondirmodifier(start, end) { + // console.log('.' + getSlice(start, end)) + }, + oncomment(start, end, offset) { // TODO oncomment }, onend() { - const end = currentInput.length + const end = currentInput.length - 1 for (let index = 0; index < stack.length; index++) { onCloseTag(stack[index], end) }