mirror of https://github.com/vuejs/core.git
wip: entities parsing in browser
This commit is contained in:
parent
caabba7590
commit
1912af04e3
|
@ -50,7 +50,8 @@ export interface ParserOptions
|
|||
*/
|
||||
whitespace?: 'preserve' | 'condense'
|
||||
/**
|
||||
* Only needed for DOM compilers
|
||||
* Only used for DOM compilers that runs in the browser.
|
||||
* In non-browser builds, this option is ignored.
|
||||
*/
|
||||
decodeEntities?: (rawText: string, asAttr: boolean) => string
|
||||
/**
|
||||
|
|
|
@ -22,12 +22,20 @@ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|||
IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import { ElementNode, Position } from '../ast'
|
||||
|
||||
/**
|
||||
* Note: entities is a non-browser-build-only dependency.
|
||||
* In the browser, we use an HTML element to do the decoding.
|
||||
* Make sure all imports from entities are only used in non-browser branches
|
||||
* so that it can be properly treeshaken.
|
||||
*/
|
||||
import {
|
||||
EntityDecoder,
|
||||
DecodingMode,
|
||||
htmlDecodeTree
|
||||
htmlDecodeTree,
|
||||
fromCodePoint
|
||||
} from 'entities/lib/decode.js'
|
||||
import { ElementNode, Position } from '../ast'
|
||||
|
||||
export const enum ParseMode {
|
||||
BASE,
|
||||
|
@ -170,7 +178,7 @@ export enum QuoteType {
|
|||
|
||||
export interface Callbacks {
|
||||
ontext(start: number, endIndex: number): void
|
||||
ontextentity(codepoint: number, endIndex: number): void
|
||||
ontextentity(char: string, endIndex: number): void
|
||||
|
||||
oninterpolation(start: number, endIndex: number): void
|
||||
|
||||
|
@ -180,7 +188,7 @@ export interface Callbacks {
|
|||
onclosetag(start: number, endIndex: number): void
|
||||
|
||||
onattribdata(start: number, endIndex: number): void
|
||||
onattribentity(codepoint: number): void
|
||||
onattribentity(char: string): void
|
||||
onattribend(quote: QuoteType, endIndex: number): void
|
||||
onattribname(start: number, endIndex: number): void
|
||||
onattribnameend(endIndex: number): void
|
||||
|
@ -233,15 +241,17 @@ export default class Tokenizer {
|
|||
/** Reocrd newline positions for fast line / column calculation */
|
||||
private newlines: number[] = []
|
||||
|
||||
private readonly entityDecoder: EntityDecoder
|
||||
private readonly entityDecoder?: EntityDecoder
|
||||
|
||||
constructor(
|
||||
private readonly stack: ElementNode[],
|
||||
private readonly cbs: Callbacks
|
||||
) {
|
||||
this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) =>
|
||||
this.emitCodePoint(cp, consumed)
|
||||
)
|
||||
if (!__BROWSER__) {
|
||||
this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) =>
|
||||
this.emitCodePoint(cp, consumed)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
public mode = ParseMode.BASE
|
||||
|
@ -290,7 +300,7 @@ export default class Tokenizer {
|
|||
}
|
||||
this.state = State.BeforeTagName
|
||||
this.sectionStart = this.index
|
||||
} else if (c === CharCodes.Amp) {
|
||||
} else if (!__BROWSER__ && c === CharCodes.Amp) {
|
||||
this.startEntity()
|
||||
} else if (c === this.delimiterOpen[0]) {
|
||||
this.state = State.InterpolationOpen
|
||||
|
@ -398,7 +408,7 @@ export default class Tokenizer {
|
|||
!(this.mode === ParseMode.SFC && this.stack.length === 0))
|
||||
) {
|
||||
// We have to parse entities in <title> and <textarea> tags.
|
||||
if (c === CharCodes.Amp) {
|
||||
if (!__BROWSER__ && c === CharCodes.Amp) {
|
||||
this.startEntity()
|
||||
}
|
||||
} else if (this.fastForwardTo(CharCodes.Lt)) {
|
||||
|
@ -702,7 +712,7 @@ export default class Tokenizer {
|
|||
}
|
||||
}
|
||||
private handleInAttributeValue(c: number, quote: number) {
|
||||
if (c === quote) {
|
||||
if (c === quote || (__BROWSER__ && this.fastForwardTo(quote))) {
|
||||
this.cbs.onattribdata(this.sectionStart, this.index)
|
||||
this.sectionStart = -1
|
||||
this.cbs.onattribend(
|
||||
|
@ -710,7 +720,7 @@ export default class Tokenizer {
|
|||
this.index + 1
|
||||
)
|
||||
this.state = State.BeforeAttributeName
|
||||
} else if (c === CharCodes.Amp) {
|
||||
} else if (!__BROWSER__ && c === CharCodes.Amp) {
|
||||
this.startEntity()
|
||||
}
|
||||
}
|
||||
|
@ -727,7 +737,7 @@ export default class Tokenizer {
|
|||
this.cbs.onattribend(QuoteType.Unquoted, this.index)
|
||||
this.state = State.BeforeAttributeName
|
||||
this.stateBeforeAttributeName(c)
|
||||
} else if (c === CharCodes.Amp) {
|
||||
} else if (!__BROWSER__ && c === CharCodes.Amp) {
|
||||
this.startEntity()
|
||||
}
|
||||
}
|
||||
|
@ -796,29 +806,33 @@ export default class Tokenizer {
|
|||
}
|
||||
|
||||
private startEntity() {
|
||||
this.baseState = this.state
|
||||
this.state = State.InEntity
|
||||
this.entityStart = this.index
|
||||
this.entityDecoder.startEntity(
|
||||
this.baseState === State.Text || this.baseState === State.InSpecialTag
|
||||
? DecodingMode.Legacy
|
||||
: DecodingMode.Attribute
|
||||
)
|
||||
if (!__BROWSER__) {
|
||||
this.baseState = this.state
|
||||
this.state = State.InEntity
|
||||
this.entityStart = this.index
|
||||
this.entityDecoder!.startEntity(
|
||||
this.baseState === State.Text || this.baseState === State.InSpecialTag
|
||||
? DecodingMode.Legacy
|
||||
: DecodingMode.Attribute
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private stateInEntity(): void {
|
||||
const length = this.entityDecoder.write(this.buffer, this.index)
|
||||
if (!__BROWSER__) {
|
||||
const length = this.entityDecoder!.write(this.buffer, this.index)
|
||||
|
||||
// If `length` is positive, we are done with the entity.
|
||||
if (length >= 0) {
|
||||
this.state = this.baseState
|
||||
// If `length` is positive, we are done with the entity.
|
||||
if (length >= 0) {
|
||||
this.state = this.baseState
|
||||
|
||||
if (length === 0) {
|
||||
this.index = this.entityStart
|
||||
if (length === 0) {
|
||||
this.index = this.entityStart
|
||||
}
|
||||
} else {
|
||||
// Mark buffer as consumed.
|
||||
this.index = this.buffer.length - 1
|
||||
}
|
||||
} else {
|
||||
// Mark buffer as consumed.
|
||||
this.index = this.buffer.length - 1
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1002,8 +1016,8 @@ export default class Tokenizer {
|
|||
}
|
||||
|
||||
private finish() {
|
||||
if (this.state === State.InEntity) {
|
||||
this.entityDecoder.end()
|
||||
if (!__BROWSER__ && this.state === State.InEntity) {
|
||||
this.entityDecoder!.end()
|
||||
this.state = this.baseState
|
||||
}
|
||||
|
||||
|
@ -1052,25 +1066,27 @@ export default class Tokenizer {
|
|||
}
|
||||
|
||||
private emitCodePoint(cp: number, consumed: number): void {
|
||||
if (
|
||||
this.baseState !== State.Text &&
|
||||
this.baseState !== State.InSpecialTag
|
||||
) {
|
||||
if (this.sectionStart < this.entityStart) {
|
||||
this.cbs.onattribdata(this.sectionStart, this.entityStart)
|
||||
}
|
||||
this.sectionStart = this.entityStart + consumed
|
||||
this.index = this.sectionStart - 1
|
||||
if (!__BROWSER__) {
|
||||
if (
|
||||
this.baseState !== State.Text &&
|
||||
this.baseState !== State.InSpecialTag
|
||||
) {
|
||||
if (this.sectionStart < this.entityStart) {
|
||||
this.cbs.onattribdata(this.sectionStart, this.entityStart)
|
||||
}
|
||||
this.sectionStart = this.entityStart + consumed
|
||||
this.index = this.sectionStart - 1
|
||||
|
||||
this.cbs.onattribentity(cp)
|
||||
} else {
|
||||
if (this.sectionStart < this.entityStart) {
|
||||
this.cbs.ontext(this.sectionStart, this.entityStart)
|
||||
}
|
||||
this.sectionStart = this.entityStart + consumed
|
||||
this.index = this.sectionStart - 1
|
||||
this.cbs.onattribentity(fromCodePoint(cp))
|
||||
} else {
|
||||
if (this.sectionStart < this.entityStart) {
|
||||
this.cbs.ontext(this.sectionStart, this.entityStart)
|
||||
}
|
||||
this.sectionStart = this.entityStart + consumed
|
||||
this.index = this.sectionStart - 1
|
||||
|
||||
this.cbs.ontextentity(cp, this.sectionStart)
|
||||
this.cbs.ontextentity(fromCodePoint(cp), this.sectionStart)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import { fromCodePoint } from 'entities/lib/decode.js'
|
||||
import {
|
||||
AttributeNode,
|
||||
ConstantTypes,
|
||||
|
@ -29,6 +28,7 @@ import { defaultOnError, defaultOnWarn } from '../errors'
|
|||
import { forAliasRE, isCoreComponent } from '../utils'
|
||||
|
||||
type OptionalOptions =
|
||||
| 'decodeEntities'
|
||||
| 'whitespace'
|
||||
| 'isNativeTag'
|
||||
| 'isBuiltInComponent'
|
||||
|
@ -37,18 +37,6 @@ type OptionalOptions =
|
|||
type MergedParserOptions = Omit<Required<ParserOptions>, OptionalOptions> &
|
||||
Pick<ParserOptions, OptionalOptions>
|
||||
|
||||
// The default decoder only provides escapes for characters reserved as part of
|
||||
// the template syntax, and is only used if the custom renderer did not provide
|
||||
// a platform-specific decoder.
|
||||
const decodeRE = /&(gt|lt|amp|apos|quot);/g
|
||||
const decodeMap: Record<string, string> = {
|
||||
gt: '>',
|
||||
lt: '<',
|
||||
amp: '&',
|
||||
apos: "'",
|
||||
quot: '"'
|
||||
}
|
||||
|
||||
export const defaultParserOptions: MergedParserOptions = {
|
||||
parseMode: 'base',
|
||||
delimiters: [`{{`, `}}`],
|
||||
|
@ -56,9 +44,6 @@ export const defaultParserOptions: MergedParserOptions = {
|
|||
isVoidTag: NO,
|
||||
isPreTag: NO,
|
||||
isCustomElement: NO,
|
||||
// TODO handle entities
|
||||
decodeEntities: (rawText: string): string =>
|
||||
rawText.replace(decodeRE, (_, p1) => decodeMap[p1]),
|
||||
onError: defaultOnError,
|
||||
onWarn: defaultOnWarn,
|
||||
comments: __DEV__
|
||||
|
@ -84,8 +69,8 @@ const tokenizer = new Tokenizer(stack, {
|
|||
onText(getSlice(start, end), start, end)
|
||||
},
|
||||
|
||||
ontextentity(cp, end) {
|
||||
onText(fromCodePoint(cp), end - 1, end)
|
||||
ontextentity(char, end) {
|
||||
onText(char, end - 1, end)
|
||||
},
|
||||
|
||||
oninterpolation(start, end) {
|
||||
|
@ -242,8 +227,8 @@ const tokenizer = new Tokenizer(stack, {
|
|||
currentAttrEndIndex = end
|
||||
},
|
||||
|
||||
onattribentity(codepoint) {
|
||||
currentAttrValue += fromCodePoint(codepoint)
|
||||
onattribentity(char) {
|
||||
currentAttrValue += char
|
||||
},
|
||||
|
||||
onattribnameend(end) {
|
||||
|
@ -265,6 +250,13 @@ const tokenizer = new Tokenizer(stack, {
|
|||
onattribend(quote, end) {
|
||||
if (currentElement && currentProp) {
|
||||
if (quote !== QuoteType.NoValue) {
|
||||
if (__BROWSER__ && currentAttrValue.includes('&')) {
|
||||
// TODO should not do this in <script> or <style>
|
||||
currentAttrValue = currentOptions.decodeEntities!(
|
||||
currentAttrValue,
|
||||
true
|
||||
)
|
||||
}
|
||||
if (currentProp.type === NodeTypes.ATTRIBUTE) {
|
||||
// assign value
|
||||
|
||||
|
@ -422,6 +414,10 @@ function closeCurrentTag(end: number) {
|
|||
}
|
||||
|
||||
function onText(content: string, start: number, end: number) {
|
||||
if (__BROWSER__ && content.includes('&')) {
|
||||
// TODO do not do this in <script> or <style>
|
||||
content = currentOptions.decodeEntities!(content, false)
|
||||
}
|
||||
const parent = getParent()
|
||||
const lastNode = parent.children[parent.children.length - 1]
|
||||
if (lastNode?.type === NodeTypes.TEXT) {
|
||||
|
@ -697,6 +693,19 @@ export function baseParse(input: string, options?: ParserOptions): RootNode {
|
|||
currentInput = input
|
||||
currentOptions = extend({}, defaultParserOptions, options)
|
||||
|
||||
if (__DEV__) {
|
||||
if (!__BROWSER__ && currentOptions.decodeEntities) {
|
||||
console.warn(
|
||||
`[@vue/compiler-core] decodeEntities option is passed but will be ` +
|
||||
`ignored in non-browser builds.`
|
||||
)
|
||||
} else if (__BROWSER__ && !currentOptions.decodeEntities) {
|
||||
throw new Error(
|
||||
`[@vue/compiler-core] decodeEntities option is required in browser builds.`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
tokenizer.mode =
|
||||
currentOptions.parseMode === 'html'
|
||||
? ParseMode.HTML
|
||||
|
|
|
@ -1,133 +0,0 @@
|
|||
import { ParserOptions } from '@vue/compiler-core'
|
||||
import namedCharacterReferences from './namedChars.json'
|
||||
|
||||
// lazy compute this to make this file tree-shakable for browser
|
||||
let maxCRNameLength: number
|
||||
|
||||
export const decodeHtml: ParserOptions['decodeEntities'] = (
|
||||
rawText,
|
||||
asAttr
|
||||
) => {
|
||||
let offset = 0
|
||||
const end = rawText.length
|
||||
let decodedText = ''
|
||||
|
||||
function advance(length: number) {
|
||||
offset += length
|
||||
rawText = rawText.slice(length)
|
||||
}
|
||||
|
||||
while (offset < end) {
|
||||
const head = /&(?:#x?)?/i.exec(rawText)
|
||||
if (!head || offset + head.index >= end) {
|
||||
const remaining = end - offset
|
||||
decodedText += rawText.slice(0, remaining)
|
||||
advance(remaining)
|
||||
break
|
||||
}
|
||||
|
||||
// Advance to the "&".
|
||||
decodedText += rawText.slice(0, head.index)
|
||||
advance(head.index)
|
||||
|
||||
if (head[0] === '&') {
|
||||
// Named character reference.
|
||||
let name = ''
|
||||
let value: string | undefined = undefined
|
||||
if (/[0-9a-z]/i.test(rawText[1])) {
|
||||
if (!maxCRNameLength) {
|
||||
maxCRNameLength = Object.keys(namedCharacterReferences).reduce(
|
||||
(max, name) => Math.max(max, name.length),
|
||||
0
|
||||
)
|
||||
}
|
||||
for (let length = maxCRNameLength; !value && length > 0; --length) {
|
||||
name = rawText.slice(1, 1 + length)
|
||||
value = (namedCharacterReferences as Record<string, string>)[name]
|
||||
}
|
||||
if (value) {
|
||||
const semi = name.endsWith(';')
|
||||
if (
|
||||
asAttr &&
|
||||
!semi &&
|
||||
/[=a-z0-9]/i.test(rawText[name.length + 1] || '')
|
||||
) {
|
||||
decodedText += '&' + name
|
||||
advance(1 + name.length)
|
||||
} else {
|
||||
decodedText += value
|
||||
advance(1 + name.length)
|
||||
}
|
||||
} else {
|
||||
decodedText += '&' + name
|
||||
advance(1 + name.length)
|
||||
}
|
||||
} else {
|
||||
decodedText += '&'
|
||||
advance(1)
|
||||
}
|
||||
} else {
|
||||
// Numeric character reference.
|
||||
const hex = head[0] === '&#x'
|
||||
const pattern = hex ? /^&#x([0-9a-f]+);?/i : /^&#([0-9]+);?/
|
||||
const body = pattern.exec(rawText)
|
||||
if (!body) {
|
||||
decodedText += head[0]
|
||||
advance(head[0].length)
|
||||
} else {
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
||||
let cp = Number.parseInt(body[1], hex ? 16 : 10)
|
||||
if (cp === 0) {
|
||||
cp = 0xfffd
|
||||
} else if (cp > 0x10ffff) {
|
||||
cp = 0xfffd
|
||||
} else if (cp >= 0xd800 && cp <= 0xdfff) {
|
||||
cp = 0xfffd
|
||||
} else if ((cp >= 0xfdd0 && cp <= 0xfdef) || (cp & 0xfffe) === 0xfffe) {
|
||||
// noop
|
||||
} else if (
|
||||
(cp >= 0x01 && cp <= 0x08) ||
|
||||
cp === 0x0b ||
|
||||
(cp >= 0x0d && cp <= 0x1f) ||
|
||||
(cp >= 0x7f && cp <= 0x9f)
|
||||
) {
|
||||
cp = CCR_REPLACEMENTS[cp] || cp
|
||||
}
|
||||
decodedText += String.fromCodePoint(cp)
|
||||
advance(body[0].length)
|
||||
}
|
||||
}
|
||||
}
|
||||
return decodedText
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#numeric-character-reference-end-state
|
||||
const CCR_REPLACEMENTS: Record<number, number | undefined> = {
|
||||
0x80: 0x20ac,
|
||||
0x82: 0x201a,
|
||||
0x83: 0x0192,
|
||||
0x84: 0x201e,
|
||||
0x85: 0x2026,
|
||||
0x86: 0x2020,
|
||||
0x87: 0x2021,
|
||||
0x88: 0x02c6,
|
||||
0x89: 0x2030,
|
||||
0x8a: 0x0160,
|
||||
0x8b: 0x2039,
|
||||
0x8c: 0x0152,
|
||||
0x8e: 0x017d,
|
||||
0x91: 0x2018,
|
||||
0x92: 0x2019,
|
||||
0x93: 0x201c,
|
||||
0x94: 0x201d,
|
||||
0x95: 0x2022,
|
||||
0x96: 0x2013,
|
||||
0x97: 0x2014,
|
||||
0x98: 0x02dc,
|
||||
0x99: 0x2122,
|
||||
0x9a: 0x0161,
|
||||
0x9b: 0x203a,
|
||||
0x9c: 0x0153,
|
||||
0x9e: 0x017e,
|
||||
0x9f: 0x0178
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -1,7 +1,6 @@
|
|||
import { ParserOptions, ElementNode, NodeTypes } from '@vue/compiler-core'
|
||||
import { isVoidTag, isHTMLTag, isSVGTag } from '@vue/shared'
|
||||
import { TRANSITION, TRANSITION_GROUP } from './runtimeHelpers'
|
||||
import { decodeHtml } from './decodeHtml'
|
||||
import { decodeHtmlBrowser } from './decodeHtmlBrowser'
|
||||
|
||||
export const enum DOMNamespaces {
|
||||
|
@ -15,7 +14,7 @@ export const parserOptions: ParserOptions = {
|
|||
isVoidTag,
|
||||
isNativeTag: tag => isHTMLTag(tag) || isSVGTag(tag),
|
||||
isPreTag: tag => tag === 'pre',
|
||||
decodeEntities: __BROWSER__ ? decodeHtmlBrowser : decodeHtml,
|
||||
decodeEntities: __BROWSER__ ? decodeHtmlBrowser : undefined,
|
||||
|
||||
isBuiltInComponent: (tag: string): symbol | undefined => {
|
||||
if (tag === 'Transition' || tag === 'transition') {
|
||||
|
|
|
@ -215,7 +215,12 @@ function createConfig(format, output, plugins = []) {
|
|||
}
|
||||
|
||||
function resolveExternal() {
|
||||
const treeShakenDeps = ['source-map-js', '@babel/parser', 'estree-walker']
|
||||
const treeShakenDeps = [
|
||||
'source-map-js',
|
||||
'@babel/parser',
|
||||
'estree-walker',
|
||||
'entities/lib/decode.js'
|
||||
]
|
||||
|
||||
if (isGlobalBuild || isBrowserESMBuild || isCompatPackage) {
|
||||
if (!packageOptions.enableNonBrowserBranches) {
|
||||
|
|
Loading…
Reference in New Issue