Skip to content

Commit e5b0843

Browse files
authored
perf(plugins): optimize highlight to use codeToTokens fast path to skip hast intermediary (#170)
1 parent c691c39 commit e5b0843

2 files changed

Lines changed: 100 additions & 64 deletions

File tree

packages/comark/src/plugins/highlight.ts

Lines changed: 100 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import type { ComarkElement, ComarkNode, ComarkTree, ComarkElementAttributes } f
33
import { defineComarkPlugin } from '../utils/helpers.ts'
44
import { createShikiPrimitive } from 'shiki'
55
import { createJavaScriptRegexEngine } from 'shiki/engine/javascript'
6-
import { codeToHast } from 'shiki/core'
6+
import { codeToHast, codeToTokens, getTokenStyleObject, stringifyTokenStyle } from 'shiki/core'
77
import comakLanguage from '../utils/comark.tmLanguage.ts'
88

99
export interface HighlightOptions {
@@ -255,69 +255,111 @@ export async function highlightCodeBlocks(tree: ComarkTree, options: HighlightOp
255255
dark: lightTheme !== darkTheme ? darkTheme : undefined,
256256
}
257257

258-
// eslint-disable-next-line unicorn/no-new-array -- pre-allocated for perf
259-
const highlightedResults: Array<{ nodes: ComarkNode[]; language: string }> = new Array(codeBlocks.length)
260-
for (let i = 0; i < codeBlocks.length; i++) {
261-
const { node } = codeBlocks[i]
262-
const code = (node[2] as any)[2] as string
263-
const attrs = node[1] as CodeBlockAttributes
264-
const language: string = (attrs as any)?.language
265-
try {
266-
const result = codeToHast(hl, code, {
267-
lang: language,
268-
transformers: options.transformers,
269-
themes: themeOptions,
270-
meta: {
271-
__raw: attrs.meta,
272-
},
273-
})
274-
highlightedResults[i] = {
275-
nodes: result.children.map(hastToComarkNode) as ComarkNode[],
276-
language,
277-
}
278-
} catch {
279-
highlightedResults[i] = { nodes: [code], language }
280-
}
281-
}
282-
258+
const hasTransformers = options.transformers && options.transformers.length > 0
283259
const darkClassSuffix = options.themes?.dark?.name ? ` dark:${options.themes.dark.name}` : ''
284260

285261
// Build new nodes array, spine-copying only paths to modified <pre> nodes
286262
const newNodes = [...tree.nodes] as ComarkNode[]
287263
for (let i = 0; i < codeBlocks.length; i++) {
288264
const { node, path } = codeBlocks[i]
289-
const preAttrs = node[1] as Record<string, any>
290-
const result = highlightedResults[i]
265+
const code = (node[2] as any)[2] as string
266+
const attrs = node[1] as CodeBlockAttributes
267+
const preAttrs = attrs as Record<string, any>
268+
const language: string = (attrs as any)?.language
291269

292-
const preNode = result.nodes[0]
293270
let classStr: string
294-
if (typeof preNode === 'string') {
295-
classStr = 'shiki' + (options.themes?.light?.name ? ` ${options.themes.light.name}` : '')
296-
} else {
297-
const cls = (preNode[1] as ComarkElementAttributes).class
298-
classStr = Array.isArray(cls) ? cls.join(' ') : String(cls)
299-
}
300-
if (darkClassSuffix) classStr += darkClassSuffix
271+
let codeChildren: ComarkNode[]
301272

302-
const codeChildren =
303-
typeof preNode === 'string' ? preNode : ((preNode[2] as ComarkElement).slice(2) as ComarkNode[])
304-
305-
if (Array.isArray(codeChildren)) {
306-
const highlightSet = Array.isArray(preAttrs.highlights) ? new Set<number>(preAttrs.highlights) : null
307-
let line = 1
308-
for (const child of codeChildren) {
309-
if (Array.isArray(child)) {
310-
if (highlightSet !== null && highlightSet.has(line)) {
311-
child[1].class = `${child[1].class ?? ''} highlight`.trim()
312-
// TODO: (enforcing default style) once we unify all ecosystem styles we can remove this
313-
child[1].style = 'display: inline-block'
314-
} else {
315-
// TODO: (enforcing default style) once we unify all ecosystem styles we can remove this
316-
child[1].style = 'display: inline'
273+
try {
274+
if (hasTransformers) {
275+
// Transformers operate on hast, so we must go through codeToHast
276+
const result = codeToHast(hl, code, {
277+
lang: language,
278+
transformers: options.transformers,
279+
themes: themeOptions,
280+
meta: { __raw: attrs.meta },
281+
})
282+
const preNode = result.children.map(hastToComarkNode)[0] as ComarkElement
283+
const cls = (preNode[1] as ComarkElementAttributes).class
284+
classStr = Array.isArray(cls) ? cls.join(' ') : String(cls)
285+
codeChildren = (preNode[2] as ComarkElement).slice(2) as ComarkNode[]
286+
} else {
287+
// Fast path: build ComarkNodes directly from tokens, skipping hast
288+
const result = codeToTokens(hl, code, {
289+
lang: language,
290+
themes: themeOptions,
291+
})
292+
classStr = `shiki ${result.themeName || ''}`
293+
294+
// Replicate shiki's mergeWhitespaceTokens: merge pure-whitespace tokens
295+
// into the following token (unless underline/strikethrough styled)
296+
const tokenLines = result.tokens
297+
codeChildren = []
298+
for (let li = 0; li < tokenLines.length; li++) {
299+
const line = tokenLines[li]
300+
const spanCount = line.length
301+
302+
// Merge whitespace tokens inline while building spans
303+
let carry = ''
304+
const spans: ComarkNode[] = []
305+
for (let t = 0; t < spanCount; t++) {
306+
const tk = line[t]
307+
const canMerge = !(
308+
(tk.fontStyle && (tk.fontStyle & 8 /* Strikethrough */ || tk.fontStyle & 4)) /* Underline */
309+
)
310+
if (canMerge && /^\s+$/.test(tk.content) && t + 1 < spanCount) {
311+
carry += tk.content
312+
} else if (carry) {
313+
const style = stringifyTokenStyle(tk.htmlStyle || getTokenStyleObject(tk))
314+
if (canMerge) {
315+
spans.push(style ? ['span', { style }, carry + tk.content] : ['span', {}, carry + tk.content])
316+
} else {
317+
spans.push(['span', {}, carry])
318+
spans.push(style ? ['span', { style }, tk.content] : ['span', {}, tk.content])
319+
}
320+
carry = ''
321+
} else {
322+
const style = stringifyTokenStyle(tk.htmlStyle || getTokenStyleObject(tk))
323+
spans.push(style ? ['span', { style }, tk.content] : ['span', {}, tk.content])
324+
}
317325
}
326+
// If trailing whitespace wasn't merged, emit it
327+
if (carry) {
328+
spans.push(['span', {}, carry])
329+
}
330+
331+
// eslint-disable-next-line unicorn/no-new-array -- pre-allocated for perf
332+
const lineNode = new Array(spans.length + 2) as ComarkElement
333+
lineNode[0] = 'span'
334+
lineNode[1] = { class: 'line' }
335+
for (let s = 0; s < spans.length; s++) lineNode[s + 2] = spans[s]
336+
337+
codeChildren.push(lineNode as ComarkNode)
338+
if (li < tokenLines.length - 1) codeChildren.push('\n')
339+
}
340+
}
341+
} catch {
342+
classStr = 'shiki'
343+
codeChildren = [code]
344+
}
345+
346+
if (darkClassSuffix) classStr += darkClassSuffix
318347

319-
line += 1
348+
// Apply line highlights
349+
const highlightSet = Array.isArray(preAttrs.highlights) ? new Set<number>(preAttrs.highlights) : null
350+
let line = 1
351+
for (const child of codeChildren) {
352+
if (Array.isArray(child)) {
353+
if (highlightSet !== null && highlightSet.has(line)) {
354+
child[1].class = `${child[1].class ?? ''} highlight`.trim()
355+
// TODO: (enforcing default style) once we unify all ecosystem styles we can remove this
356+
child[1].style = 'display: inline-block'
357+
} else {
358+
// TODO: (enforcing default style) once we unify all ecosystem styles we can remove this
359+
child[1].style = 'display: inline'
320360
}
361+
362+
line += 1
321363
}
322364
}
323365

@@ -351,17 +393,12 @@ export async function highlightCodeBlocks(tree: ComarkTree, options: HighlightOp
351393

352394
const codeEl = node[2] as ComarkElement
353395
const codeAttrs = (codeEl[1] as Record<string, any>) || {}
354-
let newPreNode: ComarkNode
355-
if (Array.isArray(codeChildren)) {
356-
// eslint-disable-next-line unicorn/no-new-array -- pre-allocated for perf
357-
const codeNode = new Array(codeChildren.length + 2) as ComarkElement
358-
codeNode[0] = 'code'
359-
codeNode[1] = codeAttrs
360-
for (let j = 0; j < codeChildren.length; j++) codeNode[j + 2] = codeChildren[j]
361-
newPreNode = ['pre', newPreAttrs, codeNode]
362-
} else {
363-
newPreNode = ['pre', newPreAttrs, ['code', codeAttrs, codeChildren]]
364-
}
396+
// eslint-disable-next-line unicorn/no-new-array -- pre-allocated for perf
397+
const codeNode = new Array(codeChildren.length + 2) as ComarkElement
398+
codeNode[0] = 'code'
399+
codeNode[1] = codeAttrs
400+
for (let j = 0; j < codeChildren.length; j++) codeNode[j + 2] = codeChildren[j]
401+
const newPreNode: ComarkNode = ['pre', newPreAttrs, codeNode]
365402

366403
if (path.length === 1) {
367404
newNodes[path[0]] = newPreNode

packages/comark/test/utils/index.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
// @ts-expect-error - ignore @nuxtjs/mdc types
21
import type { MDCRoot } from '@nuxtjs/mdc'
32
import type { ComarkTree } from 'comark'
43
import remarkGFM from 'remark-gfm'

0 commit comments

Comments
 (0)