Skip to content

Commit

Permalink
feat: introduce lazy embedded, more efficient bundle (#59)
Browse files Browse the repository at this point in the history
  • Loading branch information
antfu authored Dec 21, 2023
1 parent 9cc01bc commit 90fa148
Show file tree
Hide file tree
Showing 7 changed files with 200 additions and 91 deletions.
24 changes: 16 additions & 8 deletions packages/shikiji-core/src/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,28 +60,36 @@ export class Registry extends TextMateRegistry {
if (this.getGrammar(lang.name))
return

const embeddedLazilyBy = new Set(Object.values(this._langMap).filter(i => i.embeddedLangsLazy?.includes(lang.name)))

this._resolver.addLanguage(lang)
const embeddedLanguages = lang.embeddedLangs?.reduce(async (acc, l, idx) => {
if (!this.getLoadedLanguages().includes(l) && this._resolver.getLangRegistration(l)) {
await this._resolver.loadGrammar(this._resolver.getLangRegistration(l).scopeName)
acc[this._resolver.getLangRegistration(l).scopeName] = idx + 2
return acc
}
}, {} as any)

const grammarConfig: IGrammarConfiguration = {
embeddedLanguages,
balancedBracketSelectors: lang.balancedBracketSelectors || ['*'],
unbalancedBracketSelectors: lang.unbalancedBracketSelectors || [],
}

// @ts-expect-error Private members, set this to override the previous grammar (that can be a stub)
this._syncRegistry._rawGrammars.set(lang.scopeName, lang)
const g = await this.loadGrammarWithConfiguration(lang.scopeName, 1, grammarConfig)
this._resolvedGrammars[lang.name] = g!
if (lang.aliases) {
lang.aliases.forEach((alias) => {
this.alias[alias] = lang.name
})
}

// If there is a language that embeds this language lazily, we need to reload it
if (embeddedLazilyBy.size) {
for (const e of embeddedLazilyBy) {
delete this._resolvedGrammars[e.name]
// @ts-expect-error clear cache
this._syncRegistry?._injectionGrammars?.delete(e.scopeName)
// @ts-expect-error clear cache
this._syncRegistry?._grammars?.delete(e.scopeName)
await this.loadLanguage(this._langMap[e.name])
}
}
}

async init() {
Expand Down
5 changes: 5 additions & 0 deletions packages/shikiji-core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,11 @@ export interface LanguageRegistration extends RawGrammar {
* languages for each parent language.
*/
embeddedLangs?: string[]
/**
* A list of languages that embed the current language.
* Unlike `embeddedLangs`, the embedded languages will not be loaded automatically.
*/
embeddedLangsLazy?: string[]
balancedBracketSelectors?: string[]
unbalancedBracketSelectors?: string[]

Expand Down
49 changes: 37 additions & 12 deletions packages/shikiji/scripts/prepare/langs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ import fg from 'fast-glob'
import type { LanguageRegistration } from 'shikiji-core'
import { COMMENT_HEAD } from './constants'

/**
* Languages that includes a lot of embedded langs,
* We only load on-demand for these langs.
*/
const LANGS_LAZY_EMBEDDED = [
'markdown',
'mdx',
]

export async function prepareLangs() {
const allLangFiles = await fg('*.json', {
cwd: './node_modules/tm-grammars/grammars',
Expand All @@ -30,13 +39,13 @@ export async function prepareLangs() {
aliases: lang.aliases,
}

// F# and Markdown has circular dependency
if (lang.name === 'fsharp' && json.embeddedLangs)
json.embeddedLangs = json.embeddedLangs.filter((i: string) => i !== 'markdown')
// We don't load all the embedded langs for markdown
if (LANGS_LAZY_EMBEDDED.includes(lang.name)) {
json.embeddedLangsLazy = json.embeddedLangs
json.embeddedLangs = []
}

const deps: string[] = [
...(json.embeddedLangs || []),
]
const deps: string[] = json.embeddedLangs || []

await fs.writeFile(`./src/assets/langs/${lang.name}.ts`, `${COMMENT_HEAD}
import type { LanguageRegistration } from 'shikiji-core'
Expand All @@ -51,15 +60,34 @@ ${[
' lang',
].join(',\n') || ''}
]
`, 'utf-8')
`.replace(/\n\n+/g, '\n\n'), 'utf-8')
}

async function writeLanguageBundleIndex(
fileName: string,
ids: string[],
exclude: string[] = [],
) {
const bundled = ids.map(id => grammars.find(i => i.name === id)!).filter(i => !exclude.includes(i.name))
// We flatten all the embedded langs
const bundledIds = new Set<string>(ids)
let changed = true
while (changed) {
changed = false
for (const id of bundledIds) {
if (LANGS_LAZY_EMBEDDED.includes(id))
continue
const lang = grammars.find(i => i.name === id)
if (!lang)
continue
for (const e of lang.embedded || []) {
if (!bundledIds.has(e)) {
bundledIds.add(e)
changed = true
}
}
}
}

const bundled = Array.from(bundledIds).map(id => grammars.find(i => i.name === id)!).filter(Boolean)

const info = bundled.map(i => ({
id: i.name,
Expand Down Expand Up @@ -103,8 +131,5 @@ export const bundledLanguages = {
...grammars.filter(i => i.categories?.includes('web')).map(i => i.name),
'shellscript',
],
[
'coffee',
],
)
}
95 changes: 94 additions & 1 deletion packages/shikiji/src/assets/langs-bundle-web.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,37 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [
'name': 'Blade',
'import': (() => import('./langs/blade')) as DynamicImportLanguageRegistration
},
{
'id': 'c',
'name': 'C',
'import': (() => import('./langs/c')) as DynamicImportLanguageRegistration
},
{
'id': 'coffee',
'name': 'CoffeeScript',
'aliases': [
'coffeescript'
],
'import': (() => import('./langs/coffee')) as DynamicImportLanguageRegistration
},
{
'id': 'cpp',
'name': 'C++',
'aliases': [
'c++'
],
'import': (() => import('./langs/cpp')) as DynamicImportLanguageRegistration
},
{
'id': 'css',
'name': 'CSS',
'import': (() => import('./langs/css')) as DynamicImportLanguageRegistration
},
{
'id': 'glsl',
'name': 'GLSL',
'import': (() => import('./langs/glsl')) as DynamicImportLanguageRegistration
},
{
'id': 'graphql',
'name': 'GraphQL',
Expand Down Expand Up @@ -55,6 +81,11 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [
'name': 'Imba',
'import': (() => import('./langs/imba')) as DynamicImportLanguageRegistration
},
{
'id': 'java',
'name': 'Java',
'import': (() => import('./langs/java')) as DynamicImportLanguageRegistration
},
{
'id': 'javascript',
'name': 'JavaScript',
Expand All @@ -78,6 +109,11 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [
'name': 'JSON',
'import': (() => import('./langs/json')) as DynamicImportLanguageRegistration
},
{
'id': 'json5',
'name': 'JSON5',
'import': (() => import('./langs/json5')) as DynamicImportLanguageRegistration
},
{
'id': 'jsonc',
'name': 'JSON with Comments',
Expand All @@ -103,6 +139,19 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [
'name': 'Less',
'import': (() => import('./langs/less')) as DynamicImportLanguageRegistration
},
{
'id': 'lua',
'name': 'Lua',
'import': (() => import('./langs/lua')) as DynamicImportLanguageRegistration
},
{
'id': 'markdown',
'name': 'Markdown',
'aliases': [
'md'
],
'import': (() => import('./langs/markdown')) as DynamicImportLanguageRegistration
},
{
'id': 'marko',
'name': 'Marko',
Expand Down Expand Up @@ -136,6 +185,27 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [
],
'import': (() => import('./langs/pug')) as DynamicImportLanguageRegistration
},
{
'id': 'python',
'name': 'Python',
'aliases': [
'py'
],
'import': (() => import('./langs/python')) as DynamicImportLanguageRegistration
},
{
'id': 'r',
'name': 'R',
'import': (() => import('./langs/r')) as DynamicImportLanguageRegistration
},
{
'id': 'ruby',
'name': 'Ruby',
'aliases': [
'rb'
],
'import': (() => import('./langs/ruby')) as DynamicImportLanguageRegistration
},
{
'id': 'sass',
'name': 'Sass',
Expand All @@ -157,6 +227,11 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [
],
'import': (() => import('./langs/shellscript')) as DynamicImportLanguageRegistration
},
{
'id': 'sql',
'name': 'SQL',
'import': (() => import('./langs/sql')) as DynamicImportLanguageRegistration
},
{
'id': 'stylus',
'name': 'Stylus',
Expand All @@ -170,6 +245,11 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [
'name': 'Svelte',
'import': (() => import('./langs/svelte')) as DynamicImportLanguageRegistration
},
{
'id': 'toml',
'name': 'TOML',
'import': (() => import('./langs/toml')) as DynamicImportLanguageRegistration
},
{
'id': 'tsx',
'name': 'TSX',
Expand Down Expand Up @@ -202,14 +282,27 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [
'id': 'wgsl',
'name': 'WGSL',
'import': (() => import('./langs/wgsl')) as DynamicImportLanguageRegistration
},
{
'id': 'xml',
'name': 'XML',
'import': (() => import('./langs/xml')) as DynamicImportLanguageRegistration
},
{
'id': 'yaml',
'name': 'YAML',
'aliases': [
'yml'
],
'import': (() => import('./langs/yaml')) as DynamicImportLanguageRegistration
}
]

export const bundledLanguagesBase = Object.fromEntries(bundledLanguagesInfo.map(i => [i.id, i.import]))

export const bundledLanguagesAlias = Object.fromEntries(bundledLanguagesInfo.flatMap(i => i.aliases?.map(a => [a, i.import]) || []))

export type BundledLanguage = 'astro' | 'bash' | 'blade' | 'css' | 'gql' | 'graphql' | 'haml' | 'handlebars' | 'hbs' | 'html' | 'http' | 'imba' | 'jade' | 'javascript' | 'jinja' | 'jison' | 'js' | 'json' | 'jsonc' | 'jsonl' | 'jsx' | 'julia' | 'less' | 'marko' | 'mdc' | 'mdx' | 'php' | 'postcss' | 'pug' | 'sass' | 'scss' | 'sh' | 'shell' | 'shellscript' | 'styl' | 'stylus' | 'svelte' | 'ts' | 'tsx' | 'typescript' | 'vue' | 'vue-html' | 'wasm' | 'wgsl' | 'zsh'
export type BundledLanguage = 'astro' | 'bash' | 'blade' | 'c' | 'c++' | 'coffee' | 'coffeescript' | 'cpp' | 'css' | 'glsl' | 'gql' | 'graphql' | 'haml' | 'handlebars' | 'hbs' | 'html' | 'http' | 'imba' | 'jade' | 'java' | 'javascript' | 'jinja' | 'jison' | 'js' | 'json' | 'json5' | 'jsonc' | 'jsonl' | 'jsx' | 'julia' | 'less' | 'lua' | 'markdown' | 'marko' | 'md' | 'mdc' | 'mdx' | 'php' | 'postcss' | 'pug' | 'py' | 'python' | 'r' | 'rb' | 'ruby' | 'sass' | 'scss' | 'sh' | 'shell' | 'shellscript' | 'sql' | 'styl' | 'stylus' | 'svelte' | 'toml' | 'ts' | 'tsx' | 'typescript' | 'vue' | 'vue-html' | 'wasm' | 'wgsl' | 'xml' | 'yaml' | 'yml' | 'zsh'

export const bundledLanguages = {
...bundledLanguagesBase,
Expand Down
19 changes: 19 additions & 0 deletions packages/shikiji/test/bundle.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { expect, it } from 'vitest'

it('bundle-full', async () => {
const highlighter = await import('shikiji/bundle/full').then(r => r.getHighlighter({
langs: Object.keys(r.bundledLanguages),
}))

expect(highlighter.getLoadedLanguages().length)
.toMatchInlineSnapshot(`233`)
})

it('bundle-web', async () => {
const highlighter = await import('shikiji/bundle/web').then(r => r.getHighlighter({
langs: Object.keys(r.bundledLanguages),
}))

expect(highlighter.getLoadedLanguages().length)
.toMatchInlineSnapshot(`72`)
})
Loading

0 comments on commit 90fa148

Please sign in to comment.