sitegen/src/file-viewer/highlight.ts
2025-07-07 20:58:02 -07:00

200 lines
5.1 KiB
TypeScript

const languages = [
"ts",
"tsx",
"zig",
"json",
"css",
"astro",
"mdx",
"lua",
"shell",
"dosbatch",
"powershell",
"yaml",
"toml",
"xml",
"python",
"php",
"diff",
] as const;
const altScopes: Record<string, string> = {
astro: "text.html.astro",
xml: "text.xml",
php: "text.html.php",
};
export type Language = (typeof languages)[number];
const scopes = [
// CSS
["punctuation.definition.keyword", "keyword", "css"],
["entity.name.tag.css", "class", "css"],
["meta.selector.css", "method", "css"],
["entity.other.attribute-name.class.css", "builtin", "css"],
["punctuation.definition.entity", "builtin", "css"],
["variable.css", "parameter", "css"],
// JSON
["support.type.property-name.json", "variable", "json"],
["constant.numeric", "method", "json"],
["constant", "class", "json"],
// Lua
["entity.name", "class", "lua"],
// Diff
["punctuation.definition.deleted", "variable", "diff"],
["markup.deleted", "variable", "diff"],
["punctuation.definition.inserted", "method", "diff"],
["markup.inserted", "method", "diff"],
["meta.diff.range", "string", "diff"],
["punctuation.definition.range", "string", "diff"],
["meta.toc-list-line.number", "keyword", "diff"],
["meta.diff", "comment", "diff"],
// General
["meta.object-literal.key", "property"],
["comment", "comment"],
["string", "string"],
["storage", "keyword"],
["keyword", "keyword"],
["variable.parameter", "parameter"],
["entity.name.function", "method"],
["support.type.primitive", "builtin"],
["entity.name.type", "class"],
["support.type", "class"],
["support.class", "class"],
["constant.language", "builtin"],
["constant", "constant"],
["support.constant", "constant"],
["meta.parameters", "parameter"],
["support.function", "method"],
["variable", "variable"],
["punctuation", null],
["meta.function-call", "method"],
] as const;
interface HighlightLinesOptions {
lines: string[];
grammar: textmate.IGrammar;
state: textmate.StateStack;
language: Language;
}
export function getStyle(scopesToCheck: string[], langugage: Language) {
for (const scope of scopes) {
if (scope[2] && scope[2] !== langugage) continue;
const find = scopesToCheck.find((s) => s.startsWith(scope[0]));
if (find) {
return scope[1];
}
}
return null;
}
function highlightLines({
lines,
grammar,
state,
language,
}: HighlightLinesOptions) {
let html = "";
let lastHtmlStyle: string | null = null;
const { length } = lines;
for (let i = 0; i < length; i += 1) {
const { tokens, ruleStack, stoppedEarly } = grammar.tokenizeLine(
lines[i],
state,
);
if (stoppedEarly) throw new Error("TODO: Tokenization stopped early?");
state = ruleStack;
for (const token of tokens) {
const str = lines[i].slice(token.startIndex, token.endIndex);
if (str.trim().length === 0) {
// Emit but do not consider scope changes
html += ssr.escapeHtml(str);
continue;
}
const style = getStyle(token.scopes, language);
if (style !== lastHtmlStyle) {
if (lastHtmlStyle) html += "</span>";
if (style) html += `<span class='${style}'>`;
}
html += ssr.escapeHtml(str);
lastHtmlStyle = style;
}
html += "\n";
}
if (lastHtmlStyle) html += "</span>";
return { state, html };
}
export const getRegistry = async.once(async () => {
const wasmBin = await fs.readFile(
require.resolve("vscode-oniguruma/release/onig.wasm"),
);
await oniguruma.loadWASM(wasmBin);
return new textmate.Registry({
onigLib: Promise.resolve({
createOnigScanner: (patterns) => new oniguruma.OnigScanner(patterns),
createOnigString: (s) => new oniguruma.OnigString(s),
}),
loadGrammar: async (scopeName: string) => {
for (const lang of languages) {
if (scopeName.endsWith(`.${lang}`)) {
const file = await fs.readFile(
path.join(import.meta.dirname, `highlight-grammar/${lang}.plist`),
"utf-8",
);
return textmate.parseRawGrammar(file);
}
}
return null;
},
});
});
export async function highlightCode(code: string, language: Language) {
const registry = await getRegistry();
const grammar = await registry.loadGrammar(
altScopes[language] ?? "source." + language,
);
if (!grammar) {
throw new Error(`No grammar found for language: ${language}`);
}
let state = textmate.INITIAL;
const { html } = highlightLines({
lines: code.split("\n"),
grammar,
state,
language,
});
return html;
}
export async function main() {
// validate exts
for (const ext of languages) {
if (
!fs.existsSync(
path.join(import.meta.dirname, `highlight-grammar/${ext}.plist`),
)
) {
console.error(`Missing grammar for ${ext}`);
}
// Sanity check
await highlightCode("wwwwwwwwwwwaaaaaaaaaaaaaaaa", ext);
}
}
import * as async from "#sitegen/async";
import * as fs from "#sitegen/fs";
import * as path from "node:path";
import * as oniguruma from "vscode-oniguruma";
import * as textmate from "vscode-textmate";
import * as ssr from "#ssr";