start the markdown parser

This commit is contained in:
chloe caruso 2025-07-08 01:09:55 -07:00
parent ea5f2bc325
commit 4c2a4f7578
2 changed files with 168 additions and 0 deletions

167
framework/lib/markdown.tsx Normal file
View file

@ -0,0 +1,167 @@
/* Impementation of CommonMark specification for markdown with support
* for custom syntax extensions via the parser options. Instead of
* returning an AST that has a second conversion pass to JSX, the
* returned value of 'parse' is 'engine.Node' which can be stringified
* via clover's SSR engine. This way, generation optimizations, async
* components, and other features are gained for free here.
*/
function parse(src: string, options: Partial<ParseOpts> = {}) {
}
/* Render markdown content. Same function as 'parse', but JSX components
* only take one argument and must start with a capital letter. */
export function Markdown({ src, ...options }: { src: string } & Partial<ParseOpts>) {
return parse(src, options)
}
function parseInline(src: string, options: Partial<InlineOpts> = {}) {
const { rules = inlineRules, links = new Map() } = options;
const opts: InlineOpts = { rules, links };
const parts: engine.Node[] = [];
const ruleList = Object.values(rules);
parse: while(true) {
for (const rule of ruleList) {
if (!rule.match) continue;
const match = src.match(rule.match);
if (!match) continue;
const index = UNWRAP(match.index);
const after = src.slice(index + match[0].length);
const parse = rule.parse({ after, match: match[0], opts });
if (!parse) continue;
parts.push(src.slice(0, index), parse.result);
src = parse.rest ?? after;
continue parse;
}
break;
}
parts.push(src);
return parts;
}
// -- interfaces --
interface ParseOpts {
blockRules: Record<string, BlockRule>;
inlineRules: Record<string, InlineRule>;
}
interface InlineOpts {
rules: Record<string, InlineRule>;
links: Map<string, LinkRef>;
}
interface InlineRule {
match: RegExp;
parse(opts: {
after: string;
match: string;
opts: InlineOpts;
}): InlineParse | null;
}
interface InlineParse {
result: engine.Node;
rest?: string;
}
interface LinkRef {
href: string;
title: string | null;
}
interface BlockRule {
match: RegExp;
parse(opts: {}): unknown;
}
export const inlineRules: Record<string, InlineRule> = {
code: {
match: /`+/,
// 6.1 - code spans
parse({ after, match }) {
const end = after.indexOf(match);
if (end === -1) return null;
let inner = after.slice(0, end);
const rest = after.slice(end + match.length);
// If the resulting string both begins and ends with a space
// character, but does not consist entirely of space characters,
// a single space character is removed from the front and back.
if (inner.match(/^ [^ ]+ $/)) inner = inner.slice(1, -1);
return { result: <code>{inner}</code>, rest };
},
},
emphasis: {},
link: {
match: /(?<!!)\[/,
parse({ after, opts }) {
// Match '[' to let the inner-most link win.
const splitText = splitFirst(after, /[[\]]/);
if (!splitText) return null;
if (splitText.delim !== "]") return null;
const { first: textSrc, rest: afterText } = splitText;
let href: string, title: string | null = null, rest: string;
if (afterText[0] === "(") {
// Inline link
const splitTarget = splitFirst(afterText.slice(1), /\)/);
if (!splitTarget) return null;
({ rest } = splitTarget);
const target = parseLinkTarget(splitTarget.first);
if (!target) return null;
({ href, title } = target);
} else if (afterText[0] === "[") {
const splitTarget = splitFirst(afterText.slice(1), /]/);
if (!splitTarget) return null;
const name = splitTarget.first.trim().length === 0
// Collapsed reference link
? textSrc.trim()
// Full Reference Link
: splitTarget.first.trim();
const target = opts.links.get(name);
if (!target) return null;
({ href, title } = target);
({ rest } = splitTarget);
} else {
// Shortcut reference link
const target = opts.links.get(textSrc);
if (!target) return null;
({ href, title } = target);
rest = afterText;
}
return {
result: <a {...{ href, title }}>{parseInline(textSrc, opts)}</a>,
rest,
};
},
},
image: {},
autolink: {},
html: {},
br: {
match: / +\n|\\\n/,
parse() {
return { result: <br /> };
},
},
};
function parseLinkTarget(src: string) {
let href: string, title: string | null = null;
href = src;
return { href, title };
}
/* Find a delimiter while considering backslash escapes. */
function splitFirst(text: string, match: RegExp) {
let first = "", delim: string, escaped: boolean;
do {
const find = text.match(match);
if (!find) return null;
delim = find[0];
const index = UNWRAP(find.index);
let i = index - 1;
escaped = false;
while (i >= 0 && text[i] === "\\") escaped = !escaped, i -= 1;
first += text.slice(0, index - +escaped);
text = text.slice(index + find[0].length);
} while (escaped);
return { first, delim, rest: text };
}
console.log(engine.ssrSync(parseInline("meow `bwaa` `` ` `` `` `z``")));
import * as engine from "#ssr";import type { ParseOptions } from "node:querystring";

View file

@ -120,6 +120,7 @@ export const imagePresets = [
"-effort",
"9",
"-update",
"1",
"-frames:v",
"1",
],