start the markdown parser

2025-07-08 01:09:55 -07:00 · 2025-07-08 01:09:55 -07:00 · 4c2a4f7578
commit 4c2a4f7578
parent ea5f2bc325
2 changed files with 168 additions and 0 deletions
--- a/framework/lib/markdown.tsx
+++ b/framework/lib/markdown.tsx
@ -0,0 +1,167 @@
 /* Impementation of CommonMark specification for markdown with support
 * for custom syntax extensions via the parser options. Instead of
 * returning an AST that has a second conversion pass to JSX, the
 * returned value of 'parse' is 'engine.Node' which can be stringified
 * via clover's SSR engine. This way, generation optimizations, async
 * components, and other features are gained for free here.
 */
 function parse(src: string, options: Partial<ParseOpts> = {}) {
 }
 /* Render markdown content. Same function as 'parse', but JSX components
 * only take one argument and must start with a capital letter. */
 export function Markdown({ src, ...options }: { src: string } & Partial<ParseOpts>) {
  return parse(src, options)
 }
 function parseInline(src: string, options: Partial<InlineOpts> = {}) {
  const { rules = inlineRules, links = new Map() } = options;
  const opts: InlineOpts = { rules, links };
  const parts: engine.Node[] = [];
  const ruleList = Object.values(rules);
  parse: while(true) {
    for (const rule of ruleList) {
      if (!rule.match) continue;
      const match = src.match(rule.match);
      if (!match) continue;
      const index = UNWRAP(match.index);
      const after = src.slice(index + match[0].length);
      const parse = rule.parse({ after, match: match[0], opts });
      if (!parse) continue;
      parts.push(src.slice(0, index), parse.result);
      src = parse.rest ?? after;
      continue parse;
    }
    break;
  } 
  parts.push(src);
  return parts;
 }
 // -- interfaces --
 interface ParseOpts {
  blockRules: Record<string, BlockRule>;
  inlineRules: Record<string, InlineRule>;
 }
 interface InlineOpts {
  rules: Record<string, InlineRule>;
  links: Map<string, LinkRef>;
 }
 interface InlineRule {
  match: RegExp;
  parse(opts: {
    after: string;
    match: string;
    opts: InlineOpts;
  }): InlineParse | null;
 }
 interface InlineParse {
  result: engine.Node;
  rest?: string;
 }
 interface LinkRef {
  href: string;
  title: string | null;
 }
 interface BlockRule {
  match: RegExp;
  parse(opts: {}): unknown;
 }
 export const inlineRules: Record<string, InlineRule> = {
  code: {
    match: /`+/,
    // 6.1 - code spans
    parse({ after, match }) {
      const end = after.indexOf(match);
      if (end === -1) return null;
      let inner = after.slice(0, end);
      const rest = after.slice(end + match.length);
      // If the resulting string both begins and ends with a space
      // character, but does not consist entirely of space characters,
      // a single space character is removed from the front and back.
      if (inner.match(/^ [^ ]+ $/)) inner = inner.slice(1, -1);
      return { result: <code>{inner}</code>, rest };
    },
  },
  emphasis: {},
  link: {
    match: /(?<!!)\[/,
    parse({ after, opts }) {
      // Match '[' to let the inner-most link win.
      const splitText = splitFirst(after, /[[\]]/);
      if (!splitText) return null;
      if (splitText.delim !== "]") return null;
      const { first: textSrc, rest: afterText } = splitText;
      let href: string, title: string | null = null, rest: string;
      if (afterText[0] === "(") {
        // Inline link
        const splitTarget = splitFirst(afterText.slice(1), /\)/);
        if (!splitTarget) return null;
        ({ rest } = splitTarget);
        const target = parseLinkTarget(splitTarget.first);
        if (!target) return null;
        ({ href, title } = target);
      } else if (afterText[0] === "[") {
        const splitTarget = splitFirst(afterText.slice(1), /]/);
        if (!splitTarget) return null;
        const name = splitTarget.first.trim().length === 0
          // Collapsed reference link
          ? textSrc.trim()
          // Full Reference Link
          : splitTarget.first.trim();
        const target = opts.links.get(name);
        if (!target) return null;
        ({ href, title } = target);
        ({ rest } = splitTarget);
      } else {
        // Shortcut reference link
        const target = opts.links.get(textSrc);
        if (!target) return null;
        ({ href, title } = target);
        rest = afterText;
      }
      return {
        result: <a {...{ href, title }}>{parseInline(textSrc, opts)}</a>,
        rest,
      };
    },
  },
  image: {},
  autolink: {},
  html: {},
  br: {
    match: /  +\n|\\\n/,
    parse() {
      return { result: <br /> };
    },
  },
 };
 function parseLinkTarget(src: string) {
  let href: string, title: string | null = null;
  href = src;
    return { href, title };
 }
 /* Find a delimiter while considering backslash escapes. */
 function splitFirst(text: string, match: RegExp) {
  let first = "", delim: string, escaped: boolean;
  do {
    const find = text.match(match);
    if (!find) return null;
    delim = find[0];
    const index = UNWRAP(find.index);
    let i = index - 1;
    escaped = false;
    while (i >= 0 && text[i] === "\\") escaped = !escaped, i -= 1;
    first += text.slice(0, index - +escaped);
    text = text.slice(index + find[0].length);
  } while (escaped);
  return { first, delim, rest: text };
 }
 console.log(engine.ssrSync(parseInline("meow `bwaa` `` ` `` `` `z``")));
 import * as engine from "#ssr";import type { ParseOptions } from "node:querystring";
--- a/src/file-viewer/transcode-rules.ts
+++ b/src/file-viewer/transcode-rules.ts
@ -120,6 +120,7 @@ export const imagePresets = [
      "-effort",
      "9",
      "-update",
      "1",
      "-frames:v",
      "1",
    ],