diff --git a/flake.nix b/flake.nix index ffee12c..dafcbdd 100644 --- a/flake.nix +++ b/flake.nix @@ -21,6 +21,7 @@ withJxl = true; withWebp = true; }) + pkgs.rsync ]; }; } diff --git a/framework/lib/fs.ts b/framework/lib/fs.ts index 703890d..f7b496b 100644 --- a/framework/lib/fs.ts +++ b/framework/lib/fs.ts @@ -58,6 +58,32 @@ export function readJsonSync(file: string) { return JSON.parse(readFileSync(file, "utf-8")) as T; } +export async function removeEmptyDirectories(dir: string, removeRoot = false) { + try { + const entries = await readdir(dir, { withFileTypes: true }); + let len = entries.length; + for (const entry of entries) { + if (entry.isDirectory()) { + const subDirPath = path.join(dir, entry.name); + if (await removeEmptyDirectories(subDirPath, true)) len -= 1; + } + } + if (len === 0) { + if (removeRoot) { + await rmdir(dir); + } + return true; + } + } catch (error: any) { + if (error.code === "ENOENT") { + // Directory doesn't exist, ignore + return; + } + throw error; + } + return false; +} + import * as path from "node:path"; import { createReadStream, @@ -76,6 +102,7 @@ import { readdir, readFile, rm, + rmdir, stat, writeFile, } from "node:fs/promises"; diff --git a/framework/lib/markdown.tsx b/framework/lib/markdown.tsx index d2c86e9..7959603 100644 --- a/framework/lib/markdown.tsx +++ b/framework/lib/markdown.tsx @@ -6,13 +6,14 @@ * components, and other features are gained for free here. */ function parse(src: string, options: Partial = {}) { - } /* Render markdown content. Same function as 'parse', but JSX components * only take one argument and must start with a capital letter. */ -export function Markdown({ src, ...options }: { src: string } & Partial) { - return parse(src, options) +export function Markdown( + { src, ...options }: { src: string } & Partial, +) { + return parse(src, options); } function parseInline(src: string, options: Partial = {}) { @@ -20,7 +21,7 @@ function parseInline(src: string, options: Partial = {}) { const opts: InlineOpts = { rules, links }; const parts: engine.Node[] = []; const ruleList = Object.values(rules); - parse: while(true) { + parse: while (true) { for (const rule of ruleList) { if (!rule.match) continue; const match = src.match(rule.match); @@ -29,18 +30,20 @@ function parseInline(src: string, options: Partial = {}) { const after = src.slice(index + match[0].length); const parse = rule.parse({ after, match: match[0], opts }); if (!parse) continue; + // parse before parts.push(src.slice(0, index), parse.result); src = parse.rest ?? after; continue parse; } break; - } + } parts.push(src); return parts; } // -- interfaces -- interface ParseOpts { + gfm: boolean; blockRules: Record; inlineRules: Record; } @@ -84,9 +87,9 @@ export const inlineRules: Record = { return { result: {inner}, rest }; }, }, - emphasis: {}, link: { match: /(? = { }; }, }, - image: {}, + image: { + match: /!\[/, + // 6.4 - images + parse({ after, opts }) { + // Match '[' to let the inner-most link win. + const splitText = splitFirst(after, /[[\]]/); + if (!splitText) return null; + if (splitText.delim !== "]") return null; + const { first: textSrc, rest: afterText } = splitText; + }, + }, + emphasis: { + // detect left-flanking delimiter runs, but this expression does not + // consider preceding escapes. instead, those are programatically + // checked inside the parse function. + match: /(?:\*+|(? = { function parseLinkTarget(src: string) { let href: string, title: string | null = null; href = src; - return { href, title }; + return { href, title }; } /* Find a delimiter while considering backslash escapes. */ @@ -163,5 +188,5 @@ function splitFirst(text: string, match: RegExp) { console.log(engine.ssrSync(parseInline("meow `bwaa` `` ` `` `` `z``"))); -import * as engine from "#ssr";import type { ParseOptions } from "node:querystring"; - +import * as engine from "#ssr"; +import type { ParseOptions } from "node:querystring"; diff --git a/framework/lib/sqlite.ts b/framework/lib/sqlite.ts index e2e59b6..a749254 100644 --- a/framework/lib/sqlite.ts +++ b/framework/lib/sqlite.ts @@ -95,7 +95,7 @@ export class Stmt { } return item; } - iter(...args: Args): Iterator { + iter(...args: Args): IterableIterator { return this.#wrap(args, () => this.array(...args)[Symbol.iterator]()); } /** Get all rows */ diff --git a/readme.md b/readme.md index c34ff2c..da47830 100644 --- a/readme.md +++ b/readme.md @@ -1,12 +1,12 @@ # clover sitegen framework this repository contains clover's "sitegen" framework, which is a set of tools -that assist building websites. these tools power https://paperclover.net. +that assist building websites. these tools power . - **HTML "Server Side Rendering") engine written from scratch.** (~500 lines) - A more practical JSX runtime (`class` instead of `className`, built-in `clsx`, `html()` helper over `dangerouslySetInnerHTML` prop, etc). - - Integration with [Marko][1] for concisely written components. + - Integration with [Marko] for concisely written components. - TODO: MDX-like compiler for content-heavy pages like blogs. - Different languages can be used at the same time. Supports `async function` components, ``, and custom extensions. @@ -27,12 +27,16 @@ that assist building websites. these tools power https://paperclover.net. - TODO: Font subsetting tools to reduce bytes downloaded by fonts. - **Built on the battle-tested Node.js runtime.** -[1]: https://next.markojs.com +None of these tools are complex or revolutionary. Rather, this project is the +sum of many years of experience on managing content heavy websites, and an +example on how other over-complicate other frameworks. + +[Marko]: https://next.markojs.com Included is `src`, which contains `paperclover.net`. Website highlights: - [Question/Answer board, custom markdown parser and components][q+a]. -- [File viewer with prefetching + client-side navigation][file]. +- [File viewer with fast ui/ux + optimized media streaming][file]. - [Personal, friends-only blog with password protection][friends]. [q+a]: https://paperclover.net/q+a @@ -49,16 +53,14 @@ minimum system requirements: my development machine, for example, is Dell Inspiron 7348 with Core i7 -``` -npm install + npm install -# production generation -node run generate -node .clover/out/server + # production generation + node run generate + node .clover/out/server -# "development" watch mode -node run watch -``` + # "development" watch mode + node run watch diff --git a/src/file-viewer/bin/scan3.ts b/src/file-viewer/bin/scan3.ts index e67c20e..71d8c7f 100644 --- a/src/file-viewer/bin/scan3.ts +++ b/src/file-viewer/bin/scan3.ts @@ -11,7 +11,7 @@ // Remember that any software you want to be maintainable and high // quality cannot be written with AI. const root = path.resolve("/Volumes/clover/Published"); -const workDir = path.resolve(".clover/file-assets"); +const workDir = path.resolve(".clover/derived"); export async function main() { const start = performance.now(); @@ -27,7 +27,7 @@ export async function main() { // Read a directory or file stat and queue up changed files. using qList = new async.Queue({ name: "Discover Tree", - async fn(absPath: string, spin) { + async fn(absPath: string) { const stat = await fs.stat(absPath); const publicPath = toPublicPath(absPath); @@ -129,7 +129,7 @@ export async function main() { await processor.run({ absPath, stat, mediaFile, spin }); mediaFile.setProcessed(mediaFile.processed | (1 << (16 + index))); for (const dependantJob of after) { - ASSERT(dependantJob.needs > 0); + ASSERT(dependantJob.needs > 0, `dependantJob.needs > 0, ${dependantJob.needs}`); dependantJob.needs -= 1; if (dependantJob.needs == 0) qProcess.add(dependantJob); } @@ -152,12 +152,14 @@ export async function main() { async function queueProcessors( { absPath, stat, mediaFile }: Omit, ) { - const ext = mediaFile.extension.toLowerCase(); - let possible = processors.filter((p) => p.include.has(ext)); + const ext = mediaFile.extensionNonEmpty.toLowerCase(); + let possible = processors.filter((p) => + p.include ? p.include.has(ext) : !p.exclude?.has(ext) + ); if (possible.length === 0) return; const hash = possible.reduce((a, b) => a ^ b.hash, 0) | 1; - ASSERT(hash <= 0xFFFF); + ASSERT(hash <= 0xFFFF, `${hash.toString(16)} has no bits above 16 set`); let processed = mediaFile.processed; // If the hash has changed, migrate the bitfield over. @@ -213,7 +215,7 @@ export async function main() { if (dependJob) { dependJob.after.push(job); } else { - ASSERT(job.needs > 0); + ASSERT(job.needs > 0, `job.needs !== 0, ${job.needs}`); job.needs -= 1; if (job.needs === 0) qProcess.add(job); } @@ -241,16 +243,114 @@ export async function main() { await qMeta.done(); await qProcess.done(); + // Update directory metadata + const dirs = MediaFile.getDirectoriesToReindex() + .sort((a, b) => b.path.length - a.path.length); + for (const dir of dirs) { + const children = dir.getChildren(); + + // readme.txt + const readmeContent = children.find((x) => + x.basename === "readme.txt" + )?.contents ?? ""; + + // dirsort + let dirsort: string[] | null = null; + const dirSortRaw = + children.find((x) => x.basename === ".dirsort")?.contents ?? ""; + if (dirSortRaw) { + dirsort = dirSortRaw.split("\n") + .map((x) => x.trim()) + .filter(Boolean); + } + + // Permissions + if (children.some((x) => x.basename === ".friends")) { + FilePermissions.setPermissions(dir.path, 1); + } else { + FilePermissions.setPermissions(dir.path, 0); + } + + // Recursive stats. + let totalSize = 0; + let newestDate = new Date(0); + let allHashes = ""; + for (const child of children) { + totalSize += child.size; + allHashes += child.hash; + + if (child.basename !== "/readme.txt" && child.date > newestDate) { + newestDate = child.date; + } + } + + const dirHash = crypto.createHash("sha1") + .update(dir.path + allHashes) + .digest("hex"); + + MediaFile.markDirectoryProcessed({ + id: dir.id, + timestamp: newestDate, + contents: readmeContent, + size: totalSize, + hash: dirHash, + dirsort, + }); + } + + // Sync to remote + if ((await fs.readdir(workDir)).length > 0) { + await rsync.spawn({ + args: [ + "--links", + "--recursive", + "--times", + "--partial", + "--progress", + "--remove-source-files", + "--delay-updates", + workDir + "/", + "clo@zenith:/mnt/storage1/clover/Documents/Config/clover_file/derived/", + ], + title: "Uploading Derived Assets", + cwd: process.cwd(), + }); + + await fs.removeEmptyDirectories(workDir); + } else { + console.info("No new derived assets"); + } + console.info( - "Updated file viewer index in " + - ((performance.now() - start) / 1000).toFixed(1) + "s", + "Updated file viewer index in \x1b[1m" + + ((performance.now() - start) / 1000).toFixed(1) + "s\x1b[0m", + ); + + MediaFile.db.prepare("VACUUM").run(); + const { duration, count } = MediaFile.db.prepare< + [], + { count: number; duration: number } + >(` + select + count(*) as count, + sum(duration) as duration + from media_files + `).getNonNull(); + + console.info(); + console.info( + "Global Stats:\n" + + `- File Count: \x1b[1m${count}\x1b[0m\n` + + `- Canonical Size: \x1b[1m${formatSize(MediaFile.getByPath("/")!.size)}\x1b[0m\n` + + `- Media Duration: \x1b[1m${formatDurationLong(duration)}\x1b[0m\n`, ); } interface Process { name: string; enable?: boolean; - include: Set; + include?: Set; + exclude?: Set; depends?: string[]; version?: number; /* Perform an action. */ @@ -350,6 +450,7 @@ const procLoadTextContents: Process = { async run({ absPath, mediaFile, stat }) { if (stat.size > 1_000_000) return; const text = await fs.readFile(absPath, "utf-8"); + console.log({ text }); mediaFile.setContents(text); }, }; @@ -508,6 +609,26 @@ const procVideos = transcodeRules.videoFormats.map((preset) => ({ }, })); +const procCompression = [ + { name: "gzip", fn: () => zlib.createGzip({ level: 9 }) }, + { name: "zstd", fn: () => zlib.createZstdCompress() }, +].map(({ name, fn }) => ({ + name: `compress ${name}`, + exclude: rules.extsPreCompressed, + async run({ absPath, mediaFile }) { + if ((mediaFile.size ?? 0) < 10) return; + await produceAsset(`${mediaFile.hash}/${name}`, async (base) => { + fs.mkdirSync(path.dirname(base)); + await stream.promises.pipeline( + fs.createReadStream(absPath), + fn(), + fs.createWriteStream(base), + ); + return [base]; + }); + }, +} satisfies Process as Process)); + const processors = [ procDimensions, procDuration, @@ -515,6 +636,7 @@ const processors = [ procHighlightCode, procImageSubsets, ...procVideos, + ...procCompression, ] .map((process, id, all) => { const strIndex = (id: number) => @@ -619,7 +741,7 @@ export function skipBasename(basename: string): boolean { } export function toPublicPath(absPath: string) { - ASSERT(path.isAbsolute(absPath)); + ASSERT(path.isAbsolute(absPath), "non-absolute " + absPath); if (absPath === root) return "/"; return "/" + path.relative(root, absPath).replaceAll("\\", "/"); } @@ -642,14 +764,22 @@ import * as async from "#sitegen/async"; import * as fs from "#sitegen/fs"; import * as path from "node:path"; +import * as zlib from "node:zlib"; import * as child_process from "node:child_process"; import * as util from "node:util"; import * as crypto from "node:crypto"; +import * as stream from "node:stream"; import { MediaFile, MediaFileKind } from "@/file-viewer/models/MediaFile.ts"; import { AssetRef } from "@/file-viewer/models/AssetRef.ts"; -import { formatDate } from "@/file-viewer/format.ts"; +import { FilePermissions } from "@/file-viewer/models/FilePermissions.ts"; +import { + formatDate, + formatDurationLong, + formatSize, +} from "@/file-viewer/format.ts"; import * as rules from "@/file-viewer/rules.ts"; import * as highlight from "@/file-viewer/highlight.ts"; import * as ffmpeg from "@/file-viewer/ffmpeg.ts"; +import * as rsync from "@/file-viewer/rsync.ts"; import * as transcodeRules from "@/file-viewer/transcode-rules.ts"; diff --git a/src/file-viewer/cotyledon.tsx b/src/file-viewer/cotyledon.tsx index 8a704b5..2132a13 100644 --- a/src/file-viewer/cotyledon.tsx +++ b/src/file-viewer/cotyledon.tsx @@ -70,31 +70,31 @@ export function Speedbump() {
diff --git a/src/file-viewer/format.ts b/src/file-viewer/format.ts index 04debed..e28b214 100644 --- a/src/file-viewer/format.ts +++ b/src/file-viewer/format.ts @@ -25,6 +25,13 @@ export function formatDuration(seconds: number) { return `${minutes}:${remainingSeconds.toString().padStart(2, "0")}`; } +export function formatDurationLong(seconds: number) { + const hours = Math.floor(seconds / 3600); + const minutes = Math.floor((seconds % 3600) / 60); + const remainingSeconds = seconds % 60; + return `${hours}:${minutes.toString().padStart(2, "0")}:${remainingSeconds.toString().padStart(2, "0")}`; +} + export function escapeUri(uri: string) { return encodeURIComponent(uri) .replace(/%2F/gi, "/") diff --git a/src/file-viewer/models/MediaFile.ts b/src/file-viewer/models/MediaFile.ts index 9d092a5..d084331 100644 --- a/src/file-viewer/models/MediaFile.ts +++ b/src/file-viewer/models/MediaFile.ts @@ -104,6 +104,12 @@ export class MediaFile { get extension() { return path.extname(this.path); } + get extensionNonEmpty() { + const { basename } = this; + const ext = path.extname(basename); + if (ext === "") return basename; + return ext; + } getChildren() { return MediaFile.getChildren(this.id) .filter((file) => !file.basename.startsWith(".")); @@ -267,6 +273,9 @@ export class MediaFile { static getChildren(id: number) { return getChildrenQuery.array(id); } + static getDirectoriesToReindex() { + return getDirectoriesToReindexQuery.array(); + } static db = db; } @@ -430,6 +439,21 @@ const deleteCascadeQuery = db.prepare<[{ id: number }]>(/* SQL */ ` delete from media_files where id in (select id from items) `); +const getDirectoriesToReindexQuery = db.prepare(` + with recursive directory_chain as ( + -- base case + select id, parent_id, path from media_files + where kind = 0 and processed = 0 + -- recurse to find all parents so that size/hash can be updated + union + select m.id, m.parent_id, m.path + from media_files m + inner join directory_chain d on m.id = d.parent_id + ) + select distinct id, parent_id, path + from directory_chain + order by path; +`).as(MediaFile); import { getDb } from "#sitegen/sqlite"; import * as path from "node:path/posix"; diff --git a/src/file-viewer/rsync.ts b/src/file-viewer/rsync.ts new file mode 100644 index 0000000..8b5f86b --- /dev/null +++ b/src/file-viewer/rsync.ts @@ -0,0 +1,184 @@ +// Utilities for spawning rsync and consuming its output as a `Progress` +// A headless parser is available with `Parse` + +export type Line = + | { kind: "ignore" } + | { kind: "log"; level: "info" | "warn" | "error"; message: string } + | { kind: "count"; files: number } + | { + kind: "progress"; + currentFile: string; + bytesTransferred: number; + percentage: number; + timeElapsed: string; + transferNumber: number; + filesToCheck: number; + totalFiles: number; + speed: string | null; + }; + +export const defaultExtraOptions = [ + "--progress", +]; + +export interface SpawnOptions { + args: string[]; + title: string; + rsync?: string; + progress?: Progress; + cwd: string; +} + +export async function spawn(options: SpawnOptions) { + const { rsync = "rsync", args, title, cwd } = options; + const proc = child_process.spawn(rsync, [...defaultExtraOptions, ...args], { + stdio: ["ignore", "pipe", "pipe"], + cwd, + }); + const parser = new Parse(); + const bar = options.progress ?? new Progress({ text: title }); + let running = true; + + const stdoutSplitter = readline.createInterface({ input: proc.stdout }); + const stderrSplitter = readline.createInterface({ input: proc.stderr }); + + const handleLine = (line: string) => { + const result = parser.onLine(line); + if (result.kind === "ignore") { + return; + } else if (result.kind === "log") { + console[result.level](result.message); + } else if (result.kind === "count") { + if (!running) return; + bar.text = `${result.files} files...`; + } else if (result.kind === "progress") { + if (!running) return; + const { + transferNumber, + bytesTransferred, + totalFiles, + filesToCheck, + currentFile, + speed, + } = result; + bar.value = transferNumber; + bar.total = totalFiles; + const extras = [ + formatSize(bytesTransferred), + (totalFiles > filesToCheck) + ? `${totalFiles - filesToCheck} unchecked` + : null, + speed, + ].filter(Boolean).join(", "); + const fileName = currentFile.length > 20 + ? `${currentFile.slice(0, 3)}..${currentFile.slice(-15)}` + : currentFile; + bar.text = `[${transferNumber}/${totalFiles}] ${fileName} ${ + extras.length > 0 ? `(${extras})` : "" + }`; + } else result satisfies never; + }; + + stdoutSplitter.on("line", handleLine); + stderrSplitter.on("line", handleLine); + + const [code, signal] = await events.once(proc, "close"); + running = false; + if (code !== 0) { + const fmt = code ? `code ${code}` : `signal ${signal}`; + const e: any = new Error(`rsync failed with ${fmt}`); + e.args = [rsync, ...args].join(" "); + e.code = code; + e.signal = signal; + bar.error(e.message); + return e; + } + bar.success(title); +} + +export class Parse { + totalFiles = 0; + currentTransfer = 0; + toCheck = 0; + + onLine(line: string): Line { + line = line.trimEnd(); + + // Parse progress lines like: + // 20c83c16735608fc3de4aac61e36770d7774e0c6/au26.m4s + // 238,377 100% 460.06kB/s 0:00:00 (xfr#557, to-chk=194111/194690) + const progressMatch = line.match( + /^\s+([\d,]+)\s+(\d+)%\s+(\S+)\s+(?:(\S+)\s+)?(?:\(xfr#(\d+), to-chk=(\d+)\/(\d+)\))?/, + ); + if (progressMatch) { + const [ + , + bytesStr, + percentageStr, + speed, + timeElapsed, + transferStr, + toCheckStr, + totalStr, + ] = progressMatch; + + this.currentTransfer = Number(transferStr); + + return { + kind: "progress", + currentFile: this.lastSeenFile || "", + bytesTransferred: Number(bytesStr.replaceAll(",", "")), + percentage: Number(percentageStr), + timeElapsed, + transferNumber: this.currentTransfer, + filesToCheck: toCheckStr ? this.toCheck = Number(toCheckStr) : this.toCheck, + totalFiles: totalStr ? this.totalFiles = Number(totalStr) : this.totalFiles, + speed: speed || null, + }; + } + + // Skip common rsync info lines + if (!line.startsWith(" ") && !line.startsWith("rsync")) { + if ( + line.startsWith("sending incremental file list") || + line.startsWith("sent ") || + line.startsWith("total size is ") || + line.includes("speedup is ") || + line.startsWith("building file list") + ) { + return { kind: "ignore" }; + } + if (line.trim().length > 0) { + this.lastSeenFile = line; + } + return { kind: "ignore" }; + } + if (line.startsWith(" ")) { + const match = line.match(/ (\d+) files.../); + if (match) { + return { kind: "count", files: Number(match[1]) }; + } + } + if ( + line.toLowerCase().includes("error") || + line.toLowerCase().includes("failed") + ) { + return { kind: "log", level: "error", message: line }; + } + if ( + line.toLowerCase().includes("warning") || + line.toLowerCase().includes("skipping") + ) { + return { kind: "log", level: "warn", message: line }; + } + return { kind: "log", level: "info", message: line }; + } + + private lastSeenFile: string | null = null; +} + +import * as child_process from "node:child_process"; +import * as readline from "node:readline"; +import events from "node:events"; +import { Progress } from "@paperclover/console/Progress"; +import { formatSize } from "@/file-viewer/format.ts"; diff --git a/src/file-viewer/rules.ts b/src/file-viewer/rules.ts index 675e5b9..7f0a154 100644 --- a/src/file-viewer/rules.ts +++ b/src/file-viewer/rules.ts @@ -73,7 +73,7 @@ export const extsDuration = new Set([...extsAudio, ...extsVideo]); export const extsDimensions = new Set([...extsImage, ...extsVideo]); /** These files read file contents into `contents`, as-is */ -export const extsReadContents = new Set([".txt", ".chat"]); +export const extsReadContents = new Set([".txt", ".chat", ".dirsort"]); export const extsArchive = new Set([ ".zip", @@ -92,10 +92,20 @@ export const extsArchive = new Set([ export const extsPreCompressed = new Set([ ...extsAudio, ...extsVideo, - ...extsImage, + ...extsImageLike, ...extsArchive, - // TODO: are any of these NOT good for compression + ".docx", + ".xlsx", + ".pptx", + ".psd", + ".sketch", + ".ai", + ".3ds", + ".fbx", + ".blend", + ".dng", ]); +extsPreCompressed.delete(".svg"); export function fileIcon( file: Pick,