finish scan3

This commit is contained in:
chloe caruso 2025-07-08 20:48:30 -07:00
parent 4c2a4f7578
commit 7ef08faaec
11 changed files with 457 additions and 47 deletions

View file

@ -21,6 +21,7 @@
withJxl = true;
withWebp = true;
})
pkgs.rsync
];
};
}

View file

@ -58,6 +58,32 @@ export function readJsonSync<T>(file: string) {
return JSON.parse(readFileSync(file, "utf-8")) as T;
}
export async function removeEmptyDirectories(dir: string, removeRoot = false) {
try {
const entries = await readdir(dir, { withFileTypes: true });
let len = entries.length;
for (const entry of entries) {
if (entry.isDirectory()) {
const subDirPath = path.join(dir, entry.name);
if (await removeEmptyDirectories(subDirPath, true)) len -= 1;
}
}
if (len === 0) {
if (removeRoot) {
await rmdir(dir);
}
return true;
}
} catch (error: any) {
if (error.code === "ENOENT") {
// Directory doesn't exist, ignore
return;
}
throw error;
}
return false;
}
import * as path from "node:path";
import {
createReadStream,
@ -76,6 +102,7 @@ import {
readdir,
readFile,
rm,
rmdir,
stat,
writeFile,
} from "node:fs/promises";

View file

@ -6,13 +6,14 @@
* components, and other features are gained for free here.
*/
function parse(src: string, options: Partial<ParseOpts> = {}) {
}
/* Render markdown content. Same function as 'parse', but JSX components
* only take one argument and must start with a capital letter. */
export function Markdown({ src, ...options }: { src: string } & Partial<ParseOpts>) {
return parse(src, options)
export function Markdown(
{ src, ...options }: { src: string } & Partial<ParseOpts>,
) {
return parse(src, options);
}
function parseInline(src: string, options: Partial<InlineOpts> = {}) {
@ -20,7 +21,7 @@ function parseInline(src: string, options: Partial<InlineOpts> = {}) {
const opts: InlineOpts = { rules, links };
const parts: engine.Node[] = [];
const ruleList = Object.values(rules);
parse: while(true) {
parse: while (true) {
for (const rule of ruleList) {
if (!rule.match) continue;
const match = src.match(rule.match);
@ -29,6 +30,7 @@ function parseInline(src: string, options: Partial<InlineOpts> = {}) {
const after = src.slice(index + match[0].length);
const parse = rule.parse({ after, match: match[0], opts });
if (!parse) continue;
// parse before
parts.push(src.slice(0, index), parse.result);
src = parse.rest ?? after;
continue parse;
@ -41,6 +43,7 @@ function parseInline(src: string, options: Partial<InlineOpts> = {}) {
// -- interfaces --
interface ParseOpts {
gfm: boolean;
blockRules: Record<string, BlockRule>;
inlineRules: Record<string, InlineRule>;
}
@ -84,9 +87,9 @@ export const inlineRules: Record<string, InlineRule> = {
return { result: <code>{inner}</code>, rest };
},
},
emphasis: {},
link: {
match: /(?<!!)\[/,
// 6.3 - links
parse({ after, opts }) {
// Match '[' to let the inner-most link win.
const splitText = splitFirst(after, /[[\]]/);
@ -127,7 +130,29 @@ export const inlineRules: Record<string, InlineRule> = {
};
},
},
image: {},
image: {
match: /!\[/,
// 6.4 - images
parse({ after, opts }) {
// Match '[' to let the inner-most link win.
const splitText = splitFirst(after, /[[\]]/);
if (!splitText) return null;
if (splitText.delim !== "]") return null;
const { first: textSrc, rest: afterText } = splitText;
},
},
emphasis: {
// detect left-flanking delimiter runs, but this expression does not
// consider preceding escapes. instead, those are programatically
// checked inside the parse function.
match: /(?:\*+|(?<!\p{P})_+)(?!\s|\p{P}|$)/u,
// 6.2 - emphasis and strong emphasis
parse({ before, match, after, opts }) {
// find out how long the delim sequence is
// look for 'ends'
},
},
autolink: {},
html: {},
br: {
@ -163,5 +188,5 @@ function splitFirst(text: string, match: RegExp) {
console.log(engine.ssrSync(parseInline("meow `bwaa` `` ` `` `` `z``")));
import * as engine from "#ssr";import type { ParseOptions } from "node:querystring";
import * as engine from "#ssr";
import type { ParseOptions } from "node:querystring";

View file

@ -95,7 +95,7 @@ export class Stmt<Args extends unknown[] = unknown[], Row = unknown> {
}
return item;
}
iter(...args: Args): Iterator<Row> {
iter(...args: Args): IterableIterator<Row> {
return this.#wrap(args, () => this.array(...args)[Symbol.iterator]());
}
/** Get all rows */

View file

@ -1,12 +1,12 @@
# clover sitegen framework
this repository contains clover's "sitegen" framework, which is a set of tools
that assist building websites. these tools power https://paperclover.net.
that assist building websites. these tools power <https://paperclover.net>.
- **HTML "Server Side Rendering") engine written from scratch.** (~500 lines)
- A more practical JSX runtime (`class` instead of `className`, built-in
`clsx`, `html()` helper over `dangerouslySetInnerHTML` prop, etc).
- Integration with [Marko][1] for concisely written components.
- Integration with [Marko] for concisely written components.
- TODO: MDX-like compiler for content-heavy pages like blogs.
- Different languages can be used at the same time. Supports `async function`
components, `<Suspense />`, and custom extensions.
@ -27,12 +27,16 @@ that assist building websites. these tools power https://paperclover.net.
- TODO: Font subsetting tools to reduce bytes downloaded by fonts.
- **Built on the battle-tested Node.js runtime.**
[1]: https://next.markojs.com
None of these tools are complex or revolutionary. Rather, this project is the
sum of many years of experience on managing content heavy websites, and an
example on how other over-complicate other frameworks.
[Marko]: https://next.markojs.com
Included is `src`, which contains `paperclover.net`. Website highlights:
- [Question/Answer board, custom markdown parser and components][q+a].
- [File viewer with prefetching + client-side navigation][file].
- [File viewer with fast ui/ux + optimized media streaming][file].
- [Personal, friends-only blog with password protection][friends].
[q+a]: https://paperclover.net/q+a
@ -49,16 +53,14 @@ minimum system requirements:
my development machine, for example, is Dell Inspiron 7348 with Core i7
```
npm install
npm install
# production generation
node run generate
node .clover/out/server
# production generation
node run generate
node .clover/out/server
# "development" watch mode
node run watch
```
# "development" watch mode
node run watch
<!-- `repl.js` will open a read-eval-print-loop where plugin state is cached (on my -->
<!-- 2014 dev laptop, startup time is 600-1000ms). every file in `framework` and -->

View file

@ -11,7 +11,7 @@
// Remember that any software you want to be maintainable and high
// quality cannot be written with AI.
const root = path.resolve("/Volumes/clover/Published");
const workDir = path.resolve(".clover/file-assets");
const workDir = path.resolve(".clover/derived");
export async function main() {
const start = performance.now();
@ -27,7 +27,7 @@ export async function main() {
// Read a directory or file stat and queue up changed files.
using qList = new async.Queue({
name: "Discover Tree",
async fn(absPath: string, spin) {
async fn(absPath: string) {
const stat = await fs.stat(absPath);
const publicPath = toPublicPath(absPath);
@ -129,7 +129,7 @@ export async function main() {
await processor.run({ absPath, stat, mediaFile, spin });
mediaFile.setProcessed(mediaFile.processed | (1 << (16 + index)));
for (const dependantJob of after) {
ASSERT(dependantJob.needs > 0);
ASSERT(dependantJob.needs > 0, `dependantJob.needs > 0, ${dependantJob.needs}`);
dependantJob.needs -= 1;
if (dependantJob.needs == 0) qProcess.add(dependantJob);
}
@ -152,12 +152,14 @@ export async function main() {
async function queueProcessors(
{ absPath, stat, mediaFile }: Omit<ProcessFileArgs, "spin">,
) {
const ext = mediaFile.extension.toLowerCase();
let possible = processors.filter((p) => p.include.has(ext));
const ext = mediaFile.extensionNonEmpty.toLowerCase();
let possible = processors.filter((p) =>
p.include ? p.include.has(ext) : !p.exclude?.has(ext)
);
if (possible.length === 0) return;
const hash = possible.reduce((a, b) => a ^ b.hash, 0) | 1;
ASSERT(hash <= 0xFFFF);
ASSERT(hash <= 0xFFFF, `${hash.toString(16)} has no bits above 16 set`);
let processed = mediaFile.processed;
// If the hash has changed, migrate the bitfield over.
@ -213,7 +215,7 @@ export async function main() {
if (dependJob) {
dependJob.after.push(job);
} else {
ASSERT(job.needs > 0);
ASSERT(job.needs > 0, `job.needs !== 0, ${job.needs}`);
job.needs -= 1;
if (job.needs === 0) qProcess.add(job);
}
@ -241,16 +243,114 @@ export async function main() {
await qMeta.done();
await qProcess.done();
// Update directory metadata
const dirs = MediaFile.getDirectoriesToReindex()
.sort((a, b) => b.path.length - a.path.length);
for (const dir of dirs) {
const children = dir.getChildren();
// readme.txt
const readmeContent = children.find((x) =>
x.basename === "readme.txt"
)?.contents ?? "";
// dirsort
let dirsort: string[] | null = null;
const dirSortRaw =
children.find((x) => x.basename === ".dirsort")?.contents ?? "";
if (dirSortRaw) {
dirsort = dirSortRaw.split("\n")
.map((x) => x.trim())
.filter(Boolean);
}
// Permissions
if (children.some((x) => x.basename === ".friends")) {
FilePermissions.setPermissions(dir.path, 1);
} else {
FilePermissions.setPermissions(dir.path, 0);
}
// Recursive stats.
let totalSize = 0;
let newestDate = new Date(0);
let allHashes = "";
for (const child of children) {
totalSize += child.size;
allHashes += child.hash;
if (child.basename !== "/readme.txt" && child.date > newestDate) {
newestDate = child.date;
}
}
const dirHash = crypto.createHash("sha1")
.update(dir.path + allHashes)
.digest("hex");
MediaFile.markDirectoryProcessed({
id: dir.id,
timestamp: newestDate,
contents: readmeContent,
size: totalSize,
hash: dirHash,
dirsort,
});
}
// Sync to remote
if ((await fs.readdir(workDir)).length > 0) {
await rsync.spawn({
args: [
"--links",
"--recursive",
"--times",
"--partial",
"--progress",
"--remove-source-files",
"--delay-updates",
workDir + "/",
"clo@zenith:/mnt/storage1/clover/Documents/Config/clover_file/derived/",
],
title: "Uploading Derived Assets",
cwd: process.cwd(),
});
await fs.removeEmptyDirectories(workDir);
} else {
console.info("No new derived assets");
}
console.info(
"Updated file viewer index in " +
((performance.now() - start) / 1000).toFixed(1) + "s",
"Updated file viewer index in \x1b[1m" +
((performance.now() - start) / 1000).toFixed(1) + "s\x1b[0m",
);
MediaFile.db.prepare("VACUUM").run();
const { duration, count } = MediaFile.db.prepare<
[],
{ count: number; duration: number }
>(`
select
count(*) as count,
sum(duration) as duration
from media_files
`).getNonNull();
console.info();
console.info(
"Global Stats:\n" +
`- File Count: \x1b[1m${count}\x1b[0m\n` +
`- Canonical Size: \x1b[1m${formatSize(MediaFile.getByPath("/")!.size)}\x1b[0m\n` +
`- Media Duration: \x1b[1m${formatDurationLong(duration)}\x1b[0m\n`,
);
}
interface Process {
name: string;
enable?: boolean;
include: Set<string>;
include?: Set<string>;
exclude?: Set<string>;
depends?: string[];
version?: number;
/* Perform an action. */
@ -350,6 +450,7 @@ const procLoadTextContents: Process = {
async run({ absPath, mediaFile, stat }) {
if (stat.size > 1_000_000) return;
const text = await fs.readFile(absPath, "utf-8");
console.log({ text });
mediaFile.setContents(text);
},
};
@ -508,6 +609,26 @@ const procVideos = transcodeRules.videoFormats.map<Process>((preset) => ({
},
}));
const procCompression = [
{ name: "gzip", fn: () => zlib.createGzip({ level: 9 }) },
{ name: "zstd", fn: () => zlib.createZstdCompress() },
].map(({ name, fn }) => ({
name: `compress ${name}`,
exclude: rules.extsPreCompressed,
async run({ absPath, mediaFile }) {
if ((mediaFile.size ?? 0) < 10) return;
await produceAsset(`${mediaFile.hash}/${name}`, async (base) => {
fs.mkdirSync(path.dirname(base));
await stream.promises.pipeline(
fs.createReadStream(absPath),
fn(),
fs.createWriteStream(base),
);
return [base];
});
},
} satisfies Process as Process));
const processors = [
procDimensions,
procDuration,
@ -515,6 +636,7 @@ const processors = [
procHighlightCode,
procImageSubsets,
...procVideos,
...procCompression,
]
.map((process, id, all) => {
const strIndex = (id: number) =>
@ -619,7 +741,7 @@ export function skipBasename(basename: string): boolean {
}
export function toPublicPath(absPath: string) {
ASSERT(path.isAbsolute(absPath));
ASSERT(path.isAbsolute(absPath), "non-absolute " + absPath);
if (absPath === root) return "/";
return "/" + path.relative(root, absPath).replaceAll("\\", "/");
}
@ -642,14 +764,22 @@ import * as async from "#sitegen/async";
import * as fs from "#sitegen/fs";
import * as path from "node:path";
import * as zlib from "node:zlib";
import * as child_process from "node:child_process";
import * as util from "node:util";
import * as crypto from "node:crypto";
import * as stream from "node:stream";
import { MediaFile, MediaFileKind } from "@/file-viewer/models/MediaFile.ts";
import { AssetRef } from "@/file-viewer/models/AssetRef.ts";
import { formatDate } from "@/file-viewer/format.ts";
import { FilePermissions } from "@/file-viewer/models/FilePermissions.ts";
import {
formatDate,
formatDurationLong,
formatSize,
} from "@/file-viewer/format.ts";
import * as rules from "@/file-viewer/rules.ts";
import * as highlight from "@/file-viewer/highlight.ts";
import * as ffmpeg from "@/file-viewer/ffmpeg.ts";
import * as rsync from "@/file-viewer/rsync.ts";
import * as transcodeRules from "@/file-viewer/transcode-rules.ts";

View file

@ -70,31 +70,31 @@ export function Speedbump() {
<div className="enter-container">
<div className="image-grid">
<button>
<img src="/captcha/image/1.jpeg" alt="a four-leaf clover" />
<img src="/captcha/1.jpeg" alt="a four-leaf clover" />
</button>
<button>
<img src="/captcha/image/2.jpeg" alt="a four-leaf clover" />
<img src="/captcha/2.jpeg" alt="a four-leaf clover" />
</button>
<button>
<img src="/captcha/image/3.jpeg" alt="a four-leaf clover" />
<img src="/captcha/3.jpeg" alt="a four-leaf clover" />
</button>
<button>
<img src="/captcha/image/4.jpeg" alt="a four-leaf clover" />
<img src="/captcha/4.jpeg" alt="a four-leaf clover" />
</button>
<button>
<img src="/captcha/image/5.jpeg" alt="a four-leaf clover" />
<img src="/captcha/5.jpeg" alt="a four-leaf clover" />
</button>
<button>
<img src="/captcha/image/6.jpeg" alt="a four-leaf clover" />
<img src="/captcha/6.jpeg" alt="a four-leaf clover" />
</button>
<button>
<img src="/captcha/image/7.jpeg" alt="a four-leaf clover" />
<img src="/captcha/7.jpeg" alt="a four-leaf clover" />
</button>
<button>
<img src="/captcha/image/8.jpeg" alt="a four-leaf clover" />
<img src="/captcha/8.jpeg" alt="a four-leaf clover" />
</button>
<button>
<img src="/captcha/image/9.jpeg" alt="a four-leaf clover" />
<img src="/captcha/9.jpeg" alt="a four-leaf clover" />
</button>
</div>
</div>

View file

@ -25,6 +25,13 @@ export function formatDuration(seconds: number) {
return `${minutes}:${remainingSeconds.toString().padStart(2, "0")}`;
}
export function formatDurationLong(seconds: number) {
const hours = Math.floor(seconds / 3600);
const minutes = Math.floor((seconds % 3600) / 60);
const remainingSeconds = seconds % 60;
return `${hours}:${minutes.toString().padStart(2, "0")}:${remainingSeconds.toString().padStart(2, "0")}`;
}
export function escapeUri(uri: string) {
return encodeURIComponent(uri)
.replace(/%2F/gi, "/")

View file

@ -104,6 +104,12 @@ export class MediaFile {
get extension() {
return path.extname(this.path);
}
get extensionNonEmpty() {
const { basename } = this;
const ext = path.extname(basename);
if (ext === "") return basename;
return ext;
}
getChildren() {
return MediaFile.getChildren(this.id)
.filter((file) => !file.basename.startsWith("."));
@ -267,6 +273,9 @@ export class MediaFile {
static getChildren(id: number) {
return getChildrenQuery.array(id);
}
static getDirectoriesToReindex() {
return getDirectoriesToReindexQuery.array();
}
static db = db;
}
@ -430,6 +439,21 @@ const deleteCascadeQuery = db.prepare<[{ id: number }]>(/* SQL */ `
delete from media_files
where id in (select id from items)
`);
const getDirectoriesToReindexQuery = db.prepare(`
with recursive directory_chain as (
-- base case
select id, parent_id, path from media_files
where kind = 0 and processed = 0
-- recurse to find all parents so that size/hash can be updated
union
select m.id, m.parent_id, m.path
from media_files m
inner join directory_chain d on m.id = d.parent_id
)
select distinct id, parent_id, path
from directory_chain
order by path;
`).as(MediaFile);
import { getDb } from "#sitegen/sqlite";
import * as path from "node:path/posix";

184
src/file-viewer/rsync.ts Normal file
View file

@ -0,0 +1,184 @@
// Utilities for spawning rsync and consuming its output as a `Progress`
// A headless parser is available with `Parse`
export type Line =
| { kind: "ignore" }
| { kind: "log"; level: "info" | "warn" | "error"; message: string }
| { kind: "count"; files: number }
| {
kind: "progress";
currentFile: string;
bytesTransferred: number;
percentage: number;
timeElapsed: string;
transferNumber: number;
filesToCheck: number;
totalFiles: number;
speed: string | null;
};
export const defaultExtraOptions = [
"--progress",
];
export interface SpawnOptions {
args: string[];
title: string;
rsync?: string;
progress?: Progress;
cwd: string;
}
export async function spawn(options: SpawnOptions) {
const { rsync = "rsync", args, title, cwd } = options;
const proc = child_process.spawn(rsync, [...defaultExtraOptions, ...args], {
stdio: ["ignore", "pipe", "pipe"],
cwd,
});
const parser = new Parse();
const bar = options.progress ?? new Progress({ text: title });
let running = true;
const stdoutSplitter = readline.createInterface({ input: proc.stdout });
const stderrSplitter = readline.createInterface({ input: proc.stderr });
const handleLine = (line: string) => {
const result = parser.onLine(line);
if (result.kind === "ignore") {
return;
} else if (result.kind === "log") {
console[result.level](result.message);
} else if (result.kind === "count") {
if (!running) return;
bar.text = `${result.files} files...`;
} else if (result.kind === "progress") {
if (!running) return;
const {
transferNumber,
bytesTransferred,
totalFiles,
filesToCheck,
currentFile,
speed,
} = result;
bar.value = transferNumber;
bar.total = totalFiles;
const extras = [
formatSize(bytesTransferred),
(totalFiles > filesToCheck)
? `${totalFiles - filesToCheck} unchecked`
: null,
speed,
].filter(Boolean).join(", ");
const fileName = currentFile.length > 20
? `${currentFile.slice(0, 3)}..${currentFile.slice(-15)}`
: currentFile;
bar.text = `[${transferNumber}/${totalFiles}] ${fileName} ${
extras.length > 0 ? `(${extras})` : ""
}`;
} else result satisfies never;
};
stdoutSplitter.on("line", handleLine);
stderrSplitter.on("line", handleLine);
const [code, signal] = await events.once(proc, "close");
running = false;
if (code !== 0) {
const fmt = code ? `code ${code}` : `signal ${signal}`;
const e: any = new Error(`rsync failed with ${fmt}`);
e.args = [rsync, ...args].join(" ");
e.code = code;
e.signal = signal;
bar.error(e.message);
return e;
}
bar.success(title);
}
export class Parse {
totalFiles = 0;
currentTransfer = 0;
toCheck = 0;
onLine(line: string): Line {
line = line.trimEnd();
// Parse progress lines like:
// 20c83c16735608fc3de4aac61e36770d7774e0c6/au26.m4s
// 238,377 100% 460.06kB/s 0:00:00 (xfr#557, to-chk=194111/194690)
const progressMatch = line.match(
/^\s+([\d,]+)\s+(\d+)%\s+(\S+)\s+(?:(\S+)\s+)?(?:\(xfr#(\d+), to-chk=(\d+)\/(\d+)\))?/,
);
if (progressMatch) {
const [
,
bytesStr,
percentageStr,
speed,
timeElapsed,
transferStr,
toCheckStr,
totalStr,
] = progressMatch;
this.currentTransfer = Number(transferStr);
return {
kind: "progress",
currentFile: this.lastSeenFile || "",
bytesTransferred: Number(bytesStr.replaceAll(",", "")),
percentage: Number(percentageStr),
timeElapsed,
transferNumber: this.currentTransfer,
filesToCheck: toCheckStr ? this.toCheck = Number(toCheckStr) : this.toCheck,
totalFiles: totalStr ? this.totalFiles = Number(totalStr) : this.totalFiles,
speed: speed || null,
};
}
// Skip common rsync info lines
if (!line.startsWith(" ") && !line.startsWith("rsync")) {
if (
line.startsWith("sending incremental file list") ||
line.startsWith("sent ") ||
line.startsWith("total size is ") ||
line.includes("speedup is ") ||
line.startsWith("building file list")
) {
return { kind: "ignore" };
}
if (line.trim().length > 0) {
this.lastSeenFile = line;
}
return { kind: "ignore" };
}
if (line.startsWith(" ")) {
const match = line.match(/ (\d+) files.../);
if (match) {
return { kind: "count", files: Number(match[1]) };
}
}
if (
line.toLowerCase().includes("error") ||
line.toLowerCase().includes("failed")
) {
return { kind: "log", level: "error", message: line };
}
if (
line.toLowerCase().includes("warning") ||
line.toLowerCase().includes("skipping")
) {
return { kind: "log", level: "warn", message: line };
}
return { kind: "log", level: "info", message: line };
}
private lastSeenFile: string | null = null;
}
import * as child_process from "node:child_process";
import * as readline from "node:readline";
import events from "node:events";
import { Progress } from "@paperclover/console/Progress";
import { formatSize } from "@/file-viewer/format.ts";

View file

@ -73,7 +73,7 @@ export const extsDuration = new Set([...extsAudio, ...extsVideo]);
export const extsDimensions = new Set([...extsImage, ...extsVideo]);
/** These files read file contents into `contents`, as-is */
export const extsReadContents = new Set([".txt", ".chat"]);
export const extsReadContents = new Set([".txt", ".chat", ".dirsort"]);
export const extsArchive = new Set([
".zip",
@ -92,10 +92,20 @@ export const extsArchive = new Set([
export const extsPreCompressed = new Set([
...extsAudio,
...extsVideo,
...extsImage,
...extsImageLike,
...extsArchive,
// TODO: are any of these NOT good for compression
".docx",
".xlsx",
".pptx",
".psd",
".sketch",
".ai",
".3ds",
".fbx",
".blend",
".dng",
]);
extsPreCompressed.delete(".svg");
export function fileIcon(
file: Pick<MediaFile, "kind" | "basename" | "path">,