1090 lines
31 KiB
TypeScript
1090 lines
31 KiB
TypeScript
// @ts-nocheck
|
|
// # Simple-Markdown Core
|
|
//
|
|
// This is a fork of Khan-academy's Simple-Markdown[1], initially forked in 2022
|
|
// to add Svelte support[2], and used for paper clover's q+a markdown flavor.
|
|
//
|
|
// 1: https://github.com/Khan/perseus/tree/main/packages/simple-markdown/src
|
|
// 2: https://github.com/paperclover/svelte-simple-markdown
|
|
export type Rules = Record<string, ParserRule>;
|
|
export interface ParserRule {
|
|
name: string;
|
|
match: MatchFunction;
|
|
parse: ParseFunction;
|
|
quality?: QualityFunction;
|
|
}
|
|
|
|
export class RuleList extends Array<ParserRule> {
|
|
constructor(input?: ArrayLike<ParserRule>) {
|
|
super();
|
|
if (input) {
|
|
this.push(...Array.from(input));
|
|
}
|
|
}
|
|
|
|
insertBefore(rule: string, newRule: ParserRule): void {
|
|
const index = this.findIndex((r) => r.name === rule);
|
|
if (index === -1) {
|
|
throw new Error(`Rule ${rule} not found`);
|
|
}
|
|
this.splice(index, 0, newRule);
|
|
}
|
|
|
|
insertAfter(rule: string, newRule: ParserRule): void {
|
|
const index = this.findIndex((r) => r.name === rule);
|
|
if (index === -1) {
|
|
throw new Error(`Rule ${rule} not found`);
|
|
}
|
|
this.splice(index + 1, 0, newRule);
|
|
}
|
|
|
|
toRuleObject(): Record<string, ParserRule> {
|
|
const result: Record<string, ParserRule> = {};
|
|
this.forEach((rule) => {
|
|
result[rule.name] = rule;
|
|
});
|
|
return result;
|
|
}
|
|
|
|
add(rule: ParserRule): void {
|
|
this.push(rule);
|
|
}
|
|
|
|
get(rule: string): ParserRule | undefined {
|
|
return this.find((r) => r.name === rule);
|
|
}
|
|
|
|
remove(rule: string): void {
|
|
const index = this.findIndex((r) => r.name === rule);
|
|
if (index === -1) {
|
|
throw new Error(`Rule ${rule} not found`);
|
|
}
|
|
this.splice(index, 1);
|
|
}
|
|
|
|
clone() {
|
|
return new RuleList(this);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Creates a parser for a given set of rules, with the precedence
|
|
* specified as a list of rules.
|
|
*
|
|
* @param rules
|
|
* an object containing
|
|
* rule type -> {match, order, parse} objects
|
|
* (lower order is higher precedence)
|
|
* @param [defaultState]
|
|
*
|
|
* @returns
|
|
* The resulting parse function, with the following parameters:
|
|
* @source: the input source string to be parsed
|
|
* @state: an optional object to be threaded through parse
|
|
* calls. Allows clients to add stateful operations to
|
|
* parsing, such as keeping track of how many levels deep
|
|
* some nesting is. For an example use-case, see passage-ref
|
|
* parsing in src/widgets/passage/passage-markdown.jsx
|
|
*/
|
|
export function createParser(
|
|
ruleListInput: RuleList,
|
|
defaultState: Partial<ParserState> = {},
|
|
) {
|
|
let rules = ruleListInput.toRuleObject();
|
|
let ruleList = Object.keys(rules);
|
|
|
|
let latestState: ParserState;
|
|
|
|
let nestedParse = function (source: string, state?: ParserState) {
|
|
let result: ASTNode[] = [];
|
|
state = state || latestState;
|
|
latestState = state;
|
|
|
|
while (source) {
|
|
// store the best match, it's rule, and quality:
|
|
let ruleType = null;
|
|
let rule = null;
|
|
let capture = null;
|
|
let quality = NaN; // loop control variables:
|
|
|
|
let i = 0;
|
|
let currRuleType = ruleList[0];
|
|
|
|
let currRule = rules[currRuleType];
|
|
|
|
do {
|
|
let currCapture = currRule.match(source, state);
|
|
|
|
if (currCapture) {
|
|
let currQuality = currRule.quality
|
|
? currRule.quality(currCapture, state)
|
|
: 0;
|
|
|
|
// This should always be true the first time because
|
|
// the initial quality is NaN (that's why there's the
|
|
// condition negation).
|
|
if (!(currQuality <= quality)) {
|
|
ruleType = currRuleType;
|
|
rule = currRule;
|
|
capture = currCapture;
|
|
quality = currQuality;
|
|
}
|
|
}
|
|
|
|
// Move on to the next item.
|
|
// Note that this makes `currRule` be the next item
|
|
i++;
|
|
currRuleType = ruleList[i];
|
|
currRule = rules[currRuleType];
|
|
} while (
|
|
// keep looping while we're still within the ruleList
|
|
currRule &&
|
|
// if we don't have a match yet, continue
|
|
(!capture ||
|
|
// or if we have a match, but the next rule is
|
|
// at the same order, and has a quality measurement
|
|
// functions, then this rule must have a quality
|
|
// measurement function (since they are sorted before
|
|
// those without), and we need to check if there is
|
|
// a better quality match
|
|
currRule.quality)
|
|
);
|
|
|
|
if (!rule || !capture || !ruleType) {
|
|
throw new Error(
|
|
"Could not find a matching rule for the below " +
|
|
"content. The rule with highest `order` should " +
|
|
"always match content provided to it. Check " +
|
|
"the definition of `match` for '" +
|
|
ruleList[ruleList.length - 1] +
|
|
"'. It seems to not match the following source:\n" +
|
|
source,
|
|
);
|
|
}
|
|
|
|
if (capture.index) {
|
|
// If present and non-zero, i.e. a non-^ regexp result:
|
|
throw new Error(
|
|
"`match` must return a capture starting at index 0 " +
|
|
"(the current parse index). Did you forget a ^ at the " +
|
|
"start of the RegExp?",
|
|
);
|
|
}
|
|
|
|
let parsed = rule.parse(capture, nestedParse, state);
|
|
|
|
// We maintain the same object here so that rules can
|
|
// store references to the objects they return and
|
|
// modify them later. (oops sorry! but this adds a lot
|
|
// of power--see reflinks.)
|
|
|
|
// We also let rules override the default type of
|
|
// their parsed node if they would like to, so that
|
|
// there can be a single output function for all links,
|
|
// even if there are several rules to parse them.
|
|
if (!parsed.type) {
|
|
parsed.type = ruleType;
|
|
}
|
|
|
|
// Collapse text nodes
|
|
if (
|
|
parsed.type === "text" && result[result.length - 1]?.type === "text"
|
|
) {
|
|
result[result.length - 1].content += parsed.content;
|
|
} else {
|
|
result.push(parsed as ASTNode);
|
|
}
|
|
|
|
state.prevCapture = capture;
|
|
source = source.substring(state.prevCapture[0].length);
|
|
}
|
|
|
|
return result;
|
|
};
|
|
|
|
let outerParse = function (
|
|
source: string,
|
|
state: ParserState = { inline: false },
|
|
) {
|
|
latestState = populateInitialState(state, defaultState);
|
|
|
|
if (!latestState.inline && !latestState.disableAutoBlockNewlines) {
|
|
source = source + "\n\n";
|
|
}
|
|
|
|
// We store the previous capture so that match functions can
|
|
// use some limited amount of lookbehind. Lists use this to
|
|
// ensure they don't match arbitrary '- ' or '* ' in inline
|
|
// text (see the list rule for more information). This stores
|
|
// the full regex capture object, if there is one.
|
|
latestState.prevCapture = undefined;
|
|
return nestedParse(preprocess(source), latestState);
|
|
};
|
|
|
|
return outerParse;
|
|
}
|
|
|
|
type Multiple<T> = T | T[];
|
|
type Nullable<T> = T | null | undefined;
|
|
|
|
export type MatchFunction = (
|
|
source: string,
|
|
state: ParserState,
|
|
) => Nullable<RegExpMatchArray>;
|
|
|
|
export type Parser = (source: string, state?: ParserState) => ASTNode[];
|
|
|
|
export type ParseFunction = (
|
|
source: RegExpMatchArray,
|
|
nestedParse: Parser,
|
|
state: ParserState,
|
|
) => TypeOptionalASTNode;
|
|
|
|
export type QualityFunction = (
|
|
capture: RegExpMatchArray,
|
|
state: ParserState,
|
|
) => number;
|
|
|
|
export interface ParserState {
|
|
inline: boolean;
|
|
prevCapture?: RegExpMatchArray;
|
|
[key: string]: any;
|
|
}
|
|
|
|
export interface ASTNode {
|
|
type: string;
|
|
content?: ASTNode[] | string;
|
|
[key: string]: any;
|
|
}
|
|
|
|
export type TypeOptionalASTNode = Omit<ASTNode, "type"> & { type?: string };
|
|
|
|
export interface RefNode {
|
|
type: string;
|
|
content?: Multiple<ASTNode>;
|
|
target?: string;
|
|
title?: string;
|
|
alt?: string;
|
|
}
|
|
|
|
/** Creates a match function for an inline scoped element from a regex */
|
|
export function inlineRegex(regex: RegExp): MatchFunction {
|
|
return (source, state) => {
|
|
if (state.inline) {
|
|
return regex.exec(source);
|
|
} else {
|
|
return null;
|
|
}
|
|
};
|
|
}
|
|
|
|
/** Creates a match function for a block scoped element from a regex */
|
|
export function blockRegex(regex: RegExp): MatchFunction {
|
|
return (source, state) => {
|
|
if (state.inline) {
|
|
return null;
|
|
} else {
|
|
return regex.exec(source);
|
|
}
|
|
};
|
|
}
|
|
|
|
/** Creates a match function from a regex, ignoring block/inline scope */
|
|
export function anyScopeRegex(regex: RegExp): MatchFunction {
|
|
return (source) => {
|
|
return regex.exec(source);
|
|
};
|
|
}
|
|
|
|
const UNESCAPE_URL_R = /\\([^0-9A-Za-z\s])/g;
|
|
export function unescapeUrl(rawUrlString: string) {
|
|
return rawUrlString.replace(UNESCAPE_URL_R, "$1");
|
|
}
|
|
|
|
/**
|
|
* Parse some content with the parser `parse`, with state.inline
|
|
* set to true. Useful for block elements; not generally necessary
|
|
* to be used by inline elements (where state.inline is already true.
|
|
*/
|
|
export function parseInline(
|
|
parse: Parser,
|
|
content: string,
|
|
state: ParserState,
|
|
) {
|
|
const isCurrentlyInline = state.inline || false;
|
|
state.inline = true;
|
|
const result = parse(content, state);
|
|
state.inline = isCurrentlyInline;
|
|
return result;
|
|
}
|
|
|
|
export function parseBlock(parse: Parser, content: string, state: ParserState) {
|
|
const isCurrentlyInline = state.inline || false;
|
|
state.inline = false;
|
|
const result = parse(content + "\n\n", state);
|
|
state.inline = isCurrentlyInline;
|
|
return result;
|
|
}
|
|
|
|
export function parseCaptureInline(
|
|
capture: RegExpMatchArray,
|
|
parse: Parser,
|
|
state: ParserState,
|
|
) {
|
|
return {
|
|
content: parseInline(parse, capture[1], state),
|
|
};
|
|
}
|
|
|
|
export function ignoreCapture() {
|
|
return {};
|
|
}
|
|
|
|
export function sanitizeUrl(url?: string) {
|
|
if (url == null) {
|
|
return null;
|
|
}
|
|
try {
|
|
const prot = new URL(url, "https://localhost").protocol;
|
|
if (
|
|
prot.indexOf("javascript:") === 0 ||
|
|
prot.indexOf("vbscript:") === 0 ||
|
|
prot.indexOf("data:") === 0
|
|
) {
|
|
return null;
|
|
}
|
|
} catch (e) {
|
|
// invalid URLs should throw a TypeError
|
|
// see for instance: `new URL("");`
|
|
return null;
|
|
}
|
|
return url;
|
|
}
|
|
|
|
const CR_NEWLINE_R = /\r\n?/g;
|
|
const TAB_R = /\t/g;
|
|
const FORMFEED_R = /\f/g;
|
|
|
|
/**
|
|
* Turn various whitespace into easy-to-process whitespace
|
|
*/
|
|
function preprocess(source: string) {
|
|
return source.replace(CR_NEWLINE_R, "\n").replace(FORMFEED_R, "").replace(
|
|
TAB_R,
|
|
" ",
|
|
);
|
|
}
|
|
|
|
function populateInitialState(
|
|
givenState: Partial<ParserState>,
|
|
defaultState: Partial<ParserState>,
|
|
) {
|
|
let state = givenState || {};
|
|
|
|
for (let prop in defaultState) {
|
|
if (Object.prototype.hasOwnProperty.call(defaultState, prop)) {
|
|
state[prop] = defaultState[prop];
|
|
}
|
|
}
|
|
|
|
return state as ParserState;
|
|
}
|
|
|
|
// recognize a `*` `-`, `+`, `1.`, `2.`... list bullet
|
|
const LIST_BULLET = "(?:[*+-]|\\d+\\.)";
|
|
|
|
// recognize the start of a list item:
|
|
// leading space plus a bullet plus a space (` * `)
|
|
const LIST_ITEM_PREFIX = "( *)(" + LIST_BULLET + ") +";
|
|
const LIST_ITEM_PREFIX_R = new RegExp("^" + LIST_ITEM_PREFIX);
|
|
|
|
// recognize an individual list item:
|
|
// * hi
|
|
// this is part of the same item
|
|
//
|
|
// as is this, which is a new paragraph in the same item
|
|
//
|
|
// * but this is not part of the same item
|
|
const LIST_ITEM_R = new RegExp(
|
|
LIST_ITEM_PREFIX + "[^\\n]*(?:\\n" + "(?!\\1" + LIST_BULLET +
|
|
" )[^\\n]*)*(\n|$)",
|
|
"gm",
|
|
);
|
|
const BLOCK_END_R = /\n{2,}$/;
|
|
const INLINE_CODE_ESCAPE_BACKTICKS_R = /^ (?= *`)|(` *) $/g;
|
|
|
|
// recognize the end of a paragraph block inside a list item:
|
|
// two or more newlines at end end of the item
|
|
const LIST_BLOCK_END_R = BLOCK_END_R;
|
|
const LIST_ITEM_END_R = / *\n+$/;
|
|
|
|
// check whether a list item has paragraphs: if it does,
|
|
// we leave the newlines at the end
|
|
const LIST_R = new RegExp(
|
|
"^( *)(" +
|
|
LIST_BULLET +
|
|
") " +
|
|
"[\\s\\S]+?(?:\n{2,}(?! )" +
|
|
"(?!\\1" +
|
|
LIST_BULLET +
|
|
" )\\n*" +
|
|
// the \\s*$ here is so that we can parse the inside of nested
|
|
// lists, where our content might end before we receive two `\n`s
|
|
"|\\s*\n*$)",
|
|
);
|
|
const LIST_LOOKBEHIND_R = /(?:^|\n)( *)$/;
|
|
|
|
const TABLES = (function () {
|
|
const TABLE_ROW_SEPARATOR_TRIM = /^ *\| *| *\| *$/g;
|
|
const TABLE_CELL_END_TRIM = / *$/;
|
|
const TABLE_RIGHT_ALIGN = /^ *-+: *$/;
|
|
const TABLE_CENTER_ALIGN = /^ *:-+: *$/;
|
|
const TABLE_LEFT_ALIGN = /^ *:-+ *$/; // TODO: This needs a real type
|
|
|
|
const parseTableAlignCapture = (alignCapture: string) => {
|
|
if (TABLE_RIGHT_ALIGN.test(alignCapture)) {
|
|
return "right";
|
|
} else if (TABLE_CENTER_ALIGN.test(alignCapture)) {
|
|
return "center";
|
|
} else if (TABLE_LEFT_ALIGN.test(alignCapture)) {
|
|
return "left";
|
|
} else {
|
|
return null;
|
|
}
|
|
};
|
|
|
|
const parseTableAlign = (source: string, trimEndSeparators: boolean) => {
|
|
if (trimEndSeparators) {
|
|
source = source.replace(TABLE_ROW_SEPARATOR_TRIM, "");
|
|
}
|
|
|
|
const alignText = source.trim().split("|");
|
|
return alignText.map(parseTableAlignCapture);
|
|
};
|
|
|
|
const parseTableRow = (
|
|
source: string,
|
|
parse: Parser,
|
|
state: ParserState,
|
|
trimEndSeparators: boolean,
|
|
) => {
|
|
const prevInTable = state.inTable;
|
|
state.inTable = true;
|
|
const tableRow = parse(source.trim(), state);
|
|
state.inTable = prevInTable;
|
|
const cells: ASTNode[][] = [[]];
|
|
tableRow.forEach(function (node, i) {
|
|
if (node.type === "tableSeparator") {
|
|
// Filter out empty table separators at the start/end:
|
|
if (!trimEndSeparators || (i !== 0 && i !== tableRow.length - 1)) {
|
|
// Split the current row:
|
|
cells.push([]);
|
|
}
|
|
} else {
|
|
if (
|
|
typeof node.content === "string" &&
|
|
(tableRow[i + 1] == null || tableRow[i + 1].type === "tableSeparator")
|
|
) {
|
|
node.content = node.content.replace(TABLE_CELL_END_TRIM, "");
|
|
}
|
|
|
|
cells[cells.length - 1].push(node);
|
|
}
|
|
});
|
|
return cells;
|
|
};
|
|
|
|
/**
|
|
* @param {string} source
|
|
* @param {SimpleMarkdown.Parser} parse
|
|
* @param {SimpleMarkdown.State} state
|
|
* @param {boolean} trimEndSeparators
|
|
* @returns {SimpleMarkdown.ASTNode[][]}
|
|
*/
|
|
const parseTableCells = function (
|
|
source: string,
|
|
parse: Parser,
|
|
state: ParserState,
|
|
trimEndSeparators: boolean,
|
|
) {
|
|
const rowsText = source.trim().split("\n");
|
|
return rowsText.map(function (rowText) {
|
|
return parseTableRow(rowText, parse, state, trimEndSeparators);
|
|
});
|
|
};
|
|
|
|
/**
|
|
* @param {boolean} trimEndSeparators
|
|
* @returns {SimpleMarkdown.SingleNodeParseFunction}
|
|
*/
|
|
const parseTable = function (trimEndSeparators: boolean) {
|
|
return function (
|
|
capture: RegExpMatchArray,
|
|
parse: Parser,
|
|
state: ParserState,
|
|
) {
|
|
state.inline = true;
|
|
const header = parseTableRow(capture[1], parse, state, trimEndSeparators);
|
|
const align = parseTableAlign(capture[2], trimEndSeparators);
|
|
const cells = parseTableCells(
|
|
capture[3],
|
|
parse,
|
|
state,
|
|
trimEndSeparators,
|
|
);
|
|
state.inline = false;
|
|
return {
|
|
type: "table",
|
|
header: header,
|
|
align: align,
|
|
cells: cells,
|
|
};
|
|
};
|
|
};
|
|
|
|
return {
|
|
parseTable: parseTable(true),
|
|
parseNpTable: parseTable(false),
|
|
TABLE_REGEX: /^ *(\|.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*/,
|
|
NPTABLE_REGEX:
|
|
/^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*/,
|
|
};
|
|
})();
|
|
|
|
const LINK_INSIDE = "(?:\\[[^\\]]*\\]|[^\\[\\]]|\\](?=[^\\[]*\\]))*";
|
|
const LINK_HREF_AND_TITLE =
|
|
"\\s*<?((?:\\([^)]*\\)|[^\\s\\\\]|\\\\.)*?)>?(?:\\s+['\"]([\\s\\S]*?)['\"])?\\s*";
|
|
const AUTOLINK_MAILTO_CHECK_R = /mailto:/i;
|
|
|
|
function parseRef(
|
|
capture: RegExpMatchArray,
|
|
state: ParserState,
|
|
refNode: RefNode,
|
|
) {
|
|
const ref = (capture[2] || capture[1]).replace(/\s+/g, " ").toLowerCase();
|
|
|
|
// We store information about previously seen defs on
|
|
// state._defs (_ to deconflict with client-defined
|
|
// state). If the def for this reflink/refimage has
|
|
// already been seen, we can use its target/source
|
|
// and title here:
|
|
if (state._defs && state._defs[ref]) {
|
|
const def = state._defs[ref];
|
|
|
|
// `refNode` can be a link or an image. Both use
|
|
// target and title properties.
|
|
refNode.target = def.target;
|
|
refNode.title = def.title;
|
|
}
|
|
|
|
// In case we haven't seen our def yet (or if someone
|
|
// overwrites that def later on), we add this node
|
|
// to the list of ref nodes for that def. Then, when
|
|
// we find the def, we can modify this link/image AST
|
|
// node :).
|
|
// I'm sorry.
|
|
state._refs = state._refs || {};
|
|
state._refs[ref] = state._refs[ref] || [];
|
|
|
|
state._refs[ref].push(refNode);
|
|
|
|
return refNode;
|
|
}
|
|
|
|
export const defaultRules = new RuleList();
|
|
{
|
|
defaultRules.add({
|
|
name: "heading",
|
|
match: blockRegex(/^ *(#{1,6})([^\n]+?)#* *(?:\n *)+\n/),
|
|
parse: function (capture, parse, state) {
|
|
return {
|
|
level: capture[1].length,
|
|
content: parseInline(parse, capture[2].trim(), state),
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "nptable",
|
|
match: blockRegex(TABLES.NPTABLE_REGEX),
|
|
parse: TABLES.parseNpTable,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "lheading",
|
|
match: blockRegex(/^([^\n]+)\n *(=|-){3,} *(?:\n *)+\n/),
|
|
parse(capture, parse, state) {
|
|
return {
|
|
type: "heading",
|
|
level: capture[2] === "=" ? 1 : 2,
|
|
content: parseInline(parse, capture[1], state),
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "hr",
|
|
match: blockRegex(/^( *[-*_]){3,} *(?:\n *)+\n/),
|
|
parse: ignoreCapture,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "codeBlock",
|
|
match: blockRegex(/^(?: {4}[^\n]+\n*)+(?:\n *)+\n/),
|
|
parse(capture) {
|
|
const content = capture[0].replace(/^ {4}/gm, "").replace(/\n+$/, "");
|
|
return {
|
|
lang: undefined,
|
|
content: content,
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "fence",
|
|
match: blockRegex(
|
|
/^ *(`{3,}|~{3,}) *(?:(\S+) *)?\n([\s\S]+?)\n?\1 *(?:\n *)+\n/,
|
|
),
|
|
parse(capture) {
|
|
return {
|
|
type: "codeBlock",
|
|
lang: capture[2] || undefined,
|
|
content: capture[3],
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "blockQuote",
|
|
match: blockRegex(/^( *>[^\n]+(\n[^\n]+)*\n*)+\n{2,}/),
|
|
parse(capture, parse, state) {
|
|
const content = capture[0].replace(/^ *> ?/gm, "");
|
|
return {
|
|
content: parse(content, state),
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "list",
|
|
|
|
match(source, state) {
|
|
// We only want to break into a list if we are at the start of a
|
|
// line. This is to avoid parsing "hi * there" with "* there"
|
|
// becoming a part of a list.
|
|
// You might wonder, "but that's inline, so of course it wouldn't
|
|
// start a list?". You would be correct! Except that some of our
|
|
// lists can be inline, because they might be inside another list,
|
|
// in which case we can parse with inline scope, but need to allow
|
|
// nested lists inside this inline scope.
|
|
const prevCaptureStr = state.prevCapture == null
|
|
? ""
|
|
: state.prevCapture[0];
|
|
const isStartOfLineCapture = LIST_LOOKBEHIND_R.exec(prevCaptureStr);
|
|
const isListBlock = state._list || !state.inline;
|
|
|
|
if (isStartOfLineCapture && isListBlock) {
|
|
source = isStartOfLineCapture[1] + source;
|
|
return LIST_R.exec(source);
|
|
} else {
|
|
return null;
|
|
}
|
|
},
|
|
parse(capture, parse, state) {
|
|
const bullet = capture[2];
|
|
const ordered = bullet.length > 1;
|
|
const start = ordered ? +bullet : undefined;
|
|
// We know this will match here, because of how the regexes are defined
|
|
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
|
|
const items = capture[0].replace(LIST_BLOCK_END_R, "\n").match(
|
|
LIST_ITEM_R,
|
|
)!;
|
|
|
|
let lastItemWasAParagraph = false;
|
|
const itemContent = items.map(function (item, i) {
|
|
// We need to see how far indented this item is:
|
|
const prefixCapture = LIST_ITEM_PREFIX_R.exec(item);
|
|
const space = prefixCapture ? prefixCapture[0].length : 0; // And then we construct a regex to "unindent" the subsequent
|
|
// lines of the items by that amount:
|
|
|
|
const spaceRegex = new RegExp("^ {1," + space + "}", "gm"); // Before processing the item, we need a couple things
|
|
|
|
const content = item // remove indents on trailing lines:
|
|
.replace(spaceRegex, "") // remove the bullet:
|
|
.replace(LIST_ITEM_PREFIX_R, ""); // I'm not sur4 why this is necessary again?
|
|
// Handling "loose" lists, like:
|
|
//
|
|
// * this is wrapped in a paragraph
|
|
//
|
|
// * as is this
|
|
//
|
|
// * as is this
|
|
|
|
const isLastItem = i === items.length - 1;
|
|
const containsBlocks = content.indexOf("\n\n") !== -1; // Any element in a list is a block if it contains multiple
|
|
// newlines. The last element in the list can also be a block
|
|
// if the previous item in the list was a block (this is
|
|
// because non-last items in the list can end with \n\n, but
|
|
// the last item can't, so we just "inherit" this property
|
|
// from our previous element).
|
|
|
|
const thisItemIsAParagraph = containsBlocks ||
|
|
(isLastItem && lastItemWasAParagraph);
|
|
lastItemWasAParagraph = thisItemIsAParagraph; // backup our state for restoration afterwards. We're going to
|
|
// want to set state._list to true, and state.inline depending
|
|
// on our list's looseness.
|
|
|
|
const oldStateInline = state.inline;
|
|
const oldStateList = state._list;
|
|
state._list = true; // Parse inline if we're in a tight list, or block if we're in
|
|
// a loose list.
|
|
|
|
let adjustedContent;
|
|
|
|
if (thisItemIsAParagraph) {
|
|
state.inline = false;
|
|
adjustedContent = content.replace(LIST_ITEM_END_R, "\n\n");
|
|
} else {
|
|
state.inline = true;
|
|
adjustedContent = content.replace(LIST_ITEM_END_R, "");
|
|
}
|
|
|
|
const result = parse(adjustedContent, state); // Restore our state before returning
|
|
|
|
state.inline = oldStateInline;
|
|
state._list = oldStateList;
|
|
return result;
|
|
});
|
|
|
|
return {
|
|
ordered: ordered,
|
|
start: start,
|
|
content: itemContent,
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "def",
|
|
// TODO: This will match without a blank line before the next
|
|
// block element, which is inconsistent with most of the rest of
|
|
// simple-markdown.
|
|
match: blockRegex(
|
|
/^ *\[([^\]]+)\]: *<?([^\s>]*)>?(?: +["(]([^\n]+)[")])? *\n(?: *\n)*/,
|
|
),
|
|
parse(capture, parse, state) {
|
|
const def = capture[1].replace(/\s+/g, " ").toLowerCase();
|
|
const target = capture[2];
|
|
const title = capture[3];
|
|
|
|
// Look for previous links/images using this def
|
|
// If any links/images using this def have already been declared,
|
|
// they will have added themselves to the state._refs[def] list
|
|
// (_ to deconflict with client-defined state). We look through
|
|
// that list of reflinks for this def, and modify those AST nodes
|
|
// with our newly found information now.
|
|
// Sorry :(.
|
|
if (state._refs && state._refs[def]) {
|
|
// `refNode` can be a link or an image
|
|
state._refs[def].forEach((refNode: RefNode) => {
|
|
refNode.target = target;
|
|
refNode.title = title;
|
|
});
|
|
}
|
|
|
|
// Add this def to our map of defs for any future links/images
|
|
// In case we haven't found any or all of the refs referring to
|
|
// this def yet, we add our def to the table of known defs, so
|
|
// that future reflinks can modify themselves appropriately with
|
|
// this information.
|
|
state._defs = state._defs || {};
|
|
state._defs[def] = {
|
|
target: target,
|
|
title: title,
|
|
};
|
|
|
|
// return the relevant parsed information
|
|
// for debugging only.
|
|
return {
|
|
def: def,
|
|
target: target,
|
|
title: title,
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "table",
|
|
match: blockRegex(TABLES.TABLE_REGEX),
|
|
parse: TABLES.parseTable,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "newline",
|
|
match: blockRegex(/^(?:\n *)*\n/),
|
|
parse: ignoreCapture,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "paragraph",
|
|
match: blockRegex(/^((?:[^\n]|\n(?! *\n))+)(?:\n *)+\n/),
|
|
parse: parseCaptureInline,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "escape",
|
|
// We don't allow escaping numbers, letters, or spaces here so that
|
|
// backslashes used in plain text still get rendered. But allowing
|
|
// escaping anything else provides a very flexible escape mechanism,
|
|
// regardless of how this grammar is extended.
|
|
match: inlineRegex(/^\\([^0-9A-Za-z\s])/),
|
|
parse(capture) {
|
|
return {
|
|
type: "text",
|
|
content: capture[1],
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "tableSeparator",
|
|
|
|
match(source, state) {
|
|
if (!state.inTable) {
|
|
return null;
|
|
}
|
|
|
|
return /^ *\| */.exec(source);
|
|
},
|
|
parse() {
|
|
return {
|
|
type: "tableSeparator",
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "autolink",
|
|
match: inlineRegex(/^<([^: >]+:\/[^ >]+)>/),
|
|
parse(capture) {
|
|
return {
|
|
type: "link",
|
|
content: [
|
|
{
|
|
type: "text",
|
|
content: capture[1],
|
|
},
|
|
],
|
|
target: capture[1],
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "mailto",
|
|
match: inlineRegex(/^<([^ >]+@[^ >]+)>/),
|
|
parse(capture) {
|
|
const address = capture[1];
|
|
let target = capture[1]; // Check for a `mailto:` already existing in the link:
|
|
|
|
if (!AUTOLINK_MAILTO_CHECK_R.test(target)) {
|
|
target = "mailto:" + target;
|
|
}
|
|
|
|
return {
|
|
type: "link",
|
|
content: [
|
|
{
|
|
type: "text",
|
|
content: address,
|
|
},
|
|
],
|
|
target: target,
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "url",
|
|
match: inlineRegex(/^(https?:\/\/[^\s<]+[^<.,:;"')\]\s])/),
|
|
parse(capture) {
|
|
return {
|
|
type: "link",
|
|
content: [
|
|
{
|
|
type: "text",
|
|
content: capture[1],
|
|
},
|
|
],
|
|
target: capture[1],
|
|
title: undefined,
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "link",
|
|
match: inlineRegex(
|
|
new RegExp(
|
|
"^\\[(" + LINK_INSIDE + ")\\]\\(" + LINK_HREF_AND_TITLE + "\\)",
|
|
),
|
|
),
|
|
parse(capture, parse, state) {
|
|
const link = {
|
|
content: parse(capture[1], state),
|
|
target: unescapeUrl(capture[2]),
|
|
title: capture[3],
|
|
};
|
|
return link;
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "image",
|
|
match: inlineRegex(
|
|
new RegExp(
|
|
"^!\\[(" + LINK_INSIDE + ")\\]\\(" + LINK_HREF_AND_TITLE + "\\)",
|
|
),
|
|
),
|
|
parse: function (capture) {
|
|
const image = {
|
|
alt: capture[1],
|
|
target: unescapeUrl(capture[2]),
|
|
title: capture[3],
|
|
};
|
|
return image;
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "reflink",
|
|
match: inlineRegex(
|
|
new RegExp(
|
|
// The first [part] of the link
|
|
"^\\[(" +
|
|
LINK_INSIDE +
|
|
")\\]" + // The [ref] target of the link
|
|
"\\s*\\[([^\\]]*)\\]",
|
|
),
|
|
),
|
|
parse(capture, parse, state) {
|
|
return parseRef(capture, state, {
|
|
type: "link",
|
|
content: parse(capture[1], state),
|
|
});
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "refimage",
|
|
match: inlineRegex(
|
|
new RegExp(
|
|
// The first [part] of the link
|
|
"^!\\[(" +
|
|
LINK_INSIDE +
|
|
")\\]" + // The [ref] target of the link
|
|
"\\s*\\[([^\\]]*)\\]",
|
|
),
|
|
),
|
|
parse(capture, parse, state) {
|
|
return parseRef(capture, state, {
|
|
type: "image",
|
|
alt: capture[1],
|
|
});
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "em",
|
|
/* same as strong/u */
|
|
match: inlineRegex(
|
|
new RegExp(
|
|
// only match _s surrounding words.
|
|
"^\\b_" +
|
|
"((?:__|\\\\[\\s\\S]|[^\\\\_])+?)_" +
|
|
"\\b" + // Or match *s:
|
|
"|" + // Only match *s that are followed by a non-space:
|
|
"^\\*(?=\\S)(" + // Match at least one of:
|
|
"(?:" + // - `**`: so that bolds inside italics don't close the
|
|
// italics
|
|
"\\*\\*|" + // - escape sequence: so escaped *s don't close us
|
|
"\\\\[\\s\\S]|" + // - whitespace: followed by a non-* (we don't
|
|
// want ' *' to close an italics--it might
|
|
// start a list)
|
|
"\\s+(?:\\\\[\\s\\S]|[^\\s\\*\\\\]|\\*\\*)|" + // - non-whitespace, non-*, non-backslash characters
|
|
"[^\\s\\*\\\\]" +
|
|
")+?" + // followed by a non-space, non-* then *
|
|
")\\*(?!\\*)",
|
|
),
|
|
),
|
|
quality(capture) {
|
|
// precedence by length, `em` wins ties:
|
|
return capture[0].length + 0.2;
|
|
},
|
|
parse(capture, parse, state) {
|
|
return {
|
|
content: parse(capture[2] || capture[1], state),
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "strong",
|
|
/* same as em */
|
|
match: inlineRegex(/^\*\*((?:\\[\s\S]|[^\\])+?)\*\*(?!\*)/),
|
|
quality(capture) {
|
|
// precedence by length, wins ties vs `u`:
|
|
return capture[0].length + 0.1;
|
|
},
|
|
parse: parseCaptureInline,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "u",
|
|
/* same as em&strong; increment for next rule */
|
|
match: inlineRegex(/^__((?:\\[\s\S]|[^\\])+?)__(?!_)/),
|
|
quality(capture) {
|
|
// precedence by length, loses all ties
|
|
return capture[0].length;
|
|
},
|
|
parse: parseCaptureInline,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "del",
|
|
match: inlineRegex(/^~~(?=\S)((?:\\[\s\S]|~(?!~)|[^\s~\\]|\s(?!~~))+?)~~/),
|
|
parse: parseCaptureInline,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "inlineCode",
|
|
match: inlineRegex(/^(`+)([\s\S]*?[^`])\1(?!`)/),
|
|
parse(capture) {
|
|
return {
|
|
content: capture[2].replace(INLINE_CODE_ESCAPE_BACKTICKS_R, "$1"),
|
|
};
|
|
},
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "br",
|
|
match: anyScopeRegex(/^ {2,}\n/),
|
|
parse: ignoreCapture,
|
|
});
|
|
|
|
defaultRules.add({
|
|
name: "text",
|
|
// Here we look for anything followed by non-symbols,
|
|
// double newlines, or double-space-newlines
|
|
// We break on any symbol characters so that this grammar
|
|
// is easy to extend without needing to modify this regex
|
|
match: anyScopeRegex(
|
|
/^[\s\S]+?(?=[^0-9A-Za-z\s\u00c0-\uffff]|\n\n| {2,}\n|\w+:\S|$)/,
|
|
),
|
|
parse: function (capture) {
|
|
return {
|
|
content: capture[0],
|
|
};
|
|
},
|
|
});
|
|
}
|