diff --git a/src/parser2.ts b/src/parser2.ts new file mode 100644 index 0000000..47f9e9f --- /dev/null +++ b/src/parser2.ts @@ -0,0 +1,382 @@ +import { + Command, + ErrorResult, + InterpolatedPiece, + Script, + SimplifyWord, + Word, +} from "./words"; + +/** + * Parse out a Notcl script into an easier-to-interpret representation. + * No script is actually executed yet. + * + * @param code code to parse + * @param offset source position of code, if embedded in a larger source document + * @returns parsed list of commands, or error message on failure + */ +export function parse( + code: string, + offset = 0 +): [true, Script] | [false, string] { + try { + const parser = new Parser(code); + const script = parser.parseScript(); + parser.expect("EOF"); + + return [true, script]; + } catch (ex) { + // TODO: report error with error position + return [false, String(ex)]; + } +} + +// --------------------------- + +// Parser for evaluating Notcl scripts + +export class ParseError extends Error { + constructor(message: string, public pos: number) { + super(message); + } +} + +type TokenType = + | "newline" + | "whitespace" + | "semicolon" + | "{" + | "}" + | "[" + | "]" + | "quote" + | "backslash" + | "comment" + | "text" + | "EOF"; + +type Token = [TokenType, string, number]; + +const Tokens: [TokenType, RegExp][] = [ + ["newline", /(\n)/y], + ["whitespace", /([^\S\n]+)/y], + ["semicolon", /(;)/y], + ["{", /(\{)/y], + ["}", /(\})/y], + ["[", /(\[)/y], + ["]", /(\])/y], + ["quote", /(")/y], + ["backslash", /(\\)/y], + ["comment", /(\#)/y], + ["text", /([^\s;\{\}\[\]"\\\#]+)/y], +]; + +class WipScript { + script: Command[] = []; + wipCommand: Word[] = []; + wipWord: InterpolatedPiece[] = []; + wordPos: number | undefined = undefined; + endOfWordError: string | undefined = undefined; + + startOfWord(): boolean { + return this.wipWord.length == 0; + } + + startOfCommand(): boolean { + return this.wipWord.length == 0 && this.wipCommand.length == 0; + } + + addWordPiece(piece: InterpolatedPiece, pos: number) { + if (this.endOfWordError) { + throw new ParseError(this.endOfWordError, pos); + } + if (this.startOfWord()) { + this.wordPos = pos; + } + this.wipWord.push(piece); + } + freezeWord(error: string) { + this.endOfWordError = error; + } + finishWord() { + if (this.wipWord.length > 0) { + this.wipCommand.push(SimplifyWord(this.wipWord, this.wordPos)); + } + this.wipWord = []; + this.wordPos = undefined; + this.endOfWordError = undefined; + } + finishCommand() { + this.finishWord(); + if (this.wipCommand.length > 0) { + this.script.push(this.wipCommand); + this.wipCommand = []; + } + } + finishScript(): Script { + this.finishCommand(); + return this.script; + } +} + +class Parser { + lastIndex: number = 0; + next: Token; + + constructor(public text: string) { + this.next = this.advance(); + } + + advance(): Token { + const startPos = this.lastIndex; + if (startPos == this.text.length) { + return (this.next = ["EOF", "<EOF>", startPos]); + } + + for (const [type, regex] of Tokens) { + regex.lastIndex = startPos; + const matches = regex.exec(this.text); + if (matches) { + this.lastIndex = regex.lastIndex; + return (this.next = [type, matches[1], startPos]); + } + } + + throw new ParseError("Token not matched", startPos); + } + + expect(type: TokenType) { + if (this.next[0] != type) { + throw new ParseError( + `Expected ${type}, found ${this.next[0]} (${this.next[1]})`, + this.next[2] + ); + } + } + + parseScript(): Script { + const wip = new WipScript(); + + while (true) { + const [type, chars, pos] = this.next; + switch (type) { + case "text": + case "}": + wip.addWordPiece({ bare: chars }, pos); + break; + + case "{": { + if (wip.startOfWord()) { + this.advance(); + const text = this.parseBrace(); + wip.addWordPiece({ text }, pos); + this.expect("}"); + wip.freezeWord("Extra characters after closing brace"); + } else { + wip.addWordPiece({ bare: chars }, pos); + } + break; + } + + case "quote": { + if (wip.startOfWord()) { + wip.addWordPiece({ text: "" }, pos); + this.advance(); + this.parseQuoteWord(wip); + this.expect("quote"); + wip.freezeWord("Extra characters after quoted word"); + } else { + wip.addWordPiece({ bare: chars }, pos); + } + break; + } + + case "[": { + this.advance(); + const script = this.parseScript(); + wip.addWordPiece({ script }, pos); + this.expect("]"); + break; + } + + case "whitespace": + wip.finishWord(); + break; + + case "newline": + case "semicolon": + wip.finishCommand(); + break; + + case "comment": + if (wip.startOfCommand()) { + skipComment: while (this.advance()) { + const [type, chars, pos] = this.next; + switch (type) { + case "newline": + case "EOF": + break skipComment; + case "backslash": + this.advance(); + continue; + default: + continue; + } + } + } else { + wip.addWordPiece({ bare: chars }, pos); + } + break; + + case "EOF": + case "]": + return wip.finishScript(); + + case "backslash": { + this.advance(); + this.parseBackslashEscape(wip, pos, "bare"); + break; + } + + default: + throw new ParseError( + `Unhandled case: ${type satisfies never} (${chars})`, + pos + ); + } + + this.advance(); + } + } + + parseQuoteWord(wip: WipScript) { + while (true) { + const [type, chars, pos] = this.next; + switch (type) { + case "text": + case "{": + case "}": + case "]": + case "whitespace": + case "newline": + case "semicolon": + case "comment": + wip.addWordPiece({ text: chars }, pos); + break; + + case "[": { + this.advance(); + const script = this.parseScript(); + wip.addWordPiece({ script }, pos); + this.expect("]"); + break; + } + + case "EOF": + throw new ParseError( + "Reached end of input while parsing a quoted word", + pos + ); + + case "backslash": { + this.advance(); + this.parseBackslashEscape(wip, pos, "quote"); + break; + } + + case "quote": + return; + + default: + throw new ParseError( + `Unhandled case: ${type satisfies never} (${chars})`, + pos + ); + } + + this.advance(); + } + } + + parseBackslashEscape( + wip: WipScript, + pos: number, + wordType: "bare" | "quote" + ) { + const [type, chars] = this.next; + switch (type) { + case "newline": + if (wordType == "bare") { + wip.finishWord(); + } else { + // ignore newline + } + break; + + case "whitespace": + case "semicolon": + case "{": + case "}": + case "[": + case "]": + case "quote": + case "backslash": + case "comment": + wip.addWordPiece({ text: chars }, pos); + break; + case "text": + switch (chars) { + case "n": + wip.addWordPiece({ text: "\n" }, pos); + break; + default: + throw new ParseError(`Unknown backslash escape: ${chars}`, pos); + } + break; + case "EOF": + throw new ParseError( + "Reached end of input while parsing a backslash escape", + pos + ); + default: + throw new ParseError( + `Unhandled case: ${type satisfies never} (${chars})`, + pos + ); + } + } + + parseBrace(): string { + let wip = ""; + + while (true) { + const [type, chars, pos] = this.next; + switch (type) { + case "backslash": + wip += "\\"; + this.advance(); + wip += this.next[1]; + break; + case "{": { + wip += "{"; + this.advance(); + wip += this.parseBrace(); + this.expect("}"); + wip += "}"; + break; + } + case "}": + return wip; + case "EOF": + throw new ParseError( + "Reached end of input while parsing a brace word", + pos + ); + default: + wip += chars; + } + + this.advance(); + } + } +} diff --git a/src/words.ts b/src/words.ts index 5c8f5b7..7d70c6e 100644 --- a/src/words.ts +++ b/src/words.ts @@ -1,4 +1,4 @@ -import { escapeHtml } from './helpers'; +import { escapeHtml } from "./helpers"; export type SourcePos = number; @@ -117,7 +117,11 @@ export function SimplifyWord( if (consolidated.length == 0) { return { text: "", pos: sourcePosition }; } else if (consolidated.length == 1 && IsTextPiece(consolidated[0])) { - return { ...consolidated[0], pos: sourcePosition }; + if (pieces.every((piece) => "bare" in piece)) { + return { bare: AsText(consolidated[0]), pos: sourcePosition }; + } else { + return { ...consolidated[0], pos: sourcePosition }; + } } else { return { pieces: consolidated }; }