import { Command, ErrorResult, InterpolatedPiece, Script, SimplifyWord, Word, } from "./words"; /** * Parse out a Notcl script into an easier-to-interpret representation. * No script is actually executed yet. * * @param code code to parse * @param offset source position of code, if embedded in a larger source document * @returns parsed list of commands, or error message on failure */ export function parse( code: string, offset = 0 ): [true, Script] | [false, string] { try { const parser = new Parser(code); const script = parser.parseScript(); parser.expect("EOF"); return [true, script]; } catch (ex) { // TODO: report error with error position return [false, String(ex)]; } } // --------------------------- // Parser for evaluating Notcl scripts type TokenType = | "newline" | "whitespace" | "semicolon" | "{" | "}" | "[" | "]" | "quote" | "backslash" | "comment" | "text" | "EOF" | "ERROR"; type Token = [TokenType, string, number]; const Tokens: [TokenType, RegExp][] = [ ["newline", /(\n)/y], ["whitespace", /([^\S\n]+)/y], ["semicolon", /(;)/y], ["{", /(\{)/y], ["}", /(\})/y], ["[", /(\[)/y], ["]", /(\])/y], ["quote", /(")/y], ["backslash", /(\\)/y], ["comment", /(\#)/y], ["text", /([^\s\\;\[\]]+)/y], ]; class WipScript { script: Command[] = []; wipCommand: Word[] = []; wipWord: InterpolatedPiece[] = []; wordPos: number | undefined = undefined; // TODO: thing to fail {}a & ""a startOfWord(): boolean { return this.wipWord.length == 0; } addWordPiece(piece: InterpolatedPiece, pos?: number) { if (this.startOfWord()) { this.wordPos = pos; } this.wipWord.push(piece); } finishWord() { if (this.wipWord.length > 0) { this.wipCommand.push(SimplifyWord(this.wipWord, this.wordPos)); this.wipWord = []; this.wordPos = undefined; } } finishCommand() { this.finishWord(); if (this.wipCommand.length > 0) { this.script.push(this.wipCommand); this.wipCommand = []; } } finishScript(): Script { this.finishCommand(); return this.script; } } class Parser { lastIndex: number = 0; next: Token; constructor(public text: string) { this.next = this.advance(); } advance(): Token { const startPos = this.lastIndex; if (startPos == this.text.length) { return (this.next = ["EOF", "<EOF>", startPos]); } for (const [type, regex] of Tokens) { regex.lastIndex = startPos; const matches = regex.exec(this.text); if (matches) { this.lastIndex = regex.lastIndex; return (this.next = [type, matches[1], startPos]); } } return (this.next = ["ERROR", "Token not matched", startPos]); } expect(type: TokenType) { if (this.next[0] != type) { throw new Error( `Expected ${type}, found ${this.next[0]} (${this.next[1]})` ); } } parseScript(): Script { const wip = new WipScript(); while (true) { const [type, chars, pos] = this.next; switch (type) { case "text": wip.addWordPiece({ bare: chars }, pos); break; case "[": { this.advance(); const script = this.parseScript(); wip.addWordPiece({ script }); this.expect("]"); break; } case "whitespace": wip.finishWord(); break; case "newline": case "semicolon": wip.finishCommand(); break; case "EOF": case "]": return wip.finishScript(); case "{": case "}": case "quote": case "backslash": case "comment": case "ERROR": throw new Error(`Unhandled case: ${type} (${chars})`); default: throw new Error(`Unhandled case: ${type satisfies never} (${chars})`); } this.advance(); } } }