diff --git a/src/parser2.ts b/src/parser2.ts deleted file mode 100644 index 47f9e9f..0000000 --- a/src/parser2.ts +++ /dev/null @@ -1,382 +0,0 @@ -import { - Command, - ErrorResult, - InterpolatedPiece, - Script, - SimplifyWord, - Word, -} from "./words"; - -/** - * Parse out a Notcl script into an easier-to-interpret representation. - * No script is actually executed yet. - * - * @param code code to parse - * @param offset source position of code, if embedded in a larger source document - * @returns parsed list of commands, or error message on failure - */ -export function parse( - code: string, - offset = 0 -): [true, Script] | [false, string] { - try { - const parser = new Parser(code); - const script = parser.parseScript(); - parser.expect("EOF"); - - return [true, script]; - } catch (ex) { - // TODO: report error with error position - return [false, String(ex)]; - } -} - -// --------------------------- - -// Parser for evaluating Notcl scripts - -export class ParseError extends Error { - constructor(message: string, public pos: number) { - super(message); - } -} - -type TokenType = - | "newline" - | "whitespace" - | "semicolon" - | "{" - | "}" - | "[" - | "]" - | "quote" - | "backslash" - | "comment" - | "text" - | "EOF"; - -type Token = [TokenType, string, number]; - -const Tokens: [TokenType, RegExp][] = [ - ["newline", /(\n)/y], - ["whitespace", /([^\S\n]+)/y], - ["semicolon", /(;)/y], - ["{", /(\{)/y], - ["}", /(\})/y], - ["[", /(\[)/y], - ["]", /(\])/y], - ["quote", /(")/y], - ["backslash", /(\\)/y], - ["comment", /(\#)/y], - ["text", /([^\s;\{\}\[\]"\\\#]+)/y], -]; - -class WipScript { - script: Command[] = []; - wipCommand: Word[] = []; - wipWord: InterpolatedPiece[] = []; - wordPos: number | undefined = undefined; - endOfWordError: string | undefined = undefined; - - startOfWord(): boolean { - return this.wipWord.length == 0; - } - - startOfCommand(): boolean { - return this.wipWord.length == 0 && this.wipCommand.length == 0; - } - - addWordPiece(piece: InterpolatedPiece, pos: number) { - if (this.endOfWordError) { - throw new ParseError(this.endOfWordError, pos); - } - if (this.startOfWord()) { - this.wordPos = pos; - } - this.wipWord.push(piece); - } - freezeWord(error: string) { - this.endOfWordError = error; - } - finishWord() { - if (this.wipWord.length > 0) { - this.wipCommand.push(SimplifyWord(this.wipWord, this.wordPos)); - } - this.wipWord = []; - this.wordPos = undefined; - this.endOfWordError = undefined; - } - finishCommand() { - this.finishWord(); - if (this.wipCommand.length > 0) { - this.script.push(this.wipCommand); - this.wipCommand = []; - } - } - finishScript(): Script { - this.finishCommand(); - return this.script; - } -} - -class Parser { - lastIndex: number = 0; - next: Token; - - constructor(public text: string) { - this.next = this.advance(); - } - - advance(): Token { - const startPos = this.lastIndex; - if (startPos == this.text.length) { - return (this.next = ["EOF", "<EOF>", startPos]); - } - - for (const [type, regex] of Tokens) { - regex.lastIndex = startPos; - const matches = regex.exec(this.text); - if (matches) { - this.lastIndex = regex.lastIndex; - return (this.next = [type, matches[1], startPos]); - } - } - - throw new ParseError("Token not matched", startPos); - } - - expect(type: TokenType) { - if (this.next[0] != type) { - throw new ParseError( - `Expected ${type}, found ${this.next[0]} (${this.next[1]})`, - this.next[2] - ); - } - } - - parseScript(): Script { - const wip = new WipScript(); - - while (true) { - const [type, chars, pos] = this.next; - switch (type) { - case "text": - case "}": - wip.addWordPiece({ bare: chars }, pos); - break; - - case "{": { - if (wip.startOfWord()) { - this.advance(); - const text = this.parseBrace(); - wip.addWordPiece({ text }, pos); - this.expect("}"); - wip.freezeWord("Extra characters after closing brace"); - } else { - wip.addWordPiece({ bare: chars }, pos); - } - break; - } - - case "quote": { - if (wip.startOfWord()) { - wip.addWordPiece({ text: "" }, pos); - this.advance(); - this.parseQuoteWord(wip); - this.expect("quote"); - wip.freezeWord("Extra characters after quoted word"); - } else { - wip.addWordPiece({ bare: chars }, pos); - } - break; - } - - case "[": { - this.advance(); - const script = this.parseScript(); - wip.addWordPiece({ script }, pos); - this.expect("]"); - break; - } - - case "whitespace": - wip.finishWord(); - break; - - case "newline": - case "semicolon": - wip.finishCommand(); - break; - - case "comment": - if (wip.startOfCommand()) { - skipComment: while (this.advance()) { - const [type, chars, pos] = this.next; - switch (type) { - case "newline": - case "EOF": - break skipComment; - case "backslash": - this.advance(); - continue; - default: - continue; - } - } - } else { - wip.addWordPiece({ bare: chars }, pos); - } - break; - - case "EOF": - case "]": - return wip.finishScript(); - - case "backslash": { - this.advance(); - this.parseBackslashEscape(wip, pos, "bare"); - break; - } - - default: - throw new ParseError( - `Unhandled case: ${type satisfies never} (${chars})`, - pos - ); - } - - this.advance(); - } - } - - parseQuoteWord(wip: WipScript) { - while (true) { - const [type, chars, pos] = this.next; - switch (type) { - case "text": - case "{": - case "}": - case "]": - case "whitespace": - case "newline": - case "semicolon": - case "comment": - wip.addWordPiece({ text: chars }, pos); - break; - - case "[": { - this.advance(); - const script = this.parseScript(); - wip.addWordPiece({ script }, pos); - this.expect("]"); - break; - } - - case "EOF": - throw new ParseError( - "Reached end of input while parsing a quoted word", - pos - ); - - case "backslash": { - this.advance(); - this.parseBackslashEscape(wip, pos, "quote"); - break; - } - - case "quote": - return; - - default: - throw new ParseError( - `Unhandled case: ${type satisfies never} (${chars})`, - pos - ); - } - - this.advance(); - } - } - - parseBackslashEscape( - wip: WipScript, - pos: number, - wordType: "bare" | "quote" - ) { - const [type, chars] = this.next; - switch (type) { - case "newline": - if (wordType == "bare") { - wip.finishWord(); - } else { - // ignore newline - } - break; - - case "whitespace": - case "semicolon": - case "{": - case "}": - case "[": - case "]": - case "quote": - case "backslash": - case "comment": - wip.addWordPiece({ text: chars }, pos); - break; - case "text": - switch (chars) { - case "n": - wip.addWordPiece({ text: "\n" }, pos); - break; - default: - throw new ParseError(`Unknown backslash escape: ${chars}`, pos); - } - break; - case "EOF": - throw new ParseError( - "Reached end of input while parsing a backslash escape", - pos - ); - default: - throw new ParseError( - `Unhandled case: ${type satisfies never} (${chars})`, - pos - ); - } - } - - parseBrace(): string { - let wip = ""; - - while (true) { - const [type, chars, pos] = this.next; - switch (type) { - case "backslash": - wip += "\\"; - this.advance(); - wip += this.next[1]; - break; - case "{": { - wip += "{"; - this.advance(); - wip += this.parseBrace(); - this.expect("}"); - wip += "}"; - break; - } - case "}": - return wip; - case "EOF": - throw new ParseError( - "Reached end of input while parsing a brace word", - pos - ); - default: - wip += chars; - } - - this.advance(); - } - } -} diff --git a/src/words.ts b/src/words.ts index 7d70c6e..5c8f5b7 100644 --- a/src/words.ts +++ b/src/words.ts @@ -1,4 +1,4 @@ -import { escapeHtml } from "./helpers"; +import { escapeHtml } from './helpers'; export type SourcePos = number; @@ -117,11 +117,7 @@ export function SimplifyWord( if (consolidated.length == 0) { return { text: "", pos: sourcePosition }; } else if (consolidated.length == 1 && IsTextPiece(consolidated[0])) { - if (pieces.every((piece) => "bare" in piece)) { - return { bare: AsText(consolidated[0]), pos: sourcePosition }; - } else { - return { ...consolidated[0], pos: sourcePosition }; - } + return { ...consolidated[0], pos: sourcePosition }; } else { return { pieces: consolidated }; }