Compare commits

...

14 commits

2 changed files with 388 additions and 2 deletions

382
src/parser2.ts Normal file
View file

@ -0,0 +1,382 @@
import {
Command,
ErrorResult,
InterpolatedPiece,
Script,
SimplifyWord,
Word,
} from "./words";
/**
* Parse out a Notcl script into an easier-to-interpret representation.
* No script is actually executed yet.
*
* @param code code to parse
* @param offset source position of code, if embedded in a larger source document
* @returns parsed list of commands, or error message on failure
*/
export function parse(
code: string,
offset = 0
): [true, Script] | [false, string] {
try {
const parser = new Parser(code);
const script = parser.parseScript();
parser.expect("EOF");
return [true, script];
} catch (ex) {
// TODO: report error with error position
return [false, String(ex)];
}
}
// ---------------------------
// Parser for evaluating Notcl scripts
export class ParseError extends Error {
constructor(message: string, public pos: number) {
super(message);
}
}
type TokenType =
| "newline"
| "whitespace"
| "semicolon"
| "{"
| "}"
| "["
| "]"
| "quote"
| "backslash"
| "comment"
| "text"
| "EOF";
type Token = [TokenType, string, number];
const Tokens: [TokenType, RegExp][] = [
["newline", /(\n)/y],
["whitespace", /([^\S\n]+)/y],
["semicolon", /(;)/y],
["{", /(\{)/y],
["}", /(\})/y],
["[", /(\[)/y],
["]", /(\])/y],
["quote", /(")/y],
["backslash", /(\\)/y],
["comment", /(\#)/y],
["text", /([^\s;\{\}\[\]"\\\#]+)/y],
];
class WipScript {
script: Command[] = [];
wipCommand: Word[] = [];
wipWord: InterpolatedPiece[] = [];
wordPos: number | undefined = undefined;
endOfWordError: string | undefined = undefined;
startOfWord(): boolean {
return this.wipWord.length == 0;
}
startOfCommand(): boolean {
return this.wipWord.length == 0 && this.wipCommand.length == 0;
}
addWordPiece(piece: InterpolatedPiece, pos: number) {
if (this.endOfWordError) {
throw new ParseError(this.endOfWordError, pos);
}
if (this.startOfWord()) {
this.wordPos = pos;
}
this.wipWord.push(piece);
}
freezeWord(error: string) {
this.endOfWordError = error;
}
finishWord() {
if (this.wipWord.length > 0) {
this.wipCommand.push(SimplifyWord(this.wipWord, this.wordPos));
}
this.wipWord = [];
this.wordPos = undefined;
this.endOfWordError = undefined;
}
finishCommand() {
this.finishWord();
if (this.wipCommand.length > 0) {
this.script.push(this.wipCommand);
this.wipCommand = [];
}
}
finishScript(): Script {
this.finishCommand();
return this.script;
}
}
class Parser {
lastIndex: number = 0;
next: Token;
constructor(public text: string) {
this.next = this.advance();
}
advance(): Token {
const startPos = this.lastIndex;
if (startPos == this.text.length) {
return (this.next = ["EOF", "<EOF>", startPos]);
}
for (const [type, regex] of Tokens) {
regex.lastIndex = startPos;
const matches = regex.exec(this.text);
if (matches) {
this.lastIndex = regex.lastIndex;
return (this.next = [type, matches[1], startPos]);
}
}
throw new ParseError("Token not matched", startPos);
}
expect(type: TokenType) {
if (this.next[0] != type) {
throw new ParseError(
`Expected ${type}, found ${this.next[0]} (${this.next[1]})`,
this.next[2]
);
}
}
parseScript(): Script {
const wip = new WipScript();
while (true) {
const [type, chars, pos] = this.next;
switch (type) {
case "text":
case "}":
wip.addWordPiece({ bare: chars }, pos);
break;
case "{": {
if (wip.startOfWord()) {
this.advance();
const text = this.parseBrace();
wip.addWordPiece({ text }, pos);
this.expect("}");
wip.freezeWord("Extra characters after closing brace");
} else {
wip.addWordPiece({ bare: chars }, pos);
}
break;
}
case "quote": {
if (wip.startOfWord()) {
wip.addWordPiece({ text: "" }, pos);
this.advance();
this.parseQuoteWord(wip);
this.expect("quote");
wip.freezeWord("Extra characters after quoted word");
} else {
wip.addWordPiece({ bare: chars }, pos);
}
break;
}
case "[": {
this.advance();
const script = this.parseScript();
wip.addWordPiece({ script }, pos);
this.expect("]");
break;
}
case "whitespace":
wip.finishWord();
break;
case "newline":
case "semicolon":
wip.finishCommand();
break;
case "comment":
if (wip.startOfCommand()) {
skipComment: while (this.advance()) {
const [type, chars, pos] = this.next;
switch (type) {
case "newline":
case "EOF":
break skipComment;
case "backslash":
this.advance();
continue;
default:
continue;
}
}
} else {
wip.addWordPiece({ bare: chars }, pos);
}
break;
case "EOF":
case "]":
return wip.finishScript();
case "backslash": {
this.advance();
this.parseBackslashEscape(wip, pos, "bare");
break;
}
default:
throw new ParseError(
`Unhandled case: ${type satisfies never} (${chars})`,
pos
);
}
this.advance();
}
}
parseQuoteWord(wip: WipScript) {
while (true) {
const [type, chars, pos] = this.next;
switch (type) {
case "text":
case "{":
case "}":
case "]":
case "whitespace":
case "newline":
case "semicolon":
case "comment":
wip.addWordPiece({ text: chars }, pos);
break;
case "[": {
this.advance();
const script = this.parseScript();
wip.addWordPiece({ script }, pos);
this.expect("]");
break;
}
case "EOF":
throw new ParseError(
"Reached end of input while parsing a quoted word",
pos
);
case "backslash": {
this.advance();
this.parseBackslashEscape(wip, pos, "quote");
break;
}
case "quote":
return;
default:
throw new ParseError(
`Unhandled case: ${type satisfies never} (${chars})`,
pos
);
}
this.advance();
}
}
parseBackslashEscape(
wip: WipScript,
pos: number,
wordType: "bare" | "quote"
) {
const [type, chars] = this.next;
switch (type) {
case "newline":
if (wordType == "bare") {
wip.finishWord();
} else {
// ignore newline
}
break;
case "whitespace":
case "semicolon":
case "{":
case "}":
case "[":
case "]":
case "quote":
case "backslash":
case "comment":
wip.addWordPiece({ text: chars }, pos);
break;
case "text":
switch (chars) {
case "n":
wip.addWordPiece({ text: "\n" }, pos);
break;
default:
throw new ParseError(`Unknown backslash escape: ${chars}`, pos);
}
break;
case "EOF":
throw new ParseError(
"Reached end of input while parsing a backslash escape",
pos
);
default:
throw new ParseError(
`Unhandled case: ${type satisfies never} (${chars})`,
pos
);
}
}
parseBrace(): string {
let wip = "";
while (true) {
const [type, chars, pos] = this.next;
switch (type) {
case "backslash":
wip += "\\";
this.advance();
wip += this.next[1];
break;
case "{": {
wip += "{";
this.advance();
wip += this.parseBrace();
this.expect("}");
wip += "}";
break;
}
case "}":
return wip;
case "EOF":
throw new ParseError(
"Reached end of input while parsing a brace word",
pos
);
default:
wip += chars;
}
this.advance();
}
}
}

View file

@ -1,4 +1,4 @@
import { escapeHtml } from './helpers';
import { escapeHtml } from "./helpers";
export type SourcePos = number;
@ -117,7 +117,11 @@ export function SimplifyWord(
if (consolidated.length == 0) {
return { text: "", pos: sourcePosition };
} else if (consolidated.length == 1 && IsTextPiece(consolidated[0])) {
return { ...consolidated[0], pos: sourcePosition };
if (pieces.every((piece) => "bare" in piece)) {
return { bare: AsText(consolidated[0]), pos: sourcePosition };
} else {
return { ...consolidated[0], pos: sourcePosition };
}
} else {
return { pieces: consolidated };
}