Compare commits
14 commits
7c74c9e34f
...
d220993236
Author | SHA1 | Date | |
---|---|---|---|
d220993236 | |||
6b1c2c48ef | |||
ac1a38e75f | |||
a2c8eb66b9 | |||
fa3be1e003 | |||
b536d30420 | |||
b2c5f7ea90 | |||
c61496fcc3 | |||
c1ce90fd63 | |||
ab91d71170 | |||
63d6fa836a | |||
9b81056d1d | |||
618de2ac99 | |||
2c55e38822 |
2 changed files with 388 additions and 2 deletions
382
src/parser2.ts
Normal file
382
src/parser2.ts
Normal file
|
@ -0,0 +1,382 @@
|
|||
import {
|
||||
Command,
|
||||
ErrorResult,
|
||||
InterpolatedPiece,
|
||||
Script,
|
||||
SimplifyWord,
|
||||
Word,
|
||||
} from "./words";
|
||||
|
||||
/**
|
||||
* Parse out a Notcl script into an easier-to-interpret representation.
|
||||
* No script is actually executed yet.
|
||||
*
|
||||
* @param code code to parse
|
||||
* @param offset source position of code, if embedded in a larger source document
|
||||
* @returns parsed list of commands, or error message on failure
|
||||
*/
|
||||
export function parse(
|
||||
code: string,
|
||||
offset = 0
|
||||
): [true, Script] | [false, string] {
|
||||
try {
|
||||
const parser = new Parser(code);
|
||||
const script = parser.parseScript();
|
||||
parser.expect("EOF");
|
||||
|
||||
return [true, script];
|
||||
} catch (ex) {
|
||||
// TODO: report error with error position
|
||||
return [false, String(ex)];
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------
|
||||
|
||||
// Parser for evaluating Notcl scripts
|
||||
|
||||
export class ParseError extends Error {
|
||||
constructor(message: string, public pos: number) {
|
||||
super(message);
|
||||
}
|
||||
}
|
||||
|
||||
type TokenType =
|
||||
| "newline"
|
||||
| "whitespace"
|
||||
| "semicolon"
|
||||
| "{"
|
||||
| "}"
|
||||
| "["
|
||||
| "]"
|
||||
| "quote"
|
||||
| "backslash"
|
||||
| "comment"
|
||||
| "text"
|
||||
| "EOF";
|
||||
|
||||
type Token = [TokenType, string, number];
|
||||
|
||||
const Tokens: [TokenType, RegExp][] = [
|
||||
["newline", /(\n)/y],
|
||||
["whitespace", /([^\S\n]+)/y],
|
||||
["semicolon", /(;)/y],
|
||||
["{", /(\{)/y],
|
||||
["}", /(\})/y],
|
||||
["[", /(\[)/y],
|
||||
["]", /(\])/y],
|
||||
["quote", /(")/y],
|
||||
["backslash", /(\\)/y],
|
||||
["comment", /(\#)/y],
|
||||
["text", /([^\s;\{\}\[\]"\\\#]+)/y],
|
||||
];
|
||||
|
||||
class WipScript {
|
||||
script: Command[] = [];
|
||||
wipCommand: Word[] = [];
|
||||
wipWord: InterpolatedPiece[] = [];
|
||||
wordPos: number | undefined = undefined;
|
||||
endOfWordError: string | undefined = undefined;
|
||||
|
||||
startOfWord(): boolean {
|
||||
return this.wipWord.length == 0;
|
||||
}
|
||||
|
||||
startOfCommand(): boolean {
|
||||
return this.wipWord.length == 0 && this.wipCommand.length == 0;
|
||||
}
|
||||
|
||||
addWordPiece(piece: InterpolatedPiece, pos: number) {
|
||||
if (this.endOfWordError) {
|
||||
throw new ParseError(this.endOfWordError, pos);
|
||||
}
|
||||
if (this.startOfWord()) {
|
||||
this.wordPos = pos;
|
||||
}
|
||||
this.wipWord.push(piece);
|
||||
}
|
||||
freezeWord(error: string) {
|
||||
this.endOfWordError = error;
|
||||
}
|
||||
finishWord() {
|
||||
if (this.wipWord.length > 0) {
|
||||
this.wipCommand.push(SimplifyWord(this.wipWord, this.wordPos));
|
||||
}
|
||||
this.wipWord = [];
|
||||
this.wordPos = undefined;
|
||||
this.endOfWordError = undefined;
|
||||
}
|
||||
finishCommand() {
|
||||
this.finishWord();
|
||||
if (this.wipCommand.length > 0) {
|
||||
this.script.push(this.wipCommand);
|
||||
this.wipCommand = [];
|
||||
}
|
||||
}
|
||||
finishScript(): Script {
|
||||
this.finishCommand();
|
||||
return this.script;
|
||||
}
|
||||
}
|
||||
|
||||
class Parser {
|
||||
lastIndex: number = 0;
|
||||
next: Token;
|
||||
|
||||
constructor(public text: string) {
|
||||
this.next = this.advance();
|
||||
}
|
||||
|
||||
advance(): Token {
|
||||
const startPos = this.lastIndex;
|
||||
if (startPos == this.text.length) {
|
||||
return (this.next = ["EOF", "<EOF>", startPos]);
|
||||
}
|
||||
|
||||
for (const [type, regex] of Tokens) {
|
||||
regex.lastIndex = startPos;
|
||||
const matches = regex.exec(this.text);
|
||||
if (matches) {
|
||||
this.lastIndex = regex.lastIndex;
|
||||
return (this.next = [type, matches[1], startPos]);
|
||||
}
|
||||
}
|
||||
|
||||
throw new ParseError("Token not matched", startPos);
|
||||
}
|
||||
|
||||
expect(type: TokenType) {
|
||||
if (this.next[0] != type) {
|
||||
throw new ParseError(
|
||||
`Expected ${type}, found ${this.next[0]} (${this.next[1]})`,
|
||||
this.next[2]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
parseScript(): Script {
|
||||
const wip = new WipScript();
|
||||
|
||||
while (true) {
|
||||
const [type, chars, pos] = this.next;
|
||||
switch (type) {
|
||||
case "text":
|
||||
case "}":
|
||||
wip.addWordPiece({ bare: chars }, pos);
|
||||
break;
|
||||
|
||||
case "{": {
|
||||
if (wip.startOfWord()) {
|
||||
this.advance();
|
||||
const text = this.parseBrace();
|
||||
wip.addWordPiece({ text }, pos);
|
||||
this.expect("}");
|
||||
wip.freezeWord("Extra characters after closing brace");
|
||||
} else {
|
||||
wip.addWordPiece({ bare: chars }, pos);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "quote": {
|
||||
if (wip.startOfWord()) {
|
||||
wip.addWordPiece({ text: "" }, pos);
|
||||
this.advance();
|
||||
this.parseQuoteWord(wip);
|
||||
this.expect("quote");
|
||||
wip.freezeWord("Extra characters after quoted word");
|
||||
} else {
|
||||
wip.addWordPiece({ bare: chars }, pos);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case "[": {
|
||||
this.advance();
|
||||
const script = this.parseScript();
|
||||
wip.addWordPiece({ script }, pos);
|
||||
this.expect("]");
|
||||
break;
|
||||
}
|
||||
|
||||
case "whitespace":
|
||||
wip.finishWord();
|
||||
break;
|
||||
|
||||
case "newline":
|
||||
case "semicolon":
|
||||
wip.finishCommand();
|
||||
break;
|
||||
|
||||
case "comment":
|
||||
if (wip.startOfCommand()) {
|
||||
skipComment: while (this.advance()) {
|
||||
const [type, chars, pos] = this.next;
|
||||
switch (type) {
|
||||
case "newline":
|
||||
case "EOF":
|
||||
break skipComment;
|
||||
case "backslash":
|
||||
this.advance();
|
||||
continue;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
wip.addWordPiece({ bare: chars }, pos);
|
||||
}
|
||||
break;
|
||||
|
||||
case "EOF":
|
||||
case "]":
|
||||
return wip.finishScript();
|
||||
|
||||
case "backslash": {
|
||||
this.advance();
|
||||
this.parseBackslashEscape(wip, pos, "bare");
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw new ParseError(
|
||||
`Unhandled case: ${type satisfies never} (${chars})`,
|
||||
pos
|
||||
);
|
||||
}
|
||||
|
||||
this.advance();
|
||||
}
|
||||
}
|
||||
|
||||
parseQuoteWord(wip: WipScript) {
|
||||
while (true) {
|
||||
const [type, chars, pos] = this.next;
|
||||
switch (type) {
|
||||
case "text":
|
||||
case "{":
|
||||
case "}":
|
||||
case "]":
|
||||
case "whitespace":
|
||||
case "newline":
|
||||
case "semicolon":
|
||||
case "comment":
|
||||
wip.addWordPiece({ text: chars }, pos);
|
||||
break;
|
||||
|
||||
case "[": {
|
||||
this.advance();
|
||||
const script = this.parseScript();
|
||||
wip.addWordPiece({ script }, pos);
|
||||
this.expect("]");
|
||||
break;
|
||||
}
|
||||
|
||||
case "EOF":
|
||||
throw new ParseError(
|
||||
"Reached end of input while parsing a quoted word",
|
||||
pos
|
||||
);
|
||||
|
||||
case "backslash": {
|
||||
this.advance();
|
||||
this.parseBackslashEscape(wip, pos, "quote");
|
||||
break;
|
||||
}
|
||||
|
||||
case "quote":
|
||||
return;
|
||||
|
||||
default:
|
||||
throw new ParseError(
|
||||
`Unhandled case: ${type satisfies never} (${chars})`,
|
||||
pos
|
||||
);
|
||||
}
|
||||
|
||||
this.advance();
|
||||
}
|
||||
}
|
||||
|
||||
parseBackslashEscape(
|
||||
wip: WipScript,
|
||||
pos: number,
|
||||
wordType: "bare" | "quote"
|
||||
) {
|
||||
const [type, chars] = this.next;
|
||||
switch (type) {
|
||||
case "newline":
|
||||
if (wordType == "bare") {
|
||||
wip.finishWord();
|
||||
} else {
|
||||
// ignore newline
|
||||
}
|
||||
break;
|
||||
|
||||
case "whitespace":
|
||||
case "semicolon":
|
||||
case "{":
|
||||
case "}":
|
||||
case "[":
|
||||
case "]":
|
||||
case "quote":
|
||||
case "backslash":
|
||||
case "comment":
|
||||
wip.addWordPiece({ text: chars }, pos);
|
||||
break;
|
||||
case "text":
|
||||
switch (chars) {
|
||||
case "n":
|
||||
wip.addWordPiece({ text: "\n" }, pos);
|
||||
break;
|
||||
default:
|
||||
throw new ParseError(`Unknown backslash escape: ${chars}`, pos);
|
||||
}
|
||||
break;
|
||||
case "EOF":
|
||||
throw new ParseError(
|
||||
"Reached end of input while parsing a backslash escape",
|
||||
pos
|
||||
);
|
||||
default:
|
||||
throw new ParseError(
|
||||
`Unhandled case: ${type satisfies never} (${chars})`,
|
||||
pos
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
parseBrace(): string {
|
||||
let wip = "";
|
||||
|
||||
while (true) {
|
||||
const [type, chars, pos] = this.next;
|
||||
switch (type) {
|
||||
case "backslash":
|
||||
wip += "\\";
|
||||
this.advance();
|
||||
wip += this.next[1];
|
||||
break;
|
||||
case "{": {
|
||||
wip += "{";
|
||||
this.advance();
|
||||
wip += this.parseBrace();
|
||||
this.expect("}");
|
||||
wip += "}";
|
||||
break;
|
||||
}
|
||||
case "}":
|
||||
return wip;
|
||||
case "EOF":
|
||||
throw new ParseError(
|
||||
"Reached end of input while parsing a brace word",
|
||||
pos
|
||||
);
|
||||
default:
|
||||
wip += chars;
|
||||
}
|
||||
|
||||
this.advance();
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,4 +1,4 @@
|
|||
import { escapeHtml } from './helpers';
|
||||
import { escapeHtml } from "./helpers";
|
||||
|
||||
export type SourcePos = number;
|
||||
|
||||
|
@ -117,7 +117,11 @@ export function SimplifyWord(
|
|||
if (consolidated.length == 0) {
|
||||
return { text: "", pos: sourcePosition };
|
||||
} else if (consolidated.length == 1 && IsTextPiece(consolidated[0])) {
|
||||
return { ...consolidated[0], pos: sourcePosition };
|
||||
if (pieces.every((piece) => "bare" in piece)) {
|
||||
return { bare: AsText(consolidated[0]), pos: sourcePosition };
|
||||
} else {
|
||||
return { ...consolidated[0], pos: sourcePosition };
|
||||
}
|
||||
} else {
|
||||
return { pieces: consolidated };
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue