475 lines
17 KiB
TypeScript
475 lines
17 KiB
TypeScript
import { Cursor } from './cursor';
|
|
import { ExprScanError, exprStart, ExprStartToken, IdentifierKind, identifierScanner, isNextTokenExprStart, isNextTokenProductPatternStart, patternStart, PatternStartToken, skipWhitespaceAndComments } from './scanner';
|
|
import { char, CodePoint, SourceText, Span } from './source_text';
|
|
import { Result } from '../result';
|
|
import { Expr, ExprBinding, FieldAssignment, FieldPattern, MatchBranch, Pattern, ProductPattern } from '../expr';
|
|
|
|
// CONVENTION: Every parser is responsible to consume whitespace/comments at the end.
|
|
// Every parser is not responsible for cleaning up whitespace/comments at the start - only the final `parse` that's exposed to the public.
|
|
|
|
const DELIMITER_COMMA = char(',');
|
|
const DELIMITER_PIPE = char('|');
|
|
const TERMINATOR_CLOSE_PAREN = char(')');
|
|
const TERMINATOR_CLOSE_BRACE = char('}');
|
|
const TERMINATOR_DOT = char('.');
|
|
|
|
// TODO: Errors have to have better contextual information
|
|
export type ParseError =
|
|
| ExprScanError
|
|
| { tag: "UnexpectedToken", expected: string, span: Span }
|
|
| { tag: "UnexpectedTokenWhileParsingSequence", reason: "ExpectedDelimiterOrTerminator", received: CodePoint, expectedDelimiter: CodePoint, expectedTerminator: CodePoint, span: Span }
|
|
|
|
// === Specific Context Errors ===
|
|
| { tag: "ExpectedExpression", span: Span } // Expected start of expr (e.g. hit EOF or keyword)
|
|
| { tag: "ExpectedFieldAssignmentSymbol", span: Span } // Expected '=' in field assignment
|
|
| { tag: "ExpectedPatternAssignmentSymbol", span: Span } // Expected '=' in pattern assignment
|
|
| { tag: "ExpectedPatternBindingSymbol", span: Span } // Expected '.' in pattern binding
|
|
| { tag: "ExpectedFunctionCallStart", span: Span } // Expected '(' after function name
|
|
| { tag: "ExpectedRecordOpen", span: Span } // Expected '(' after ':'
|
|
| { tag: "ExpectedLetBlockOpen", span: Span } // Expected '{' after 'let'
|
|
| { tag: "ExpectedLetBlockClose", span: Span } // Expected '}' at end of 'let' expression
|
|
| { tag: "ExpectedMatchBlockOpen", span: Span } // Expected '{' after 'match'
|
|
| { tag: "ExpectedMatchBlockClose", span: Span } // Expected '}' at end of 'match' expression
|
|
| { tag: "ExpectedLambdaBlockOpen", span: Span } // Expected '{' after `fn`
|
|
| { tag: "ExpectedLambdaBlockClose", span: Span } // Expected '}' at end of `fn` expression
|
|
| { tag: "ExpectedApplyStart", span: Span } // Expected '(' after 'apply'
|
|
| { tag: "ExpectedApplySeparator", span: Span } // Expected '!' inside 'apply'
|
|
| { tag: "UnexpectedTagPattern", span: Span } // Found #tag where product pattern expected
|
|
| { tag: "ExpectedPattern", span: Span } // EOF or invalid start of pattern
|
|
| { tag: "ExpectedRecordPatternOpen", span: Span } // Expected '(' at start of record pattern
|
|
| { tag: "ExpectedRecordField", span: Span }; // Expected identifier in record pattern
|
|
|
|
// TODO: Delete?
|
|
export type Expectation =
|
|
| "ExpectedExpression"
|
|
| "ExpectedFieldAssignmentSymbol"
|
|
| "ExpectedPatternAssignmentSymbol"
|
|
| "ExpectedPatternBindingSymbol"
|
|
| "ExpectedFunctionCallStart"
|
|
| "ExpectedRecordOpen"
|
|
| "ExpectedLetBlockOpen"
|
|
| "ExpectedLetBlockClose"
|
|
| "ExpectedMatchBlockOpen"
|
|
| "ExpectedMatchBlockClose"
|
|
| "ExpectedApplyStart"
|
|
| "ExpectedApplySeparator"
|
|
| "UnexpectedTagPattern"
|
|
| "ExpectedPattern"
|
|
| "ExpectedRecordField"
|
|
| "ExpectedRecordPatternOpen"
|
|
|
|
export type Parser<T> = (cursor: Cursor) => T
|
|
|
|
// === Helpers ===
|
|
// Don't use `cursor.next()` directly - for most of the cases you also want to consume the WS/comments after.
|
|
function nextWS(cursor: Cursor): CodePoint | undefined {
|
|
const c = cursor.next();
|
|
if (c === undefined) {
|
|
return undefined;
|
|
} else {
|
|
skipWhitespaceAndComments(cursor);
|
|
return c;
|
|
}
|
|
}
|
|
|
|
// Returns true if the char was consumed.
|
|
function tryConsume(cursor: Cursor, expected: CodePoint): boolean {
|
|
if (cursor.peek() === expected) {
|
|
nextWS(cursor);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// TODO: Perhaps when it comes to terminators, we should allow the user of this function to create better contextual errors?
|
|
// Parses a delimited sequence of `p` that's terminated by a special character.
|
|
function delimitedTerminalSequence<A>(cursor: Cursor, delimiter: CodePoint, terminator: CodePoint, p: Parser<A>): A[] {
|
|
// Let's say the terminator is `.` and delimiter is `,`. This is the grammar for valid sequences
|
|
// list(p) :=
|
|
// | p
|
|
// | p , p
|
|
// | p , p , p
|
|
// | ...
|
|
// delimitedTerminalSequence(p) :=
|
|
// | .
|
|
// | optional(,) list(p) optional(,) .
|
|
//
|
|
// All our use-cases always have a well-defined terminator character:
|
|
// tuples: ( a, b, c ) -> `)`
|
|
// records: :( f0 = e0, f1 = e1 ) -> `}`
|
|
// function call: f(a, b, c) -> `)`
|
|
// let-binding: let { p = e . body } -> `.`
|
|
// fn-asbtraction: fn { p0, p1 . body } -> `.`
|
|
// fn-application: apply(e ! e0, e1, e2) -> `)`
|
|
const items: A[] = [];
|
|
|
|
if (cursor.peek() === terminator) {
|
|
nextWS(cursor);
|
|
return items;
|
|
}
|
|
|
|
// optional delimiter
|
|
if (cursor.peek() === delimiter) {
|
|
nextWS(cursor);
|
|
}
|
|
|
|
while (true) {
|
|
|
|
const item = p(cursor); // `p` should be responsible for getting rid of whitespace after it has done its work
|
|
items.push(item);
|
|
|
|
const nextChar = cursor.peek();
|
|
|
|
if (nextChar === terminator) {
|
|
nextWS(cursor);
|
|
|
|
return items;
|
|
} else if (nextChar === delimiter) {
|
|
nextWS(cursor);
|
|
|
|
if (cursor.peek() === terminator) {
|
|
nextWS(cursor);
|
|
// the previous delimiter was trailing
|
|
return items;
|
|
}
|
|
// the previous delimiter was a separator
|
|
continue;
|
|
} else {
|
|
throw {
|
|
tag: "UnexpectedTokenWhileParsingSequence",
|
|
received: nextChar,
|
|
reason: "ExpectedDelimiterOrTerminator",
|
|
expectedDelimiter: delimiter,
|
|
expectedTerminator: terminator,
|
|
span: cursor.makeSpan(cursor.currentLocation()),
|
|
} as ParseError;
|
|
}
|
|
}
|
|
}
|
|
|
|
function exprStartToken(cursor: Cursor): ExprStartToken {
|
|
const token = exprStart(cursor);
|
|
skipWhitespaceAndComments(cursor);
|
|
return token;
|
|
}
|
|
|
|
function patternStartToken(cursor: Cursor): PatternStartToken {
|
|
const token = patternStart(cursor);
|
|
skipWhitespaceAndComments(cursor);
|
|
return token;
|
|
}
|
|
|
|
function identifier(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } {
|
|
const result = identifierScanner(cursor, kind);
|
|
skipWhitespaceAndComments(cursor);
|
|
return result;
|
|
}
|
|
|
|
// === Expression Parsers ===
|
|
|
|
function expr(cursor: Cursor): Expr {
|
|
const start = cursor.currentLocation();
|
|
const token = exprStartToken(cursor);
|
|
// TODO: You need to include the spans and perhaps other meta-info.
|
|
switch (token.tag) {
|
|
case "EOF":
|
|
throw {
|
|
tag: "UnexpectedToken",
|
|
expected: "Expression",
|
|
span: token.span
|
|
} as ParseError;
|
|
case "number":
|
|
return Expr.number(token.value, token.span);
|
|
case "string":
|
|
return Expr.string(token.text, token.span);
|
|
case "variable_use":
|
|
return Expr.var_use(token.name, token.span);
|
|
case "tag":
|
|
// #true
|
|
// #foo e
|
|
if (isNextTokenExprStart(cursor)) {
|
|
const value = expr(cursor);
|
|
return Expr.tagged(token.name, value, cursor.makeSpan(start));
|
|
} else {
|
|
return Expr.tag(token.name, token.span);
|
|
}
|
|
case "tuple_start":
|
|
// e.g. (a, b, c)
|
|
const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
|
|
return Expr.tuple(items, cursor.makeSpan(start));
|
|
case "function_name":
|
|
// e.g. my_func(arg1, arg2)
|
|
// parse a `,` delimiter sequence of expr
|
|
// need to consume )
|
|
if (!tryConsume(cursor, char('('))) {
|
|
throw {
|
|
tag: "ExpectedFunctionCallStart",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
|
|
return Expr.call(token.name, args, cursor.makeSpan(start));
|
|
case "keyword":
|
|
switch (token.kw) {
|
|
case ":":
|
|
// e.g. :( x = 1, y = 2 )
|
|
// or : ( x = 1, y = 2 )
|
|
if (!tryConsume(cursor, char('('))) {
|
|
throw {
|
|
tag: "ExpectedRecordOpen",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, fieldAssignment);
|
|
return Expr.record(fields, cursor.makeSpan(start));
|
|
case "let":
|
|
// let { p0 = e0, p1 = e2 . body }
|
|
if (!tryConsume(cursor, char('{'))) {
|
|
throw {
|
|
tag: "ExpectedLetBlockOpen",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
const bindings = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPatternBinding);
|
|
const body = expr(cursor);
|
|
|
|
if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) {
|
|
throw {
|
|
tag: "ExpectedLetBlockClose",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
return Expr.let_(bindings, body, cursor.makeSpan(start));
|
|
case "fn": {
|
|
// fn { p0, p1, p2 . body }
|
|
if (!tryConsume(cursor, char('{'))) {
|
|
throw {
|
|
tag: "ExpectedLambdaBlockOpen",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
|
|
const parameters = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPattern);
|
|
const body = expr(cursor);
|
|
if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) {
|
|
throw {
|
|
tag: "ExpectedLambdaBlockClose",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
return Expr.lambda(parameters, body, cursor.makeSpan(start));
|
|
}
|
|
case "apply":
|
|
// apply(e ! e0, e1, e2)
|
|
if (!tryConsume(cursor, char('('))) {
|
|
throw {
|
|
tag: "ExpectedApplyStart",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
const callee = expr(cursor);
|
|
if (!tryConsume(cursor, char('!'))) {
|
|
throw {
|
|
tag: "ExpectedApplySeparator",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
|
|
const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
|
|
return Expr.apply(callee, args, cursor.makeSpan(start));
|
|
case "match":
|
|
// match e { branch0 | branch1 | branch2 }
|
|
const arg = expr(cursor);
|
|
if (!tryConsume(cursor, char('{'))) {
|
|
throw {
|
|
tag: "ExpectedMatchBlockOpen",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
|
|
const branches = delimitedTerminalSequence(cursor, DELIMITER_PIPE, TERMINATOR_CLOSE_BRACE, matchBranch);
|
|
return Expr.match(arg, branches, cursor.makeSpan(start))
|
|
case "=":
|
|
case "|":
|
|
case "!":
|
|
// These keywords CANNOT start an expression.
|
|
throw {
|
|
tag: "ExpectedExpression",
|
|
span: token.span
|
|
} as ParseError;
|
|
}
|
|
}
|
|
}
|
|
|
|
function matchBranch(cursor: Cursor): MatchBranch {
|
|
// p . body
|
|
const start = cursor.currentLocation();
|
|
const p = pattern(cursor);
|
|
|
|
if (!tryConsume(cursor, char("."))) {
|
|
throw {
|
|
tag: "ExpectedPatternBindingSymbol",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
const e = expr(cursor);
|
|
return Expr.matchBranch(p, e, cursor.makeSpan(start));
|
|
}
|
|
|
|
function productPatternBinding(cursor: Cursor): ExprBinding {
|
|
const start = cursor.currentLocation();
|
|
// TODO: There's a potential here to do a lot of work on nice errors.
|
|
// `p = e`
|
|
// here there could be problems like the pattern being just a variable that uses `=` as its part
|
|
// `x= = 123` is valid. Maybe in case of erroneous things like
|
|
// `x=123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk...
|
|
// or even...
|
|
// `x= 123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk...
|
|
// or
|
|
// `x =123` - this one technically is a sequence of two identifiers.
|
|
const pattern = productPattern(cursor);
|
|
|
|
if (!tryConsume(cursor, char('='))) {
|
|
throw {
|
|
tag: "ExpectedPatternBindingSymbol",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
const e = expr(cursor);
|
|
return Expr.exprBinding(pattern, e, cursor.makeSpan(start));
|
|
}
|
|
|
|
function fieldAssignment(cursor: Cursor): FieldAssignment {
|
|
const start = cursor.currentLocation();
|
|
// `f = e`
|
|
const { name, span } = identifier(cursor, 'field_name');
|
|
|
|
if (!tryConsume(cursor, char('='))) {
|
|
throw {
|
|
tag: "ExpectedFieldAssignmentSymbol", // Specific error for this context
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
|
|
const value = expr(cursor);
|
|
return Expr.fieldAssignment(name, value, cursor.makeSpan(start));
|
|
}
|
|
|
|
function pattern(cursor: Cursor): Pattern {
|
|
const start = cursor.currentLocation();
|
|
// x
|
|
// (x, y, z)
|
|
// ((x, y), z)
|
|
// { a = x, b = y }
|
|
// { a, b = y }
|
|
// #foo
|
|
// #foo x
|
|
// #foo (x, y)
|
|
const token = patternStartToken(cursor);
|
|
|
|
// === Tag Pattern logic ===
|
|
if (token.tag === "tag") {
|
|
// Check Lookahead: Is this a Tagged Value? e.g. #foo x or #foo (a,b)
|
|
if (isNextTokenProductPatternStart(cursor)) {
|
|
// Parse the payload (must be a product pattern)
|
|
const payload = productPattern(cursor);
|
|
return Pattern.tagged(token.name, payload, cursor.makeSpan(start));
|
|
} else {
|
|
// Standalone Tag: #foo
|
|
return Pattern.tag(token.name, cursor.makeSpan(start));
|
|
}
|
|
}
|
|
|
|
return finishProductPattern(cursor, token);
|
|
}
|
|
|
|
function productPattern(cursor: Cursor): ProductPattern {
|
|
// x
|
|
// (x, y, z)
|
|
// ((x, y), z)
|
|
// { a = x, b = y }
|
|
// { a, b = y }
|
|
//
|
|
// tag patterns are syntax errors i.e. unexpected-tag-pattern (renderer then could explain that we don't allow tag patterns to be deeply nested)
|
|
|
|
const token = patternStartToken(cursor);
|
|
return finishProductPattern(cursor, token);
|
|
}
|
|
|
|
function finishProductPattern(cursor: Cursor, token: PatternStartToken): ProductPattern {
|
|
const start = cursor.currentLocation();
|
|
switch (token.tag) {
|
|
case "pattern_binding":
|
|
// foo
|
|
return ProductPattern.any(token.name, cursor.makeSpan(start));
|
|
|
|
case "tuple_start": {
|
|
// ( p1, p2 )
|
|
const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, productPattern);
|
|
return ProductPattern.tuple(items, cursor.makeSpan(start));
|
|
}
|
|
|
|
case "tag":
|
|
throw { tag: "UnexpectedTagPattern", span: token.span } as ParseError;
|
|
|
|
case "keyword": {
|
|
switch (token.kw) {
|
|
case ":": {
|
|
// :( a = p, b )
|
|
// TODO: parse open-paren
|
|
if (!tryConsume(cursor, char('{'))) {
|
|
throw {
|
|
tag: "ExpectedRecordPatternOpen",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError;
|
|
}
|
|
const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, recordPatternField);
|
|
return ProductPattern.record(fields, cursor.makeSpan(start));
|
|
}
|
|
default:
|
|
// These keywords CANNOT start a pattern.
|
|
throw { tag: "ExpectedPattern", span: token.span } as ParseError;
|
|
}
|
|
}
|
|
|
|
|
|
case "EOF":
|
|
throw { tag: "ExpectedPattern", span: token.span } as ParseError;
|
|
}
|
|
}
|
|
|
|
function recordPatternField(cursor: Cursor): FieldPattern {
|
|
const start = cursor.currentLocation();
|
|
const { name, span } = identifier(cursor, 'field_name');
|
|
if (tryConsume(cursor, char('='))) {
|
|
const p = productPattern(cursor);
|
|
return ProductPattern.fieldPattern(name, p, cursor.makeSpan(start));
|
|
} else {
|
|
// Punning: :( a ) -> :( a = a )
|
|
return ProductPattern.fieldPattern(name, ProductPattern.any(name, span), cursor.makeSpan(start));
|
|
}
|
|
}
|
|
|
|
|
|
export function parse(source: SourceText): Result<Expr, ParseError> {
|
|
const cursor = new Cursor(source);
|
|
|
|
try {
|
|
skipWhitespaceAndComments(cursor);
|
|
const expression = expr(cursor);
|
|
|
|
if (!cursor.eof()) {
|
|
return Result.error({
|
|
tag: "UnexpectedToken",
|
|
expected: "EndOfFile",
|
|
span: cursor.makeSpan(cursor.currentLocation())
|
|
} as ParseError);
|
|
}
|
|
|
|
return Result.ok(expression);
|
|
} catch (e) {
|
|
// TODO: This is a bit sketchy. We maybe forced to have "checked" Exceptions for `ParseError` by wrapping it in something that has a proper tag.
|
|
return Result.error(e as ParseError);
|
|
}
|
|
}
|
|
|