import { Cursor } from './cursor'; import { ExprScanError, exprStart, ExprStartToken, IdentifierKind, identifierScanner, isNextTokenExprStart, isNextTokenProductPatternStart, patternStart, PatternStartToken, skipWhitespaceAndComments } from './scanner'; import { char, CodePoint, SourceText, Span } from './source_text'; import { Result } from '../result'; import { Expr, ExprBinding, FieldAssignment, FieldPattern, MatchBranch, Pattern, ProductPattern } from '../expr'; // CONVENTION: Every parser is responsible to consume whitespace/comments at the end. // Every parser is not responsible for cleaning up whitespace/comments at the start - only the final `parse` that's exposed to the public. const DELIMITER_COMMA = char(','); const DELIMITER_PIPE = char('|'); const TERMINATOR_CLOSE_PAREN = char(')'); const TERMINATOR_CLOSE_BRACE = char('}'); const TERMINATOR_DOT = char('.'); // TODO: Errors have to have better contextual information export type ParseError = | ExprScanError | { tag: "UnexpectedToken", expected: string, span: Span } | { tag: "UnexpectedTokenWhileParsingSequence", reason: "ExpectedDelimiterOrTerminator", received: CodePoint, expectedDelimiter: CodePoint, expectedTerminator: CodePoint, span: Span } // === Specific Context Errors === | { tag: "ExpectedExpression", span: Span } // Expected start of expr (e.g. hit EOF or keyword) | { tag: "ExpectedFieldAssignmentSymbol", span: Span } // Expected '=' in field assignment | { tag: "ExpectedPatternAssignmentSymbol", span: Span } // Expected '=' in pattern assignment | { tag: "ExpectedPatternBindingSymbol", span: Span } // Expected '.' in pattern binding | { tag: "ExpectedFunctionCallStart", span: Span } // Expected '(' after function name | { tag: "ExpectedRecordOpen", span: Span } // Expected '(' after ':' | { tag: "ExpectedLetBlockOpen", span: Span } // Expected '{' after 'let' | { tag: "ExpectedLetBlockClose", span: Span } // Expected '}' at end of 'let' expression | { tag: "ExpectedMatchBlockOpen", span: Span } // Expected '{' after 'match' | { tag: "ExpectedMatchBlockClose", span: Span } // Expected '}' at end of 'match' expression | { tag: "ExpectedLambdaBlockOpen", span: Span } // Expected '{' after `fn` | { tag: "ExpectedLambdaBlockClose", span: Span } // Expected '}' at end of `fn` expression | { tag: "ExpectedApplyStart", span: Span } // Expected '(' after 'apply' | { tag: "ExpectedApplySeparator", span: Span } // Expected '!' inside 'apply' | { tag: "UnexpectedTagPattern", span: Span } // Found #tag where product pattern expected | { tag: "ExpectedPattern", span: Span } // EOF or invalid start of pattern | { tag: "ExpectedRecordPatternOpen", span: Span } // Expected '(' at start of record pattern | { tag: "ExpectedRecordField", span: Span }; // Expected identifier in record pattern // TODO: Delete? export type Expectation = | "ExpectedExpression" | "ExpectedFieldAssignmentSymbol" | "ExpectedPatternAssignmentSymbol" | "ExpectedPatternBindingSymbol" | "ExpectedFunctionCallStart" | "ExpectedRecordOpen" | "ExpectedLetBlockOpen" | "ExpectedLetBlockClose" | "ExpectedMatchBlockOpen" | "ExpectedMatchBlockClose" | "ExpectedApplyStart" | "ExpectedApplySeparator" | "UnexpectedTagPattern" | "ExpectedPattern" | "ExpectedRecordField" | "ExpectedRecordPatternOpen" export type Parser = (cursor: Cursor) => T // === Helpers === // Don't use `cursor.next()` directly - for most of the cases you also want to consume the WS/comments after. function nextWS(cursor: Cursor): CodePoint | undefined { const c = cursor.next(); if (c === undefined) { return undefined; } else { skipWhitespaceAndComments(cursor); return c; } } // Returns true if the char was consumed. function tryConsume(cursor: Cursor, expected: CodePoint): boolean { if (cursor.peek() === expected) { nextWS(cursor); return true; } return false; } // TODO: Perhaps when it comes to terminators, we should allow the user of this function to create better contextual errors? // Parses a delimited sequence of `p` that's terminated by a special character. function delimitedTerminalSequence(cursor: Cursor, delimiter: CodePoint, terminator: CodePoint, p: Parser): A[] { // Let's say the terminator is `.` and delimiter is `,`. This is the grammar for valid sequences // list(p) := // | p // | p , p // | p , p , p // | ... // delimitedTerminalSequence(p) := // | . // | optional(,) list(p) optional(,) . // // All our use-cases always have a well-defined terminator character: // tuples: ( a, b, c ) -> `)` // records: :( f0 = e0, f1 = e1 ) -> `}` // function call: f(a, b, c) -> `)` // let-binding: let { p = e . body } -> `.` // fn-asbtraction: fn { p0, p1 . body } -> `.` // fn-application: apply(e ! e0, e1, e2) -> `)` const items: A[] = []; if (cursor.peek() === terminator) { nextWS(cursor); return items; } // optional delimiter if (cursor.peek() === delimiter) { nextWS(cursor); } while (true) { const item = p(cursor); // `p` should be responsible for getting rid of whitespace after it has done its work items.push(item); const nextChar = cursor.peek(); if (nextChar === terminator) { nextWS(cursor); return items; } else if (nextChar === delimiter) { nextWS(cursor); if (cursor.peek() === terminator) { nextWS(cursor); // the previous delimiter was trailing return items; } // the previous delimiter was a separator continue; } else { throw { tag: "UnexpectedTokenWhileParsingSequence", received: nextChar, reason: "ExpectedDelimiterOrTerminator", expectedDelimiter: delimiter, expectedTerminator: terminator, span: cursor.makeSpan(cursor.currentLocation()), } as ParseError; } } } function exprStartToken(cursor: Cursor): ExprStartToken { const token = exprStart(cursor); skipWhitespaceAndComments(cursor); return token; } function patternStartToken(cursor: Cursor): PatternStartToken { const token = patternStart(cursor); skipWhitespaceAndComments(cursor); return token; } function identifier(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } { const result = identifierScanner(cursor, kind); skipWhitespaceAndComments(cursor); return result; } // === Expression Parsers === function expr(cursor: Cursor): Expr { const start = cursor.currentLocation(); const token = exprStartToken(cursor); // TODO: You need to include the spans and perhaps other meta-info. switch (token.tag) { case "EOF": throw { tag: "UnexpectedToken", expected: "Expression", span: token.span } as ParseError; case "number": return Expr.number(token.value, token.span); case "string": return Expr.string(token.text, token.span); case "variable_use": return Expr.var_use(token.name, token.span); case "tag": // #true // #foo e if (isNextTokenExprStart(cursor)) { const value = expr(cursor); return Expr.tagged(token.name, value, cursor.makeSpan(start)); } else { return Expr.tag(token.name, token.span); } case "tuple_start": // e.g. (a, b, c) const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr); return Expr.tuple(items, cursor.makeSpan(start)); case "function_name": // e.g. my_func(arg1, arg2) // parse a `,` delimiter sequence of expr // need to consume ) if (!tryConsume(cursor, char('('))) { throw { tag: "ExpectedFunctionCallStart", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr); return Expr.call(token.name, args, cursor.makeSpan(start)); case "keyword": switch (token.kw) { case ":": // e.g. :( x = 1, y = 2 ) // or : ( x = 1, y = 2 ) if (!tryConsume(cursor, char('('))) { throw { tag: "ExpectedRecordOpen", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, fieldAssignment); return Expr.record(fields, cursor.makeSpan(start)); case "let": // let { p0 = e0, p1 = e2 . body } if (!tryConsume(cursor, char('{'))) { throw { tag: "ExpectedLetBlockOpen", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const bindings = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPatternBinding); const body = expr(cursor); if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) { throw { tag: "ExpectedLetBlockClose", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } return Expr.let_(bindings, body, cursor.makeSpan(start)); case "fn": { // fn { p0, p1, p2 . body } if (!tryConsume(cursor, char('{'))) { throw { tag: "ExpectedLambdaBlockOpen", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const parameters = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPattern); const body = expr(cursor); if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) { throw { tag: "ExpectedLambdaBlockClose", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } return Expr.lambda(parameters, body, cursor.makeSpan(start)); } case "apply": // apply(e ! e0, e1, e2) if (!tryConsume(cursor, char('('))) { throw { tag: "ExpectedApplyStart", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const callee = expr(cursor); if (!tryConsume(cursor, char('!'))) { throw { tag: "ExpectedApplySeparator", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr); return Expr.apply(callee, args, cursor.makeSpan(start)); case "match": // match e { branch0 | branch1 | branch2 } const arg = expr(cursor); if (!tryConsume(cursor, char('{'))) { throw { tag: "ExpectedMatchBlockOpen", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const branches = delimitedTerminalSequence(cursor, DELIMITER_PIPE, TERMINATOR_CLOSE_BRACE, matchBranch); return Expr.match(arg, branches, cursor.makeSpan(start)) case "=": case "|": case "!": // These keywords CANNOT start an expression. throw { tag: "ExpectedExpression", span: token.span } as ParseError; } } } function matchBranch(cursor: Cursor): MatchBranch { // p . body const start = cursor.currentLocation(); const p = pattern(cursor); if (!tryConsume(cursor, char("."))) { throw { tag: "ExpectedPatternBindingSymbol", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const e = expr(cursor); return Expr.matchBranch(p, e, cursor.makeSpan(start)); } function productPatternBinding(cursor: Cursor): ExprBinding { const start = cursor.currentLocation(); // TODO: There's a potential here to do a lot of work on nice errors. // `p = e` // here there could be problems like the pattern being just a variable that uses `=` as its part // `x= = 123` is valid. Maybe in case of erroneous things like // `x=123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk... // or even... // `x= 123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk... // or // `x =123` - this one technically is a sequence of two identifiers. const pattern = productPattern(cursor); if (!tryConsume(cursor, char('='))) { throw { tag: "ExpectedPatternBindingSymbol", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const e = expr(cursor); return Expr.exprBinding(pattern, e, cursor.makeSpan(start)); } function fieldAssignment(cursor: Cursor): FieldAssignment { const start = cursor.currentLocation(); // `f = e` const { name, span } = identifier(cursor, 'field_name'); if (!tryConsume(cursor, char('='))) { throw { tag: "ExpectedFieldAssignmentSymbol", // Specific error for this context span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const value = expr(cursor); return Expr.fieldAssignment(name, value, cursor.makeSpan(start)); } function pattern(cursor: Cursor): Pattern { const start = cursor.currentLocation(); // x // (x, y, z) // ((x, y), z) // { a = x, b = y } // { a, b = y } // #foo // #foo x // #foo (x, y) const token = patternStartToken(cursor); // === Tag Pattern logic === if (token.tag === "tag") { // Check Lookahead: Is this a Tagged Value? e.g. #foo x or #foo (a,b) if (isNextTokenProductPatternStart(cursor)) { // Parse the payload (must be a product pattern) const payload = productPattern(cursor); return Pattern.tagged(token.name, payload, cursor.makeSpan(start)); } else { // Standalone Tag: #foo return Pattern.tag(token.name, cursor.makeSpan(start)); } } return finishProductPattern(cursor, token); } function productPattern(cursor: Cursor): ProductPattern { // x // (x, y, z) // ((x, y), z) // { a = x, b = y } // { a, b = y } // // tag patterns are syntax errors i.e. unexpected-tag-pattern (renderer then could explain that we don't allow tag patterns to be deeply nested) const token = patternStartToken(cursor); return finishProductPattern(cursor, token); } function finishProductPattern(cursor: Cursor, token: PatternStartToken): ProductPattern { const start = cursor.currentLocation(); switch (token.tag) { case "pattern_binding": // foo return ProductPattern.any(token.name, cursor.makeSpan(start)); case "tuple_start": { // ( p1, p2 ) const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, productPattern); return ProductPattern.tuple(items, cursor.makeSpan(start)); } case "tag": throw { tag: "UnexpectedTagPattern", span: token.span } as ParseError; case "keyword": { switch (token.kw) { case ":": { // :( a = p, b ) // TODO: parse open-paren if (!tryConsume(cursor, char('{'))) { throw { tag: "ExpectedRecordPatternOpen", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError; } const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, recordPatternField); return ProductPattern.record(fields, cursor.makeSpan(start)); } default: // These keywords CANNOT start a pattern. throw { tag: "ExpectedPattern", span: token.span } as ParseError; } } case "EOF": throw { tag: "ExpectedPattern", span: token.span } as ParseError; } } function recordPatternField(cursor: Cursor): FieldPattern { const start = cursor.currentLocation(); const { name, span } = identifier(cursor, 'field_name'); if (tryConsume(cursor, char('='))) { const p = productPattern(cursor); return ProductPattern.fieldPattern(name, p, cursor.makeSpan(start)); } else { // Punning: :( a ) -> :( a = a ) return ProductPattern.fieldPattern(name, ProductPattern.any(name, span), cursor.makeSpan(start)); } } export function parse(source: SourceText): Result { const cursor = new Cursor(source); try { skipWhitespaceAndComments(cursor); const expression = expr(cursor); if (!cursor.eof()) { return Result.error({ tag: "UnexpectedToken", expected: "EndOfFile", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError); } return Result.ok(expression); } catch (e) { // TODO: This is a bit sketchy. We maybe forced to have "checked" Exceptions for `ParseError` by wrapping it in something that has a proper tag. return Result.error(e as ParseError); } }