Finish parser

This commit is contained in:
Yura Dupyn 2026-02-06 18:55:21 +01:00
parent d553a05f45
commit 13a66f2d16
6 changed files with 772 additions and 58 deletions

107
src/debug/expr_show.ts Normal file
View file

@ -0,0 +1,107 @@
import { Expr, Pattern, ProductPattern, Literal, FieldAssignment, FieldPattern } from '../value';
export function exprToString(expr: Expr): string {
switch (expr.tag) {
case "literal":
return literalToString(expr.literal);
case "var_use":
return expr.name;
case "call":
return `${expr.name}(${expr.args.map(exprToString).join(", ")})`;
case "tuple":
return `(${expr.exprs.map(exprToString).join(", ")})`;
case "record": {
const fields = expr.fields.map(fieldAssignmentToString).join(", ");
return `{ ${fields} }`;
}
case "tag":
return `#${expr.tag_name}`;
case "tagged": {
// We wrap the payload in parens if it's complex to be safe,
// but strictly speaking #foo 1 is valid.
// Let's just output space + expr.
const payload = exprToString(expr.expr);
// Heuristic: if payload starts with '(', '{', or is simple, we might not need parens,
// but for "tagged" expression precedence, it's often safer to wrap unless atomic.
return `#${expr.tag_name} ${payload}`;
}
case "let": {
const bindings = expr.bindings
.map(b => `${productPatternToString(b.pattern)} = ${exprToString(b.expr)}`)
.join(", ");
return `let { ${bindings} . ${exprToString(expr.body)} }`;
}
case "lambda": {
const params = expr.parameters.map(productPatternToString).join(", ");
return `fn { ${params} . ${exprToString(expr.body)} }`;
}
case "apply": {
const args = expr.args.map(exprToString).join(", ");
return `apply(${exprToString(expr.callee)} ! ${args})`;
}
case "match": {
const branches = expr.branches
.map(b => `${patternToString(b.pattern)} . ${exprToString(b.body)}`)
.join(" | ");
return `match ${exprToString(expr.arg)} { ${branches} }`;
}
}
}
// === Helpers ===
function literalToString(lit: Literal): string {
switch (lit.tag) {
case "number": return lit.value.toString();
case "string": return `"${lit.value}"`; // simplistic string escaping
}
}
function fieldAssignmentToString(f: FieldAssignment): string {
return `${f.name} = ${exprToString(f.expr)}`;
}
// === Pattern Printers ===
export function patternToString(pat: Pattern): string {
switch (pat.tag) {
case "tag":
return `#${pat.tag_name}`;
case "tagged":
return `#${pat.tag_name} ${patternToString(pat.pattern)}`;
// If it's a product pattern (any, tuple, record)
default:
return productPatternToString(pat);
}
}
export function productPatternToString(pat: ProductPattern): string {
switch (pat.tag) {
case "any":
return pat.name;
case "tuple":
return `(${pat.patterns.map(productPatternToString).join(", ")})`;
case "record":
return `{ ${pat.fields.map(fieldPatternToString).join(", ")} }`;
}
}
function fieldPatternToString(f: FieldPattern): string {
// Check for punning: if pattern is "any" and name matches fieldName
if (f.pattern.tag === "any" && f.pattern.name === f.fieldName) {
return f.fieldName;
}
return `${f.fieldName} = ${productPatternToString(f.pattern)}`;
}

76
src/debug/repl.ts Normal file
View file

@ -0,0 +1,76 @@
import * as readline from 'readline';
import { parse } from '../parser/parser';
import { exprToString } from '../debug/expr_show';
import { Result } from '../result';
// Helper to calculate line/col from an absolute offset
function getLineCol(text: string, offset: number) {
let line = 1;
let col = 1;
for (let i = 0; i < offset && i < text.length; i++) {
if (text[i] === '\n') {
line++;
col = 1;
} else {
col++;
}
}
return { line, col };
}
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
prompt: 'expr> '
});
console.log("=== Quick & Dirty Parser REPL ===");
console.log("Type an expression to parse and verify round-trip stringification.");
console.log("Ctrl+C to exit.\n");
rl.prompt();
rl.on('line', (lineInput) => {
const trimmed = lineInput.trim();
if (trimmed) {
try {
const result = parse(trimmed);
if (result.tag === "ok") {
const ast = result.value;
const reconstructed = exprToString(ast);
console.log(`\n✅ Parsed Successfully:`);
console.log(` ${reconstructed}`);
} else {
const err = result.error;
// FIX: Calculate line/col manually using the input string
const { line, col } = getLineCol(trimmed, err.span.start);
const loc = `${line}:${col}`;
console.log(`\n❌ Parse Error [${err.tag}] at ${loc}`);
if ('expected' in err) {
console.log(` Expected: ${(err as any).expected}`);
}
if ('reason' in err) {
console.log(` Reason: ${(err as any).reason}`);
}
if ('received' in err) {
// Useful to see what char we actually got (print char code if needed)
console.log(` Received CodePoint: ${(err as any).received}`);
}
}
} catch (e) {
console.log(`\n🔥 CRASH (Uncaught Exception):`);
console.log(e);
}
}
console.log();
rl.prompt();
}).on('close', () => {
console.log('Bye!');
process.exit(0);
});

435
src/parser/parser.ts Normal file
View file

@ -0,0 +1,435 @@
import { Expr, ExprBinding, FieldAssignment, FieldPattern, MatchBranch, Pattern, ProductPattern } from '../value';
import { Cursor } from './cursor';
import { ExprScanError, exprStart, ExprStartToken, identifier, isNextTokenExprStart, isNextTokenProductPatternStart, patternStart, PatternStartToken, skipWhitespaceAndComments } from './scanner';
import { char, CodePoint, SourceText, Span } from './source_text';
import { Result } from '../result';
// CONVENTION: Every parser is responsible to consume whitespace/comments at the end.
// Every parser is not responsible for cleaning up whitespace/comments at the start - only the final `parse` that's exposed to the public.
const DELIMITER_COMMA = char(',');
const DELIMITER_PIPE = char('|');
const TERMINATOR_CLOSE_PAREN = char(')');
const TERMINATOR_CLOSE_BRACE = char('}');
const TERMINATOR_DOT = char('.');
// TODO: Errors have to have better contextual information
export type ParseError =
| ExprScanError
| { tag: "UnexpectedToken", expected: string, span: Span }
| { tag: "UnexpectedTokenWhileParsingSequence", reason: "ExpectedDelimiterOrTerminator", received: CodePoint, expectedDelimiter: CodePoint, expectedTerminator: CodePoint, span: Span }
// === Specific Context Errors ===
| { tag: "ExpectedExpression", span: Span } // Expected start of expr (e.g. hit EOF or keyword)
| { tag: "ExpectedFieldAssignmentSymbol", span: Span } // Expected '=' in field assignment
| { tag: "ExpectedPatternAssignmentSymbol", span: Span } // Expected '=' in pattern assignment
| { tag: "ExpectedPatternBindingSymbol", span: Span } // Expected '.' in pattern binding
| { tag: "ExpectedFunctionCallStart", span: Span } // Expected '(' after function name
| { tag: "ExpectedLetBlockOpen", span: Span } // Expected '{' after 'let'
| { tag: "ExpectedLetBlockClose", span: Span } // Expected '}' at end of 'let' expression
| { tag: "ExpectedMatchBlockOpen", span: Span } // Expected '{' after 'match'
| { tag: "ExpectedMatchBlockClose", span: Span } // Expected '}' at end of 'match' expression
| { tag: "ExpectedLambdaBlockOpen", span: Span } // Expected '{' after `fn`
| { tag: "ExpectedLambdaBlockClose", span: Span } // Expected '}' at end of `fn` expression
| { tag: "ExpectedApplyStart", span: Span } // Expected '(' after 'apply'
| { tag: "ExpectedApplySeparator", span: Span } // Expected '!' inside 'apply'
| { tag: "UnexpectedTagPattern", span: Span } // Found #tag where product pattern expected
| { tag: "ExpectedPattern", span: Span } // EOF or invalid start of pattern
| { tag: "ExpectedRecordField", span: Span }; // Expected identifier in record pattern
// TODO: Delete?
export type Expectation =
| "ExpectedExpression"
| "ExpectedFieldAssignmentSymbol"
| "ExpectedPatternAssignmentSymbol"
| "ExpectedPatternBindingSymbol"
| "ExpectedFunctionCallStart"
| "ExpectedLetBlockOpen"
| "ExpectedLetBlockClose"
| "ExpectedMatchBlockOpen"
| "ExpectedMatchBlockClose"
| "ExpectedApplyStart"
| "ExpectedApplySeparator"
| "UnexpectedTagPattern"
| "ExpectedPattern"
| "ExpectedRecordField"
export type Parser<T> = (cursor: Cursor) => T
// === Helpers ===
// Don't use `cursor.next()` directly - for most of the cases you also want to consume the WS/comments after.
function nextWS(cursor: Cursor): CodePoint | undefined {
const c = cursor.next();
if (c === undefined) {
return undefined;
} else {
skipWhitespaceAndComments(cursor);
return c;
}
}
// Returns true if the char was consumed.
function tryConsume(cursor: Cursor, expected: CodePoint): boolean {
if (cursor.peek() === expected) {
nextWS(cursor);
return true;
}
return false;
}
// TODO: Perhaps when it comes to terminators, we should allow the user of this function to create better contextual errors?
// Parses a delimited sequence of `p` that's terminated by a special character.
function delimitedTerminalSequence<A>(cursor: Cursor, delimiter: CodePoint, terminator: CodePoint, p: Parser<A>): A[] {
// Let's say the terminator is `.` and delimiter is `,`. This is the grammar for valid sequences
// list(p) :=
// | p
// | p , p
// | p , p , p
// | ...
// delimitedTerminalSequence(p) :=
// | .
// | optional(,) list(p) optional(,) .
//
// All our use-cases always have a well-defined terminator character:
// tuples: ( a, b, c ) -> `)`
// records: { f0 = e0, f1 = e1 } -> `}`
// function call: f(a, b, c) -> `)`
// let-binding: let { p = e . body } -> `.`
// fn-asbtraction: fn { p0, p1 . body } -> `.`
// fn-application: apply(e ! e0, e1, e2) -> `)`
const items: A[] = [];
if (cursor.peek() === terminator) {
nextWS(cursor);
return items;
}
// optional delimiter
if (cursor.peek() === delimiter) {
nextWS(cursor);
}
while (true) {
const item = p(cursor); // `p` should be responsible for getting rid of whitespace after it has done its work
items.push(item);
const nextChar = cursor.peek();
if (nextChar === terminator) {
nextWS(cursor);
return items;
} else if (nextChar === delimiter) {
nextWS(cursor);
if (cursor.peek() === terminator) {
nextWS(cursor);
// the previous delimiter was trailing
return items;
}
// the previous delimiter was a separator
continue;
} else {
throw {
tag: "UnexpectedTokenWhileParsingSequence",
received: nextChar,
reason: "ExpectedDelimiterOrTerminator",
expectedDelimiter: delimiter,
expectedTerminator: terminator,
span: cursor.makeSpan(cursor.currentLocation()),
} as ParseError;
}
}
}
function exprStartToken(cursor: Cursor): ExprStartToken {
const token = exprStart(cursor);
skipWhitespaceAndComments(cursor);
return token;
}
function patternStartToken(cursor: Cursor): PatternStartToken {
const token = patternStart(cursor);
skipWhitespaceAndComments(cursor);
return token;
}
// === Expression Parsers ===
function expr(cursor: Cursor): Expr {
const token = exprStartToken(cursor);
// TODO: You need to include the spans and perhaps other meta-info.
switch (token.tag) {
case "EOF":
throw {
tag: "UnexpectedToken",
expected: "Expression",
span: token.span
} as ParseError;
case "number":
return Expr.number(token.value);
case "string":
return Expr.string(token.text);
case "variable_use":
return Expr.var_use(token.name);
case "tag":
// #true
// #foo e
if (isNextTokenExprStart(cursor)) {
const value = expr(cursor);
return Expr.tagged(token.name, value);
} else {
return Expr.tag(token.name);
}
case "tuple_start":
// e.g. (a, b, c)
const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
return Expr.tuple(items);
case "record_start":
// e.g. { x = 1, y = 2 }
const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_BRACE, fieldAssignment);
return Expr.record(fields);
case "function_name":
// e.g. my_func(arg1, arg2)
// parse a `,` delimiter sequence of expr
// need to consume )
if (!tryConsume(cursor, char('('))) {
throw {
tag: "ExpectedFunctionCallStart",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
return Expr.call(token.name, args);
case "keyword":
switch (token.kw) {
case "let":
// let { p0 = e0, p1 = e2 . body }
if (!tryConsume(cursor, char('{'))) {
throw {
tag: "ExpectedLetBlockOpen",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const bindings = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPatternBinding);
const body = expr(cursor);
if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) {
throw {
tag: "ExpectedLetBlockClose",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
return Expr.let_(bindings, body);
case "fn": {
// fn { p0, p1, p2 . body }
if (!tryConsume(cursor, char('{'))) {
throw {
tag: "ExpectedLambdaBlockOpen",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const parameters = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPattern);
const body = expr(cursor);
if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) {
throw {
tag: "ExpectedLambdaBlockClose",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
return Expr.lambda(parameters, body);
}
case "apply":
// apply(e ! e0, e1, e2)
if (!tryConsume(cursor, char('('))) {
throw {
tag: "ExpectedApplyStart",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const callee = expr(cursor);
if (!tryConsume(cursor, char('!'))) {
throw {
tag: "ExpectedApplySeparator",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
return Expr.apply(callee, args);
case "match":
// match e { branch0 | branch1 | branch2 }
const arg = expr(cursor);
if (!tryConsume(cursor, char('{'))) {
throw {
tag: "ExpectedMatchBlockOpen",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const branches = delimitedTerminalSequence(cursor, DELIMITER_PIPE, TERMINATOR_CLOSE_BRACE, matchBranch);
return Expr.match(arg, branches)
case "=":
case "|":
case "!":
// These keywords CANNOT start an expression.
throw {
tag: "ExpectedExpression",
span: token.span
} as ParseError;
}
}
}
function matchBranch(cursor: Cursor): MatchBranch {
// p . body
const p = pattern(cursor);
if (!tryConsume(cursor, char("."))) {
throw {
tag: "ExpectedPatternBindingSymbol",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const e = expr(cursor);
return Expr.matchBranch(p, e);
}
function productPatternBinding(cursor: Cursor): ExprBinding {
// TODO: There's a potential here to do a lot of work on nice errors.
// `p = e`
// here there could be problems like the pattern being just a variable that uses `=` as its part
// `x= = 123` is valid. Maybe in case of erroneous things like
// `x=123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk...
// or even...
// `x= 123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk...
// or
// `x =123` - this one technically is a sequence of two identifiers.
const pattern = productPattern(cursor);
if (!tryConsume(cursor, char('='))) {
throw {
tag: "ExpectedPatternBindingSymbol",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const e = expr(cursor);
return Expr.exprBinding(pattern, e);
}
function fieldAssignment(cursor: Cursor): FieldAssignment {
// `f = e`
const { name, span } = identifier(cursor, 'identifier');
if (!tryConsume(cursor, char('='))) {
throw {
tag: "ExpectedFieldAssignmentSymbol", // Specific error for this context
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const value = expr(cursor);
return Expr.fieldAssignment(name, value);
}
function pattern(cursor: Cursor): Pattern {
// x
// (x, y, z)
// ((x, y), z)
// { a = x, b = y }
// { a, b = y }
// #foo
// #foo x
// #foo (x, y)
const token = patternStartToken(cursor);
// === Tag Pattern logic ===
if (token.tag === "tag") {
// Check Lookahead: Is this a Tagged Value? e.g. #foo x or #foo (a,b)
if (isNextTokenProductPatternStart(cursor)) {
// Parse the payload (must be a product pattern)
const payload = productPattern(cursor);
return Pattern.tagged(token.name, payload);
} else {
// Standalone Tag: #foo
return Pattern.tag(token.name);
}
}
return finishProductPattern(cursor, token);
}
function productPattern(cursor: Cursor): ProductPattern {
// x
// (x, y, z)
// ((x, y), z)
// { a = x, b = y }
// { a, b = y }
//
// tag patterns are syntax errors i.e. unexpected-tag-pattern (renderer then could explain that we don't allow tag patterns to be deeply nested)
const token = patternStartToken(cursor);
return finishProductPattern(cursor, token);
}
function finishProductPattern(cursor: Cursor, token: PatternStartToken): ProductPattern {
switch (token.tag) {
case "pattern_binding":
// foo
return ProductPattern.any(token.name);
case "tuple_start": {
// ( p1, p2 )
const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, productPattern);
return ProductPattern.tuple(items);
}
case "record_start": {
// { a = p, b }
const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_BRACE, recordPatternField);
return ProductPattern.record(fields);
}
case "tag":
throw { tag: "UnexpectedTagPattern", span: token.span } as ParseError;
case "EOF":
throw { tag: "ExpectedPattern", span: token.span } as ParseError;
}
}
function recordPatternField(cursor: Cursor): FieldPattern {
const { name, span } = identifier(cursor, 'identifier'); // Reuse existing identifier scanner
if (tryConsume(cursor, char('='))) {
const p = productPattern(cursor);
return ProductPattern.fieldPattern(name, p);
} else {
// Punning: { a } -> { a = a }
return ProductPattern.fieldPattern(name, ProductPattern.any(name));
}
}
export function parse(input: string): Result<Expr, ParseError> {
const source = new SourceText(input);
const cursor = new Cursor(source);
try {
skipWhitespaceAndComments(cursor);
const expression = expr(cursor);
if (!cursor.eof()) {
return Result.error({
tag: "UnexpectedToken",
expected: "End of File",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError);
}
return Result.ok(expression);
} catch (e) {
// TODO: This is a bit sketchy. We maybe forced to have "checked" Exceptions for `ParseError` by wrapping it in something that has a proper tag.
return Result.error(e as ParseError);
}
}

View file

@ -6,7 +6,7 @@ import type { Cursor, CursorState, GenericScanError, NumberError, StringError }
import { Result } from '../result';
import { Expr } from 'src/value';
function skipWhitespaceAndComments(cursor: Cursor): number {
export function skipWhitespaceAndComments(cursor: Cursor): number {
let totalConsumed = 0;
while (true) {
@ -60,7 +60,13 @@ export type ExprScanError =
| { tag: "InvalidIdentifier", text: string, kind: IdentifierKind, reason: IdentifierErrorReason, span: Span }
// What kind of identifier were we trying to parse?
export type IdentifierKind = "identifier" | "variable_use" | "tag_construction" | "function_call";
export type IdentifierKind =
| "identifier"
| "variable_use"
| "tag_construction"
| "function_call"
| "pattern_binding";
export type IdentifierErrorReason =
| { tag: "StartsWithDigit" }
| { tag: "IsKeyword", kw: Keyword }
@ -73,12 +79,20 @@ export type ExprStartToken =
| { tag: "function_name", name: string, span: Span }
| { tag: "variable_use", name: string, span: Span }
| { tag: "tag", name: string, span: Span }
| { tag: "tagged", name: string, span: Span } // TODO: This may be a bit weird. Actually we can lookahead and see if the next char after the identifier is NOT-AN-EXPRESSION start.
| { tag: "tuple_start", span: Span }
| { tag: "record_start", span: Span }
| { tag: "keyword", kw: Keyword, span: Span }
// TODO: ger rid of EOF
| { tag: "EOF", span: Span }
export type PatternStartToken =
| { tag: "pattern_binding", name: string, span: Span }
| { tag: "tag", name: string, span: Span }
| { tag: "tuple_start", span: Span }
| { tag: "record_start", span: Span }
// TODO: ger rid of EOF
| { tag: "EOF", span: Span };
// === Identifier Scanners ===
// Returns the raw string.
@ -146,12 +160,14 @@ export function identifier(cursor: Cursor, kind: IdentifierKind): { name: string
// === Literal Scanners ===
// throws ExprScanError
// TODO: handle trailing whitespace
function number(cursor: Cursor): { value: number, span: Span } {
const res = scanNumber(cursor);
if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); }
}
// throws ExprScanError
// TODO: handle trailing whitespace
function string(cursor: Cursor): { value: string, span: Span } {
const res = scanString(cursor);
if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); }
@ -159,10 +175,10 @@ function string(cursor: Cursor): { value: string, span: Span } {
// === complex scanners ===
// TODO: in exprStart/patternStart make sure whitespace is consumed after they succesfuly produce token. Should we build it in the functions? Or should that be in `parser.ts`?
// throws ExprScanError
export function exprStart(cursor: Cursor): ExprStartToken {
skipWhitespaceAndComments(cursor);
const start = cursor.currentLocation();
if (cursor.eof()) {
return { tag: "EOF", span: cursor.makeSpan(start) };
@ -220,53 +236,105 @@ export function exprStart(cursor: Cursor): ExprStartToken {
}
}
export function patternStart(cursor: Cursor): PatternStartToken {
const start = cursor.currentLocation();
// TODO: Need a Token to TokenKind function
// TODO: Need is_start_of_expression(token): boolean
// identifier -> true
// symbol # -> true
// symbol $ -> true
// symbol @ -> true
// symbol ( -> true
// symbol { -> true // this is actually context dependent. Sometimes its a start of a binding context { params . body } or { let-params . body }, and sometimes it is a record. But this function is gonna be used only in the first context
// symbol _ -> false
// number -> true
// string -> true
// keyword let -> true
// keyword fn -> true
// keyword apply -> true
// keyword = -> false
// keyword | -> false
// EOF -> false
//
// TODO: function that matches a token with a token_type (returns bool)
if (cursor.eof()) {
return { tag: "EOF", span: cursor.makeSpan(start) };
}
// TODO: forbidden characters are
// '('
// ')'
// '{'
// '}'
// '.'
// ','
// '|'
// '$'
// '#'
// '@'
// '"'
// ' '
// '\r'
// '\t'
// '\n'
// TODO: need function is_forbidden_char
const c = cursor.peek()!;
// === tuple ===
if (c === char('(')) {
cursor.next();
return { tag: "tuple_start", span: cursor.makeSpan(start) };
}
// === record ===
if (c === char('{')) {
cursor.next();
return { tag: "record_start", span: cursor.makeSpan(start) };
}
// === tag ===
if (c === char('#')) {
cursor.next();
const { name } = identifier(cursor, 'tag_construction');
return { tag: "tag", name, span: cursor.makeSpan(start) };
}
// === scanner functions ===
// TODO: whitespace - consumes whitespace
// TODO: comment - consumes token
// TODO: raw_identifier - consumes raw identifier - then we can decide whether that was a keyword or an identifier
// TODO: string - consumes string like "foo bar\njfjdsajfksd"
// TODO: number - consumes number like 123123 or 000123 or 23919233.123
//
// TODO: token - gives next token
// TODO: This is more subtle... -foo is a valid pattern name... I think I should restrict: can't have identifiers start with `-`? But then `-` itself can't be an identifier, which is a bit sad.
// TODO: This is gonna be different once we allow number/string literals as patterns.
if (isDigit(c) || c === char('"') || c === char('-')) {
throw {
tag: "UnexpectedCharacter",
char: c,
span: cursor.makeSpan(start)
} as ExprScanError;
}
// === pattern binding ===
const { name } = identifier(cursor, 'pattern_binding');
return { tag: "pattern_binding", name, span: cursor.makeSpan(start) };
}
export function isNextTokenExprStart(cursor: Cursor): boolean {
const state = cursor.save();
try {
const token = exprStart(cursor);
switch (token.tag) {
case "number":
case "string":
case "variable_use":
case "tag":
case "tuple_start":
case "record_start":
case "function_name": // e.g. my_func(x)
return true;
case "keyword":
switch (token.kw) {
case "let":
case "fn":
case "match":
case "apply":
return true;
case "=":
case "|":
case "!":
return false;
}
case "EOF":
return false;
default:
return false;
}
} catch (e) {
return false;
} finally {
cursor.restore(state);
}
}
export function isNextTokenProductPatternStart(cursor: Cursor): boolean {
const state = cursor.save();
try {
const token = patternStart(cursor);
switch (token.tag) {
case "pattern_binding":
case "tuple_start":
case "record_start":
return true;
default:
return false;
}
} catch (e) {
return false;
} finally {
cursor.restore(state);
}
}

View file

@ -145,8 +145,8 @@ export type Expr =
| { tag: "tag", tag_name: Tag }
| { tag: "tagged", tag_name: Tag, expr: Expr }
| { tag: "tuple", exprs: Expr[] }
| { tag: "record", fields: { name: FieldName, expr: Expr }[] }
| { tag: "match", arg: Expr, branches: { pattern: Pattern, body: Expr }[] }
| { tag: "record", fields: FieldAssignment[] }
| { tag: "match", arg: Expr, branches: MatchBranch[] }
| { tag: "lambda", parameters: ProductPattern[], body: Expr }
| { tag: "apply", callee: Expr, args: Expr[] }
@ -155,14 +155,23 @@ export type Literal =
| { tag: "string", value: string }
export type ExprBinding = {
var: ProductPattern,
pattern: ProductPattern,
expr: Expr,
}
export type MatchBranch = {
pattern: Pattern,
body: Expr,
}
export type FieldAssignment = { name: FieldName, expr: Expr };
export type ProductPattern =
| { tag: "any", name: VariableName }
| { tag: "tuple", patterns: ProductPattern[] }
| { tag: "record", fields: { field_name: FieldName, pattern: ProductPattern }[] }
| { tag: "record", fields: FieldPattern[] }
export type FieldPattern = { fieldName: FieldName, pattern: ProductPattern };
export type Pattern =
| ProductPattern
@ -212,12 +221,16 @@ export namespace Expr {
export const tag = (tag_name: Tag): Expr => ({ tag: "tag", tag_name, });
export const tagged = (tag_name: Tag, expr: Expr): Expr => ({ tag: "tagged", tag_name, expr, });
export const tuple = (exprs: Expr[]): Expr => ({ tag: "tuple", exprs });
export const record = (fields: { name: FieldName, expr: Expr }[]): Expr => ({ tag: "record", fields });
export const match = (arg: Expr, branches: { pattern: Pattern; body: Expr }[]): Expr => ({ tag: "match", arg, branches, });
export const record = (fields: FieldAssignment[]): Expr => ({ tag: "record", fields });
export const match = (arg: Expr, branches: MatchBranch[]): Expr => ({ tag: "match", arg, branches, });
export const var_use = (name: VariableName): Expr => ({ tag: "var_use", name, });
export const let_ = (bindings: ExprBinding[], body: Expr): Expr => ({ tag: "let", bindings, body, });
export const apply = (callee: Expr, args: Expr[]): Expr => ({ tag: "apply", callee, args, });
export const lambda = (parameters: ProductPattern[], body: Expr): Expr => ({ tag: "lambda", parameters, body, });
export const matchBranch = (pattern: Pattern, expr: Expr): MatchBranch => ({ pattern, body: expr });
export const exprBinding = (pattern: ProductPattern, expr: Expr): ExprBinding => ({ pattern, expr });
export const fieldAssignment = (name: FieldName, expr: Expr): FieldAssignment => ({ name, expr });
}
export namespace Value {
@ -230,6 +243,19 @@ export namespace Value {
export const closure = (closure: Closure): Value => ({ tag: "closure", closure });
}
export namespace ProductPattern {
export const any = (name: VariableName): ProductPattern => ({ tag: "any", name });
export const tuple = (patterns: ProductPattern[]): ProductPattern => ({ tag: "tuple", patterns });
export const record = (fields: FieldPattern[]): ProductPattern => ({ tag: "record", fields });
export const fieldPattern = (fieldName: FieldName, pattern: ProductPattern): FieldPattern => ({ fieldName, pattern });
}
export namespace Pattern {
export const tag = (tag_name: Tag): Pattern => ({ tag: "tag", tag_name });
export const tagged = (tag_name: Tag, pattern: Pattern): Pattern => ({ tag: "tagged", tag_name, pattern });
}
// ===Errors===
type RuntimeError =
| { tag: "FunctionLookupFailure", name: FunctionName }
@ -369,7 +395,7 @@ function eval_expr(program: Program, env: Env, e: Expr): Value {
function eval_bindings(program: Program, env: Env, bindings: ExprBinding[]): Env {
// note that `let { x = 123, y = x + 1 ... } is allowed. Ofcourse later bindings can't be referenced by earlier bindings (i.e. no recursion).
let cur_env = env;
for (const { var: var_name, expr } of bindings) {
for (const { pattern: var_name, expr } of bindings) {
const value = eval_expr(program, cur_env, expr);
const res = match_product_pattern(var_name, value);
if (res.tag === "failure") {
@ -514,8 +540,8 @@ function match_product_pattern_mut(frame: EnvFrame, pattern: ProductPattern, val
case "record":
if (value.tag !== "record") return { tag: "failure", pattern, value };
for (const { field_name, pattern: p } of pattern.fields) {
const field_value = value.fields.get(field_name);
for (const { fieldName, pattern: p } of pattern.fields) {
const field_value = value.fields.get(fieldName);
if (field_value === undefined) {
return { tag: "failure", pattern, value };
} else {

View file

@ -15,3 +15,5 @@ npm install -D sass-embedded
npx ts-node src/parser/cursor.test.ts
npx ts-node src/debug/repl.ts