From e389e46852403b485faf00b31482631d4c0a6a9a Mon Sep 17 00:00:00 2001 From: Yura Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:52:32 +0100 Subject: [PATCH] Change record syntax, fix a few bugs --- src/SYNTAX.md | 17 +++---- src/debug/expr_show.ts | 2 +- src/debug/repl.ts | 101 +++++++++++++++++----------------------- src/debug/value_show.ts | 55 ++++++++++++++++++++++ src/parser/parser.ts | 65 +++++++++++++++++++------- src/parser/scanner.ts | 70 +++++++++++++++++----------- src/value.ts | 16 +++++-- 7 files changed, 212 insertions(+), 114 deletions(-) create mode 100644 src/debug/value_show.ts diff --git a/src/SYNTAX.md b/src/SYNTAX.md index 2842344..c10dc45 100644 --- a/src/SYNTAX.md +++ b/src/SYNTAX.md @@ -44,8 +44,9 @@ fn match = | +: ``` -Note that `=` and `|` are treated as keywords, not symbols. So these can occur in identifiers. +Note that `=`, `|`, and `:` are treated as keywords, not symbols. So these can occur in identifiers. This has strange consequences, for example ``` @@ -105,9 +106,9 @@ let { ( , "hello" , " ", "world" , ) // this is also technically valid syntax - the commas at the start and end are optional. // records -{ x = 123, y = 512 } -{} // different from (). Perhaps in the future I'll make them equivalent or... disallow one of them. But right now these are different. -{ name = "Conan", position = { x = 5, y = 6 } } +:( x = 123, y = 512 ) // you can read `:` as `record`. We could have adopted verbose syntax `record ( x = 123, y =512 )`, but we shortened it to `:`. +:() // different from (). Perhaps in the future I'll make them equivalent or... disallow one of them. But right now these are different. +:( name = "Conan", position = :( x = 5, y = 6 ) ) // tags (zero-ary constructors) #true @@ -140,9 +141,9 @@ fn is-some?(xs) { // patterns x (x, y, z) -{ foo , bar } -{ foo = x, bar } // equivalent to { foo = x, bar = bar } -{ foo = _, bar = (x, y, z) } +:( foo , bar ) +:( foo = x, bar ) // equivalent to :( foo = x, bar = bar ) +:( foo = _, bar = (x, y, z) ) // lambdas/anonymous-functions fn { x . $x } // identity function @@ -195,7 +196,7 @@ top-fn-call := identifier`(` args `)` tuple-expr := `(` args `)` -record-expr := `{` list-sep-by(field, `,`) `}` +record-expr := `:` `(` list-sep-by(field, `,`) `)` field := variable-identifier `=` expr tag-expr := `#`tag-identifier // note how we don't allow a space between # and the identifier diff --git a/src/debug/expr_show.ts b/src/debug/expr_show.ts index e72c0a6..db78fe5 100644 --- a/src/debug/expr_show.ts +++ b/src/debug/expr_show.ts @@ -6,7 +6,7 @@ export function exprToString(expr: Expr): string { return literalToString(expr.literal); case "var_use": - return expr.name; + return `\$${expr.name}`; case "call": return `${expr.name}(${expr.args.map(exprToString).join(", ")})`; diff --git a/src/debug/repl.ts b/src/debug/repl.ts index 77ea260..7bc968d 100644 --- a/src/debug/repl.ts +++ b/src/debug/repl.ts @@ -1,76 +1,63 @@ import * as readline from 'readline'; import { parse } from '../parser/parser'; import { exprToString } from '../debug/expr_show'; +import { valueToString } from '../debug/value_show'; +import { eval_start, Program } from '../value'; import { Result } from '../result'; -// Helper to calculate line/col from an absolute offset -function getLineCol(text: string, offset: number) { - let line = 1; - let col = 1; - for (let i = 0; i < offset && i < text.length; i++) { - if (text[i] === '\n') { - line++; - col = 1; - } else { - col++; - } - } - return { line, col }; -} - const rl = readline.createInterface({ input: process.stdin, output: process.stdout, - prompt: 'expr> ' + prompt: '> ' }); -console.log("=== Quick & Dirty Parser REPL ==="); -console.log("Type an expression to parse and verify round-trip stringification."); +// We create one persistent program context +const program = Program.makeEmpty(); + +console.log("=== Evaluator REPL ==="); +console.log("Input -> Parse -> Eval -> Value"); console.log("Ctrl+C to exit.\n"); rl.prompt(); rl.on('line', (lineInput) => { const trimmed = lineInput.trim(); - if (trimmed) { - try { - const result = parse(trimmed); - - if (result.tag === "ok") { - const ast = result.value; - const reconstructed = exprToString(ast); - - console.log(`\nāœ… Parsed Successfully:`); - console.log(` ${reconstructed}`); - - } else { - const err = result.error; - // FIX: Calculate line/col manually using the input string - const { line, col } = getLineCol(trimmed, err.span.start); - const loc = `${line}:${col}`; - - console.log(`\nāŒ Parse Error [${err.tag}] at ${loc}`); - - if ('expected' in err) { - console.log(` Expected: ${(err as any).expected}`); - } - if ('reason' in err) { - console.log(` Reason: ${(err as any).reason}`); - } - if ('received' in err) { - // Useful to see what char we actually got (print char code if needed) - console.log(` Received CodePoint: ${(err as any).received}`); - } - } - } catch (e) { - console.log(`\nšŸ”„ CRASH (Uncaught Exception):`); - console.log(e); - } + if (!trimmed) { + rl.prompt(); + return; } - - console.log(); + + try { + // === 1. PARSE === + const parseResult = parse(trimmed); + + if (parseResult.tag === "error") { + const err = parseResult.error; + console.log(`\nāŒ [Parse Error]:`, err); + // (Optional: Reuse your line/col logic here) + rl.prompt(); + return; + } + + const ast = parseResult.value; + console.log(`\nAST: ${exprToString(ast)}`); + + // === 2. EVALUATE === + const evalResult = eval_start(program, ast); + + if (evalResult.tag === "ok") { + console.log(`VAL: ${valueToString(evalResult.value)}`); + } else { + const err = evalResult.error; + console.log(`\nšŸ”„ [Runtime Error]:`, err); + } + + } catch (e) { + console.log(`\nšŸ’„ [System Crash]:`); + console.log(e); + } + + console.log(""); rl.prompt(); -}).on('close', () => { - console.log('Bye!'); - process.exit(0); }); + diff --git a/src/debug/value_show.ts b/src/debug/value_show.ts new file mode 100644 index 0000000..7c13080 --- /dev/null +++ b/src/debug/value_show.ts @@ -0,0 +1,55 @@ +// src/debug/value_string.ts + +import { Value, Env, Closure, EnvFrame } from '../value'; +import { exprToString, productPatternToString } from './expr_show'; + +export function valueToString(val: Value): string { + switch (val.tag) { + case "number": return val.value.toString(); + case "string": return `"${val.value}"`; + case "tag": return `#${val.tag_name}`; + case "tagged": return `#${val.tag_name} ${valueToString(val.value)}`; + case "tuple": return `(${val.values.map(valueToString).join(", ")})`; + case "record": { + const entries = Array.from(val.fields.entries()) + .map(([k, v]) => `${k} = ${valueToString(v)}`) + .join(", "); + return `{ ${entries} }`; + } + case "closure": return closureToString(val.closure); + } +} + +function closureToString(c: Closure): string { + const params = c.parameters.map(productPatternToString).join(", "); + const envStr = envToString(c.env); + // We represent the closure as the code + a summary of its captured scope + return `fn { ${params} . ${exprToString(c.body)} } [captured: ${envStr}]`; +} + +function envToString(env: Env): string { + if (env.tag === "nil") return "āˆ…"; + + const frames: string[] = []; + let current: Env = env; + + while (current.tag === "frame") { + frames.push(frameToString(current.frame)); + current = current.parent; + } + + // Shows stack from inner-most to outer-most + return frames.join(" ⮕ "); +} + +function frameToString(frame: EnvFrame): string { + const entries = Array.from(frame.entries()); + if (entries.length === 0) return "{}"; + + const formattedEntries = entries.map(([name, val]) => { + // We call valueToString here to show the actual data + return `${name} = ${valueToString(val)}`; + }); + + return `{ ${formattedEntries.join(", ")} }`; +} diff --git a/src/parser/parser.ts b/src/parser/parser.ts index 8c95ac2..96f1f36 100644 --- a/src/parser/parser.ts +++ b/src/parser/parser.ts @@ -1,6 +1,6 @@ import { Expr, ExprBinding, FieldAssignment, FieldPattern, MatchBranch, Pattern, ProductPattern } from '../value'; import { Cursor } from './cursor'; -import { ExprScanError, exprStart, ExprStartToken, identifier, isNextTokenExprStart, isNextTokenProductPatternStart, patternStart, PatternStartToken, skipWhitespaceAndComments } from './scanner'; +import { ExprScanError, exprStart, ExprStartToken, IdentifierKind, identifierScanner, isNextTokenExprStart, isNextTokenProductPatternStart, patternStart, PatternStartToken, skipWhitespaceAndComments } from './scanner'; import { char, CodePoint, SourceText, Span } from './source_text'; import { Result } from '../result'; @@ -25,6 +25,7 @@ export type ParseError = | { tag: "ExpectedPatternAssignmentSymbol", span: Span } // Expected '=' in pattern assignment | { tag: "ExpectedPatternBindingSymbol", span: Span } // Expected '.' in pattern binding | { tag: "ExpectedFunctionCallStart", span: Span } // Expected '(' after function name + | { tag: "ExpectedRecordOpen", span: Span } // Expected '(' after ':' | { tag: "ExpectedLetBlockOpen", span: Span } // Expected '{' after 'let' | { tag: "ExpectedLetBlockClose", span: Span } // Expected '}' at end of 'let' expression | { tag: "ExpectedMatchBlockOpen", span: Span } // Expected '{' after 'match' @@ -35,6 +36,7 @@ export type ParseError = | { tag: "ExpectedApplySeparator", span: Span } // Expected '!' inside 'apply' | { tag: "UnexpectedTagPattern", span: Span } // Found #tag where product pattern expected | { tag: "ExpectedPattern", span: Span } // EOF or invalid start of pattern + | { tag: "ExpectedRecordPatternOpen", span: Span } // Expected '(' at start of record pattern | { tag: "ExpectedRecordField", span: Span }; // Expected identifier in record pattern // TODO: Delete? @@ -44,6 +46,7 @@ export type Expectation = | "ExpectedPatternAssignmentSymbol" | "ExpectedPatternBindingSymbol" | "ExpectedFunctionCallStart" + | "ExpectedRecordOpen" | "ExpectedLetBlockOpen" | "ExpectedLetBlockClose" | "ExpectedMatchBlockOpen" @@ -53,6 +56,7 @@ export type Expectation = | "UnexpectedTagPattern" | "ExpectedPattern" | "ExpectedRecordField" + | "ExpectedRecordPatternOpen" export type Parser = (cursor: Cursor) => T @@ -92,7 +96,7 @@ function delimitedTerminalSequence(cursor: Cursor, delimiter: CodePoint, term // // All our use-cases always have a well-defined terminator character: // tuples: ( a, b, c ) -> `)` - // records: { f0 = e0, f1 = e1 } -> `}` + // records: :( f0 = e0, f1 = e1 ) -> `}` // function call: f(a, b, c) -> `)` // let-binding: let { p = e . body } -> `.` // fn-asbtraction: fn { p0, p1 . body } -> `.` @@ -110,6 +114,7 @@ function delimitedTerminalSequence(cursor: Cursor, delimiter: CodePoint, term } while (true) { + const item = p(cursor); // `p` should be responsible for getting rid of whitespace after it has done its work items.push(item); @@ -154,6 +159,12 @@ function patternStartToken(cursor: Cursor): PatternStartToken { return token; } +function identifier(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } { + const result = identifierScanner(cursor, kind); + skipWhitespaceAndComments(cursor); + return result; +} + // === Expression Parsers === function expr(cursor: Cursor): Expr { @@ -185,10 +196,6 @@ function expr(cursor: Cursor): Expr { // e.g. (a, b, c) const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr); return Expr.tuple(items); - case "record_start": - // e.g. { x = 1, y = 2 } - const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_BRACE, fieldAssignment); - return Expr.record(fields); case "function_name": // e.g. my_func(arg1, arg2) // parse a `,` delimiter sequence of expr @@ -203,6 +210,17 @@ function expr(cursor: Cursor): Expr { return Expr.call(token.name, args); case "keyword": switch (token.kw) { + case ":": + // e.g. :( x = 1, y = 2 ) + // or : ( x = 1, y = 2 ) + if (!tryConsume(cursor, char('('))) { + throw { + tag: "ExpectedRecordOpen", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, fieldAssignment); + return Expr.record(fields); case "let": // let { p0 = e0, p1 = e2 . body } if (!tryConsume(cursor, char('{'))) { @@ -320,7 +338,7 @@ function productPatternBinding(cursor: Cursor): ExprBinding { function fieldAssignment(cursor: Cursor): FieldAssignment { // `f = e` - const { name, span } = identifier(cursor, 'identifier'); + const { name, span } = identifier(cursor, 'field_name'); if (!tryConsume(cursor, char('='))) { throw { @@ -385,27 +403,42 @@ function finishProductPattern(cursor: Cursor, token: PatternStartToken): Product return ProductPattern.tuple(items); } - case "record_start": { - // { a = p, b } - const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_BRACE, recordPatternField); - return ProductPattern.record(fields); - } - case "tag": throw { tag: "UnexpectedTagPattern", span: token.span } as ParseError; + case "keyword": { + switch (token.kw) { + case ":": { + // :( a = p, b ) + // TODO: parse open-paren + if (!tryConsume(cursor, char('{'))) { + throw { + tag: "ExpectedRecordPatternOpen", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, recordPatternField); + return ProductPattern.record(fields); + } + default: + // These keywords CANNOT start a pattern. + throw { tag: "ExpectedPattern", span: token.span } as ParseError; + } + } + + case "EOF": throw { tag: "ExpectedPattern", span: token.span } as ParseError; } } function recordPatternField(cursor: Cursor): FieldPattern { - const { name, span } = identifier(cursor, 'identifier'); // Reuse existing identifier scanner + const { name, span } = identifier(cursor, 'field_name'); if (tryConsume(cursor, char('='))) { const p = productPattern(cursor); return ProductPattern.fieldPattern(name, p); } else { - // Punning: { a } -> { a = a } + // Punning: :( a ) -> :( a = a ) return ProductPattern.fieldPattern(name, ProductPattern.any(name)); } } @@ -422,7 +455,7 @@ export function parse(input: string): Result { if (!cursor.eof()) { return Result.error({ tag: "UnexpectedToken", - expected: "End of File", + expected: "EndOfFile", span: cursor.makeSpan(cursor.currentLocation()) } as ParseError); } diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 813c835..2a748bb 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -47,7 +47,7 @@ const DELIMITER_CHARS = ["(", ")", "{", "}", ".", ",", "@", "$", "#", '"', "\\"] export type Delimiter = typeof DELIMITER_CHARS[number]; const DELIMITER_SET: Set = new Set(DELIMITER_CHARS.map(c => char(c))); -const KEYWORD_LIST = ["let" , "fn" , "match" , "apply" , "=" , "|" , "!"] as const; +const KEYWORD_LIST = ["let" , "fn" , "match" , "apply" , "=" , "|" , "!", ":"] as const; export type Keyword = typeof KEYWORD_LIST[number]; const KEYWORD_SET: Set = new Set(KEYWORD_LIST); @@ -61,13 +61,14 @@ export type ExprScanError = // What kind of identifier were we trying to parse? export type IdentifierKind = - | "identifier" | "variable_use" + | "field_name" | "tag_construction" | "function_call" | "pattern_binding"; export type IdentifierErrorReason = + | { tag: "Empty" } | { tag: "StartsWithDigit" } | { tag: "IsKeyword", kw: Keyword } @@ -80,7 +81,6 @@ export type ExprStartToken = | { tag: "variable_use", name: string, span: Span } | { tag: "tag", name: string, span: Span } | { tag: "tuple_start", span: Span } - | { tag: "record_start", span: Span } | { tag: "keyword", kw: Keyword, span: Span } // TODO: ger rid of EOF | { tag: "EOF", span: Span } @@ -89,7 +89,7 @@ export type PatternStartToken = | { tag: "pattern_binding", name: string, span: Span } | { tag: "tag", name: string, span: Span } | { tag: "tuple_start", span: Span } - | { tag: "record_start", span: Span } + | { tag: "keyword", kw: Keyword, span: Span } // TODO: ger rid of EOF | { tag: "EOF", span: Span }; @@ -116,7 +116,7 @@ function rawIdentifier(cursor: Cursor): string { // Scans raw identifier, // checks if it is a keyword, // if it ain't, validates it into a proper identifier. -function identifierOrKeyword( +function identifierOrKeywordScanner( cursor: Cursor, kind: IdentifierKind, ): { tag: "keyword", kw: Keyword, span: Span } @@ -124,6 +124,15 @@ function identifierOrKeyword( const start = cursor.currentLocation(); const text = rawIdentifier(cursor); const span = cursor.makeSpan(start); + if (text.length === 0) { + throw ({ + tag: "InvalidIdentifier", + text, + kind, + reason: { tag: "Empty" }, + span + } as ExprScanError); + } if (KEYWORD_SET.has(text)) { return { tag: "keyword", kw: text as Keyword, span }; @@ -142,8 +151,8 @@ function identifierOrKeyword( return { tag: "identifier", name: text, span }; } -export function identifier(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } { - const res = identifierOrKeyword(cursor, kind); +export function identifierScanner(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } { + const res = identifierOrKeywordScanner(cursor, kind); if (res.tag === "keyword") { throw ({ @@ -201,14 +210,14 @@ export function exprStart(cursor: Cursor): ExprStartToken { // === variable use === if (c === char('$')) { cursor.next(); - const { name } = identifier(cursor, 'variable_use'); + const { name } = identifierScanner(cursor, 'variable_use'); return { tag: "variable_use", name, span: cursor.makeSpan(start) }; } // === tags === if (c === char('#')) { cursor.next(); - const { name } = identifier(cursor, 'tag_construction'); + const { name } = identifierScanner(cursor, 'tag_construction'); return { tag: "tag", name, span: cursor.makeSpan(start) }; } @@ -219,15 +228,9 @@ export function exprStart(cursor: Cursor): ExprStartToken { return { tag: "tuple_start", span: cursor.makeSpan(start) }; } - // === records === - if (c === char('{')) { - cursor.next(); - return { tag: "record_start", span: cursor.makeSpan(start) }; - } - // === keywords & identifiers === // Fallthrough: it must be a keyword or a function call - const result = identifierOrKeyword(cursor, 'function_call'); + const result = identifierOrKeywordScanner(cursor, 'function_call'); switch (result.tag) { case "keyword": return result; @@ -250,16 +253,10 @@ export function patternStart(cursor: Cursor): PatternStartToken { return { tag: "tuple_start", span: cursor.makeSpan(start) }; } - // === record === - if (c === char('{')) { - cursor.next(); - return { tag: "record_start", span: cursor.makeSpan(start) }; - } - // === tag === if (c === char('#')) { cursor.next(); - const { name } = identifier(cursor, 'tag_construction'); + const { name } = identifierScanner(cursor, 'tag_construction'); return { tag: "tag", name, span: cursor.makeSpan(start) }; } @@ -274,8 +271,14 @@ export function patternStart(cursor: Cursor): PatternStartToken { } // === pattern binding === - const { name } = identifier(cursor, 'pattern_binding'); - return { tag: "pattern_binding", name, span: cursor.makeSpan(start) }; + // Fallthrough: it must be a keyword or a pattern-variable + const result = identifierOrKeywordScanner(cursor, 'function_call'); + switch (result.tag) { + case "keyword": + return result; + case "identifier": + return { tag: "pattern_binding", name: result.name, span: result.span }; + } } export function isNextTokenExprStart(cursor: Cursor): boolean { @@ -289,7 +292,6 @@ export function isNextTokenExprStart(cursor: Cursor): boolean { case "variable_use": case "tag": case "tuple_start": - case "record_start": case "function_name": // e.g. my_func(x) return true; @@ -299,6 +301,7 @@ export function isNextTokenExprStart(cursor: Cursor): boolean { case "fn": case "match": case "apply": + case ":": return true; case "=": case "|": @@ -327,8 +330,21 @@ export function isNextTokenProductPatternStart(cursor: Cursor): boolean { switch (token.tag) { case "pattern_binding": case "tuple_start": - case "record_start": return true; + + case "keyword": + switch (token.kw) { + case ":": + return true; + case "let": + case "fn": + case "match": + case "apply": + case "=": + case "|": + case "!": + return false; + } default: return false; } diff --git a/src/value.ts b/src/value.ts index ef6302a..ae5bd56 100644 --- a/src/value.ts +++ b/src/value.ts @@ -75,6 +75,12 @@ export namespace Program { export function error(error: Error): Result { return { tag: "error", error } } } + export function makeEmpty(): Program { + return { + function_definitions: new Map(), + function_definition_order: [], + }; + } // TODO: Primitive functions like +, -, *, div, <, <=, ==, mod // TODO: function to create initial program (with the above primitive functions otherwise empty) @@ -181,7 +187,7 @@ export type Pattern = // === Values === -type Value = +export type Value = | { tag: "string", value: string } | { tag: "number", value: number } | { tag: "tag", tag_name: Tag } @@ -190,7 +196,7 @@ type Value = | { tag: "record", fields: Map } | { tag: "closure", closure: Closure } -type ValueTag = +export type ValueTag = | "string" | "number" | "tag" @@ -200,13 +206,13 @@ type ValueTag = | "closure" // Used as a Stack of frames. Basically a linked list. -type Env = +export type Env = | { tag: "nil" } | { tag: "frame", frame: EnvFrame, parent: Env } -type EnvFrame = Map; +export type EnvFrame = Map; -type Closure = { +export type Closure = { env: Env, parameters: ProductPattern[], body: Expr,