Move language files into dedicated folder

This commit is contained in:
Yura Dupyn 2026-02-07 10:43:30 +01:00
parent 3d1cd89067
commit 1b406899e0
15 changed files with 7 additions and 343 deletions

253
src/lang/SYNTAX.md Normal file
View file

@ -0,0 +1,253 @@
Syntax is designed to be easily parsable by a recursive-descent/parser-combinators - not necessarily designed for intuitive human use.
It is inspired by syntax like LISP, but instead of `(f a b c)` we'd write `f(a, b, c)`.
Note that we allow fairly permissive identifiers like
```
foo-bar
_asd123
foo☺
☺️
<
<=
==
+
*
-
empty?
bang-you-dead!
```
The following are not valid identifiers
```
123foo // starts with a digit
#foo // special symbol #
$bar // special symbol $
b@ar // special symbol @
b(ar // special symbol (
= // only a special symbol by itself so for example `==` is ok, or even `foo=` is ok.
```
Special symbols can't ever occur in any sort of way in an identifier. The following are special symbols
```
#
$
@
.
,
(
)
{
}
```
Then there are keywords, which can occur as substrings, but can't just be the keywords
```
let
fn
match
=
|
:
```
Note that `=`, `|`, and `:` are treated as keywords, not symbols. So these can occur in identifiers.
This has strange consequences, for example
```
x=123
```
is a valid identifier (sincce there are no spaces) - and this may cause huge confusion for the user when using record syntax or pattern matching on records.
Even though this is borderline evil, I've decided for simplicity's sake to accept this. I'll just have to make sure syntax errors are really good.
Examples of expressions in the language (except the top-level function definitions which are here just for illustration)
```
// numbers
123
12312.154
// strings
"hello, world!"
"unicode◊yes☺"
// variable use
$x
// top-level function call
square(3)
pyth(3, 4)
+(16, 17) // even primitive operations are just function calls
+(*($x, $x), *($y, $y)) // x^2 + y^2
// top-level function definitions
fn square(x) { *($x, $x) }
fn pyth(x, y) { +(square($x), square($y) }
// let-binding
let { foo = 123 . $foo }
let { x = 123, y = +($x, 1) . *($y, $y) } // i.e. y = (x + 1)^2
let { . 123 } // degenerate case
let {
, x = 123 // note the position of comma at the start (it is optional there)
, y = +($x, 1)
, z = *($x, $y)
. $z
}
// tuples
(1, 2, 3)
()
(123) // this is not the same as 123. Parentheses used like this don't introduce redundancy. 1-tuple technically doesn't act as identity (in LISP `(f)` is also very different from `f`)
((123, 56), (78, 123))
(
"hello",
" ",
"world", // note the comma at the end (also optional)
)
( , "hello" , " ", "world" , ) // this is also technically valid syntax - the commas at the start and end are optional.
// records
:( x = 123, y = 512 ) // you can read `:` as `record`. We could have adopted verbose syntax `record ( x = 123, y =512 )`, but we shortened it to `:`.
:() // different from (). Perhaps in the future I'll make them equivalent or... disallow one of them. But right now these are different.
:( name = "Conan", position = :( x = 5, y = 6 ) )
// tags (zero-ary constructors)
#true
#false
#nil
#empty
#unit
#none
#whatever-you-want
// tagged values (unary constructors)
#just 123
#cons (10, #cons (20, #nil))
// match-statement and patterns
fn len(xs) {
match $xs {
| #nil . 0
| #cons (_x, xs) . +(1, len($xs))
}
}
fn is-some?(xs) {
match $xs {
| #some _ . #true
| #none . #false
}
}
// patterns
x
(x, y, z)
:( foo , bar )
:( foo = x, bar ) // equivalent to :( foo = x, bar = bar )
:( foo = _, bar = (x, y, z) )
// lambdas/anonymous-functions
fn { x . $x } // identity function
fn { x, y . +(*($x, $x), *($y, $y)) // (x, y) => x^2 + y^2
// Actually the following is a valid lambda abstraction, since the product-patterns are allowed as the input parameters
fn { (x, (y, z)), { foo, bar = u } . +($x, $y, $z, $u) }
// application of lambdas
apply(e !)
apply(e ! e0)
apply(e ! e0, e1)
apply(e ! e0, e1, e2)
apply(apply(e ! e0) ! e1, e2)
apply(e ! apply(f ! e0, e1), e2)
```
More formal grammar
```
number := ...
string := ...
identifier := ...
variable-identifier := identifier
fn-identifier := identifier
field-identifier := identifier
tag-identifier := identifier
expr :=
| var-use
| let-expr
| top-fn-call
| tuple-expr
| record-expr
| tag-expr
| tagged-expr
| match-expr
| lambda-abstraction
| lambda-application
var-use := `$`identifier // note that here we don't allow a space between $ and the identifier
let-expr := `let` `{` list-sep-by(let-binding, `,`) `.` expr `}`
let-binding := product-pattern `=` expr
top-fn-call := identifier`(` args `)`
tuple-expr := `(` args `)`
record-expr := `:` `(` list-sep-by(field, `,`) `)`
field := variable-identifier `=` expr
tag-expr := `#`tag-identifier // note how we don't allow a space between # and the identifier
tagged-expr := `#`tag-identifier expr
match-expr := `match` expr `{` list-sep-by(match-branch, '|') `}`
match-branch := pattern `.` expr
lambda-abstraction := `fn` `{` params `.` expr `}`
lambda-application := `apply` `(` expr `!` args `)`
args := list-sep-by(expr, `,`)
params := list-sep-by(product-pattern, `,`)
// patterns
pattern :=
| `#`tag-identifier // note we don't allow a space between # and the identifier
| `#`tag-identifier expr // tagged value pattern
| product-pattern
product-pattern :=
| variable-identifier // match-all pattern
| `(` list-sep-by(product-pattern, `,`) `)`
| `{` list-sep-by(field-pattern, `,`) `}`
field-pattern :=
| field-identifier
| field-identifier `=` product-pattern
// helpers
strict-list-sep-by<p, sep> :=
| p
| p sep p
| p sep p sep p
| ...
list-sep-by<p, sep> :=
| ''
| optional(sep) strict-list-sep-by(p, sep) optinal(sep)
optional<p> :=
| ''
| p
```
Also:
A tagged expression is parsed when a #tag is followed by a token that can start an expression and is not a terminator.
So for example when parsing `#foo 123`, we parse the tag, the remainder is `123`, and we see that the next token is not a special symbol like `,` or `}` or `)` etc,
so we conclude that this has to be a tagged value we're parsing, so the next thing is an expression.
But for a case like `#foo, ...`, we see that the next token is `,`, so we must have just parsed a tag - not a tagged value.
Also:
We don't yet have any convinient syntax for creating new records from old (like an immutable update syntax).

107
src/lang/debug/expr_show.ts Normal file
View file

@ -0,0 +1,107 @@
import { Expr, Pattern, ProductPattern, Literal, FieldAssignment, FieldPattern } from '../value';
export function exprToString(expr: Expr): string {
switch (expr.tag) {
case "literal":
return literalToString(expr.literal);
case "var_use":
return `\$${expr.name}`;
case "call":
return `${expr.name}(${expr.args.map(exprToString).join(", ")})`;
case "tuple":
return `(${expr.exprs.map(exprToString).join(", ")})`;
case "record": {
const fields = expr.fields.map(fieldAssignmentToString).join(", ");
return `{ ${fields} }`;
}
case "tag":
return `#${expr.tag_name}`;
case "tagged": {
// We wrap the payload in parens if it's complex to be safe,
// but strictly speaking #foo 1 is valid.
// Let's just output space + expr.
const payload = exprToString(expr.expr);
// Heuristic: if payload starts with '(', '{', or is simple, we might not need parens,
// but for "tagged" expression precedence, it's often safer to wrap unless atomic.
return `#${expr.tag_name} ${payload}`;
}
case "let": {
const bindings = expr.bindings
.map(b => `${productPatternToString(b.pattern)} = ${exprToString(b.expr)}`)
.join(", ");
return `let { ${bindings} . ${exprToString(expr.body)} }`;
}
case "lambda": {
const params = expr.parameters.map(productPatternToString).join(", ");
return `fn { ${params} . ${exprToString(expr.body)} }`;
}
case "apply": {
const args = expr.args.map(exprToString).join(", ");
return `apply(${exprToString(expr.callee)} ! ${args})`;
}
case "match": {
const branches = expr.branches
.map(b => `${patternToString(b.pattern)} . ${exprToString(b.body)}`)
.join(" | ");
return `match ${exprToString(expr.arg)} { ${branches} }`;
}
}
}
// === Helpers ===
function literalToString(lit: Literal): string {
switch (lit.tag) {
case "number": return lit.value.toString();
case "string": return `"${lit.value}"`; // simplistic string escaping
}
}
function fieldAssignmentToString(f: FieldAssignment): string {
return `${f.name} = ${exprToString(f.expr)}`;
}
// === Pattern Printers ===
export function patternToString(pat: Pattern): string {
switch (pat.tag) {
case "tag":
return `#${pat.tag_name}`;
case "tagged":
return `#${pat.tag_name} ${patternToString(pat.pattern)}`;
// If it's a product pattern (any, tuple, record)
default:
return productPatternToString(pat);
}
}
export function productPatternToString(pat: ProductPattern): string {
switch (pat.tag) {
case "any":
return pat.name;
case "tuple":
return `(${pat.patterns.map(productPatternToString).join(", ")})`;
case "record":
return `{ ${pat.fields.map(fieldPatternToString).join(", ")} }`;
}
}
function fieldPatternToString(f: FieldPattern): string {
// Check for punning: if pattern is "any" and name matches fieldName
if (f.pattern.tag === "any" && f.pattern.name === f.fieldName) {
return f.fieldName;
}
return `${f.fieldName} = ${productPatternToString(f.pattern)}`;
}

214
src/lang/debug/repl.ts Normal file
View file

@ -0,0 +1,214 @@
import * as readline from 'readline';
import * as fs from 'fs';
import { parse, ParseError } from '../parser/parser';
import { SourceText, renderSpan, sourceText } from '../parser/source_text';
import { exprToString } from '../debug/expr_show';
import { valueToString } from '../debug/value_show';
import { eval_start, Program } from '../value';
// ANSI Color Codes
const C = {
Reset: "\x1b[0m",
Red: "\x1b[31m",
Green: "\x1b[32m",
Yellow: "\x1b[33m",
Blue: "\x1b[34m",
Gray: "\x1b[90m",
Bold: "\x1b[1m",
};
const program = Program.makeEmpty();
function runSource(inputRaw: string, isRepl: boolean): boolean {
const input = inputRaw.trim();
if (!input) return true; // Empty lines are fine
try {
// Wrap in SourceText
const text = sourceText(input);
// === Parse ===
const parseResult = parse(text);
if (parseResult.tag === "error") {
printPrettyError(text, parseResult.error);
return false; // Failed
}
const ast = parseResult.value;
console.log(`${C.Green}AST:${C.Reset} ${exprToString(ast)}`);
// === Eval ===
const evalResult = eval_start(program, ast);
if (evalResult.tag === "ok") {
// Always print the result value
console.log(`${C.Blue}VAL:${C.Reset} ${valueToString(evalResult.value)}`);
return true;
} else {
const err = evalResult.error;
console.log(`\n🔥 [Runtime Error]:`, err);
// If your Runtime Errors have spans, use printPrettyError(text, err) here too!
return false;
}
} catch (e) {
console.log(`\n💥 [System Crash]:`);
console.log(e);
return false;
}
}
// === 3. Entry Point Logic ===
// Check for command line arguments (ignoring 'node' and 'script.ts')
const args = process.argv.slice(2);
if (args.length > 0) {
// === FILE MODE ===
const filePath = args[0];
if (!fs.existsSync(filePath)) {
console.error(`${C.Red}Error: File not found '${filePath}'${C.Reset}`);
process.exit(1);
}
const fileContent = fs.readFileSync(filePath, 'utf-8');
// Run the file
const success = runSource(fileContent, false);
// Exit with appropriate code
process.exit(success ? 0 : 1);
} else {
// === REPL MODE ===
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
prompt: '> '
});
console.log("=== Evaluator REPL ===");
console.log("Input -> Parse -> Eval -> Value");
console.log("Ctrl+C to exit.\n");
rl.prompt();
rl.on('line', (line) => {
runSource(line, true);
console.log(""); // Empty line for spacing
rl.prompt();
});
}
// === Formatting ===
// Helper to safely print code points (handling special chars like \n)
function formatChar(cp: number | undefined): string {
// Handle EOF (undefined) or invalid numbers safely
if (cp === undefined || Number.isNaN(cp)) {
return "EOF";
}
const s = String.fromCodePoint(cp);
if (s === '\n') return "\\n";
if (s === '\r') return "\\r";
if (s === '\t') return "\\t";
return `'${s}'`;
}
function getErrorMessage(err: ParseError): string {
switch (err.tag) {
case "UnexpectedToken":
return `Unexpected token. Expected: ${err.expected}`;
case "UnexpectedTokenWhileParsingSequence":
return `Unexpected token in sequence. Expected delimiter ${formatChar(err.expectedDelimiter)} or terminator ${formatChar(err.expectedTerminator)}, but found ${formatChar(err.received)}.`;
case "UnexpectedCharacter":
return `Unexpected character: ${formatChar(err.char)}`;
case "UnexpectedEOF":
return "Unexpected end of file.";
case "ExpectedNumber":
return "Expected a number here.";
case "InvalidNumber":
return err.reason === "NotFinite"
? "Number is too large or invalid."
: "Invalid number format (missing fractional digits?).";
case "InvalidIdentifier":
// Handle nested reasons if needed, e.g. "Keyword 'let' cannot be used as an identifier"
return `Invalid identifier '${err.text}': ${err.reason.tag}`;
case "InvalidEscape":
switch (err.reason.tag) {
case "UnknownEscapeSequence": return `Unknown escape sequence: \\${formatChar(err.reason.char)}`;
case "UnicodeMissingBrace": return "Unicode escape missing opening brace '{'.";
case "UnicodeNoDigits": return "Unicode escape missing hex digits.";
case "UnicodeUnclosed": return "Unicode escape missing closing brace '}'.";
case "UnicodeOverflow": return `Unicode code point ${err.reason.value.toString(16)} is out of bounds.`;
}
return "Invalid escape sequence.";
// Context specific errors
case "ExpectedExpression": return "Expected an expression here.";
case "ExpectedFieldAssignmentSymbol": return "Expected '=' for field assignment.";
case "ExpectedPatternAssignmentSymbol": return "Expected '=' for pattern assignment.";
case "ExpectedPatternBindingSymbol": return "Expected '.' for pattern binding.";
case "ExpectedFunctionCallStart": return "Expected '(' to start function call.";
case "ExpectedRecordOpen": return "Expected '(' to start record.";
case "ExpectedLetBlockOpen": return "Expected '{' to start let-block.";
case "ExpectedLetBlockClose": return "Expected '}' to close let-block.";
case "ExpectedMatchBlockOpen": return "Expected '{' to start match-block.";
case "ExpectedMatchBlockClose": return "Expected '}' to close match-block.";
case "ExpectedLambdaBlockOpen": return "Expected '{' to start lambda body.";
case "ExpectedLambdaBlockClose": return "Expected '}' to close lambda body.";
case "ExpectedApplyStart": return "Expected '(' after 'apply'.";
case "ExpectedApplySeparator": return "Expected '!' inside 'apply'.";
case "UnexpectedTagPattern": return "Unexpected tag pattern (expected product pattern).";
case "ExpectedPattern": return "Expected a pattern here.";
case "ExpectedRecordPatternOpen": return "Expected '(' for record pattern.";
case "ExpectedRecordField": return "Expected a field name in record pattern.";
default:
return `Unknown error: ${(err as any).tag}`;
}
}
function printPrettyError(text: SourceText, err: ParseError) {
const msg = getErrorMessage(err);
console.log(`\n${C.Red}${C.Bold}Parse Error:${C.Reset} ${C.Bold}${msg}${C.Reset}`);
// Use your new renderSpan function
// We request 1 line of context before/after
const views = renderSpan(text, err.span, 3);
for (const view of views) {
// Format the gutter (line number)
// e.g. " 10 | "
const gutter = `${C.Blue}${view.gutterPad}${view.lineNo} | ${C.Reset}`;
// Reconstruct the line with highlighting
// prefix (gray) + highlight (red/bold) + suffix (gray)
const code =
`${C.Gray}${view.prefix}${C.Reset}` +
`${C.Red}${C.Bold}${view.highlight}${C.Reset}` +
`${C.Gray}${view.suffix}${C.Reset}`;
console.log(`${gutter}${code}`);
// Render the underline if this line contains the error
if (view.underline.trim().length > 0) {
const emptyGutter = " ".repeat(view.gutterPad.length + String(view.lineNo).length + 3); // match " N | "
console.log(`${emptyGutter}${C.Red}${C.Bold}${view.underline}${C.Reset}`);
}
}
}

View file

@ -0,0 +1,55 @@
// src/debug/value_string.ts
import { Value, Env, Closure, EnvFrame } from '../value';
import { exprToString, productPatternToString } from './expr_show';
export function valueToString(val: Value): string {
switch (val.tag) {
case "number": return val.value.toString();
case "string": return `"${val.value}"`;
case "tag": return `#${val.tag_name}`;
case "tagged": return `#${val.tag_name} ${valueToString(val.value)}`;
case "tuple": return `(${val.values.map(valueToString).join(", ")})`;
case "record": {
const entries = Array.from(val.fields.entries())
.map(([k, v]) => `${k} = ${valueToString(v)}`)
.join(", ");
return `{ ${entries} }`;
}
case "closure": return closureToString(val.closure);
}
}
function closureToString(c: Closure): string {
const params = c.parameters.map(productPatternToString).join(", ");
const envStr = envToString(c.env);
// We represent the closure as the code + a summary of its captured scope
return `fn { ${params} . ${exprToString(c.body)} } [captured: ${envStr}]`;
}
function envToString(env: Env): string {
if (env.tag === "nil") return "∅";
const frames: string[] = [];
let current: Env = env;
while (current.tag === "frame") {
frames.push(frameToString(current.frame));
current = current.parent;
}
// Shows stack from inner-most to outer-most
return frames.join(" ⮕ ");
}
function frameToString(frame: EnvFrame): string {
const entries = Array.from(frame.entries());
if (entries.length === 0) return "{}";
const formattedEntries = entries.map(([name, val]) => {
// We call valueToString here to show the actual data
return `${name} = ${valueToString(val)}`;
});
return `{ ${formattedEntries.join(", ")} }`;
}

View file

@ -0,0 +1,47 @@
Suppose that we have even the parser for expressions implemented, and it is run on some string. It should be an expression. To do this correctly, the parser needs to decide which sub-parsers to activate - so we need to use the scanner to determine info about what sort of an expression we may be looking at.
It can either be
| number
| string
| variable-use
| tag
| tagged-value
| tuple
| record
| top-level function call
| match
| let binding
| lambda-abstraction
| lambda-application
The syntax is designed in such a way that we don't need to do a deep-lookahead.
Basically in the scanner we try the following in this exact order to determine what to parse next:
- peek one character
- digit?, yes -> number
- symbol `-`?, peek at the next token,
- digit? -> number
- not-a-digit? -> the only possibility is that we're looking at an identifier that happens to start with `-`, and we discuss how to handle identifiers at the end of this process...
- symbol `"`?, yes -> string
- symbol `$`?, yes -> variable-use
- symbol `#`?, yes ->
- we have either a tag or a tagged-value (we don't know which yet... ). At a first glance it seems it is not scanner's job to determine which - more of a parser's job.
TODO:
But actually we can do this in scanner. After we scan the tag-name, we can do a lookahead and see if the next char is NOT-EXPRESSION-START, which is possible to do in our grammar.
- symbol `(`?, yes -> tuple
- symbol `{`?, yes -> record
- here the only possibilities are: top-level function call or some construct that starts syntactically with a keyword or an error. So the scanner needs to attempta raw-identifier scan.
- keyword? easy
- match? -> match-expression
- let? -> let-expression
- fn? -> lambda-abstraction
- apply? -> lambda-application
- not-a-keyword? then we need to validate:
It is either a valid identifier (in this specific case function-name) or invalid identifier.
In case it is an invalid identifier, it would be nice to also predict typical errors (like are we looking at punctuation?) - but this is a bit hard - maybe this shouldn't be the job of the scanner - parser will have more context in general. In this case, we're just expecting a top-level function call. But this kind of analysis will have to be done for other invalid identifiers...
raw-identifier scan just consumes any character until it encounters a delimiter `$ # @ ( ) { } " , . \` or whitespace (I include comments as whitespace here).
The job of a validator for a raw identifier is to decide whether we're looking at a keyword or an actual valid identifier.

View file

@ -0,0 +1,237 @@
import { SourceText } from "./source_text";
import { Cursor, scanString, scanNumber } from "./cursor";
import { Result } from "../result";
// === Test Harness Utilities ===
const RED = "\x1b[31m";
const GREEN = "\x1b[32m";
const RESET = "\x1b[0m";
function assert(condition: boolean, message: string) {
if (!condition) {
throw new Error(message);
}
}
// Helper to assert that a scan result is OK and matches expected value
function assertOk<T>(result: Result<T, any>, expectedValue: any) {
if (result.tag === "error") {
// If it's a structured error, pretty print it
const err = result.error as any;
const reason = err.reason ? ` (Reason: ${JSON.stringify(err.reason)})` : "";
throw new Error(`Expected Ok, got Error: ${err.tag}${reason}`);
}
// Unwrap the value
const actual = (result.value as any).value; // Both number and string results have a .value property
assert(actual === expectedValue, `Expected value '${expectedValue}', got '${actual}'`);
}
// Helper to assert that a scan result is an Error with specific tags
function assertError(result: Result<any, any>, expectedTag: string, expectedReason?: string | object) {
if (result.tag === "ok") {
throw new Error(`Expected Error '${expectedTag}', but got Ok with value: ${(result.value as any).value}`);
}
const err = result.error;
assert(err.tag === expectedTag, `Expected error tag '${expectedTag}', got '${err.tag}'`);
if (expectedReason !== undefined) {
// If reason is an object (like for Unicode errors), compare deeply-ish
if (typeof expectedReason === 'object') {
const actualReason = (err as any).reason;
assert(JSON.stringify(actualReason) === JSON.stringify(expectedReason),
`Expected reason ${JSON.stringify(expectedReason)}, got ${JSON.stringify(actualReason)}`);
} else {
assert((err as any).reason === expectedReason,
`Expected reason '${expectedReason}', got '${(err as any).reason}'`);
}
}
}
// === Number Tests ===
function test_integers() {
const src = new SourceText("123");
const cursor = new Cursor(src);
const result = scanNumber(cursor);
assertOk(result, 123);
const src2 = new SourceText("-500");
const cursor2 = new Cursor(src2);
const result2 = scanNumber(cursor2);
assertOk(result2, -500);
console.log(`${GREEN}✔ Integers passed${RESET}`);
}
function test_floats() {
const src = new SourceText("3.14159");
const cursor = new Cursor(src);
const result = scanNumber(cursor);
assertOk(result, 3.14159);
const src2 = new SourceText("-0.001");
const cursor2 = new Cursor(src2);
const result2 = scanNumber(cursor2);
assertOk(result2, -0.001);
console.log(`${GREEN}✔ Floats passed${RESET}`);
}
function test_number_errors() {
// 1. Trailing Dot
const c1 = new Cursor(new SourceText("1."));
const r1 = scanNumber(c1);
assertError(r1, "InvalidNumber", "MissingFractionalDigits");
// 2. No leading digit (.5)
// Let's test "Saw Sign but no digits" which is a hard error
const c2 = new Cursor(new SourceText("-")); // Just a minus
const r2 = scanNumber(c2);
assertError(r2, "ExpectedNumber");
console.log(`${GREEN}✔ Number errors passed${RESET}`);
}
// === String Tests ===
function test_basic_strings() {
const src = new SourceText('"hello world"');
const cursor = new Cursor(src);
const result = scanString(cursor);
assertOk(result, "hello world");
const src2 = new SourceText('""'); // Empty string
const cursor2 = new Cursor(src2);
const result2 = scanString(cursor2);
assertOk(result2, "");
console.log(`${GREEN}✔ Basic strings passed${RESET}`);
}
function test_string_escapes() {
const src = new SourceText('"line1\\nline2"');
const cursor = new Cursor(src);
const result = scanString(cursor);
assertOk(result, "line1\nline2");
const src2 = new SourceText('"col1\\tcol2"');
const cursor2 = new Cursor(src2);
const result2 = scanString(cursor2);
assertOk(result2, "col1\tcol2");
const src3 = new SourceText('"quote: \\" slash: \\\\"');
const cursor3 = new Cursor(src3);
const result3 = scanString(cursor3);
assertOk(result3, 'quote: " slash: \\');
// Null byte test
const src4 = new SourceText('"null\\0byte"');
const cursor4 = new Cursor(src4);
const result4 = scanString(cursor4);
assertOk(result4, "null\0byte");
console.log(`${GREEN}✔ String escapes passed${RESET}`);
}
function test_unicode_escapes() {
// Rocket emoji: 🚀 (U+1F680)
const c1 = new Cursor(new SourceText('"\\u{1F680}"'));
assertOk(scanString(c1), "🚀");
// Two escapes
const c2 = new Cursor(new SourceText('"\\u{41}\\u{42}"'));
assertOk(scanString(c2), "AB");
// Error: Missing Brace
const c3 = new Cursor(new SourceText('"\\u1F680"'));
assertError(scanString(c3), "InvalidEscape", { tag: "UnicodeMissingBrace" });
// Error: Empty
const c4 = new Cursor(new SourceText('"\\u{}"'));
assertError(scanString(c4), "InvalidEscape", { tag: "UnicodeNoDigits" });
// Error: Overflow
const c5 = new Cursor(new SourceText('"\\u{110000}"'));
const res5 = scanString(c5);
// Need to check the value inside the reason for overflow
if (res5.tag === 'ok') throw new Error("Should have failed overflow");
const err = res5.error as any;
assert(err.tag === "InvalidEscape", "Wrong tag");
assert(err.reason.tag === "UnicodeOverflow", "Wrong reason tag");
assert(err.reason.value === 0x110000, "Wrong overflow value");
console.log(`${GREEN}✔ Unicode escapes passed${RESET}`);
}
function test_cursor_tracking() {
// Mixed line endings:
// Line 1: 123 (CRLF)
// Line 2: 456 (LF)
// Line 3: "foo"
const code = "123\r\n456\n\"foo\"";
const src = new SourceText(code);
const cursor = new Cursor(src);
// 1. Scan 123
const r1 = scanNumber(cursor);
assertOk(r1, 123);
// Check location after scan (should be after 123, before \r)
// Actually r1.value.span tells us where the token WAS.
const span1 = (r1 as any).value.span;
assert(span1.line === 1, "Line 1 line# wrong");
assert(span1.column === 1, "Line 1 col# wrong");
// 2. Skip Whitespace (Scanner logic simulation)
// We need to manually skip \r\n
cursor.next(); // \r
cursor.next(); // \n
// 3. Scan 456
const r2 = scanNumber(cursor);
assertOk(r2, 456);
const span2 = (r2 as any).value.span;
assert(span2.line === 2, "Line 2 line# wrong");
// 4. Skip \n
cursor.next();
// 5. Scan "foo"
const r3 = scanString(cursor);
assertOk(r3, "foo");
const span3 = (r3 as any).value.span;
assert(span3.line === 3, "Line 3 line# wrong");
console.log(`${GREEN}✔ Cursor tracking passed${RESET}`);
}
// === Run All ===
function run_all_tests() {
console.log("Running Scanner Tests...\n");
try {
test_integers();
test_floats();
test_number_errors();
test_basic_strings();
test_string_escapes();
test_unicode_escapes();
test_cursor_tracking();
console.log(`\n${GREEN}ALL TESTS PASSED${RESET}`);
} catch (e: any) {
console.error(`\n${RED}TEST FAILED:${RESET}`);
console.error(e.message || e);
process.exit(1);
}
}
run_all_tests();

333
src/lang/parser/cursor.ts Normal file
View file

@ -0,0 +1,333 @@
import { char, NEW_LINE, CARRIAGE_RETURN, DOT, DIGIT_0, DIGIT_9, LOWERCASE_a, LOWERCASE_f, UPPERCASE_A, UPPERCASE_F, SPACE, TAB } from './source_text';
import type { SourceText, Span, SourceLocation, CodePoint, StringIndex, CodePointIndex } from './source_text';
import { Result } from '../result';
export type CursorState = {
index: CodePointIndex,
line: number,
column: number,
lastCharWasCR: boolean,
}
export class Cursor {
private index: CodePointIndex = 0;
private line: number = 1;
private column: number = 1;
// Track previous char to handle \r\n correctly
private lastCharWasCR: boolean = false;
constructor(readonly text: SourceText) {}
save(): CursorState {
return { index: this.index, line: this.line, column: this.column, lastCharWasCR: this.lastCharWasCR };
}
restore({ index, line, column, lastCharWasCR }: CursorState) {
this.index = index;
this.line = line;
this.column = column;
this.lastCharWasCR = lastCharWasCR;
}
eof(): boolean {
return this.index >= this.text.length;
}
peek(n: number = 0): CodePoint | undefined {
return this.text.chars[this.index + n]?.char;
}
next(): CodePoint | undefined {
const ref = this.text.chars[this.index];
if (!ref) return undefined;
const c = ref.char;
this.index++;
if (c === NEW_LINE) {
if (!this.lastCharWasCR) {
this.line++;
this.column = 1;
} else {
// We just saw \r, so this \n is part of \r\n.
// We already bumped the line count on \r.
// Just reset the flag.
this.lastCharWasCR = false;
}
} else if (c === CARRIAGE_RETURN) {
this.line++;
this.column = 1;
this.lastCharWasCR = true;
} else {
this.column++;
this.lastCharWasCR = false;
}
return c;
}
get currentIndex(): CodePointIndex {
return this.index;
}
// TODO: unicode-index ~> string-offset, make that into a separate function.
currentOffset(): StringIndex {
return this.text.chars[this.index]?.offset ?? this.text.source.length;
}
currentLocation(): SourceLocation {
return { index: this.index, line: this.line, column: this.column };
}
makeSpan(start: SourceLocation): Span {
return {
start,
end: this.currentLocation(),
};
}
consumeWhile(pred: (c: CodePoint) => boolean): number {
let count = 0;
while (!this.eof()) {
const c = this.peek();
if (c === undefined || !pred(c)) break;
this.next();
count++;
}
return count;
}
// Helper to check for exact char matches quickly
match(c: CodePoint): boolean {
if (this.peek() === c) {
this.next();
return true;
}
return false;
}
// Checks if the next characters match the string.
// If yes, consumes them and returns true.
// If no, touches nothing and returns false.
matchString(str: string): boolean {
if (this.index + str.length > this.text.length) return false;
const slice = this.text.sliceByCp(this.index, this.index + str.length);
if (slice !== str) return false;
for (let i = 0; i < str.length; i++) {
// We must call next() to correctly update line/col tracking.
// We already know it matches, so we just burn through.
this.next();
}
return true;
}
}
// === Basic Scanners/Predicates ===
export function isWhitespace(char: CodePoint): boolean {
return char === SPACE || char === TAB || char === NEW_LINE || char === CARRIAGE_RETURN;
}
export function isDigit(char: CodePoint): boolean {
return char >= DIGIT_0 && char <= DIGIT_9;
}
export type GenericScanError =
| { tag: "UnexpectedCharacter", char: CodePoint, span: Span }
| { tag: "UnexpectedEOF", span: Span }
export type NumberError =
| { tag: "ExpectedNumber", span: Span }
| { tag: "InvalidNumber", reason: "NotFinite" | "MissingFractionalDigits", span: Span }
export function scanNumber(cursor: Cursor): Result<{ value: number, span: Span }, NumberError> {
// number :=
// | optional(`-`) digits optional(`.` digits)
const startNumberLocation = cursor.currentLocation();
let c: CodePoint;
// 1. Optional Sign
c = cursor.peek();
if (c === char("-")) {
cursor.next();
}
// 2. Integer Part
c = cursor.peek();
const integerPartDigitCount = cursor.consumeWhile(isDigit);
if (integerPartDigitCount === 0) {
return Result.error({
tag: "ExpectedNumber",
span: cursor.makeSpan(startNumberLocation),
});
}
// 3. Fractional Part
if (cursor.peek() === DOT) {
const dotLocation = cursor.currentLocation();
cursor.next(); // consume '.'
const fracPartDigitCount = cursor.consumeWhile(isDigit);
if (fracPartDigitCount === 0) {
return Result.error({
tag: "InvalidNumber",
reason: "MissingFractionalDigits",
span: cursor.makeSpan(dotLocation),
});
}
}
const text = cursor.text.sliceByCp(startNumberLocation.index, cursor.currentIndex);
const value = Number(text);
if (!Number.isFinite(value)) {
return Result.error({
tag: "InvalidNumber",
reason: "NotFinite",
span: cursor.makeSpan(startNumberLocation),
});
}
return Result.ok({
value,
span: cursor.makeSpan(startNumberLocation),
});
}
export type StringError =
| { tag: "InvalidEscape", reason: EscapeErrorReason, span: Span };
export type EscapeErrorReason =
| { tag: "UnknownEscapeSequence", char: CodePoint } // e.g. \k
| { tag: "UnicodeMissingBrace" } // \u without {
| { tag: "UnicodeNoDigits" } // \u{}
| { tag: "UnicodeUnclosed" } // \u{FF without }
| { tag: "UnicodeOverflow", value: number }; // \u{110000}
export function scanString(cursor: Cursor): Result<{ value: string, span: Span }, StringError | GenericScanError> {
const start = cursor.currentLocation();
const firstChar = cursor.peek();
if (firstChar === undefined) {
return Result.error({ tag: "UnexpectedEOF", span: cursor.makeSpan(start) });
}
if (firstChar !== char('"')) {
return Result.error({ tag: "UnexpectedCharacter", char: firstChar, span: cursor.makeSpan(start) });
}
cursor.next();
let value = ""; // The actual string content
while (true) {
if (cursor.eof()) {
return Result.error({ tag: "UnexpectedEOF", span: cursor.makeSpan(start) });
}
const c = cursor.peek();
// 1. End of string
if (c === char('"')) {
cursor.next(); // consume closing quote
break;
}
if (c === char('\\')) {
// 2. Escape Sequences
const escapeStart = cursor.currentLocation();
cursor.next(); // consume backslash
const escaped = cursor.peek();
switch (escaped) {
case char('n'): value += '\n'; cursor.next(); break;
case char('r'): value += '\r'; cursor.next(); break;
case char('t'): value += '\t'; cursor.next(); break;
case char('\\'): value += '\\'; cursor.next(); break;
case char("0"): value += "\0"; cursor.next(); break;
case char('"'): value += '"'; cursor.next(); break;
// Unicode Escape: \u{XXXX}
case char('u'): {
cursor.next(); // consume 'u'
// Expect '{'
const braceStart = cursor.currentLocation();
if (cursor.peek() !== char('{')) {
return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeMissingBrace" }, span: cursor.makeSpan(braceStart) });
}
cursor.next(); // consume '{'
// Consume Hex Digits
const hexStart = cursor.currentIndex;
const hexCount = cursor.consumeWhile(c =>
(c >= DIGIT_0 && c <= DIGIT_9) ||
(c >= LOWERCASE_a && c <= LOWERCASE_f) ||
(c >= UPPERCASE_A && c <= UPPERCASE_F)
);
if (hexCount === 0) {
return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeNoDigits" }, span: cursor.makeSpan(braceStart) });
}
// Expect '}'
if (cursor.peek() !== char("}")) {
return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeUnclosed" }, span: cursor.makeSpan(braceStart) });
}
cursor.next(); // consume '}'
// Convert & Append
const hexStr = cursor.text.sliceByCp(hexStart, hexStart + hexCount);
const codePoint = parseInt(hexStr, 16);
if (codePoint > 0x10FFFF) {
return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeOverflow", value: codePoint }, span: cursor.makeSpan(braceStart) });
}
value += String.fromCodePoint(codePoint);
break;
}
default:
return Result.error({
tag: "InvalidEscape",
reason: { tag: "UnknownEscapeSequence", char: escaped },
span: cursor.makeSpan(escapeStart)
});
}
} else {
// 3. Regular character
// Optimization: consume chunks of non-special chars for speed?
// For now, char-by-char is fine.
cursor.next();
// Note: We use ! because we checked EOF at loop start
value += String.fromCodePoint(c!);
}
}
return Result.ok({
value,
span: cursor.makeSpan(start)
});
}
// TODO: rendering of errors
// function renderStringError(err: StringError): string {
// switch (err.tag) {
// case "ExpectedQuote": return "Expected a string starting with \"";
// case "UnexpectedEOF": return "Unterminated string literal";
// case "InvalidEscape":
// const k = err.kind;
// switch (k.tag) {
// case "Unknown":
// return `Unknown escape sequence '\\${String.fromCodePoint(k.char)}'`;
// case "UnicodeMissingBrace":
// return "Unicode escape must start with '{', e.g. \\u{1F600}";
// case "UnicodeNoDigits":
// return "Empty unicode escape \\u{}";
// case "UnicodeUnclosed":
// return "Expected '}' to close unicode escape";
// case "UnicodeOverflow":
// return `Unicode code point 0x${k.value.toString(16)} is too large (max 0x10FFFF)`;
// }
// }
// }

467
src/lang/parser/parser.ts Normal file
View file

@ -0,0 +1,467 @@
import { Expr, ExprBinding, FieldAssignment, FieldPattern, MatchBranch, Pattern, ProductPattern } from '../value';
import { Cursor } from './cursor';
import { ExprScanError, exprStart, ExprStartToken, IdentifierKind, identifierScanner, isNextTokenExprStart, isNextTokenProductPatternStart, patternStart, PatternStartToken, skipWhitespaceAndComments } from './scanner';
import { char, CodePoint, SourceText, Span } from './source_text';
import { Result } from '../result';
// CONVENTION: Every parser is responsible to consume whitespace/comments at the end.
// Every parser is not responsible for cleaning up whitespace/comments at the start - only the final `parse` that's exposed to the public.
const DELIMITER_COMMA = char(',');
const DELIMITER_PIPE = char('|');
const TERMINATOR_CLOSE_PAREN = char(')');
const TERMINATOR_CLOSE_BRACE = char('}');
const TERMINATOR_DOT = char('.');
// TODO: Errors have to have better contextual information
export type ParseError =
| ExprScanError
| { tag: "UnexpectedToken", expected: string, span: Span }
| { tag: "UnexpectedTokenWhileParsingSequence", reason: "ExpectedDelimiterOrTerminator", received: CodePoint, expectedDelimiter: CodePoint, expectedTerminator: CodePoint, span: Span }
// === Specific Context Errors ===
| { tag: "ExpectedExpression", span: Span } // Expected start of expr (e.g. hit EOF or keyword)
| { tag: "ExpectedFieldAssignmentSymbol", span: Span } // Expected '=' in field assignment
| { tag: "ExpectedPatternAssignmentSymbol", span: Span } // Expected '=' in pattern assignment
| { tag: "ExpectedPatternBindingSymbol", span: Span } // Expected '.' in pattern binding
| { tag: "ExpectedFunctionCallStart", span: Span } // Expected '(' after function name
| { tag: "ExpectedRecordOpen", span: Span } // Expected '(' after ':'
| { tag: "ExpectedLetBlockOpen", span: Span } // Expected '{' after 'let'
| { tag: "ExpectedLetBlockClose", span: Span } // Expected '}' at end of 'let' expression
| { tag: "ExpectedMatchBlockOpen", span: Span } // Expected '{' after 'match'
| { tag: "ExpectedMatchBlockClose", span: Span } // Expected '}' at end of 'match' expression
| { tag: "ExpectedLambdaBlockOpen", span: Span } // Expected '{' after `fn`
| { tag: "ExpectedLambdaBlockClose", span: Span } // Expected '}' at end of `fn` expression
| { tag: "ExpectedApplyStart", span: Span } // Expected '(' after 'apply'
| { tag: "ExpectedApplySeparator", span: Span } // Expected '!' inside 'apply'
| { tag: "UnexpectedTagPattern", span: Span } // Found #tag where product pattern expected
| { tag: "ExpectedPattern", span: Span } // EOF or invalid start of pattern
| { tag: "ExpectedRecordPatternOpen", span: Span } // Expected '(' at start of record pattern
| { tag: "ExpectedRecordField", span: Span }; // Expected identifier in record pattern
// TODO: Delete?
export type Expectation =
| "ExpectedExpression"
| "ExpectedFieldAssignmentSymbol"
| "ExpectedPatternAssignmentSymbol"
| "ExpectedPatternBindingSymbol"
| "ExpectedFunctionCallStart"
| "ExpectedRecordOpen"
| "ExpectedLetBlockOpen"
| "ExpectedLetBlockClose"
| "ExpectedMatchBlockOpen"
| "ExpectedMatchBlockClose"
| "ExpectedApplyStart"
| "ExpectedApplySeparator"
| "UnexpectedTagPattern"
| "ExpectedPattern"
| "ExpectedRecordField"
| "ExpectedRecordPatternOpen"
export type Parser<T> = (cursor: Cursor) => T
// === Helpers ===
// Don't use `cursor.next()` directly - for most of the cases you also want to consume the WS/comments after.
function nextWS(cursor: Cursor): CodePoint | undefined {
const c = cursor.next();
if (c === undefined) {
return undefined;
} else {
skipWhitespaceAndComments(cursor);
return c;
}
}
// Returns true if the char was consumed.
function tryConsume(cursor: Cursor, expected: CodePoint): boolean {
if (cursor.peek() === expected) {
nextWS(cursor);
return true;
}
return false;
}
// TODO: Perhaps when it comes to terminators, we should allow the user of this function to create better contextual errors?
// Parses a delimited sequence of `p` that's terminated by a special character.
function delimitedTerminalSequence<A>(cursor: Cursor, delimiter: CodePoint, terminator: CodePoint, p: Parser<A>): A[] {
// Let's say the terminator is `.` and delimiter is `,`. This is the grammar for valid sequences
// list(p) :=
// | p
// | p , p
// | p , p , p
// | ...
// delimitedTerminalSequence(p) :=
// | .
// | optional(,) list(p) optional(,) .
//
// All our use-cases always have a well-defined terminator character:
// tuples: ( a, b, c ) -> `)`
// records: :( f0 = e0, f1 = e1 ) -> `}`
// function call: f(a, b, c) -> `)`
// let-binding: let { p = e . body } -> `.`
// fn-asbtraction: fn { p0, p1 . body } -> `.`
// fn-application: apply(e ! e0, e1, e2) -> `)`
const items: A[] = [];
if (cursor.peek() === terminator) {
nextWS(cursor);
return items;
}
// optional delimiter
if (cursor.peek() === delimiter) {
nextWS(cursor);
}
while (true) {
const item = p(cursor); // `p` should be responsible for getting rid of whitespace after it has done its work
items.push(item);
const nextChar = cursor.peek();
if (nextChar === terminator) {
nextWS(cursor);
return items;
} else if (nextChar === delimiter) {
nextWS(cursor);
if (cursor.peek() === terminator) {
nextWS(cursor);
// the previous delimiter was trailing
return items;
}
// the previous delimiter was a separator
continue;
} else {
throw {
tag: "UnexpectedTokenWhileParsingSequence",
received: nextChar,
reason: "ExpectedDelimiterOrTerminator",
expectedDelimiter: delimiter,
expectedTerminator: terminator,
span: cursor.makeSpan(cursor.currentLocation()),
} as ParseError;
}
}
}
function exprStartToken(cursor: Cursor): ExprStartToken {
const token = exprStart(cursor);
skipWhitespaceAndComments(cursor);
return token;
}
function patternStartToken(cursor: Cursor): PatternStartToken {
const token = patternStart(cursor);
skipWhitespaceAndComments(cursor);
return token;
}
function identifier(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } {
const result = identifierScanner(cursor, kind);
skipWhitespaceAndComments(cursor);
return result;
}
// === Expression Parsers ===
function expr(cursor: Cursor): Expr {
const token = exprStartToken(cursor);
// TODO: You need to include the spans and perhaps other meta-info.
switch (token.tag) {
case "EOF":
throw {
tag: "UnexpectedToken",
expected: "Expression",
span: token.span
} as ParseError;
case "number":
return Expr.number(token.value);
case "string":
return Expr.string(token.text);
case "variable_use":
return Expr.var_use(token.name);
case "tag":
// #true
// #foo e
if (isNextTokenExprStart(cursor)) {
const value = expr(cursor);
return Expr.tagged(token.name, value);
} else {
return Expr.tag(token.name);
}
case "tuple_start":
// e.g. (a, b, c)
const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
return Expr.tuple(items);
case "function_name":
// e.g. my_func(arg1, arg2)
// parse a `,` delimiter sequence of expr
// need to consume )
if (!tryConsume(cursor, char('('))) {
throw {
tag: "ExpectedFunctionCallStart",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
return Expr.call(token.name, args);
case "keyword":
switch (token.kw) {
case ":":
// e.g. :( x = 1, y = 2 )
// or : ( x = 1, y = 2 )
if (!tryConsume(cursor, char('('))) {
throw {
tag: "ExpectedRecordOpen",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, fieldAssignment);
return Expr.record(fields);
case "let":
// let { p0 = e0, p1 = e2 . body }
if (!tryConsume(cursor, char('{'))) {
throw {
tag: "ExpectedLetBlockOpen",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const bindings = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPatternBinding);
const body = expr(cursor);
if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) {
throw {
tag: "ExpectedLetBlockClose",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
return Expr.let_(bindings, body);
case "fn": {
// fn { p0, p1, p2 . body }
if (!tryConsume(cursor, char('{'))) {
throw {
tag: "ExpectedLambdaBlockOpen",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const parameters = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPattern);
const body = expr(cursor);
if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) {
throw {
tag: "ExpectedLambdaBlockClose",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
return Expr.lambda(parameters, body);
}
case "apply":
// apply(e ! e0, e1, e2)
if (!tryConsume(cursor, char('('))) {
throw {
tag: "ExpectedApplyStart",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const callee = expr(cursor);
if (!tryConsume(cursor, char('!'))) {
throw {
tag: "ExpectedApplySeparator",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
return Expr.apply(callee, args);
case "match":
// match e { branch0 | branch1 | branch2 }
const arg = expr(cursor);
if (!tryConsume(cursor, char('{'))) {
throw {
tag: "ExpectedMatchBlockOpen",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const branches = delimitedTerminalSequence(cursor, DELIMITER_PIPE, TERMINATOR_CLOSE_BRACE, matchBranch);
return Expr.match(arg, branches)
case "=":
case "|":
case "!":
// These keywords CANNOT start an expression.
throw {
tag: "ExpectedExpression",
span: token.span
} as ParseError;
}
}
}
function matchBranch(cursor: Cursor): MatchBranch {
// p . body
const p = pattern(cursor);
if (!tryConsume(cursor, char("."))) {
throw {
tag: "ExpectedPatternBindingSymbol",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const e = expr(cursor);
return Expr.matchBranch(p, e);
}
function productPatternBinding(cursor: Cursor): ExprBinding {
// TODO: There's a potential here to do a lot of work on nice errors.
// `p = e`
// here there could be problems like the pattern being just a variable that uses `=` as its part
// `x= = 123` is valid. Maybe in case of erroneous things like
// `x=123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk...
// or even...
// `x= 123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk...
// or
// `x =123` - this one technically is a sequence of two identifiers.
const pattern = productPattern(cursor);
if (!tryConsume(cursor, char('='))) {
throw {
tag: "ExpectedPatternBindingSymbol",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const e = expr(cursor);
return Expr.exprBinding(pattern, e);
}
function fieldAssignment(cursor: Cursor): FieldAssignment {
// `f = e`
const { name, span } = identifier(cursor, 'field_name');
if (!tryConsume(cursor, char('='))) {
throw {
tag: "ExpectedFieldAssignmentSymbol", // Specific error for this context
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const value = expr(cursor);
return Expr.fieldAssignment(name, value);
}
function pattern(cursor: Cursor): Pattern {
// x
// (x, y, z)
// ((x, y), z)
// { a = x, b = y }
// { a, b = y }
// #foo
// #foo x
// #foo (x, y)
const token = patternStartToken(cursor);
// === Tag Pattern logic ===
if (token.tag === "tag") {
// Check Lookahead: Is this a Tagged Value? e.g. #foo x or #foo (a,b)
if (isNextTokenProductPatternStart(cursor)) {
// Parse the payload (must be a product pattern)
const payload = productPattern(cursor);
return Pattern.tagged(token.name, payload);
} else {
// Standalone Tag: #foo
return Pattern.tag(token.name);
}
}
return finishProductPattern(cursor, token);
}
function productPattern(cursor: Cursor): ProductPattern {
// x
// (x, y, z)
// ((x, y), z)
// { a = x, b = y }
// { a, b = y }
//
// tag patterns are syntax errors i.e. unexpected-tag-pattern (renderer then could explain that we don't allow tag patterns to be deeply nested)
const token = patternStartToken(cursor);
return finishProductPattern(cursor, token);
}
function finishProductPattern(cursor: Cursor, token: PatternStartToken): ProductPattern {
switch (token.tag) {
case "pattern_binding":
// foo
return ProductPattern.any(token.name);
case "tuple_start": {
// ( p1, p2 )
const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, productPattern);
return ProductPattern.tuple(items);
}
case "tag":
throw { tag: "UnexpectedTagPattern", span: token.span } as ParseError;
case "keyword": {
switch (token.kw) {
case ":": {
// :( a = p, b )
// TODO: parse open-paren
if (!tryConsume(cursor, char('{'))) {
throw {
tag: "ExpectedRecordPatternOpen",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError;
}
const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, recordPatternField);
return ProductPattern.record(fields);
}
default:
// These keywords CANNOT start a pattern.
throw { tag: "ExpectedPattern", span: token.span } as ParseError;
}
}
case "EOF":
throw { tag: "ExpectedPattern", span: token.span } as ParseError;
}
}
function recordPatternField(cursor: Cursor): FieldPattern {
const { name, span } = identifier(cursor, 'field_name');
if (tryConsume(cursor, char('='))) {
const p = productPattern(cursor);
return ProductPattern.fieldPattern(name, p);
} else {
// Punning: :( a ) -> :( a = a )
return ProductPattern.fieldPattern(name, ProductPattern.any(name));
}
}
export function parse(source: SourceText): Result<Expr, ParseError> {
const cursor = new Cursor(source);
try {
skipWhitespaceAndComments(cursor);
const expression = expr(cursor);
if (!cursor.eof()) {
return Result.error({
tag: "UnexpectedToken",
expected: "EndOfFile",
span: cursor.makeSpan(cursor.currentLocation())
} as ParseError);
}
return Result.ok(expression);
} catch (e) {
// TODO: This is a bit sketchy. We maybe forced to have "checked" Exceptions for `ParseError` by wrapping it in something that has a proper tag.
return Result.error(e as ParseError);
}
}

354
src/lang/parser/scanner.ts Normal file
View file

@ -0,0 +1,354 @@
import { CARRIAGE_RETURN, char, NEW_LINE } from './source_text';
import type { Span, CodePoint } from './source_text';
import { isDigit, isWhitespace, scanNumber, scanString } from './cursor';
import type { Cursor, GenericScanError, NumberError, StringError } from './cursor';
export function skipWhitespaceAndComments(cursor: Cursor): number {
let totalConsumed = 0;
while (true) {
// 1. Consume standard whitespace (spaces, tabs, newlines)
const wsCount = cursor.consumeWhile(isWhitespace);
totalConsumed += wsCount;
// 2. Check for Line Comment start ('//')
const c = cursor.peek();
const nextC = cursor.peek(1);
if (c === char('/') && nextC === char('/')) {
// Found comment start. Consume the '//' markers
cursor.next();
cursor.next();
totalConsumed += 2;
// Consume everything until the next newline (or EOF).
// Note: We do NOT consume the newline itself here.
// We let the NEXT iteration of the 'while(true)' loop catch
// the newline as standard whitespace.
const commentContentLength = cursor.consumeWhile(c => c !== NEW_LINE && c !== CARRIAGE_RETURN);
totalConsumed += commentContentLength;
} else {
// We are not at a comment.
// If we also didn't consume any whitespace in step 1, we are truly done.
if (wsCount === 0) {
break;
}
}
}
return totalConsumed;
}
// === Language Specific Stuff ===
const DELIMITER_CHARS = ["(", ")", "{", "}", ".", ",", "@", "$", "#", '"', "\\"] as const;
export type Delimiter = typeof DELIMITER_CHARS[number];
const DELIMITER_SET: Set<CodePoint> = new Set(DELIMITER_CHARS.map(c => char(c)));
const KEYWORD_LIST = ["let" , "fn" , "match" , "apply" , "=" , "|" , "!", ":"] as const;
export type Keyword = typeof KEYWORD_LIST[number];
const KEYWORD_SET: Set<string> = new Set(KEYWORD_LIST);
// === Errors ===
export type ExprScanError =
| GenericScanError
| NumberError
| StringError
| { tag: "InvalidIdentifier", text: string, kind: IdentifierKind, reason: IdentifierErrorReason, span: Span }
// What kind of identifier were we trying to parse?
export type IdentifierKind =
| "variable_use"
| "field_name"
| "tag_construction"
| "function_call"
| "pattern_binding";
export type IdentifierErrorReason =
| { tag: "Empty" }
| { tag: "StartsWithDigit" }
| { tag: "IsKeyword", kw: Keyword }
// === Tokens ===
export type ExprStartToken =
| { tag: "number", value: number, span: Span }
| { tag: "string", text: string, span: Span }
| { tag: "function_name", name: string, span: Span }
| { tag: "variable_use", name: string, span: Span }
| { tag: "tag", name: string, span: Span }
| { tag: "tuple_start", span: Span }
| { tag: "keyword", kw: Keyword, span: Span }
// TODO: ger rid of EOF
| { tag: "EOF", span: Span }
export type PatternStartToken =
| { tag: "pattern_binding", name: string, span: Span }
| { tag: "tag", name: string, span: Span }
| { tag: "tuple_start", span: Span }
| { tag: "keyword", kw: Keyword, span: Span }
// TODO: ger rid of EOF
| { tag: "EOF", span: Span };
// === Identifier Scanners ===
// Returns the raw string.
function rawIdentifier(cursor: Cursor): string {
const start = cursor.currentIndex;
while (!cursor.eof()) {
const c = cursor.peek();
if (DELIMITER_SET.has(c) || isWhitespace(c)) {
break;
}
if (c === char('/') && cursor.peek(1) === char('/')) {
break;
}
cursor.next();
}
return cursor.text.sliceByCp(start, cursor.currentIndex);
}
// Scans raw identifier,
// checks if it is a keyword,
// if it ain't, validates it into a proper identifier.
function identifierOrKeywordScanner(
cursor: Cursor,
kind: IdentifierKind,
): { tag: "keyword", kw: Keyword, span: Span }
| { tag: "identifier", name: string, span: Span } {
const start = cursor.currentLocation();
const text = rawIdentifier(cursor);
const span = cursor.makeSpan(start);
if (text.length === 0) {
throw ({
tag: "InvalidIdentifier",
text,
kind,
reason: { tag: "Empty" },
span
} as ExprScanError);
}
if (KEYWORD_SET.has(text)) {
return { tag: "keyword", kw: text as Keyword, span };
}
// validation
if (isDigit(char(text[0]))) {
throw ({
tag: "InvalidIdentifier",
text,
kind,
reason: { tag: "StartsWithDigit" },
span
} as ExprScanError);
}
return { tag: "identifier", name: text, span };
}
export function identifierScanner(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } {
const res = identifierOrKeywordScanner(cursor, kind);
if (res.tag === "keyword") {
throw ({
tag: "InvalidIdentifier",
text: res.kw,
kind,
reason: { tag: "IsKeyword", kw: res.kw },
span: res.span
} as ExprScanError);
}
return { name: res.name, span: res.span };
}
// === Literal Scanners ===
// throws ExprScanError
// TODO: handle trailing whitespace
function number(cursor: Cursor): { value: number, span: Span } {
const res = scanNumber(cursor);
if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); }
}
// throws ExprScanError
// TODO: handle trailing whitespace
function string(cursor: Cursor): { value: string, span: Span } {
const res = scanString(cursor);
if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); }
}
// === complex scanners ===
// TODO: in exprStart/patternStart make sure whitespace is consumed after they succesfuly produce token. Should we build it in the functions? Or should that be in `parser.ts`?
// throws ExprScanError
export function exprStart(cursor: Cursor): ExprStartToken {
const start = cursor.currentLocation();
if (cursor.eof()) {
return { tag: "EOF", span: cursor.makeSpan(start) };
}
const c = cursor.peek()!;
// === numbers ===
if (isDigit(c) || (c === char('-') && isDigit(cursor.peek(1) ?? 0))) {
const { value, span } = number(cursor);
return { tag: "number", value: value, span };
}
// === strings ===
if (c === char('"')) {
const { value, span } = string(cursor);
return { tag: "string", text: value, span };
}
// === variable use ===
if (c === char('$')) {
cursor.next();
const { name } = identifierScanner(cursor, 'variable_use');
return { tag: "variable_use", name, span: cursor.makeSpan(start) };
}
// === tags ===
if (c === char('#')) {
cursor.next();
const { name } = identifierScanner(cursor, 'tag_construction');
return { tag: "tag", name, span: cursor.makeSpan(start) };
}
// === tuples ===
if (c === char('(')) {
cursor.next();
return { tag: "tuple_start", span: cursor.makeSpan(start) };
}
// === keywords & identifiers ===
// Fallthrough: it must be a keyword or a function call
const result = identifierOrKeywordScanner(cursor, 'function_call');
switch (result.tag) {
case "keyword":
return result;
case "identifier":
return { tag: "function_name", name: result.name, span: result.span };
}
}
export function patternStart(cursor: Cursor): PatternStartToken {
const start = cursor.currentLocation();
if (cursor.eof()) {
return { tag: "EOF", span: cursor.makeSpan(start) };
}
const c = cursor.peek()!;
// === tuple ===
if (c === char('(')) {
cursor.next();
return { tag: "tuple_start", span: cursor.makeSpan(start) };
}
// === tag ===
if (c === char('#')) {
cursor.next();
const { name } = identifierScanner(cursor, 'tag_construction');
return { tag: "tag", name, span: cursor.makeSpan(start) };
}
// TODO: This is more subtle... -foo is a valid pattern name... I think I should restrict: can't have identifiers start with `-`? But then `-` itself can't be an identifier, which is a bit sad.
// TODO: This is gonna be different once we allow number/string literals as patterns.
if (isDigit(c) || c === char('"') || c === char('-')) {
throw {
tag: "UnexpectedCharacter",
char: c,
span: cursor.makeSpan(start)
} as ExprScanError;
}
// === pattern binding ===
// Fallthrough: it must be a keyword or a pattern-variable
const result = identifierOrKeywordScanner(cursor, 'function_call');
switch (result.tag) {
case "keyword":
return result;
case "identifier":
return { tag: "pattern_binding", name: result.name, span: result.span };
}
}
export function isNextTokenExprStart(cursor: Cursor): boolean {
const state = cursor.save();
try {
const token = exprStart(cursor);
switch (token.tag) {
case "number":
case "string":
case "variable_use":
case "tag":
case "tuple_start":
case "function_name": // e.g. my_func(x)
return true;
case "keyword":
switch (token.kw) {
case "let":
case "fn":
case "match":
case "apply":
case ":":
return true;
case "=":
case "|":
case "!":
return false;
}
case "EOF":
return false;
default:
return false;
}
} catch (e) {
return false;
} finally {
cursor.restore(state);
}
}
export function isNextTokenProductPatternStart(cursor: Cursor): boolean {
const state = cursor.save();
try {
const token = patternStart(cursor);
switch (token.tag) {
case "pattern_binding":
case "tuple_start":
return true;
case "keyword":
switch (token.kw) {
case ":":
return true;
case "let":
case "fn":
case "match":
case "apply":
case "=":
case "|":
case "!":
return false;
}
default:
return false;
}
} catch (e) {
return false;
} finally {
cursor.restore(state);
}
}

View file

@ -0,0 +1,239 @@
// === Char type ===
export type StringIndex = number; // UTF-16 index into string
export type CodePointIndex = number; // index into array of code-points
export type CodePoint = number; // could also name it `UnicodeCodePoint`. Basically for `s: string` we have `s.codePointAt(i: index): char`.
export function char(c: string): CodePoint {
return c.codePointAt(0)
}
export type CodePointRef = {
char: CodePoint,
offset: StringIndex,
};
// === Source Text ===
export class SourceText {
readonly source: string;
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
readonly chars: CodePointRef[];
// Stores the CodePointIndex where each line begins
readonly lineStarts: CodePointIndex[];
constructor(rawSource: string) {
const source = rawSource.normalize('NFC');
this.source = source;
this.chars = [];
this.lineStarts = [0]; // Line 1 always starts at index 0
let i = 0;
while (i < source.length) {
const char = source.codePointAt(i) as CodePoint;
const cpIndex = this.chars.length;
this.chars.push({ char: char, offset: i });
const size =(char > 0xFFFF ? 2 : 1);
i += size;
// === Newline Logic ===
if (char === NEW_LINE) {
// Found a newline, the NEXT char starts a new line
this.lineStarts.push(cpIndex + 1);
}
// Handle CR (Classic Mac) or CRLF start
else if (char === CARRIAGE_RETURN) {
// Check if the next char is '\n' (CRLF)
// We peek ahead in the raw string to see if we need to skip the \n for line counting purposes
// or just treat this as a newline.
const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE;
if (!nextIsNL) {
// Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next.
this.lineStarts.push(cpIndex + 1);
}
}
}
}
get length(): number {
return this.chars.length;
}
sliceByCp(start: number, end: number): string {
const startRef = this.chars[start];
// Handle out of bounds gracefully
if (!startRef) return "";
const startOff = startRef.offset;
const endOff = end < this.chars.length
? this.chars[end].offset
: this.source.length;
return this.source.slice(startOff, endOff);
}
// Converts a linear Code Point Index into SourceLocation
// getLocation(index: CodePointIndex): SourceLocation {
// // TODO: can be implemented either by a linear or binary search.
// return (0 as any);
// }
// Returns the full text of a specific line (1-based index)
getLineText(line: number): string {
const lineIndex = line - 1;
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) return "";
const startCp = this.lineStarts[lineIndex];
const endCp = (lineIndex + 1 < this.lineStarts.length)
? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself
: this.chars.length;
// TODO: Consider removing \r or \n from the end if they exist.
return this.sliceByCp(startCp, endCp);
}
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
const lineIndex = line - 1;
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
// TODO: This is a bit suspicious. Maybe return undefined?
return { start: 0, end: 0 };
}
const start = this.lineStarts[lineIndex];
const end = (lineIndex + 1 < this.lineStarts.length)
? this.lineStarts[lineIndex + 1]
: this.chars.length;
return { start, end };
}
}
export function sourceText(s: string) {
return new SourceText(s);
}
export type Span = {
start: SourceLocation;
end: SourceLocation;
}
export type SourceLocation = {
index: CodePointIndex;
line: number; // 1-based
column: number; // 1-based
}
// Whitespace
export const NEW_LINE: CodePoint = char('\n');
export const CARRIAGE_RETURN: CodePoint = char('\r');
export const SPACE: CodePoint = char(' ');
export const TAB: CodePoint = char('\t');
// Digit Boundaries
export const DIGIT_0: CodePoint = char('0');
export const DIGIT_9: CodePoint = char('9');
export const DOT: CodePoint = char('.');
// Hex Boundaries
export const LOWERCASE_a: CodePoint = char('a');
export const UPPERCASE_A: CodePoint = char('A');
export const LOWERCASE_f: CodePoint = char('f');
export const UPPERCASE_F: CodePoint = char('F');
// === Rendering Utilities ===
export type LineView = {
lineNo: number;
sourceLine: string; // The full raw text of the line
// These split the line into 3 parts for coloring:
// prefix | highlight | suffix
prefix: string;
highlight: string;
suffix: string;
// Helpers for underlines (e.g., " ^^^^^")
gutterPad: string; // Padding to align line numbers
underline: string; // The literal "^^^" string for CLI usage
};
export function renderSpan(text: SourceText, span: Span, contextLines = 1): LineView[] {
const views: LineView[] = [];
// Determine range of lines to show (including context)
const startLine = Math.max(1, span.start.line - contextLines);
const endLine = Math.min(text.lineStarts.length, span.end.line + contextLines);
// Calculate the max width of line numbers for nice padding (e.g. " 9 |" vs " 10 |")
const maxLineNoWidth = endLine.toString().length;
for (let lineNo = startLine; lineNo <= endLine; lineNo++) {
const lineRange = text.getLineRange(lineNo);
// We strip the trailing newline for display purposes
let lineRaw = text.sliceByCp(lineRange.start, lineRange.end);
if (lineRaw.endsWith('\n') || lineRaw.endsWith('\r')) {
lineRaw = lineRaw.trimEnd();
}
// Determine the intersection of the Span with this specific Line
// 1. Where does the highlight start on this line?
// If this is the start line, use span.column. Otherwise start at 0 (beginning of line)
// We subtract 1 because columns are 1-based, string indices are 0-based.
const highlightStartCol = (lineNo === span.start.line)
? span.start.column - 1
: 0;
// 2. Where does the highlight end on this line?
// If this is the end line, use span.column. Otherwise end at the string length.
const highlightEndCol = (lineNo === span.end.line)
? span.end.column - 1
: lineRaw.length;
// Logic to distinguish context lines from error lines
const isErrorLine = lineNo >= span.start.line && lineNo <= span.end.line;
let prefix = "", highlight = "", suffix = "";
if (isErrorLine) {
// Clamp indices to bounds (safety)
const safeStart = Math.max(0, Math.min(highlightStartCol, lineRaw.length));
const safeEnd = Math.max(0, Math.min(highlightEndCol, lineRaw.length));
prefix = lineRaw.substring(0, safeStart);
highlight = lineRaw.substring(safeStart, safeEnd);
suffix = lineRaw.substring(safeEnd);
} else {
// Pure context line
prefix = lineRaw;
}
// Build the "underline" string (e.g., " ^^^^")
// Note: This naive approach assumes monospaced fonts and no fancy unicode widths,
// which usually holds for code.
let underline = "";
if (isErrorLine) {
// Spaces for prefix
underline += " ".repeat(prefix.length);
// Carets for highlight (ensure at least 1 if it's a zero-width cursor position)
const hlLen = Math.max(1, highlight.length);
underline += "^".repeat(hlLen);
}
views.push({
lineNo,
sourceLine: lineRaw,
prefix,
highlight,
suffix,
gutterPad: " ".repeat(maxLineNoWidth - lineNo.toString().length),
underline
});
}
return views;
}

10
src/lang/result.ts Normal file
View file

@ -0,0 +1,10 @@
export type Result<T, E> =
| { tag: "ok", value: T }
| { tag: "error", error: E }
export namespace Result {
export function ok<T, E>(value: T): Result<T, E> { return { tag: "ok", value } }
export function error<T, E>(error: E): Result<T, E> { return { tag: "error", error } }
}

563
src/lang/value.ts Normal file
View file

@ -0,0 +1,563 @@
// === Identifiers ===
export type VariableName = string
export type FunctionName = string
// type CellName = string
export type Tag = string
export type FieldName = string
// === Program ===
export type Timestamp = number;
export type Program = {
function_definitions: Map<FunctionName, FunctionDefinition>,
function_definition_order: FunctionName[],
// TODO: Perhaps include the story and the environment?
// story should be a list of currently viewed bindings
// environment should be like the store... maybe call it store! It should map names to values and perhaps expressions that generated the value...
// like a reactive cell. This is the analogue of the tiddler.
// store: Map<CellName, Cell>
};
// type Cell = {
// name: CellName,
// expression: Expr,
// cached_value?: Value,
// status: CellStatus
// // TODO: Dependencies? Not sure about this yet...
// // Operational Semantics of Cells is gonna be thought up much later.
// // dependencies?: Set<CellName>,
// }
// type CellStatus =
// | "clean"
// | "dirty"
// | "error"
export type FunctionDefinition =
| { tag: "user", def: UserFunctionDefinition }
| { tag: "primitive", def: PrimitiveFunctionDefinition }
export type UserFunctionDefinition = {
// Raw user input (authoritative)
name: FunctionName,
raw_parameters: string;
raw_body: string;
// parsed
parameters: ProductPattern[],
body: Expr,
// metadata
created_at: Timestamp;
last_modified_at: Timestamp;
}
export type PrimitiveFunctionDefinition = {
name: FunctionName,
implementation: (args: Value[]) => Value,
}
export namespace Program {
type Error =
| { tag: "DuplicateFunctionName", name: FunctionName }
| { tag: "FunctionNotFound", name: FunctionName };
type Result<T> =
| { tag: "ok", value: T }
| { tag: "error", error: Error };
// | { tag: "ParseError", message: string } // TODO
export namespace Result {
export function ok<T>(value: T): Result<T> { return { tag: "ok", value } }
export function error<T>(error: Error): Result<T> { return { tag: "error", error } }
}
export function makeEmpty(): Program {
return {
function_definitions: new Map(),
function_definition_order: [],
};
}
// TODO: Primitive functions like +, -, *, div, <, <=, ==, mod
// TODO: function to create initial program (with the above primitive functions otherwise empty)
// may throw `ThrownRuntimeError`
export function lookup_function(program: Program, name: FunctionName): FunctionDefinition {
const fn = program.function_definitions.get(name);
if (!fn) {
throw ThrownRuntimeError.error({
tag: "FunctionLookupFailure",
name,
});
}
return fn;
}
export type CreateFunction = {
raw_name: string,
raw_parameters: string,
raw_body: string,
}
export type UpdateFunction = {
raw_name?: string,
raw_parameters?: string,
raw_body?: string,
}
export function add_user_function(program: Program, description: CreateFunction): Result<void> {
// TODO:
// - parsing/validation
// - raw_name (check if function already exists)
// - raw_parameters
// - raw_body
// - compute timestamp for now
return (0 as any);
}
// TODO: What about result type? Should it on deletion return the original data of the function, and if there's a failure, how detailed should it be?
export function delete_user_function(program: Program, name: FunctionName): Result<void> {
// TODO:
// - see if the user function exists
// - if it does, delete it
// - if it doesn't ???
return (0 as any);
}
export function update_user_function(program: Program, name: FunctionName): Result<void> {
// TODO:
return (0 as any);
}
export function get_user_function(program: Program, name: FunctionName): Result<UserFunctionDefinition> {
// TODO:
return (0 as any);
}
}
// === Expressions ===
export type Expr =
| { tag: "literal", literal: Literal }
| { tag: "var_use", name: VariableName }
// | { tag: "cell_ref", name: CellName }
| { tag: "call", name: FunctionName, args: Expr[] }
| { tag: "let", bindings: ExprBinding[], body: Expr }
| { tag: "tag", tag_name: Tag }
| { tag: "tagged", tag_name: Tag, expr: Expr }
| { tag: "tuple", exprs: Expr[] }
| { tag: "record", fields: FieldAssignment[] }
| { tag: "match", arg: Expr, branches: MatchBranch[] }
| { tag: "lambda", parameters: ProductPattern[], body: Expr }
| { tag: "apply", callee: Expr, args: Expr[] }
export type Literal =
| { tag: "number", value: number }
| { tag: "string", value: string }
export type ExprBinding = {
pattern: ProductPattern,
expr: Expr,
}
export type MatchBranch = {
pattern: Pattern,
body: Expr,
}
export type FieldAssignment = { name: FieldName, expr: Expr };
export type ProductPattern =
| { tag: "any", name: VariableName }
| { tag: "tuple", patterns: ProductPattern[] }
| { tag: "record", fields: FieldPattern[] }
export type FieldPattern = { fieldName: FieldName, pattern: ProductPattern };
export type Pattern =
| ProductPattern
| { tag: "tag", tag_name: Tag }
| { tag: "tagged", tag_name: Tag, pattern: Pattern }
// === Values ===
export type Value =
| { tag: "string", value: string }
| { tag: "number", value: number }
| { tag: "tag", tag_name: Tag }
| { tag: "tagged", tag_name: Tag, value: Value }
| { tag: "tuple", values: Value[] }
| { tag: "record", fields: Map<FieldName, Value> }
| { tag: "closure", closure: Closure }
export type ValueTag =
| "string"
| "number"
| "tag"
| "tagged"
| "tuple"
| "record"
| "closure"
// Used as a Stack of frames. Basically a linked list.
export type Env =
| { tag: "nil" }
| { tag: "frame", frame: EnvFrame, parent: Env }
export type EnvFrame = Map<VariableName, Value>;
export type Closure = {
env: Env,
parameters: ProductPattern[],
body: Expr,
}
// === Constructors ===
export namespace Expr {
const literal = (literal: Literal): Expr => ({ tag: "literal", literal });
export const number = (value: number): Expr => literal({ tag: "number", value });
export const string = (value: string): Expr => literal({ tag: "string", value });
export const call = (name: FunctionName, args: Expr[]): Expr => ({ tag: "call", name, args, });
export const tag = (tag_name: Tag): Expr => ({ tag: "tag", tag_name, });
export const tagged = (tag_name: Tag, expr: Expr): Expr => ({ tag: "tagged", tag_name, expr, });
export const tuple = (exprs: Expr[]): Expr => ({ tag: "tuple", exprs });
export const record = (fields: FieldAssignment[]): Expr => ({ tag: "record", fields });
export const match = (arg: Expr, branches: MatchBranch[]): Expr => ({ tag: "match", arg, branches, });
export const var_use = (name: VariableName): Expr => ({ tag: "var_use", name, });
export const let_ = (bindings: ExprBinding[], body: Expr): Expr => ({ tag: "let", bindings, body, });
export const apply = (callee: Expr, args: Expr[]): Expr => ({ tag: "apply", callee, args, });
export const lambda = (parameters: ProductPattern[], body: Expr): Expr => ({ tag: "lambda", parameters, body, });
export const matchBranch = (pattern: Pattern, expr: Expr): MatchBranch => ({ pattern, body: expr });
export const exprBinding = (pattern: ProductPattern, expr: Expr): ExprBinding => ({ pattern, expr });
export const fieldAssignment = (name: FieldName, expr: Expr): FieldAssignment => ({ name, expr });
}
export namespace Value {
export const string = (value: string): Value => ({ tag: "string", value });
export const number = (value: number): Value => ({ tag: "number", value });
export const tag = (tag_name: Tag): Value => ({ tag: "tag", tag_name });
export const tagged = (tag_name: Tag, value: Value): Value => ({ tag: "tagged", tag_name, value });
export const tuple = (values: Value[]): Value => ({ tag: "tuple", values });
export const record = (fields: Map<FieldName, Value>): Value => ({ tag: "record", fields });
export const closure = (closure: Closure): Value => ({ tag: "closure", closure });
}
export namespace ProductPattern {
export const any = (name: VariableName): ProductPattern => ({ tag: "any", name });
export const tuple = (patterns: ProductPattern[]): ProductPattern => ({ tag: "tuple", patterns });
export const record = (fields: FieldPattern[]): ProductPattern => ({ tag: "record", fields });
export const fieldPattern = (fieldName: FieldName, pattern: ProductPattern): FieldPattern => ({ fieldName, pattern });
}
export namespace Pattern {
export const tag = (tag_name: Tag): Pattern => ({ tag: "tag", tag_name });
export const tagged = (tag_name: Tag, pattern: Pattern): Pattern => ({ tag: "tagged", tag_name, pattern });
}
// ===Errors===
type RuntimeError =
| { tag: "FunctionLookupFailure", name: FunctionName }
| { tag: "FunctionCallArityMismatch", name: FunctionName, expected: number, actual: number }
| { tag: "ClosureApplicationArityMismatch", closure: Closure, expected: number, actual: number }
| { tag: "VariableLookupFailure", name: VariableName }
// | { tag: "CellLookupFailure", name: CellName }
| { tag: "UnableToFindMatchingPattern", value: Value }
| { tag: "TypeMismatch", expected: ValueTag, received: Value }
| { tag: "DuplicateVariableNamesInPattern", pattern: Pattern, duplicates: VariableName[] }
// | { tag: "DuplicateVariableNamesInProductPattern", pattern: ProductPattern, duplicates: VariableName[] }
type Result<T> =
| { tag: "ok", value: T }
| { tag: "error", error: RuntimeError }
export namespace Result {
export function ok<T>(value: T): Result<T> { return { tag: "ok", value } }
export function error<T>(error: RuntimeError): Result<T> { return { tag: "error", error } }
}
// This is an internal type - use it in all internal evaluation functions.
type ThrownRuntimeError = {
kind: "RuntimeError",
error: RuntimeError
}
namespace ThrownRuntimeError {
// use as follows
// `throw ThrownRuntimeError.error(e)`
export function error(error: RuntimeError): ThrownRuntimeError {
return { kind: "RuntimeError", error };
}
}
// ===Evaluation===
export namespace Env {
export function nil(): Env {
return { tag: "nil" };
}
export function push_frame(env: Env, frame: EnvFrame): Env {
return { tag: "frame", frame, parent: env };
}
// may throw `ThrownRuntimeError`
export function lookup(env: Env, var_name: VariableName): Value {
let cur = env;
while (cur.tag !== "nil") {
if (cur.frame.has(var_name)) {
return cur.frame.get(var_name)!;
}
cur = cur.parent;
}
throw ThrownRuntimeError.error({
tag: "VariableLookupFailure",
name: var_name,
});
}
export function frame_insert_mut(frame: EnvFrame, var_name: VariableName, value: Value) {
frame.set(var_name, value);
}
}
export function eval_start(program: Program, e: Expr): Result<Value> {
try {
return Result.ok(eval_expr(program, Env.nil(), e));
} catch (err) {
if (typeof err === "object" && (err as any).kind === "RuntimeError") {
return Result.error(err.error as RuntimeError);
} else {
throw err;
}
}
}
// may throw `ThrownRuntimeError`
function eval_expr(program: Program, env: Env, e: Expr): Value {
switch (e.tag) {
case "literal":
switch (e.literal.tag) {
case "number":
return Value.number(e.literal.value);
case "string":
return Value.string(e.literal.value);
}
case "tag":
return Value.tag(e.tag_name);
case "tagged":
return Value.tagged(e.tag_name, eval_expr(program, env, e.expr));
case "tuple":
return Value.tuple(eval_sequence(program, env, e.exprs));
case "record":
const fields = new Map<FieldName, Value>();
for (const field of e.fields) {
const value = eval_expr(program, env, field.expr);
fields.set(field.name, value);
}
return Value.record(fields);
case "lambda":
return Value.closure({
env,
parameters: e.parameters,
body: e.body,
});
case "var_use":
return Env.lookup(env, e.name);
case "call":
const fn = Program.lookup_function(program, e.name);
const fn_args = eval_sequence(program, env, e.args);
return call_function(program, fn, fn_args);
case "apply":
const closure = force_closure(eval_expr(program, env, e.callee));
const closure_args = eval_sequence(program, env, e.args);
return apply_closure(program, closure, closure_args);
case "let":
const new_env = eval_bindings(program, env, e.bindings);
return eval_expr(program, new_env, e.body);
case "match":
const match_val = eval_expr(program, env, e.arg);
for (const branch of e.branches) {
const res = match_pattern(branch.pattern, match_val);
if (res.tag === "match") {
return eval_expr(program, Env.push_frame(env, res.frame), branch.body);
}
}
throw ThrownRuntimeError.error({
tag: "UnableToFindMatchingPattern",
value: match_val,
});
}
}
// may throw `ThrownRuntimeError`
function eval_bindings(program: Program, env: Env, bindings: ExprBinding[]): Env {
// note that `let { x = 123, y = x + 1 ... } is allowed. Ofcourse later bindings can't be referenced by earlier bindings (i.e. no recursion).
let cur_env = env;
for (const { pattern: var_name, expr } of bindings) {
const value = eval_expr(program, cur_env, expr);
const res = match_product_pattern(var_name, value);
if (res.tag === "failure") {
throw ThrownRuntimeError.error({
tag: "UnableToFindMatchingPattern",
value,
});
} else {
cur_env = Env.push_frame(cur_env, res.frame);
}
}
return cur_env;
}
// may throw `ThrownRuntimeError`
function eval_sequence(program: Program, env: Env, args: Expr[]): Value[] {
return args.map(arg => eval_expr(program, env, arg));
}
// may throw `ThrownRuntimeError`
function call_function(program: Program, fn_def: FunctionDefinition, args: Value[]): Value {
switch (fn_def.tag) {
case "user":
return call_user_function(program, fn_def.def, args);
case "primitive":
return fn_def.def.implementation(args);
}
}
// may throw `ThrownRuntimeError`
function call_user_function(program: Program, fn_def: UserFunctionDefinition, args: Value[]): Value {
const frame = bind_arguments_to_parameters(
fn_def.parameters,
args,
(expected, actual) => ({ tag: "FunctionCallArityMismatch", name: fn_def.name, expected, actual })
);
return eval_expr(program, Env.push_frame(Env.nil(), frame), fn_def.body);
}
// may throw `ThrownRuntimeError`
function apply_closure(program: Program, closure: Closure, args: Value[]): Value {
const frame = bind_arguments_to_parameters(
closure.parameters,
args,
(expected, actual) => ({ tag: "ClosureApplicationArityMismatch", closure, expected, actual })
);
return eval_expr(program, Env.push_frame(closure.env, frame), closure.body);
}
// may throw `ThrownRuntimeError`
function force_closure(value: Value): Closure {
if (value.tag !== "closure") {
throw ThrownRuntimeError.error({
tag: "TypeMismatch",
expected: "closure",
received: value,
});
}
return value.closure;
}
// may throw `ThrownRuntimeError`
function bind_arguments_to_parameters(
patterns: ProductPattern[],
values: Value[],
onArityMismatchError: (expected: number, actual: number) => RuntimeError
): EnvFrame {
const expected = patterns.length;
const actual = values.length;
if (expected !== actual) {
throw ThrownRuntimeError.error(onArityMismatchError(expected, actual));
}
const frame: EnvFrame = new Map();
for (let i = 0; i < patterns.length; i++) {
const pattern = patterns[i];
const value = values[i];
const res = match_product_pattern_mut(frame, pattern, value);
if (res.tag === "failure") {
throw ThrownRuntimeError.error({
tag: "UnableToFindMatchingPattern",
value,
});
}
}
return frame;
}
// === Pattern Matching ===
// A pattern match will result either in a succesfull match with a new EnvFrame
type PatternMatchingResult =
| { tag: "match", frame: EnvFrame }
| { tag: "failure", pattern: Pattern, value: Value }
function match_pattern(pattern: Pattern, value: Value): PatternMatchingResult {
const frame = new Map<VariableName, Value>();
return match_pattern_mut(frame, pattern, value);
}
function match_pattern_mut(frame: EnvFrame, pattern: Pattern, value: Value): PatternMatchingResult {
switch (pattern.tag) {
case "tag":
if (value.tag === "tag" && value.tag_name === pattern.tag_name) {
return { tag: "match", frame }
} else {
return { tag: "failure", pattern, value }
}
case "tagged":
if (value.tag === "tagged" && value.tag_name === pattern.tag_name) {
return match_pattern_mut(frame, pattern.pattern, value.value);
} else {
return { tag: "failure", pattern, value };
}
default:
return match_product_pattern_mut(frame, pattern, value);
}
}
function match_product_pattern(pattern: ProductPattern, value: Value): PatternMatchingResult {
const frame = new Map<VariableName, Value>();
return match_product_pattern_mut(frame, pattern, value);
}
function match_product_pattern_mut(frame: EnvFrame, pattern: ProductPattern, value: Value): PatternMatchingResult {
switch (pattern.tag) {
case "any":
frame.set(pattern.name, value);
return { tag: "match", frame };
case "tuple":
if (value.tag !== "tuple" || pattern.patterns.length !== value.values.length) return { tag: "failure", pattern, value };
for (let i = 0; i < pattern.patterns.length; i++) {
const res = match_product_pattern_mut(frame, pattern.patterns[i], value.values[i]);
if (res.tag === "failure") return res;
}
return { tag: "match", frame };
case "record":
if (value.tag !== "record") return { tag: "failure", pattern, value };
for (const { fieldName, pattern: p } of pattern.fields) {
const field_value = value.fields.get(fieldName);
if (field_value === undefined) {
return { tag: "failure", pattern, value };
} else {
const res = match_product_pattern_mut(frame, p, field_value);
if (res.tag === "failure") {
return res;
}
}
}
return { tag: "match", frame };
}
}