diff --git a/src/debug/expr_show.ts b/src/debug/expr_show.ts new file mode 100644 index 0000000..e72c0a6 --- /dev/null +++ b/src/debug/expr_show.ts @@ -0,0 +1,107 @@ +import { Expr, Pattern, ProductPattern, Literal, FieldAssignment, FieldPattern } from '../value'; + +export function exprToString(expr: Expr): string { + switch (expr.tag) { + case "literal": + return literalToString(expr.literal); + + case "var_use": + return expr.name; + + case "call": + return `${expr.name}(${expr.args.map(exprToString).join(", ")})`; + + case "tuple": + return `(${expr.exprs.map(exprToString).join(", ")})`; + + case "record": { + const fields = expr.fields.map(fieldAssignmentToString).join(", "); + return `{ ${fields} }`; + } + + case "tag": + return `#${expr.tag_name}`; + + case "tagged": { + // We wrap the payload in parens if it's complex to be safe, + // but strictly speaking #foo 1 is valid. + // Let's just output space + expr. + const payload = exprToString(expr.expr); + // Heuristic: if payload starts with '(', '{', or is simple, we might not need parens, + // but for "tagged" expression precedence, it's often safer to wrap unless atomic. + return `#${expr.tag_name} ${payload}`; + } + + case "let": { + const bindings = expr.bindings + .map(b => `${productPatternToString(b.pattern)} = ${exprToString(b.expr)}`) + .join(", "); + return `let { ${bindings} . ${exprToString(expr.body)} }`; + } + + case "lambda": { + const params = expr.parameters.map(productPatternToString).join(", "); + return `fn { ${params} . ${exprToString(expr.body)} }`; + } + + case "apply": { + const args = expr.args.map(exprToString).join(", "); + return `apply(${exprToString(expr.callee)} ! ${args})`; + } + + case "match": { + const branches = expr.branches + .map(b => `${patternToString(b.pattern)} . ${exprToString(b.body)}`) + .join(" | "); + return `match ${exprToString(expr.arg)} { ${branches} }`; + } + } +} + +// === Helpers === + +function literalToString(lit: Literal): string { + switch (lit.tag) { + case "number": return lit.value.toString(); + case "string": return `"${lit.value}"`; // simplistic string escaping + } +} + +function fieldAssignmentToString(f: FieldAssignment): string { + return `${f.name} = ${exprToString(f.expr)}`; +} + +// === Pattern Printers === + +export function patternToString(pat: Pattern): string { + switch (pat.tag) { + case "tag": + return `#${pat.tag_name}`; + case "tagged": + return `#${pat.tag_name} ${patternToString(pat.pattern)}`; + // If it's a product pattern (any, tuple, record) + default: + return productPatternToString(pat); + } +} + +export function productPatternToString(pat: ProductPattern): string { + switch (pat.tag) { + case "any": + return pat.name; + + case "tuple": + return `(${pat.patterns.map(productPatternToString).join(", ")})`; + + case "record": + return `{ ${pat.fields.map(fieldPatternToString).join(", ")} }`; + } +} + +function fieldPatternToString(f: FieldPattern): string { + // Check for punning: if pattern is "any" and name matches fieldName + if (f.pattern.tag === "any" && f.pattern.name === f.fieldName) { + return f.fieldName; + } + return `${f.fieldName} = ${productPatternToString(f.pattern)}`; +} diff --git a/src/debug/repl.ts b/src/debug/repl.ts new file mode 100644 index 0000000..77ea260 --- /dev/null +++ b/src/debug/repl.ts @@ -0,0 +1,76 @@ +import * as readline from 'readline'; +import { parse } from '../parser/parser'; +import { exprToString } from '../debug/expr_show'; +import { Result } from '../result'; + +// Helper to calculate line/col from an absolute offset +function getLineCol(text: string, offset: number) { + let line = 1; + let col = 1; + for (let i = 0; i < offset && i < text.length; i++) { + if (text[i] === '\n') { + line++; + col = 1; + } else { + col++; + } + } + return { line, col }; +} + +const rl = readline.createInterface({ + input: process.stdin, + output: process.stdout, + prompt: 'expr> ' +}); + +console.log("=== Quick & Dirty Parser REPL ==="); +console.log("Type an expression to parse and verify round-trip stringification."); +console.log("Ctrl+C to exit.\n"); + +rl.prompt(); + +rl.on('line', (lineInput) => { + const trimmed = lineInput.trim(); + if (trimmed) { + try { + const result = parse(trimmed); + + if (result.tag === "ok") { + const ast = result.value; + const reconstructed = exprToString(ast); + + console.log(`\n✅ Parsed Successfully:`); + console.log(` ${reconstructed}`); + + } else { + const err = result.error; + // FIX: Calculate line/col manually using the input string + const { line, col } = getLineCol(trimmed, err.span.start); + const loc = `${line}:${col}`; + + console.log(`\n❌ Parse Error [${err.tag}] at ${loc}`); + + if ('expected' in err) { + console.log(` Expected: ${(err as any).expected}`); + } + if ('reason' in err) { + console.log(` Reason: ${(err as any).reason}`); + } + if ('received' in err) { + // Useful to see what char we actually got (print char code if needed) + console.log(` Received CodePoint: ${(err as any).received}`); + } + } + } catch (e) { + console.log(`\n🔥 CRASH (Uncaught Exception):`); + console.log(e); + } + } + + console.log(); + rl.prompt(); +}).on('close', () => { + console.log('Bye!'); + process.exit(0); +}); diff --git a/src/parser/parser.ts b/src/parser/parser.ts new file mode 100644 index 0000000..8c95ac2 --- /dev/null +++ b/src/parser/parser.ts @@ -0,0 +1,435 @@ +import { Expr, ExprBinding, FieldAssignment, FieldPattern, MatchBranch, Pattern, ProductPattern } from '../value'; +import { Cursor } from './cursor'; +import { ExprScanError, exprStart, ExprStartToken, identifier, isNextTokenExprStart, isNextTokenProductPatternStart, patternStart, PatternStartToken, skipWhitespaceAndComments } from './scanner'; +import { char, CodePoint, SourceText, Span } from './source_text'; +import { Result } from '../result'; + +// CONVENTION: Every parser is responsible to consume whitespace/comments at the end. +// Every parser is not responsible for cleaning up whitespace/comments at the start - only the final `parse` that's exposed to the public. + +const DELIMITER_COMMA = char(','); +const DELIMITER_PIPE = char('|'); +const TERMINATOR_CLOSE_PAREN = char(')'); +const TERMINATOR_CLOSE_BRACE = char('}'); +const TERMINATOR_DOT = char('.'); + +// TODO: Errors have to have better contextual information +export type ParseError = + | ExprScanError + | { tag: "UnexpectedToken", expected: string, span: Span } + | { tag: "UnexpectedTokenWhileParsingSequence", reason: "ExpectedDelimiterOrTerminator", received: CodePoint, expectedDelimiter: CodePoint, expectedTerminator: CodePoint, span: Span } + + // === Specific Context Errors === + | { tag: "ExpectedExpression", span: Span } // Expected start of expr (e.g. hit EOF or keyword) + | { tag: "ExpectedFieldAssignmentSymbol", span: Span } // Expected '=' in field assignment + | { tag: "ExpectedPatternAssignmentSymbol", span: Span } // Expected '=' in pattern assignment + | { tag: "ExpectedPatternBindingSymbol", span: Span } // Expected '.' in pattern binding + | { tag: "ExpectedFunctionCallStart", span: Span } // Expected '(' after function name + | { tag: "ExpectedLetBlockOpen", span: Span } // Expected '{' after 'let' + | { tag: "ExpectedLetBlockClose", span: Span } // Expected '}' at end of 'let' expression + | { tag: "ExpectedMatchBlockOpen", span: Span } // Expected '{' after 'match' + | { tag: "ExpectedMatchBlockClose", span: Span } // Expected '}' at end of 'match' expression + | { tag: "ExpectedLambdaBlockOpen", span: Span } // Expected '{' after `fn` + | { tag: "ExpectedLambdaBlockClose", span: Span } // Expected '}' at end of `fn` expression + | { tag: "ExpectedApplyStart", span: Span } // Expected '(' after 'apply' + | { tag: "ExpectedApplySeparator", span: Span } // Expected '!' inside 'apply' + | { tag: "UnexpectedTagPattern", span: Span } // Found #tag where product pattern expected + | { tag: "ExpectedPattern", span: Span } // EOF or invalid start of pattern + | { tag: "ExpectedRecordField", span: Span }; // Expected identifier in record pattern + + // TODO: Delete? +export type Expectation = + | "ExpectedExpression" + | "ExpectedFieldAssignmentSymbol" + | "ExpectedPatternAssignmentSymbol" + | "ExpectedPatternBindingSymbol" + | "ExpectedFunctionCallStart" + | "ExpectedLetBlockOpen" + | "ExpectedLetBlockClose" + | "ExpectedMatchBlockOpen" + | "ExpectedMatchBlockClose" + | "ExpectedApplyStart" + | "ExpectedApplySeparator" + | "UnexpectedTagPattern" + | "ExpectedPattern" + | "ExpectedRecordField" + +export type Parser = (cursor: Cursor) => T + +// === Helpers === +// Don't use `cursor.next()` directly - for most of the cases you also want to consume the WS/comments after. +function nextWS(cursor: Cursor): CodePoint | undefined { + const c = cursor.next(); + if (c === undefined) { + return undefined; + } else { + skipWhitespaceAndComments(cursor); + return c; + } +} + +// Returns true if the char was consumed. +function tryConsume(cursor: Cursor, expected: CodePoint): boolean { + if (cursor.peek() === expected) { + nextWS(cursor); + return true; + } + return false; +} + +// TODO: Perhaps when it comes to terminators, we should allow the user of this function to create better contextual errors? +// Parses a delimited sequence of `p` that's terminated by a special character. +function delimitedTerminalSequence(cursor: Cursor, delimiter: CodePoint, terminator: CodePoint, p: Parser): A[] { + // Let's say the terminator is `.` and delimiter is `,`. This is the grammar for valid sequences + // list(p) := + // | p + // | p , p + // | p , p , p + // | ... + // delimitedTerminalSequence(p) := + // | . + // | optional(,) list(p) optional(,) . + // + // All our use-cases always have a well-defined terminator character: + // tuples: ( a, b, c ) -> `)` + // records: { f0 = e0, f1 = e1 } -> `}` + // function call: f(a, b, c) -> `)` + // let-binding: let { p = e . body } -> `.` + // fn-asbtraction: fn { p0, p1 . body } -> `.` + // fn-application: apply(e ! e0, e1, e2) -> `)` + const items: A[] = []; + + if (cursor.peek() === terminator) { + nextWS(cursor); + return items; + } + + // optional delimiter + if (cursor.peek() === delimiter) { + nextWS(cursor); + } + + while (true) { + const item = p(cursor); // `p` should be responsible for getting rid of whitespace after it has done its work + items.push(item); + + const nextChar = cursor.peek(); + + if (nextChar === terminator) { + nextWS(cursor); + + return items; + } else if (nextChar === delimiter) { + nextWS(cursor); + + if (cursor.peek() === terminator) { + nextWS(cursor); + // the previous delimiter was trailing + return items; + } + // the previous delimiter was a separator + continue; + } else { + throw { + tag: "UnexpectedTokenWhileParsingSequence", + received: nextChar, + reason: "ExpectedDelimiterOrTerminator", + expectedDelimiter: delimiter, + expectedTerminator: terminator, + span: cursor.makeSpan(cursor.currentLocation()), + } as ParseError; + } + } +} + +function exprStartToken(cursor: Cursor): ExprStartToken { + const token = exprStart(cursor); + skipWhitespaceAndComments(cursor); + return token; +} + +function patternStartToken(cursor: Cursor): PatternStartToken { + const token = patternStart(cursor); + skipWhitespaceAndComments(cursor); + return token; +} + +// === Expression Parsers === + +function expr(cursor: Cursor): Expr { + const token = exprStartToken(cursor); + // TODO: You need to include the spans and perhaps other meta-info. + switch (token.tag) { + case "EOF": + throw { + tag: "UnexpectedToken", + expected: "Expression", + span: token.span + } as ParseError; + case "number": + return Expr.number(token.value); + case "string": + return Expr.string(token.text); + case "variable_use": + return Expr.var_use(token.name); + case "tag": + // #true + // #foo e + if (isNextTokenExprStart(cursor)) { + const value = expr(cursor); + return Expr.tagged(token.name, value); + } else { + return Expr.tag(token.name); + } + case "tuple_start": + // e.g. (a, b, c) + const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr); + return Expr.tuple(items); + case "record_start": + // e.g. { x = 1, y = 2 } + const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_BRACE, fieldAssignment); + return Expr.record(fields); + case "function_name": + // e.g. my_func(arg1, arg2) + // parse a `,` delimiter sequence of expr + // need to consume ) + if (!tryConsume(cursor, char('('))) { + throw { + tag: "ExpectedFunctionCallStart", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr); + return Expr.call(token.name, args); + case "keyword": + switch (token.kw) { + case "let": + // let { p0 = e0, p1 = e2 . body } + if (!tryConsume(cursor, char('{'))) { + throw { + tag: "ExpectedLetBlockOpen", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + const bindings = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPatternBinding); + const body = expr(cursor); + + if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) { + throw { + tag: "ExpectedLetBlockClose", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + return Expr.let_(bindings, body); + case "fn": { + // fn { p0, p1, p2 . body } + if (!tryConsume(cursor, char('{'))) { + throw { + tag: "ExpectedLambdaBlockOpen", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + + const parameters = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPattern); + const body = expr(cursor); + if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) { + throw { + tag: "ExpectedLambdaBlockClose", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + return Expr.lambda(parameters, body); + } + case "apply": + // apply(e ! e0, e1, e2) + if (!tryConsume(cursor, char('('))) { + throw { + tag: "ExpectedApplyStart", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + const callee = expr(cursor); + if (!tryConsume(cursor, char('!'))) { + throw { + tag: "ExpectedApplySeparator", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + + const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr); + return Expr.apply(callee, args); + case "match": + // match e { branch0 | branch1 | branch2 } + const arg = expr(cursor); + if (!tryConsume(cursor, char('{'))) { + throw { + tag: "ExpectedMatchBlockOpen", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + + const branches = delimitedTerminalSequence(cursor, DELIMITER_PIPE, TERMINATOR_CLOSE_BRACE, matchBranch); + return Expr.match(arg, branches) + case "=": + case "|": + case "!": + // These keywords CANNOT start an expression. + throw { + tag: "ExpectedExpression", + span: token.span + } as ParseError; + } + } +} + +function matchBranch(cursor: Cursor): MatchBranch { + // p . body + const p = pattern(cursor); + + if (!tryConsume(cursor, char("."))) { + throw { + tag: "ExpectedPatternBindingSymbol", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + const e = expr(cursor); + return Expr.matchBranch(p, e); +} + +function productPatternBinding(cursor: Cursor): ExprBinding { + // TODO: There's a potential here to do a lot of work on nice errors. + // `p = e` + // here there could be problems like the pattern being just a variable that uses `=` as its part + // `x= = 123` is valid. Maybe in case of erroneous things like + // `x=123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk... + // or even... + // `x= 123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk... + // or + // `x =123` - this one technically is a sequence of two identifiers. + const pattern = productPattern(cursor); + + if (!tryConsume(cursor, char('='))) { + throw { + tag: "ExpectedPatternBindingSymbol", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + const e = expr(cursor); + return Expr.exprBinding(pattern, e); +} + +function fieldAssignment(cursor: Cursor): FieldAssignment { + // `f = e` + const { name, span } = identifier(cursor, 'identifier'); + + if (!tryConsume(cursor, char('='))) { + throw { + tag: "ExpectedFieldAssignmentSymbol", // Specific error for this context + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError; + } + + const value = expr(cursor); + return Expr.fieldAssignment(name, value); +} + +function pattern(cursor: Cursor): Pattern { + // x + // (x, y, z) + // ((x, y), z) + // { a = x, b = y } + // { a, b = y } + // #foo + // #foo x + // #foo (x, y) + const token = patternStartToken(cursor); + + // === Tag Pattern logic === + if (token.tag === "tag") { + // Check Lookahead: Is this a Tagged Value? e.g. #foo x or #foo (a,b) + if (isNextTokenProductPatternStart(cursor)) { + // Parse the payload (must be a product pattern) + const payload = productPattern(cursor); + return Pattern.tagged(token.name, payload); + } else { + // Standalone Tag: #foo + return Pattern.tag(token.name); + } + } + + return finishProductPattern(cursor, token); +} + +function productPattern(cursor: Cursor): ProductPattern { + // x + // (x, y, z) + // ((x, y), z) + // { a = x, b = y } + // { a, b = y } + // + // tag patterns are syntax errors i.e. unexpected-tag-pattern (renderer then could explain that we don't allow tag patterns to be deeply nested) + + const token = patternStartToken(cursor); + return finishProductPattern(cursor, token); +} + +function finishProductPattern(cursor: Cursor, token: PatternStartToken): ProductPattern { + switch (token.tag) { + case "pattern_binding": + // foo + return ProductPattern.any(token.name); + + case "tuple_start": { + // ( p1, p2 ) + const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, productPattern); + return ProductPattern.tuple(items); + } + + case "record_start": { + // { a = p, b } + const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_BRACE, recordPatternField); + return ProductPattern.record(fields); + } + + case "tag": + throw { tag: "UnexpectedTagPattern", span: token.span } as ParseError; + + case "EOF": + throw { tag: "ExpectedPattern", span: token.span } as ParseError; + } +} + +function recordPatternField(cursor: Cursor): FieldPattern { + const { name, span } = identifier(cursor, 'identifier'); // Reuse existing identifier scanner + if (tryConsume(cursor, char('='))) { + const p = productPattern(cursor); + return ProductPattern.fieldPattern(name, p); + } else { + // Punning: { a } -> { a = a } + return ProductPattern.fieldPattern(name, ProductPattern.any(name)); + } +} + + +export function parse(input: string): Result { + const source = new SourceText(input); + const cursor = new Cursor(source); + + try { + skipWhitespaceAndComments(cursor); + const expression = expr(cursor); + + if (!cursor.eof()) { + return Result.error({ + tag: "UnexpectedToken", + expected: "End of File", + span: cursor.makeSpan(cursor.currentLocation()) + } as ParseError); + } + + return Result.ok(expression); + } catch (e) { + // TODO: This is a bit sketchy. We maybe forced to have "checked" Exceptions for `ParseError` by wrapping it in something that has a proper tag. + return Result.error(e as ParseError); + } +} diff --git a/src/parser/scanner.ts b/src/parser/scanner.ts index 6564ac6..813c835 100644 --- a/src/parser/scanner.ts +++ b/src/parser/scanner.ts @@ -6,7 +6,7 @@ import type { Cursor, CursorState, GenericScanError, NumberError, StringError } import { Result } from '../result'; import { Expr } from 'src/value'; -function skipWhitespaceAndComments(cursor: Cursor): number { +export function skipWhitespaceAndComments(cursor: Cursor): number { let totalConsumed = 0; while (true) { @@ -60,7 +60,13 @@ export type ExprScanError = | { tag: "InvalidIdentifier", text: string, kind: IdentifierKind, reason: IdentifierErrorReason, span: Span } // What kind of identifier were we trying to parse? -export type IdentifierKind = "identifier" | "variable_use" | "tag_construction" | "function_call"; +export type IdentifierKind = + | "identifier" + | "variable_use" + | "tag_construction" + | "function_call" + | "pattern_binding"; + export type IdentifierErrorReason = | { tag: "StartsWithDigit" } | { tag: "IsKeyword", kw: Keyword } @@ -73,12 +79,20 @@ export type ExprStartToken = | { tag: "function_name", name: string, span: Span } | { tag: "variable_use", name: string, span: Span } | { tag: "tag", name: string, span: Span } - | { tag: "tagged", name: string, span: Span } // TODO: This may be a bit weird. Actually we can lookahead and see if the next char after the identifier is NOT-AN-EXPRESSION start. | { tag: "tuple_start", span: Span } | { tag: "record_start", span: Span } | { tag: "keyword", kw: Keyword, span: Span } + // TODO: ger rid of EOF | { tag: "EOF", span: Span } +export type PatternStartToken = + | { tag: "pattern_binding", name: string, span: Span } + | { tag: "tag", name: string, span: Span } + | { tag: "tuple_start", span: Span } + | { tag: "record_start", span: Span } + // TODO: ger rid of EOF + | { tag: "EOF", span: Span }; + // === Identifier Scanners === // Returns the raw string. @@ -146,12 +160,14 @@ export function identifier(cursor: Cursor, kind: IdentifierKind): { name: string // === Literal Scanners === // throws ExprScanError +// TODO: handle trailing whitespace function number(cursor: Cursor): { value: number, span: Span } { const res = scanNumber(cursor); if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); } } // throws ExprScanError +// TODO: handle trailing whitespace function string(cursor: Cursor): { value: string, span: Span } { const res = scanString(cursor); if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); } @@ -159,10 +175,10 @@ function string(cursor: Cursor): { value: string, span: Span } { // === complex scanners === +// TODO: in exprStart/patternStart make sure whitespace is consumed after they succesfuly produce token. Should we build it in the functions? Or should that be in `parser.ts`? + // throws ExprScanError export function exprStart(cursor: Cursor): ExprStartToken { - skipWhitespaceAndComments(cursor); - const start = cursor.currentLocation(); if (cursor.eof()) { return { tag: "EOF", span: cursor.makeSpan(start) }; @@ -220,53 +236,105 @@ export function exprStart(cursor: Cursor): ExprStartToken { } } +export function patternStart(cursor: Cursor): PatternStartToken { + const start = cursor.currentLocation(); -// TODO: Need a Token to TokenKind function -// TODO: Need is_start_of_expression(token): boolean -// identifier -> true -// symbol # -> true -// symbol $ -> true -// symbol @ -> true -// symbol ( -> true -// symbol { -> true // this is actually context dependent. Sometimes its a start of a binding context { params . body } or { let-params . body }, and sometimes it is a record. But this function is gonna be used only in the first context -// symbol _ -> false -// number -> true -// string -> true -// keyword let -> true -// keyword fn -> true -// keyword apply -> true -// keyword = -> false -// keyword | -> false -// EOF -> false -// -// TODO: function that matches a token with a token_type (returns bool) + if (cursor.eof()) { + return { tag: "EOF", span: cursor.makeSpan(start) }; + } -// TODO: forbidden characters are -// '(' -// ')' -// '{' -// '}' -// '.' -// ',' -// '|' -// '$' -// '#' -// '@' -// '"' -// ' ' -// '\r' -// '\t' -// '\n' -// TODO: need function is_forbidden_char + const c = cursor.peek()!; + // === tuple === + if (c === char('(')) { + cursor.next(); + return { tag: "tuple_start", span: cursor.makeSpan(start) }; + } + // === record === + if (c === char('{')) { + cursor.next(); + return { tag: "record_start", span: cursor.makeSpan(start) }; + } + // === tag === + if (c === char('#')) { + cursor.next(); + const { name } = identifier(cursor, 'tag_construction'); + return { tag: "tag", name, span: cursor.makeSpan(start) }; + } -// === scanner functions === -// TODO: whitespace - consumes whitespace -// TODO: comment - consumes token -// TODO: raw_identifier - consumes raw identifier - then we can decide whether that was a keyword or an identifier -// TODO: string - consumes string like "foo bar\njfjdsajfksd" -// TODO: number - consumes number like 123123 or 000123 or 23919233.123 -// -// TODO: token - gives next token + // TODO: This is more subtle... -foo is a valid pattern name... I think I should restrict: can't have identifiers start with `-`? But then `-` itself can't be an identifier, which is a bit sad. + // TODO: This is gonna be different once we allow number/string literals as patterns. + if (isDigit(c) || c === char('"') || c === char('-')) { + throw { + tag: "UnexpectedCharacter", + char: c, + span: cursor.makeSpan(start) + } as ExprScanError; + } + // === pattern binding === + const { name } = identifier(cursor, 'pattern_binding'); + return { tag: "pattern_binding", name, span: cursor.makeSpan(start) }; +} + +export function isNextTokenExprStart(cursor: Cursor): boolean { + const state = cursor.save(); + try { + const token = exprStart(cursor); + + switch (token.tag) { + case "number": + case "string": + case "variable_use": + case "tag": + case "tuple_start": + case "record_start": + case "function_name": // e.g. my_func(x) + return true; + + case "keyword": + switch (token.kw) { + case "let": + case "fn": + case "match": + case "apply": + return true; + case "=": + case "|": + case "!": + return false; + } + + case "EOF": + return false; + + default: + return false; + } + + } catch (e) { + return false; + } finally { + cursor.restore(state); + } +} + +export function isNextTokenProductPatternStart(cursor: Cursor): boolean { + const state = cursor.save(); + try { + const token = patternStart(cursor); + switch (token.tag) { + case "pattern_binding": + case "tuple_start": + case "record_start": + return true; + default: + return false; + } + } catch (e) { + return false; + } finally { + cursor.restore(state); + } +} diff --git a/src/value.ts b/src/value.ts index 59aee8b..ef6302a 100644 --- a/src/value.ts +++ b/src/value.ts @@ -145,8 +145,8 @@ export type Expr = | { tag: "tag", tag_name: Tag } | { tag: "tagged", tag_name: Tag, expr: Expr } | { tag: "tuple", exprs: Expr[] } - | { tag: "record", fields: { name: FieldName, expr: Expr }[] } - | { tag: "match", arg: Expr, branches: { pattern: Pattern, body: Expr }[] } + | { tag: "record", fields: FieldAssignment[] } + | { tag: "match", arg: Expr, branches: MatchBranch[] } | { tag: "lambda", parameters: ProductPattern[], body: Expr } | { tag: "apply", callee: Expr, args: Expr[] } @@ -155,14 +155,23 @@ export type Literal = | { tag: "string", value: string } export type ExprBinding = { - var: ProductPattern, + pattern: ProductPattern, expr: Expr, } +export type MatchBranch = { + pattern: Pattern, + body: Expr, +} + +export type FieldAssignment = { name: FieldName, expr: Expr }; + export type ProductPattern = | { tag: "any", name: VariableName } | { tag: "tuple", patterns: ProductPattern[] } - | { tag: "record", fields: { field_name: FieldName, pattern: ProductPattern }[] } + | { tag: "record", fields: FieldPattern[] } + +export type FieldPattern = { fieldName: FieldName, pattern: ProductPattern }; export type Pattern = | ProductPattern @@ -212,12 +221,16 @@ export namespace Expr { export const tag = (tag_name: Tag): Expr => ({ tag: "tag", tag_name, }); export const tagged = (tag_name: Tag, expr: Expr): Expr => ({ tag: "tagged", tag_name, expr, }); export const tuple = (exprs: Expr[]): Expr => ({ tag: "tuple", exprs }); - export const record = (fields: { name: FieldName, expr: Expr }[]): Expr => ({ tag: "record", fields }); - export const match = (arg: Expr, branches: { pattern: Pattern; body: Expr }[]): Expr => ({ tag: "match", arg, branches, }); + export const record = (fields: FieldAssignment[]): Expr => ({ tag: "record", fields }); + export const match = (arg: Expr, branches: MatchBranch[]): Expr => ({ tag: "match", arg, branches, }); export const var_use = (name: VariableName): Expr => ({ tag: "var_use", name, }); export const let_ = (bindings: ExprBinding[], body: Expr): Expr => ({ tag: "let", bindings, body, }); export const apply = (callee: Expr, args: Expr[]): Expr => ({ tag: "apply", callee, args, }); export const lambda = (parameters: ProductPattern[], body: Expr): Expr => ({ tag: "lambda", parameters, body, }); + + export const matchBranch = (pattern: Pattern, expr: Expr): MatchBranch => ({ pattern, body: expr }); + export const exprBinding = (pattern: ProductPattern, expr: Expr): ExprBinding => ({ pattern, expr }); + export const fieldAssignment = (name: FieldName, expr: Expr): FieldAssignment => ({ name, expr }); } export namespace Value { @@ -230,6 +243,19 @@ export namespace Value { export const closure = (closure: Closure): Value => ({ tag: "closure", closure }); } +export namespace ProductPattern { + export const any = (name: VariableName): ProductPattern => ({ tag: "any", name }); + export const tuple = (patterns: ProductPattern[]): ProductPattern => ({ tag: "tuple", patterns }); + export const record = (fields: FieldPattern[]): ProductPattern => ({ tag: "record", fields }); + + export const fieldPattern = (fieldName: FieldName, pattern: ProductPattern): FieldPattern => ({ fieldName, pattern }); +} + +export namespace Pattern { + export const tag = (tag_name: Tag): Pattern => ({ tag: "tag", tag_name }); + export const tagged = (tag_name: Tag, pattern: Pattern): Pattern => ({ tag: "tagged", tag_name, pattern }); +} + // ===Errors=== type RuntimeError = | { tag: "FunctionLookupFailure", name: FunctionName } @@ -369,7 +395,7 @@ function eval_expr(program: Program, env: Env, e: Expr): Value { function eval_bindings(program: Program, env: Env, bindings: ExprBinding[]): Env { // note that `let { x = 123, y = x + 1 ... } is allowed. Ofcourse later bindings can't be referenced by earlier bindings (i.e. no recursion). let cur_env = env; - for (const { var: var_name, expr } of bindings) { + for (const { pattern: var_name, expr } of bindings) { const value = eval_expr(program, cur_env, expr); const res = match_product_pattern(var_name, value); if (res.tag === "failure") { @@ -514,8 +540,8 @@ function match_product_pattern_mut(frame: EnvFrame, pattern: ProductPattern, val case "record": if (value.tag !== "record") return { tag: "failure", pattern, value }; - for (const { field_name, pattern: p } of pattern.fields) { - const field_value = value.fields.get(field_name); + for (const { fieldName, pattern: p } of pattern.fields) { + const field_value = value.fields.get(fieldName); if (field_value === undefined) { return { tag: "failure", pattern, value }; } else { diff --git a/tmp_repl/tmp_repl.md b/tmp_repl/tmp_repl.md index 0c39734..ed9fdf3 100644 --- a/tmp_repl/tmp_repl.md +++ b/tmp_repl/tmp_repl.md @@ -15,3 +15,5 @@ npm install -D sass-embedded npx ts-node src/parser/cursor.test.ts +npx ts-node src/debug/repl.ts +