Move language files into dedicated folder

2026-02-07 10:43:30 +01:00 · 2026-02-07 10:43:30 +01:00 · 1b406899e0
commit 1b406899e0
parent 3d1cd89067
15 changed files with 7 additions and 343 deletions
--- a/src/parser/parser.ts
+++ b/src/parser/parser.ts
@ -1,467 +0,0 @@
-import { Expr, ExprBinding, FieldAssignment, FieldPattern, MatchBranch, Pattern, ProductPattern } from '../value';
-import { Cursor } from './cursor';
-import { ExprScanError, exprStart, ExprStartToken, IdentifierKind, identifierScanner, isNextTokenExprStart, isNextTokenProductPatternStart, patternStart, PatternStartToken, skipWhitespaceAndComments } from './scanner';
-import { char, CodePoint, SourceText, Span } from './source_text';
-import { Result } from '../result';
-
-// CONVENTION: Every parser is responsible to consume whitespace/comments at the end.
-//             Every parser is not responsible for cleaning up whitespace/comments at the start - only the final `parse` that's exposed to the public.
-
-const DELIMITER_COMMA = char(',');
-const DELIMITER_PIPE = char('|');
-const TERMINATOR_CLOSE_PAREN = char(')');
-const TERMINATOR_CLOSE_BRACE = char('}');
-const TERMINATOR_DOT = char('.');
-
-// TODO: Errors have to have better contextual information
-export type ParseError =
-  | ExprScanError
-  | { tag: "UnexpectedToken", expected: string, span: Span }
-  | { tag: "UnexpectedTokenWhileParsingSequence", reason: "ExpectedDelimiterOrTerminator", received: CodePoint, expectedDelimiter: CodePoint, expectedTerminator: CodePoint, span: Span }
-
-  // === Specific Context Errors ===
-  | { tag: "ExpectedExpression", span: Span }          // Expected start of expr (e.g. hit EOF or keyword)
-  | { tag: "ExpectedFieldAssignmentSymbol", span: Span }  // Expected '=' in field assignment
-  | { tag: "ExpectedPatternAssignmentSymbol", span: Span }  // Expected '=' in pattern assignment
-  | { tag: "ExpectedPatternBindingSymbol", span: Span }  // Expected '.' in pattern binding
-  | { tag: "ExpectedFunctionCallStart", span: Span }   // Expected '(' after function name
-  | { tag: "ExpectedRecordOpen", span: Span }        // Expected '(' after ':'
-  | { tag: "ExpectedLetBlockOpen", span: Span }        // Expected '{' after 'let'
-  | { tag: "ExpectedLetBlockClose", span: Span }       // Expected '}' at end of 'let' expression
-  | { tag: "ExpectedMatchBlockOpen", span: Span }        // Expected '{' after 'match'
-  | { tag: "ExpectedMatchBlockClose", span: Span }        // Expected '}' at end of 'match' expression
-  | { tag: "ExpectedLambdaBlockOpen", span: Span }   // Expected '{' after `fn`
-  | { tag: "ExpectedLambdaBlockClose", span: Span }   // Expected '}' at end of `fn` expression
-  | { tag: "ExpectedApplyStart", span: Span }          // Expected '(' after 'apply'
-  | { tag: "ExpectedApplySeparator", span: Span }      // Expected '!' inside 'apply'
-  | { tag: "UnexpectedTagPattern", span: Span }       // Found #tag where product pattern expected
-  | { tag: "ExpectedPattern", span: Span }            // EOF or invalid start of pattern
-  | { tag: "ExpectedRecordPatternOpen", span: Span }  // Expected '(' at start of record pattern
-  | { tag: "ExpectedRecordField", span: Span };       // Expected identifier in record pattern
-
-  // TODO: Delete?
-export type Expectation =
-  | "ExpectedExpression"
-  | "ExpectedFieldAssignmentSymbol"
-  | "ExpectedPatternAssignmentSymbol"
-  | "ExpectedPatternBindingSymbol"
-  | "ExpectedFunctionCallStart"
-  | "ExpectedRecordOpen"
-  | "ExpectedLetBlockOpen"
-  | "ExpectedLetBlockClose"
-  | "ExpectedMatchBlockOpen"
-  | "ExpectedMatchBlockClose"
-  | "ExpectedApplyStart"
-  | "ExpectedApplySeparator"
-  | "UnexpectedTagPattern"
-  | "ExpectedPattern"
-  | "ExpectedRecordField"
-  | "ExpectedRecordPatternOpen"
-
-export type Parser<T> = (cursor: Cursor) => T
-
-// === Helpers ===
-// Don't use `cursor.next()` directly - for most of the cases you also want to consume the WS/comments after.
-function nextWS(cursor: Cursor): CodePoint | undefined {
-  const c = cursor.next();
-  if (c === undefined) {
-    return undefined;
-  } else {
-    skipWhitespaceAndComments(cursor);
-    return c;
-  }
-}
-
-// Returns true if the char was consumed.
-function tryConsume(cursor: Cursor, expected: CodePoint): boolean {
-  if (cursor.peek() === expected) {
-    nextWS(cursor);
-    return true;
-  }
-  return false;
-}
-
-// TODO: Perhaps when it comes to terminators, we should allow the user of this function to create better contextual errors?
-// Parses a delimited sequence of `p` that's terminated by a special character.
-function delimitedTerminalSequence<A>(cursor: Cursor, delimiter: CodePoint, terminator: CodePoint, p: Parser<A>): A[] {
-  // Let's say the terminator is `.` and delimiter is `,`. This is the grammar for valid sequences
-  //   list(p) :=
-  //     | p
-  //     | p , p
-  //     | p , p , p
-  //     | ...
-  //   delimitedTerminalSequence(p) :=
-  //     | .
-  //     | optional(,) list(p) optional(,) .
-  //
-  // All our use-cases always have a well-defined terminator character:
-  //   tuples:         ( a, b, c )           -> `)`
-  //   records:        :( f0 = e0, f1 = e1 ) -> `}`
-  //   function call:  f(a, b, c)            -> `)`
-  //   let-binding:    let { p = e . body }  -> `.`
-  //   fn-asbtraction: fn { p0, p1 . body }  -> `.`
-  //   fn-application: apply(e ! e0, e1, e2) -> `)`
-  const items: A[] = [];
-
-  if (cursor.peek() === terminator) {
-    nextWS(cursor);
-    return items;
-  }
-
-  // optional delimiter
-  if (cursor.peek() === delimiter) {
-    nextWS(cursor);
-  }
-
-  while (true) {
-
-    const item = p(cursor); // `p` should be responsible for getting rid of whitespace after it has done its work
-    items.push(item);
-
-    const nextChar = cursor.peek();
-
-    if (nextChar === terminator) {
-      nextWS(cursor);
-
-      return items;
-    } else if (nextChar === delimiter) {
-      nextWS(cursor);
-
-      if (cursor.peek() === terminator) {
-        nextWS(cursor);
-        // the previous delimiter was trailing
-        return items;
-      }
-      // the previous delimiter was a separator
-      continue;
-    } else {
-      throw { 
-        tag: "UnexpectedTokenWhileParsingSequence", 
-        received: nextChar,
-        reason: "ExpectedDelimiterOrTerminator",
-        expectedDelimiter: delimiter,
-        expectedTerminator: terminator,
-        span: cursor.makeSpan(cursor.currentLocation()),
-      } as ParseError;
-    }
-  }
-}
-
-function exprStartToken(cursor: Cursor): ExprStartToken {
-  const token = exprStart(cursor);
-  skipWhitespaceAndComments(cursor);
-  return token;
-}
-
-function patternStartToken(cursor: Cursor): PatternStartToken {
-  const token = patternStart(cursor);
-  skipWhitespaceAndComments(cursor);
-  return token;
-}
-
-function identifier(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } {
-  const result = identifierScanner(cursor, kind);
-  skipWhitespaceAndComments(cursor);
-  return result;
-}
-
-// === Expression Parsers ===
-
-function expr(cursor: Cursor): Expr {
-  const token = exprStartToken(cursor);
-  // TODO: You need to include the spans and perhaps other meta-info.
-  switch (token.tag) {
-    case "EOF":
-      throw { 
-          tag: "UnexpectedToken", 
-          expected: "Expression", 
-          span: token.span 
-      } as ParseError;
-    case "number":
-      return Expr.number(token.value);
-    case "string":
-      return Expr.string(token.text);
-    case "variable_use":
-      return Expr.var_use(token.name);
-    case "tag":
-      // #true
-      // #foo e
-      if (isNextTokenExprStart(cursor)) {
-        const value = expr(cursor);
-        return Expr.tagged(token.name, value);
-      } else {
-        return Expr.tag(token.name);
-      }
-    case "tuple_start":
-      // e.g. (a, b, c)
-      const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
-      return Expr.tuple(items);
-    case "function_name":
-      // e.g. my_func(arg1, arg2)
-      // parse a `,` delimiter sequence of expr
-      // need to consume )
-      if (!tryConsume(cursor, char('('))) {
-        throw { 
-          tag: "ExpectedFunctionCallStart", 
-          span: cursor.makeSpan(cursor.currentLocation()) 
-        } as ParseError;
-      }
-      const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
-      return Expr.call(token.name, args);
-    case "keyword":
-      switch (token.kw) {
-        case ":":
-          // e.g. :( x = 1, y = 2 )
-          // or   : ( x = 1, y = 2 )
-          if (!tryConsume(cursor, char('('))) {
-            throw { 
-              tag: "ExpectedRecordOpen", 
-              span: cursor.makeSpan(cursor.currentLocation()) 
-            } as ParseError;
-          }
-          const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, fieldAssignment);
-          return Expr.record(fields);
-        case "let":
-          // let { p0 = e0, p1 = e2 . body }
-          if (!tryConsume(cursor, char('{'))) {
-            throw { 
-              tag: "ExpectedLetBlockOpen", 
-              span: cursor.makeSpan(cursor.currentLocation()) 
-            } as ParseError;
-          }
-          const bindings = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPatternBinding);
-          const body = expr(cursor);
-
-          if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) {
-            throw { 
-              tag: "ExpectedLetBlockClose", 
-              span: cursor.makeSpan(cursor.currentLocation()) 
-            } as ParseError;
-          }
-          return Expr.let_(bindings, body);
-        case "fn": {
-          // fn { p0, p1, p2 . body }
-          if (!tryConsume(cursor, char('{'))) {
-            throw { 
-              tag: "ExpectedLambdaBlockOpen", 
-              span: cursor.makeSpan(cursor.currentLocation()) 
-            } as ParseError;
-          }
-
-          const parameters = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_DOT, productPattern);
-          const body = expr(cursor);
-          if (!tryConsume(cursor, TERMINATOR_CLOSE_BRACE)) {
-            throw { 
-              tag: "ExpectedLambdaBlockClose", 
-              span: cursor.makeSpan(cursor.currentLocation()) 
-            } as ParseError;
-          }
-          return Expr.lambda(parameters, body);
-        }
-        case "apply":
-          // apply(e ! e0, e1, e2)
-          if (!tryConsume(cursor, char('('))) {
-            throw { 
-              tag: "ExpectedApplyStart", 
-              span: cursor.makeSpan(cursor.currentLocation()) 
-            } as ParseError;
-          }
-          const callee = expr(cursor);
-          if (!tryConsume(cursor, char('!'))) {
-             throw { 
-               tag: "ExpectedApplySeparator", 
-               span: cursor.makeSpan(cursor.currentLocation()) 
-             } as ParseError;
-           }
-
-          const args = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, expr);
-          return Expr.apply(callee, args);
-        case "match":
-          // match e { branch0 | branch1 | branch2 }
-          const arg = expr(cursor);
-          if (!tryConsume(cursor, char('{'))) {
-            throw { 
-              tag: "ExpectedMatchBlockOpen", 
-              span: cursor.makeSpan(cursor.currentLocation()) 
-            } as ParseError;
-          }
-
-          const branches = delimitedTerminalSequence(cursor, DELIMITER_PIPE, TERMINATOR_CLOSE_BRACE, matchBranch);
-          return Expr.match(arg, branches)
-        case "=":
-        case "|":
-        case "!":
-          // These keywords CANNOT start an expression.
-          throw { 
-            tag: "ExpectedExpression", 
-            span: token.span 
-          } as ParseError;
-      }
-  }
-}
-
-function matchBranch(cursor: Cursor): MatchBranch {
-  // p . body
-  const p = pattern(cursor);
-
-  if (!tryConsume(cursor, char("."))) {
-    throw { 
-      tag: "ExpectedPatternBindingSymbol", 
-      span: cursor.makeSpan(cursor.currentLocation()) 
-    } as ParseError;
-  }
-  const e = expr(cursor);
-  return Expr.matchBranch(p, e);
-}
-
-function productPatternBinding(cursor: Cursor): ExprBinding {
-  // TODO: There's a potential here to do a lot of work on nice errors.
-  // `p = e`
-  // here there could be problems like the pattern being just a variable that uses `=` as its part
-  // `x= = 123` is valid. Maybe in case of erroneous things like
-  // `x=123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk...
-  // or even...
-  // `x= 123` - which just parses as an identifier - we should analyze the identifier and product a suggestion? idk...
-  // or
-  // `x =123` - this one technically is a sequence of two identifiers.
-  const pattern = productPattern(cursor);
-
-  if (!tryConsume(cursor, char('='))) {
-    throw { 
-      tag: "ExpectedPatternBindingSymbol", 
-      span: cursor.makeSpan(cursor.currentLocation()) 
-    } as ParseError;
-  }
-  const e = expr(cursor);
-  return Expr.exprBinding(pattern, e);
-}
-
-function fieldAssignment(cursor: Cursor): FieldAssignment {
-  // `f = e`
-  const { name, span } = identifier(cursor, 'field_name');
-
-  if (!tryConsume(cursor, char('='))) {
-    throw { 
-      tag: "ExpectedFieldAssignmentSymbol", // Specific error for this context
-      span: cursor.makeSpan(cursor.currentLocation()) 
-    } as ParseError;
-  }
-
-  const value = expr(cursor);
-  return Expr.fieldAssignment(name, value);
-}
-
-function pattern(cursor: Cursor): Pattern {
-  // x
-  // (x, y, z)
-  // ((x, y), z)
-  // { a = x, b = y } 
-  // { a, b = y } 
-  // #foo
-  // #foo x
-  // #foo (x, y)
-  const token = patternStartToken(cursor);
-
-  // === Tag Pattern logic ===
-  if (token.tag === "tag") {
-    // Check Lookahead: Is this a Tagged Value? e.g. #foo x or #foo (a,b)
-    if (isNextTokenProductPatternStart(cursor)) {
-      // Parse the payload (must be a product pattern)
-      const payload = productPattern(cursor);
-      return Pattern.tagged(token.name, payload);
-    } else {
-      // Standalone Tag: #foo
-      return Pattern.tag(token.name);
-    }
-  }
-
-  return finishProductPattern(cursor, token);
-}
-
-function productPattern(cursor: Cursor): ProductPattern {
-  // x
-  // (x, y, z)
-  // ((x, y), z)
-  // { a = x, b = y } 
-  // { a, b = y } 
-  //
-  // tag patterns are syntax errors i.e. unexpected-tag-pattern (renderer then could explain that we don't allow tag patterns to be deeply nested)
-
-  const token = patternStartToken(cursor);
-  return finishProductPattern(cursor, token);
-}
-
-function finishProductPattern(cursor: Cursor, token: PatternStartToken): ProductPattern {
-  switch (token.tag) {
-    case "pattern_binding":
-      // foo
-      return ProductPattern.any(token.name);
-
-    case "tuple_start": {
-      // ( p1, p2 )
-      const items = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, productPattern);
-      return ProductPattern.tuple(items);
-    }
-
-    case "tag":
-      throw { tag: "UnexpectedTagPattern", span: token.span } as ParseError;
-
-    case "keyword": {
-      switch (token.kw) {
-        case ":": {
-          // :( a = p, b )
-          // TODO: parse open-paren
-          if (!tryConsume(cursor, char('{'))) {
-            throw { 
-              tag: "ExpectedRecordPatternOpen", 
-              span: cursor.makeSpan(cursor.currentLocation()) 
-            } as ParseError;
-          }
-          const fields = delimitedTerminalSequence(cursor, DELIMITER_COMMA, TERMINATOR_CLOSE_PAREN, recordPatternField);
-          return ProductPattern.record(fields);
-        }
-        default:
-          // These keywords CANNOT start a pattern.
-          throw { tag: "ExpectedPattern", span: token.span } as ParseError;
-      }
-    }
-
-
-    case "EOF":
-      throw { tag: "ExpectedPattern", span: token.span } as ParseError;
-  }
-}
-
-function recordPatternField(cursor: Cursor): FieldPattern {
-  const { name, span } = identifier(cursor, 'field_name');
-  if (tryConsume(cursor, char('='))) {
-    const p = productPattern(cursor);
-    return ProductPattern.fieldPattern(name, p);
-  } else {
-    // Punning: :( a ) -> :( a = a )
-    return ProductPattern.fieldPattern(name, ProductPattern.any(name));
-  }
-}
-
-
-export function parse(source: SourceText): Result<Expr, ParseError> {
-  const cursor = new Cursor(source);
-
-  try {
-    skipWhitespaceAndComments(cursor);
-    const expression = expr(cursor);
-
-    if (!cursor.eof()) {
-      return Result.error({
-        tag: "UnexpectedToken",
-        expected: "EndOfFile",
-        span: cursor.makeSpan(cursor.currentLocation())
-      } as ParseError);
-    }
-
-    return Result.ok(expression);
-  } catch (e) {
-    // TODO: This is a bit sketchy. We maybe forced to have "checked" Exceptions for `ParseError` by wrapping it in something that has a proper tag.
-    return Result.error(e as ParseError);
-  }
-}