Move language files into dedicated folder

2026-02-07 10:43:30 +01:00 · 2026-02-07 10:43:30 +01:00 · 1b406899e0
commit 1b406899e0
parent 3d1cd89067
15 changed files with 7 additions and 343 deletions
--- a/src/parser/cursor.ts
+++ b/src/parser/cursor.ts
@ -1,333 +0,0 @@
-import { char, NEW_LINE, CARRIAGE_RETURN, DOT, DIGIT_0, DIGIT_9, LOWERCASE_a, LOWERCASE_f, UPPERCASE_A, UPPERCASE_F, SPACE, TAB } from './source_text';
-import type { SourceText, Span, SourceLocation, CodePoint, StringIndex, CodePointIndex } from './source_text';
-import { Result } from '../result';
- 
-export type CursorState = {
-  index: CodePointIndex,
-  line: number,
-  column: number,
-  lastCharWasCR: boolean,
-}
-
-export class Cursor {
-  private index: CodePointIndex = 0;
-  private line: number = 1;
-  private column: number = 1;
-  // Track previous char to handle \r\n correctly
-  private lastCharWasCR: boolean = false;
-
-  constructor(readonly text: SourceText) {}
-
-  save(): CursorState {
-    return { index: this.index, line: this.line, column: this.column, lastCharWasCR: this.lastCharWasCR };
-  }
-
-  restore({ index, line, column, lastCharWasCR }: CursorState) {
-    this.index = index;
-    this.line = line;
-    this.column = column;
-    this.lastCharWasCR = lastCharWasCR;
-  }
-
-  eof(): boolean {
-    return this.index >= this.text.length;
-  }
-
-  peek(n: number = 0): CodePoint | undefined {
-    return this.text.chars[this.index + n]?.char;
-  }
-
-  next(): CodePoint | undefined {
-    const ref = this.text.chars[this.index];
-    if (!ref) return undefined;
-
-    const c = ref.char;
-    this.index++;
-
-    if (c === NEW_LINE) {
-      if (!this.lastCharWasCR) {
-        this.line++;
-        this.column = 1;
-      } else {
-        // We just saw \r, so this \n is part of \r\n. 
-        // We already bumped the line count on \r.
-        // Just reset the flag.
-        this.lastCharWasCR = false;
-      }
-    } else if (c === CARRIAGE_RETURN) {
-      this.line++;
-      this.column = 1;
-      this.lastCharWasCR = true;
-    } else {
-      this.column++;
-      this.lastCharWasCR = false;
-    }
-
-    return c;
-  }
-
-  get currentIndex(): CodePointIndex {
-    return this.index;
-  }
-
-  // TODO: unicode-index ~> string-offset, make that into a separate function.
-  currentOffset(): StringIndex {
-    return this.text.chars[this.index]?.offset ?? this.text.source.length;
-  }
-
-  currentLocation(): SourceLocation {
-    return { index: this.index, line: this.line, column: this.column };
-  }
-
-  makeSpan(start: SourceLocation): Span {
-    return {
-      start,
-      end: this.currentLocation(),
-    };
-  }
-
-  consumeWhile(pred: (c: CodePoint) => boolean): number {
-    let count = 0;
-    while (!this.eof()) {
-      const c = this.peek();
-      if (c === undefined || !pred(c)) break;
-      this.next();
-      count++;
-    }
-    return count;
-  }
-
-  // Helper to check for exact char matches quickly
-  match(c: CodePoint): boolean {
-    if (this.peek() === c) {
-        this.next();
-        return true;
-    }
-    return false;
-  }
-
-  // Checks if the next characters match the string. 
-  // If yes, consumes them and returns true.
-  // If no, touches nothing and returns false.
-  matchString(str: string): boolean {
-    if (this.index + str.length > this.text.length) return false;
-    
-    const slice = this.text.sliceByCp(this.index, this.index + str.length);
-    if (slice !== str) return false;
-
-    for (let i = 0; i < str.length; i++) {
-        // We must call next() to correctly update line/col tracking.
-        // We already know it matches, so we just burn through.
-        this.next();
-    }
-    return true;
-  }
-}
-
-// === Basic Scanners/Predicates ===
-export function isWhitespace(char: CodePoint): boolean {
-  return char === SPACE || char === TAB || char === NEW_LINE || char === CARRIAGE_RETURN;
-}
-
-export function isDigit(char: CodePoint): boolean {
-  return char >= DIGIT_0 && char <= DIGIT_9;
-}
-
-export type GenericScanError =
-  | { tag: "UnexpectedCharacter", char: CodePoint, span: Span }
-  | { tag: "UnexpectedEOF", span: Span }
-
-export type NumberError =
-  | { tag: "ExpectedNumber", span: Span }
-  | { tag: "InvalidNumber", reason: "NotFinite" | "MissingFractionalDigits", span: Span }
-
-export function scanNumber(cursor: Cursor): Result<{ value: number, span: Span }, NumberError> {
-  // number :=
-  //   | optional(`-`) digits optional(`.` digits)
-
-  const startNumberLocation = cursor.currentLocation();
-
-  let c: CodePoint;
-
-  // 1. Optional Sign
-  c = cursor.peek();
-  if (c === char("-")) {
-    cursor.next();
-  }
-
-  // 2. Integer Part
-  c = cursor.peek();
-  const integerPartDigitCount = cursor.consumeWhile(isDigit);
-  if (integerPartDigitCount === 0) {
-    return Result.error({
-      tag: "ExpectedNumber",
-      span: cursor.makeSpan(startNumberLocation),
-    });
-  }
-
-  // 3. Fractional Part
-  if (cursor.peek() === DOT) {
-    const dotLocation = cursor.currentLocation();
-
-    cursor.next(); // consume '.'
-
-    const fracPartDigitCount = cursor.consumeWhile(isDigit);
-    if (fracPartDigitCount === 0) {
-      return Result.error({
-        tag: "InvalidNumber",
-        reason: "MissingFractionalDigits",
-        span: cursor.makeSpan(dotLocation),
-      });
-    }
-  }
-
-  const text = cursor.text.sliceByCp(startNumberLocation.index, cursor.currentIndex);
-  const value = Number(text);
-
-  if (!Number.isFinite(value)) {
-    return Result.error({
-      tag: "InvalidNumber",
-      reason: "NotFinite",
-      span: cursor.makeSpan(startNumberLocation),
-    });
-  }
-  return Result.ok({
-    value,
-    span: cursor.makeSpan(startNumberLocation),
-  });
-}
-
-export type StringError =
-  | { tag: "InvalidEscape", reason: EscapeErrorReason, span: Span };
-
-export type EscapeErrorReason = 
-  | { tag: "UnknownEscapeSequence", char: CodePoint } // e.g. \k
-  | { tag: "UnicodeMissingBrace" }                    // \u without {
-  | { tag: "UnicodeNoDigits" }                        // \u{}
-  | { tag: "UnicodeUnclosed" }                        // \u{FF without }
-  | { tag: "UnicodeOverflow", value: number };        // \u{110000}
-
-export function scanString(cursor: Cursor): Result<{ value: string, span: Span }, StringError | GenericScanError> {
-  const start = cursor.currentLocation();
-
-  const firstChar = cursor.peek();
-  if (firstChar === undefined) {
-    return Result.error({ tag: "UnexpectedEOF", span: cursor.makeSpan(start) });
-  }
-  if (firstChar !== char('"')) {
-    return Result.error({ tag: "UnexpectedCharacter", char: firstChar, span: cursor.makeSpan(start) });
-  }
-  cursor.next();
-
-  let value = ""; // The actual string content
-  
-  while (true) {
-    if (cursor.eof()) {
-      return Result.error({ tag: "UnexpectedEOF", span: cursor.makeSpan(start) });
-    }
-
-    const c = cursor.peek();
-
-    // 1. End of string
-    if (c === char('"')) {
-      cursor.next(); // consume closing quote
-      break;
-    }
-
-    if (c === char('\\')) {
-      // 2. Escape Sequences
-      const escapeStart = cursor.currentLocation();
-      cursor.next(); // consume backslash
-      const escaped = cursor.peek();
-      
-      switch (escaped) {
-        case char('n'): value += '\n'; cursor.next(); break;
-        case char('r'): value += '\r'; cursor.next(); break;
-        case char('t'): value += '\t'; cursor.next(); break;
-        case char('\\'): value += '\\'; cursor.next(); break;
-        case char("0"): value += "\0"; cursor.next(); break;
-        case char('"'): value += '"'; cursor.next(); break;
-        // Unicode Escape: \u{XXXX}
-        case char('u'): {
-          cursor.next(); // consume 'u'
-          
-          // Expect '{'
-          const braceStart = cursor.currentLocation();
-          if (cursor.peek() !== char('{')) {
-              return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeMissingBrace" }, span: cursor.makeSpan(braceStart) });
-          }
-          cursor.next(); // consume '{'
-
-          // Consume Hex Digits
-          const hexStart = cursor.currentIndex;
-          const hexCount = cursor.consumeWhile(c => 
-            (c >= DIGIT_0 && c <= DIGIT_9) ||
-            (c >= LOWERCASE_a && c <= LOWERCASE_f) ||
-            (c >= UPPERCASE_A && c <= UPPERCASE_F)
-          );
-
-          if (hexCount === 0) {
-             return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeNoDigits" }, span: cursor.makeSpan(braceStart) });
-          }
-
-          // Expect '}'
-          if (cursor.peek() !== char("}")) {
-            return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeUnclosed" }, span: cursor.makeSpan(braceStart) });
-          }
-          cursor.next(); // consume '}'
-
-          // Convert & Append
-          const hexStr = cursor.text.sliceByCp(hexStart, hexStart + hexCount);
-          const codePoint = parseInt(hexStr, 16);
-
-          if (codePoint > 0x10FFFF) {
-             return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeOverflow", value: codePoint }, span: cursor.makeSpan(braceStart) });
-          }
-
-          value += String.fromCodePoint(codePoint);
-          break;
-        }
-        default:
-          return Result.error({ 
-            tag: "InvalidEscape", 
-            reason: { tag: "UnknownEscapeSequence", char: escaped }, 
-            span: cursor.makeSpan(escapeStart) 
-          });
-      }
-    } else {
-      // 3. Regular character
-      // Optimization: consume chunks of non-special chars for speed?
-      // For now, char-by-char is fine.
-      cursor.next();
-      // Note: We use ! because we checked EOF at loop start
-      value += String.fromCodePoint(c!); 
-    }
-  }
-
-  return Result.ok({
-    value,
-    span: cursor.makeSpan(start)
-  });
-}
-
-// TODO: rendering of errors
-// function renderStringError(err: StringError): string {
-//   switch (err.tag) {
-//     case "ExpectedQuote": return "Expected a string starting with \"";
-//     case "UnexpectedEOF": return "Unterminated string literal";
-//     case "InvalidEscape": 
-//       const k = err.kind;
-//       switch (k.tag) {
-//         case "Unknown": 
-//           return `Unknown escape sequence '\\${String.fromCodePoint(k.char)}'`;
-//         case "UnicodeMissingBrace": 
-//           return "Unicode escape must start with '{', e.g. \\u{1F600}";
-//         case "UnicodeNoDigits": 
-//           return "Empty unicode escape \\u{}";
-//         case "UnicodeUnclosed": 
-//           return "Expected '}' to close unicode escape";
-//         case "UnicodeOverflow": 
-//           return `Unicode code point 0x${k.value.toString(16)} is too large (max 0x10FFFF)`;
-//       }
-//   }
-// }