Move language files into dedicated folder
This commit is contained in:
parent
3d1cd89067
commit
1b406899e0
15 changed files with 7 additions and 343 deletions
|
|
@ -1,333 +0,0 @@
|
|||
import { char, NEW_LINE, CARRIAGE_RETURN, DOT, DIGIT_0, DIGIT_9, LOWERCASE_a, LOWERCASE_f, UPPERCASE_A, UPPERCASE_F, SPACE, TAB } from './source_text';
|
||||
import type { SourceText, Span, SourceLocation, CodePoint, StringIndex, CodePointIndex } from './source_text';
|
||||
import { Result } from '../result';
|
||||
|
||||
export type CursorState = {
|
||||
index: CodePointIndex,
|
||||
line: number,
|
||||
column: number,
|
||||
lastCharWasCR: boolean,
|
||||
}
|
||||
|
||||
export class Cursor {
|
||||
private index: CodePointIndex = 0;
|
||||
private line: number = 1;
|
||||
private column: number = 1;
|
||||
// Track previous char to handle \r\n correctly
|
||||
private lastCharWasCR: boolean = false;
|
||||
|
||||
constructor(readonly text: SourceText) {}
|
||||
|
||||
save(): CursorState {
|
||||
return { index: this.index, line: this.line, column: this.column, lastCharWasCR: this.lastCharWasCR };
|
||||
}
|
||||
|
||||
restore({ index, line, column, lastCharWasCR }: CursorState) {
|
||||
this.index = index;
|
||||
this.line = line;
|
||||
this.column = column;
|
||||
this.lastCharWasCR = lastCharWasCR;
|
||||
}
|
||||
|
||||
eof(): boolean {
|
||||
return this.index >= this.text.length;
|
||||
}
|
||||
|
||||
peek(n: number = 0): CodePoint | undefined {
|
||||
return this.text.chars[this.index + n]?.char;
|
||||
}
|
||||
|
||||
next(): CodePoint | undefined {
|
||||
const ref = this.text.chars[this.index];
|
||||
if (!ref) return undefined;
|
||||
|
||||
const c = ref.char;
|
||||
this.index++;
|
||||
|
||||
if (c === NEW_LINE) {
|
||||
if (!this.lastCharWasCR) {
|
||||
this.line++;
|
||||
this.column = 1;
|
||||
} else {
|
||||
// We just saw \r, so this \n is part of \r\n.
|
||||
// We already bumped the line count on \r.
|
||||
// Just reset the flag.
|
||||
this.lastCharWasCR = false;
|
||||
}
|
||||
} else if (c === CARRIAGE_RETURN) {
|
||||
this.line++;
|
||||
this.column = 1;
|
||||
this.lastCharWasCR = true;
|
||||
} else {
|
||||
this.column++;
|
||||
this.lastCharWasCR = false;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
get currentIndex(): CodePointIndex {
|
||||
return this.index;
|
||||
}
|
||||
|
||||
// TODO: unicode-index ~> string-offset, make that into a separate function.
|
||||
currentOffset(): StringIndex {
|
||||
return this.text.chars[this.index]?.offset ?? this.text.source.length;
|
||||
}
|
||||
|
||||
currentLocation(): SourceLocation {
|
||||
return { index: this.index, line: this.line, column: this.column };
|
||||
}
|
||||
|
||||
makeSpan(start: SourceLocation): Span {
|
||||
return {
|
||||
start,
|
||||
end: this.currentLocation(),
|
||||
};
|
||||
}
|
||||
|
||||
consumeWhile(pred: (c: CodePoint) => boolean): number {
|
||||
let count = 0;
|
||||
while (!this.eof()) {
|
||||
const c = this.peek();
|
||||
if (c === undefined || !pred(c)) break;
|
||||
this.next();
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
// Helper to check for exact char matches quickly
|
||||
match(c: CodePoint): boolean {
|
||||
if (this.peek() === c) {
|
||||
this.next();
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Checks if the next characters match the string.
|
||||
// If yes, consumes them and returns true.
|
||||
// If no, touches nothing and returns false.
|
||||
matchString(str: string): boolean {
|
||||
if (this.index + str.length > this.text.length) return false;
|
||||
|
||||
const slice = this.text.sliceByCp(this.index, this.index + str.length);
|
||||
if (slice !== str) return false;
|
||||
|
||||
for (let i = 0; i < str.length; i++) {
|
||||
// We must call next() to correctly update line/col tracking.
|
||||
// We already know it matches, so we just burn through.
|
||||
this.next();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// === Basic Scanners/Predicates ===
|
||||
export function isWhitespace(char: CodePoint): boolean {
|
||||
return char === SPACE || char === TAB || char === NEW_LINE || char === CARRIAGE_RETURN;
|
||||
}
|
||||
|
||||
export function isDigit(char: CodePoint): boolean {
|
||||
return char >= DIGIT_0 && char <= DIGIT_9;
|
||||
}
|
||||
|
||||
export type GenericScanError =
|
||||
| { tag: "UnexpectedCharacter", char: CodePoint, span: Span }
|
||||
| { tag: "UnexpectedEOF", span: Span }
|
||||
|
||||
export type NumberError =
|
||||
| { tag: "ExpectedNumber", span: Span }
|
||||
| { tag: "InvalidNumber", reason: "NotFinite" | "MissingFractionalDigits", span: Span }
|
||||
|
||||
export function scanNumber(cursor: Cursor): Result<{ value: number, span: Span }, NumberError> {
|
||||
// number :=
|
||||
// | optional(`-`) digits optional(`.` digits)
|
||||
|
||||
const startNumberLocation = cursor.currentLocation();
|
||||
|
||||
let c: CodePoint;
|
||||
|
||||
// 1. Optional Sign
|
||||
c = cursor.peek();
|
||||
if (c === char("-")) {
|
||||
cursor.next();
|
||||
}
|
||||
|
||||
// 2. Integer Part
|
||||
c = cursor.peek();
|
||||
const integerPartDigitCount = cursor.consumeWhile(isDigit);
|
||||
if (integerPartDigitCount === 0) {
|
||||
return Result.error({
|
||||
tag: "ExpectedNumber",
|
||||
span: cursor.makeSpan(startNumberLocation),
|
||||
});
|
||||
}
|
||||
|
||||
// 3. Fractional Part
|
||||
if (cursor.peek() === DOT) {
|
||||
const dotLocation = cursor.currentLocation();
|
||||
|
||||
cursor.next(); // consume '.'
|
||||
|
||||
const fracPartDigitCount = cursor.consumeWhile(isDigit);
|
||||
if (fracPartDigitCount === 0) {
|
||||
return Result.error({
|
||||
tag: "InvalidNumber",
|
||||
reason: "MissingFractionalDigits",
|
||||
span: cursor.makeSpan(dotLocation),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const text = cursor.text.sliceByCp(startNumberLocation.index, cursor.currentIndex);
|
||||
const value = Number(text);
|
||||
|
||||
if (!Number.isFinite(value)) {
|
||||
return Result.error({
|
||||
tag: "InvalidNumber",
|
||||
reason: "NotFinite",
|
||||
span: cursor.makeSpan(startNumberLocation),
|
||||
});
|
||||
}
|
||||
return Result.ok({
|
||||
value,
|
||||
span: cursor.makeSpan(startNumberLocation),
|
||||
});
|
||||
}
|
||||
|
||||
export type StringError =
|
||||
| { tag: "InvalidEscape", reason: EscapeErrorReason, span: Span };
|
||||
|
||||
export type EscapeErrorReason =
|
||||
| { tag: "UnknownEscapeSequence", char: CodePoint } // e.g. \k
|
||||
| { tag: "UnicodeMissingBrace" } // \u without {
|
||||
| { tag: "UnicodeNoDigits" } // \u{}
|
||||
| { tag: "UnicodeUnclosed" } // \u{FF without }
|
||||
| { tag: "UnicodeOverflow", value: number }; // \u{110000}
|
||||
|
||||
export function scanString(cursor: Cursor): Result<{ value: string, span: Span }, StringError | GenericScanError> {
|
||||
const start = cursor.currentLocation();
|
||||
|
||||
const firstChar = cursor.peek();
|
||||
if (firstChar === undefined) {
|
||||
return Result.error({ tag: "UnexpectedEOF", span: cursor.makeSpan(start) });
|
||||
}
|
||||
if (firstChar !== char('"')) {
|
||||
return Result.error({ tag: "UnexpectedCharacter", char: firstChar, span: cursor.makeSpan(start) });
|
||||
}
|
||||
cursor.next();
|
||||
|
||||
let value = ""; // The actual string content
|
||||
|
||||
while (true) {
|
||||
if (cursor.eof()) {
|
||||
return Result.error({ tag: "UnexpectedEOF", span: cursor.makeSpan(start) });
|
||||
}
|
||||
|
||||
const c = cursor.peek();
|
||||
|
||||
// 1. End of string
|
||||
if (c === char('"')) {
|
||||
cursor.next(); // consume closing quote
|
||||
break;
|
||||
}
|
||||
|
||||
if (c === char('\\')) {
|
||||
// 2. Escape Sequences
|
||||
const escapeStart = cursor.currentLocation();
|
||||
cursor.next(); // consume backslash
|
||||
const escaped = cursor.peek();
|
||||
|
||||
switch (escaped) {
|
||||
case char('n'): value += '\n'; cursor.next(); break;
|
||||
case char('r'): value += '\r'; cursor.next(); break;
|
||||
case char('t'): value += '\t'; cursor.next(); break;
|
||||
case char('\\'): value += '\\'; cursor.next(); break;
|
||||
case char("0"): value += "\0"; cursor.next(); break;
|
||||
case char('"'): value += '"'; cursor.next(); break;
|
||||
// Unicode Escape: \u{XXXX}
|
||||
case char('u'): {
|
||||
cursor.next(); // consume 'u'
|
||||
|
||||
// Expect '{'
|
||||
const braceStart = cursor.currentLocation();
|
||||
if (cursor.peek() !== char('{')) {
|
||||
return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeMissingBrace" }, span: cursor.makeSpan(braceStart) });
|
||||
}
|
||||
cursor.next(); // consume '{'
|
||||
|
||||
// Consume Hex Digits
|
||||
const hexStart = cursor.currentIndex;
|
||||
const hexCount = cursor.consumeWhile(c =>
|
||||
(c >= DIGIT_0 && c <= DIGIT_9) ||
|
||||
(c >= LOWERCASE_a && c <= LOWERCASE_f) ||
|
||||
(c >= UPPERCASE_A && c <= UPPERCASE_F)
|
||||
);
|
||||
|
||||
if (hexCount === 0) {
|
||||
return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeNoDigits" }, span: cursor.makeSpan(braceStart) });
|
||||
}
|
||||
|
||||
// Expect '}'
|
||||
if (cursor.peek() !== char("}")) {
|
||||
return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeUnclosed" }, span: cursor.makeSpan(braceStart) });
|
||||
}
|
||||
cursor.next(); // consume '}'
|
||||
|
||||
// Convert & Append
|
||||
const hexStr = cursor.text.sliceByCp(hexStart, hexStart + hexCount);
|
||||
const codePoint = parseInt(hexStr, 16);
|
||||
|
||||
if (codePoint > 0x10FFFF) {
|
||||
return Result.error({ tag: "InvalidEscape", reason: { tag: "UnicodeOverflow", value: codePoint }, span: cursor.makeSpan(braceStart) });
|
||||
}
|
||||
|
||||
value += String.fromCodePoint(codePoint);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return Result.error({
|
||||
tag: "InvalidEscape",
|
||||
reason: { tag: "UnknownEscapeSequence", char: escaped },
|
||||
span: cursor.makeSpan(escapeStart)
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// 3. Regular character
|
||||
// Optimization: consume chunks of non-special chars for speed?
|
||||
// For now, char-by-char is fine.
|
||||
cursor.next();
|
||||
// Note: We use ! because we checked EOF at loop start
|
||||
value += String.fromCodePoint(c!);
|
||||
}
|
||||
}
|
||||
|
||||
return Result.ok({
|
||||
value,
|
||||
span: cursor.makeSpan(start)
|
||||
});
|
||||
}
|
||||
|
||||
// TODO: rendering of errors
|
||||
// function renderStringError(err: StringError): string {
|
||||
// switch (err.tag) {
|
||||
// case "ExpectedQuote": return "Expected a string starting with \"";
|
||||
// case "UnexpectedEOF": return "Unterminated string literal";
|
||||
// case "InvalidEscape":
|
||||
// const k = err.kind;
|
||||
// switch (k.tag) {
|
||||
// case "Unknown":
|
||||
// return `Unknown escape sequence '\\${String.fromCodePoint(k.char)}'`;
|
||||
// case "UnicodeMissingBrace":
|
||||
// return "Unicode escape must start with '{', e.g. \\u{1F600}";
|
||||
// case "UnicodeNoDigits":
|
||||
// return "Empty unicode escape \\u{}";
|
||||
// case "UnicodeUnclosed":
|
||||
// return "Expected '}' to close unicode escape";
|
||||
// case "UnicodeOverflow":
|
||||
// return `Unicode code point 0x${k.value.toString(16)} is too large (max 0x10FFFF)`;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
Loading…
Add table
Add a link
Reference in a new issue