Improve and abstract Cursor design. Start scanner

This commit is contained in:
Yura Dupyn 2026-02-06 00:38:16 +01:00
parent d382b16e6d
commit d5f9777711
8 changed files with 713 additions and 476 deletions

View file

@ -17,7 +17,6 @@ export type CodePointRef = {
export class SourceText {
readonly source: string;
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
//
readonly chars: CodePointRef[];
// Stores the CodePointIndex where each line begins
@ -40,17 +39,16 @@ export class SourceText {
i += size;
// === Newline Logic ===
// 0x0A is '\n', 0x0D is '\r'
if (char === 0x0A) {
if (char === NEW_LINE) {
// Found a newline, the NEXT char starts a new line
this.lineStarts.push(cpIndex + 1);
}
// Handle CR (Classic Mac) or CRLF start
else if (char === 0x0D) {
else if (char === CARRIAGE_RETURN) {
// Check if the next char is '\n' (CRLF)
// We peek ahead in the raw string to see if we need to skip the \n for line counting purposes
// or just treat this as a newline.
const nextIsNL = i < source.length && source.codePointAt(i) === 0x0A;
const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE;
if (!nextIsNL) {
// Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next.
this.lineStarts.push(cpIndex + 1);
@ -101,15 +99,6 @@ export function sourceText(s: string) {
return new SourceText(s);
}
export function isWhitespace(char: CodePoint): boolean {
return char === 0x20 || char === 0x09 || char === 0x0A || char === 0x0D;
}
export function isDigit(char: CodePoint): boolean {
return char >= 0x30 && char <= 0x39;
}
export type Span = {
start: StringIndex,
end: StringIndex,
@ -123,3 +112,21 @@ export type SourceLocation = {
column: number; // 1-based
};
// Whitespace
export const NEW_LINE: CodePoint = char('\n');
export const CARRIAGE_RETURN: CodePoint = char('\r');
export const SPACE: CodePoint = char(' ');
export const TAB: CodePoint = char('\t');
// Digit Boundaries
export const DIGIT_0: CodePoint = char('0');
export const DIGIT_9: CodePoint = char('9');
export const DOT: CodePoint = char('.');
// Hex Boundaries
export const LOWERCASE_a: CodePoint = char('a');
export const UPPERCASE_A: CodePoint = char('A');
export const LOWERCASE_f: CodePoint = char('f');
export const UPPERCASE_F: CodePoint = char('F');