Hide internal representation of chars and expose codePointAt instead

This commit is contained in:
Yura Dupyn 2026-04-06 19:46:01 +02:00
parent 345f69fd1c
commit cfe02da073

View file

@ -17,7 +17,7 @@ export type CodePointRef = {
export class SourceText {
readonly source: string;
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
readonly chars: CodePointRef[];
readonly #chars: CodePointRef[];
// Stores the CodePointIndex where each line begins
readonly lineStarts: CodePointIndex[];
@ -28,14 +28,14 @@ export class SourceText {
const source = rawSource;
this.source = source;
this.chars = [];
this.#chars = [];
this.lineStarts = [0]; // Line 1 always starts at index 0
let i = 0;
while (i < source.length) {
const char = source.codePointAt(i) as CodePoint;
const cpIndex = this.chars.length;
this.chars.push({ char: char, offset: i });
const cpIndex = this.#chars.length;
this.#chars.push({ char: char, offset: i });
const size =(char > 0xFFFF ? 2 : 1);
i += size;
@ -60,11 +60,11 @@ export class SourceText {
}
codePointAt(index: CodePointIndex): CodePoint {
return this.chars[index].char;
return this.#chars[index].char;
}
get length(): number {
return this.chars.length;
return this.#chars.length;
}
get lineCount(): number {
@ -80,14 +80,20 @@ export class SourceText {
return span(loc, loc);
}
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
for (let i = 0; i < this.length; i++) {
yield [i as CodePointIndex, this.codePointAt(i as CodePointIndex)];
}
}
sliceByCp(start: number, end: number): string {
const startRef = this.chars[start];
const startRef = this.#chars[start];
// Handle out of bounds gracefully
if (!startRef) return "";
const startOff = startRef.offset;
const endOff = end < this.chars.length
? this.chars[end].offset
const endOff = end < this.#chars.length
? this.#chars[end].offset
: this.source.length;
return this.source.slice(startOff, endOff);
@ -101,10 +107,10 @@ export class SourceText {
if (stripNewlines && endIdx > range.start) {
// Look at the character just before endIdx
const lastChar = this.chars[endIdx - 1].char;
const lastChar = this.#chars[endIdx - 1].char;
if (lastChar === NEW_LINE) {
endIdx--;
if (endIdx > range.start && this.chars[endIdx - 1].char === CARRIAGE_RETURN) {
if (endIdx > range.start && this.#chars[endIdx - 1].char === CARRIAGE_RETURN) {
endIdx--;
}
} else if (lastChar === CARRIAGE_RETURN) {
@ -165,7 +171,7 @@ export class SourceText {
const startCp = this.lineStarts[lineIndex];
const endCp = (lineIndex + 1 < this.lineStarts.length)
? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself
: this.chars.length;
: this.#chars.length;
// TODO: Consider removing \r or \n from the end if they exist.
return this.sliceByCp(startCp, endCp);
@ -181,7 +187,7 @@ export class SourceText {
const start = this.lineStarts[lineIndex];
const end = (lineIndex + 1 < this.lineStarts.length)
? this.lineStarts[lineIndex + 1]
: this.chars.length;
: this.#chars.length;
return { start, end };
}
@ -223,6 +229,14 @@ export class SourceRegion {
return span(loc, loc);
}
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
const start = this.span.start.index;
const end = this.span.end.index;
for (let i = start; i < end; i++) {
yield [i as CodePointIndex, this.codePointAt(i as CodePointIndex)];
}
}
toString(): string {
return this.source.sliceByCp(this.span.start.index, this.span.end.index);
}