Hide internal representation of chars and expose codePointAt instead

This commit is contained in:
Yura Dupyn 2026-04-06 19:46:01 +02:00
parent 345f69fd1c
commit cfe02da073

View file

@ -17,7 +17,7 @@ export type CodePointRef = {
export class SourceText { export class SourceText {
readonly source: string; readonly source: string;
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string. // TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
readonly chars: CodePointRef[]; readonly #chars: CodePointRef[];
// Stores the CodePointIndex where each line begins // Stores the CodePointIndex where each line begins
readonly lineStarts: CodePointIndex[]; readonly lineStarts: CodePointIndex[];
@ -28,14 +28,14 @@ export class SourceText {
const source = rawSource; const source = rawSource;
this.source = source; this.source = source;
this.chars = []; this.#chars = [];
this.lineStarts = [0]; // Line 1 always starts at index 0 this.lineStarts = [0]; // Line 1 always starts at index 0
let i = 0; let i = 0;
while (i < source.length) { while (i < source.length) {
const char = source.codePointAt(i) as CodePoint; const char = source.codePointAt(i) as CodePoint;
const cpIndex = this.chars.length; const cpIndex = this.#chars.length;
this.chars.push({ char: char, offset: i }); this.#chars.push({ char: char, offset: i });
const size =(char > 0xFFFF ? 2 : 1); const size =(char > 0xFFFF ? 2 : 1);
i += size; i += size;
@ -60,11 +60,11 @@ export class SourceText {
} }
codePointAt(index: CodePointIndex): CodePoint { codePointAt(index: CodePointIndex): CodePoint {
return this.chars[index].char; return this.#chars[index].char;
} }
get length(): number { get length(): number {
return this.chars.length; return this.#chars.length;
} }
get lineCount(): number { get lineCount(): number {
@ -80,14 +80,20 @@ export class SourceText {
return span(loc, loc); return span(loc, loc);
} }
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
for (let i = 0; i < this.length; i++) {
yield [i as CodePointIndex, this.codePointAt(i as CodePointIndex)];
}
}
sliceByCp(start: number, end: number): string { sliceByCp(start: number, end: number): string {
const startRef = this.chars[start]; const startRef = this.#chars[start];
// Handle out of bounds gracefully // Handle out of bounds gracefully
if (!startRef) return ""; if (!startRef) return "";
const startOff = startRef.offset; const startOff = startRef.offset;
const endOff = end < this.chars.length const endOff = end < this.#chars.length
? this.chars[end].offset ? this.#chars[end].offset
: this.source.length; : this.source.length;
return this.source.slice(startOff, endOff); return this.source.slice(startOff, endOff);
@ -101,10 +107,10 @@ export class SourceText {
if (stripNewlines && endIdx > range.start) { if (stripNewlines && endIdx > range.start) {
// Look at the character just before endIdx // Look at the character just before endIdx
const lastChar = this.chars[endIdx - 1].char; const lastChar = this.#chars[endIdx - 1].char;
if (lastChar === NEW_LINE) { if (lastChar === NEW_LINE) {
endIdx--; endIdx--;
if (endIdx > range.start && this.chars[endIdx - 1].char === CARRIAGE_RETURN) { if (endIdx > range.start && this.#chars[endIdx - 1].char === CARRIAGE_RETURN) {
endIdx--; endIdx--;
} }
} else if (lastChar === CARRIAGE_RETURN) { } else if (lastChar === CARRIAGE_RETURN) {
@ -165,7 +171,7 @@ export class SourceText {
const startCp = this.lineStarts[lineIndex]; const startCp = this.lineStarts[lineIndex];
const endCp = (lineIndex + 1 < this.lineStarts.length) const endCp = (lineIndex + 1 < this.lineStarts.length)
? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself ? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself
: this.chars.length; : this.#chars.length;
// TODO: Consider removing \r or \n from the end if they exist. // TODO: Consider removing \r or \n from the end if they exist.
return this.sliceByCp(startCp, endCp); return this.sliceByCp(startCp, endCp);
@ -181,7 +187,7 @@ export class SourceText {
const start = this.lineStarts[lineIndex]; const start = this.lineStarts[lineIndex];
const end = (lineIndex + 1 < this.lineStarts.length) const end = (lineIndex + 1 < this.lineStarts.length)
? this.lineStarts[lineIndex + 1] ? this.lineStarts[lineIndex + 1]
: this.chars.length; : this.#chars.length;
return { start, end }; return { start, end };
} }
@ -223,6 +229,14 @@ export class SourceRegion {
return span(loc, loc); return span(loc, loc);
} }
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
const start = this.span.start.index;
const end = this.span.end.index;
for (let i = start; i < end; i++) {
yield [i as CodePointIndex, this.codePointAt(i as CodePointIndex)];
}
}
toString(): string { toString(): string {
return this.source.sliceByCp(this.span.start.index, this.span.end.index); return this.source.sliceByCp(this.span.start.index, this.span.end.index);
} }