Introduce CodePointString

This commit is contained in:
Yura Dupyn 2026-04-25 11:20:21 +02:00
parent ec6ba36220
commit 3ec7005198
2 changed files with 41 additions and 0 deletions

View file

@ -1,5 +1,8 @@
TypeScript library for handling source code strings without having to deal with intricacies of JS's UTF16 encoding.
# CodePointString
A wrapper for a string that's just an array of codepoints. There's no newline or offset tracking to the original string.
# SourceText
A sane, UTF-16-safe string wrapper specifically designed for parsing source code, tracking line numbers, and generating CLI error messages.
Think of it as a fat wrapper for a string that understand more info about the string like line structure.

View file

@ -68,6 +68,44 @@ export type CodePointSpan = {
end: CodePointIndex,
}
// === CodePointString ===
export class CodePointString {
readonly codePoints: readonly CodePoint[];
constructor(source: string) {
const codePointsInternal: CodePoint[] = [];
let i = 0;
while (i < source.length) {
const char = source.codePointAt(i) as CodePoint;
codePointsInternal.push(char);
const size =(char > 0xFFFF ? 2 : 1);
i += size;
}
this.codePoints = Object.freeze(codePointsInternal);
}
static makeFromString(s: string): CodePointString {
return new CodePointString(s);
}
codePointAt(index: CodePointIndex): CodePoint {
return this.codePoints[index];
}
get length(): CodePointIndex {
return this.codePoints.length;
}
toString(): string {
let result = "";
for (const cp of this.codePoints) {
result += String.fromCodePoint(cp);
}
return result;
}
}
// === Source Text ===
// TODO:
// @deprecated and say to use `SourceText.makeFromString` instead.