source-region/src/index.ts
2026-04-25 11:17:47 +02:00

582 lines
17 KiB
TypeScript

// === Char type ===
export type StringIndex = number; // UTF-16 index into string
export type CodePointIndex = number; // index into array of code-points
export type CodePoint = number; // could also name it `UnicodeCodePoint`. Basically for `s: string` we have `s.codePointAt(i: index): char`.
export function char(c: string): CodePoint {
return c.codePointAt(0) as CodePoint;
}
// Whitespace
export const NEW_LINE: CodePoint = char('\n');
export const CARRIAGE_RETURN: CodePoint = char('\r');
export const SPACE: CodePoint = char(' ');
export const TAB: CodePoint = char('\t');
// Digit Boundaries
export const DIGIT_0: CodePoint = char('0');
export const DIGIT_9: CodePoint = char('9');
export const DOT: CodePoint = char('.');
// Hex Boundaries
export const LOWERCASE_A: CodePoint = char('a');
export const UPPERCASE_A: CodePoint = char('A');
export const LOWERCASE_F: CodePoint = char('f');
export const UPPERCASE_F: CodePoint = char('F');
export const LOWERCASE_Z: CodePoint = char('z');
export const UPPERCASE_Z: CodePoint = char('Z');
// === Predicates ===
export function isBetween(a: CodePoint, x: CodePoint, b: CodePoint): boolean {
return a <= x && x <= b;
}
export function isDigit(x: CodePoint): boolean {
return isBetween(DIGIT_0, x, DIGIT_9);
}
export function isAsciiAlpha(x: CodePoint): boolean {
return isBetween(LOWERCASE_A, x, LOWERCASE_Z)
|| isBetween(UPPERCASE_A, x, UPPERCASE_Z);
}
export function isAsciiAlphanumeric(x: CodePoint): boolean {
return isAsciiAlpha(x) || isDigit(x);
}
export function isAsciiWhitespace(cp: CodePoint): boolean {
return cp === SPACE
|| cp === TAB
|| cp === NEW_LINE
|| cp === CARRIAGE_RETURN;
}
export function isAsciiInlineWhitespace(cp: CodePoint): boolean {
return cp === SPACE || cp === TAB;
}
export type CodePointRef = {
char: CodePoint,
offset: StringIndex,
};
export type CodePointSpan = {
start: CodePointIndex,
end: CodePointIndex,
}
// === Source Text ===
// TODO:
// @deprecated and say to use `SourceText.makeFromString` instead.
export function sourceText(s: string): SourceText {
return SourceText.makeFromString(s);
}
export class SourceText {
readonly source: string;
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
readonly #chars: CodePointRef[];
// Stores the CodePointIndex where each line begins
readonly lineStarts: CodePointIndex[];
static makeFromString(s: string): SourceText {
return new SourceText(s);
}
constructor(rawSource: string) {
// TODO: This shouldn't really be a concern of the library.
// const source = rawSource.normalize('NFC');
const source = rawSource;
this.source = source;
this.#chars = [];
this.lineStarts = [0]; // Line 1 always starts at index 0
let i = 0;
while (i < source.length) {
const char = source.codePointAt(i) as CodePoint;
const cpIndex = this.#chars.length;
this.#chars.push({ char: char, offset: i });
const size =(char > 0xFFFF ? 2 : 1);
i += size;
// === Newline Logic ===
if (char === NEW_LINE) {
// Found a newline, the NEXT char starts a new line
this.lineStarts.push(cpIndex + 1);
}
// Handle CR (Classic Mac) or CRLF start
else if (char === CARRIAGE_RETURN) {
// Check if the next char is '\n' (CRLF)
// We peek ahead in the raw string to see if we need to skip the \n for line counting purposes
// or just treat this as a newline.
const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE;
if (!nextIsNL) {
// Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next.
this.lineStarts.push(cpIndex + 1);
}
}
}
}
codePointAt(index: CodePointIndex): CodePoint {
return this.#chars[index].char;
}
get length(): number {
return this.#chars.length;
}
get lineCount(): number {
return this.lineStarts.length;
}
get eofLocation(): SourceLocation {
return this.getLocation(this.length);
}
get eofSpan(): Span {
const loc = this.eofLocation;
return span(loc, loc);
}
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
for (let i = 0; i < this.length; i++) {
yield [i as CodePointIndex, this.codePointAt(i as CodePointIndex)];
}
}
sliceByCp(start: number, end: number): string {
const startRef = this.#chars[start];
// Handle out of bounds gracefully
if (!startRef) return "";
const startOff = startRef.offset;
const endOff = end < this.#chars.length
? this.#chars[end].offset
: this.source.length;
return this.source.slice(startOff, endOff);
}
slice(span: CodePointSpan): string {
return this.sliceByCp(span.start, span.end);
}
// Returns a Span for the given line (1-based index).
// If stripNewlines is true, the span will exclude trailing \r\n.
getLineSpan(line: number, stripNewlines = true): Span {
const range = this.getLineRange(line);
let endIdx = range.end;
if (stripNewlines && endIdx > range.start) {
// Look at the character just before endIdx
const lastChar = this.#chars[endIdx - 1].char;
if (lastChar === NEW_LINE) {
endIdx--;
if (endIdx > range.start && this.#chars[endIdx - 1].char === CARRIAGE_RETURN) {
endIdx--;
}
} else if (lastChar === CARRIAGE_RETURN) {
endIdx--;
}
}
return {
start: this.getLocation(range.start),
end: this.getLocation(endIdx)
};
}
// Converts a linear Code Point Index into a SourceLocation (line, column, index).
getLocation(index: CodePointIndex): SourceLocation {
// Does binary search.
let low = 0;
let high = this.lineStarts.length - 1;
let line = 1;
while (low <= high) {
const mid = Math.floor((low + high) / 2);
if (this.lineStarts[mid] <= index) {
line = mid + 1;
low = mid + 1;
} else {
high = mid - 1;
}
}
const lineStartIndex = this.lineStarts[line - 1];
const column = index - lineStartIndex + 1;
return { index, line, column };
}
getSpan(span: CodePointSpan): Span {
return { start: this.getLocation(span.start), end: this.getLocation(span.end) };
}
// Creates a SourceRegion from a Span.
makeRegion(span: Span): SourceRegion {
// Basic validation
if (span.start.index < 0 || span.end.index > this.length) {
throw new Error(`Span out of bounds: ${span.start.index}-${span.end.index} (length: ${this.length})`);
}
return new SourceRegion(this, span);
}
// Creates a SourceRegion covering the entire SourceText.
fullRegion(): SourceRegion {
const start = this.getLocation(0);
const end = this.getLocation(this.length);
return this.makeRegion({ start, end });
}
// Returns the full text of a specific line (1-based index)
getLineText(line: number): string {
const lineIndex = line - 1;
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) return "";
const startCp = this.lineStarts[lineIndex];
const endCp = (lineIndex + 1 < this.lineStarts.length)
? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself
: this.#chars.length;
// TODO: Consider removing \r or \n from the end if they exist.
return this.sliceByCp(startCp, endCp);
}
tryGetLineRange(line: number): CodePointSpan | undefined {
const lineIndex = line - 1;
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
return undefined;
}
const start = this.lineStarts[lineIndex];
const end = (lineIndex + 1 < this.lineStarts.length)
? this.lineStarts[lineIndex + 1]
: this.#chars.length;
return rawSpan(start, end);
}
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
const range = this.tryGetLineRange(line);
if (range === undefined) {
throw new Error(`Line ${line} is out of bounds (line count: ${this.lineCount})`);
}
return range;
}
}
// Creates a Span from two SourceLocations.
export function span(start: SourceLocation, end: SourceLocation): Span {
return { start, end };
}
export function rawSpan(start: CodePointIndex, end: CodePointIndex): CodePointSpan {
return { start, end };
}
export function pointSpan(index: CodePointIndex): CodePointSpan {
return { start: index, end: index };
}
export function spanLength(span: CodePointSpan): number {
return span.end - span.start;
}
export function isZeroWidth(span: CodePointSpan): boolean {
return span.start === span.end;
}
export class SourceRegion {
constructor(
public readonly source: SourceText,
public readonly span: Span
) {}
codePointAt(index: CodePointIndex): CodePoint {
return this.source.codePointAt(index);
}
get length(): number {
return this.span.end.index - this.span.start.index;
}
get lineCount(): number {
return this.span.end.line - this.span.start.line + 1;
}
get eofLocation(): SourceLocation {
return this.span.end;
}
get eofSpan(): Span {
const loc = this.eofLocation;
return span(loc, loc);
}
get codePointSpan(): CodePointSpan {
return rawSpan(this.span.start.index, this.span.end.index);
}
*codePoints(): IterableIterator<[CodePointIndex, CodePoint]> {
const start = this.span.start.index;
const end = this.span.end.index;
for (let i = start; i < end; i++) {
yield [i as CodePointIndex, this.codePointAt(i as CodePointIndex)];
}
}
toString(): string {
return this.stringOf(this.span);
}
stringOf(span: Span): string {
return this.source.sliceByCp(span.start.index, span.end.index);
}
// Returns a Span for the given line (1-based index).
getLineSpan(line: number, stripNewlines = true): Span {
if (line < this.span.start.line || line > this.span.end.line) {
throw new Error(`Line ${line} is outside of region lines ${this.span.start.line}-${this.span.end.line}`);
}
return this.source.getLineSpan(line, stripNewlines);
}
// Iterates over all lines that intersect this region.
// Yields a Span for each line.
*lines(stripNewlines = true): IterableIterator<Span> {
const startLine = this.span.start.line;
const endLine = this.span.end.line;
for (let currentLine = startLine; currentLine <= endLine; currentLine++) {
yield this.getLineSpan(currentLine, stripNewlines);
}
}
forEachLine(callback: (span: Span, lineNo: number) => void, stripNewlines = true): void {
let lineNo = this.span.start.line;
for (const lineSpan of this.lines(stripNewlines)) {
callback(lineSpan, lineNo++);
}
}
slice(span: CodePointSpan): string {
if (span.start < this.span.start.index || span.end > this.span.end.index) {
throw new Error(`CodePointSpan ${span.start}-${span.end} is outside region ${this.span.start.index}-${this.span.end.index}`);
}
return this.source.sliceByCp(span.start, span.end);
}
// Creates a sub-region within this region.
// Validates that the new span is contained within the current region.
subRegion(span: Span): SourceRegion {
if (span.start.index < this.span.start.index || span.end.index > this.span.end.index) {
throw new Error(`Sub-region span ${span.start.index}-${span.end.index} is not within parent region ${this.span.start.index}-${this.span.end.index}`);
}
return this.source.makeRegion(span);
}
}
export type Span = {
start: SourceLocation;
end: SourceLocation;
}
export type SourceLocation = {
index: CodePointIndex;
line: number; // 1-based
column: number; // 1-based
}
export function containsSpan(outer: CodePointSpan, inner: CodePointSpan): boolean {
return outer.start <= inner.start && inner.end <= outer.end;
}
export function containsIndex(span: CodePointSpan, index: CodePointIndex): boolean {
return span.start <= index && index < span.end;
}
// === Cursor ===
export class SourceCursor {
private index: CodePointIndex;
constructor(public readonly region: SourceRegion) {
this.index = region.span.start.index;
}
current(): CodePointIndex {
return this.index;
}
checkpoint(): CodePointIndex {
return this.index;
}
restore(index: CodePointIndex) {
this.index = index;
}
peek(): CodePoint | undefined {
if (this.index >= this.region.span.end.index) return undefined;
return this.region.codePointAt(this.index);
}
advance(): CodePoint | undefined {
const cp = this.peek();
if (cp === undefined) return undefined;
this.index += 1;
return cp;
}
isAtEnd(): boolean {
return this.index >= this.region.span.end.index;
}
spanFrom(start: CodePointIndex): CodePointSpan {
return rawSpan(start, this.index);
}
currentSpan(): CodePointSpan {
return this.isAtEnd()
? pointSpan(this.index)
: rawSpan(this.index, this.index + 1);
}
eofSpan(): CodePointSpan {
return pointSpan(this.region.span.end.index);
}
slice(span: CodePointSpan): string {
return this.region.slice(span);
}
location(): SourceLocation {
return this.region.source.getLocation(this.index);
}
moveToNextLineStart(): void {
const loc = this.region.source.getLocation(this.index);
const nextLine = loc.line + 1;
if (nextLine > this.region.span.end.line) {
this.index = this.region.span.end.index;
return;
}
const range = this.region.source.getLineRange(nextLine);
this.index = Math.min(range.start, this.region.span.end.index);
}
}
// === Rendering Utilities ===
export type LineView = {
lineNo: number;
sourceLine: string; // The full raw text of the line
// These split the line into 3 parts for coloring:
// prefix | highlight | suffix
prefix: string;
highlight: string;
suffix: string;
// Helpers for underlines (e.g., " ^^^^^")
gutterPad: string; // Padding to align line numbers
underline: string; // The literal "^^^" string for CLI usage
};
export function renderRegion(region: SourceRegion, contextLines = 1): LineView[] {
return renderSpan(region, region.span, contextLines);
}
export function renderSpan(region: SourceRegion, span: Span, contextLines = 1): LineView[] {
const text = region.source;
const views: LineView[] = [];
// Determine range of lines to show (including context)
const startLine = Math.max(1, span.start.line - contextLines);
const endLine = Math.min(text.lineStarts.length, span.end.line + contextLines);
// Calculate the max width of line numbers for nice padding (e.g. " 9 |" vs " 10 |")
const maxLineNoWidth = endLine.toString().length;
for (let lineNo = startLine; lineNo <= endLine; lineNo++) {
const lineRange = text.getLineRange(lineNo);
// We strip the trailing newline for display purposes
let lineRaw = text.sliceByCp(lineRange.start, lineRange.end);
if (lineRaw.endsWith('\n') || lineRaw.endsWith('\r')) {
lineRaw = lineRaw.trimEnd();
}
// Determine the intersection of the Span with this specific Line
// 1. Where does the highlight start on this line?
// If this is the start line, use span.column. Otherwise start at 0 (beginning of line)
// We subtract 1 because columns are 1-based, string indices are 0-based.
const highlightStartCol = (lineNo === span.start.line)
? span.start.column - 1
: 0;
// 2. Where does the highlight end on this line?
// If this is the end line, use span.column. Otherwise end at the string length.
const highlightEndCol = (lineNo === span.end.line)
? span.end.column - 1
: lineRaw.length;
// Logic to distinguish context lines from error lines
const isErrorLine = lineNo >= span.start.line && lineNo <= span.end.line;
let prefix = "", highlight = "", suffix = "";
if (isErrorLine) {
// Clamp indices to bounds (safety)
const safeStart = Math.max(0, Math.min(highlightStartCol, lineRaw.length));
const safeEnd = Math.max(0, Math.min(highlightEndCol, lineRaw.length));
prefix = lineRaw.substring(0, safeStart);
highlight = lineRaw.substring(safeStart, safeEnd);
suffix = lineRaw.substring(safeEnd);
} else {
// Pure context line
prefix = lineRaw;
}
// Build the "underline" string (e.g., " ^^^^")
// Note: This naive approach assumes monospaced fonts and no fancy unicode widths,
// which usually holds for code.
let underline = "";
if (isErrorLine) {
// Spaces for prefix
underline += " ".repeat(prefix.length);
// Carets for highlight (ensure at least 1 if it's a zero-width cursor position)
const hlLen = Math.max(1, highlight.length);
underline += "^".repeat(hlLen);
}
views.push({
lineNo,
sourceLine: lineRaw,
prefix,
highlight,
suffix,
gutterPad: " ".repeat(maxLineNoWidth - lineNo.toString().length),
underline
});
}
return views;
}