// === Char type === export type StringIndex = number; // UTF-16 index into string export type CodePointIndex = number; // index into array of code-points export type CodePoint = number; // could also name it `UnicodeCodePoint`. Basically for `s: string` we have `s.codePointAt(i: index): char`. export function char(c: string): CodePoint { return c.codePointAt(0) as CodePoint; } // Whitespace export const NEW_LINE: CodePoint = char('\n'); export const CARRIAGE_RETURN: CodePoint = char('\r'); export const SPACE: CodePoint = char(' '); export const TAB: CodePoint = char('\t'); // Digit Boundaries export const DIGIT_0: CodePoint = char('0'); export const DIGIT_9: CodePoint = char('9'); export const DOT: CodePoint = char('.'); // Hex Boundaries export const LOWERCASE_A: CodePoint = char('a'); export const UPPERCASE_A: CodePoint = char('A'); export const LOWERCASE_F: CodePoint = char('f'); export const UPPERCASE_F: CodePoint = char('F'); export const LOWERCASE_Z: CodePoint = char('z'); export const UPPERCASE_Z: CodePoint = char('Z'); // === Predicates === export function isBetween(a: CodePoint, x: CodePoint, b: CodePoint): boolean { return a <= x && x <= b; } export function isDigit(x: CodePoint): boolean { return isBetween(DIGIT_0, x, DIGIT_9); } export function isAsciiAlpha(x: CodePoint): boolean { return isBetween(LOWERCASE_A, x, LOWERCASE_Z) || isBetween(UPPERCASE_A, x, UPPERCASE_Z); } export function isAsciiAlphanumeric(x: CodePoint): boolean { return isAsciiAlpha(x) || isDigit(x); } export function isAsciiWhitespace(cp: CodePoint): boolean { return cp === SPACE || cp === TAB || cp === NEW_LINE || cp === CARRIAGE_RETURN; } export function isAsciiInlineWhitespace(cp: CodePoint): boolean { return cp === SPACE || cp === TAB; } export type CodePointRef = { char: CodePoint, offset: StringIndex, }; export type CodePointSpan = { start: CodePointIndex, end: CodePointIndex, } // === Source Text === // TODO: // @deprecated and say to use `SourceText.makeFromString` instead. export function sourceText(s: string): SourceText { return SourceText.makeFromString(s); } export class SourceText { readonly source: string; // TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string. readonly #chars: CodePointRef[]; // Stores the CodePointIndex where each line begins readonly lineStarts: CodePointIndex[]; static makeFromString(s: string): SourceText { return new SourceText(s); } constructor(rawSource: string) { // TODO: This shouldn't really be a concern of the library. // const source = rawSource.normalize('NFC'); const source = rawSource; this.source = source; this.#chars = []; this.lineStarts = [0]; // Line 1 always starts at index 0 let i = 0; while (i < source.length) { const char = source.codePointAt(i) as CodePoint; const cpIndex = this.#chars.length; this.#chars.push({ char: char, offset: i }); const size =(char > 0xFFFF ? 2 : 1); i += size; // === Newline Logic === if (char === NEW_LINE) { // Found a newline, the NEXT char starts a new line this.lineStarts.push(cpIndex + 1); } // Handle CR (Classic Mac) or CRLF start else if (char === CARRIAGE_RETURN) { // Check if the next char is '\n' (CRLF) // We peek ahead in the raw string to see if we need to skip the \n for line counting purposes // or just treat this as a newline. const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE; if (!nextIsNL) { // Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next. this.lineStarts.push(cpIndex + 1); } } } } codePointAt(index: CodePointIndex): CodePoint { return this.#chars[index].char; } get length(): number { return this.#chars.length; } get lineCount(): number { return this.lineStarts.length; } get eofLocation(): SourceLocation { return this.getLocation(this.length); } get eofSpan(): Span { const loc = this.eofLocation; return span(loc, loc); } *codePoints(): IterableIterator<[CodePointIndex, CodePoint]> { for (let i = 0; i < this.length; i++) { yield [i as CodePointIndex, this.codePointAt(i as CodePointIndex)]; } } sliceByCp(start: number, end: number): string { const startRef = this.#chars[start]; // Handle out of bounds gracefully if (!startRef) return ""; const startOff = startRef.offset; const endOff = end < this.#chars.length ? this.#chars[end].offset : this.source.length; return this.source.slice(startOff, endOff); } slice(span: CodePointSpan): string { return this.sliceByCp(span.start, span.end); } // Returns a Span for the given line (1-based index). // If stripNewlines is true, the span will exclude trailing \r\n. getLineSpan(line: number, stripNewlines = true): Span { const range = this.getLineRange(line); let endIdx = range.end; if (stripNewlines && endIdx > range.start) { // Look at the character just before endIdx const lastChar = this.#chars[endIdx - 1].char; if (lastChar === NEW_LINE) { endIdx--; if (endIdx > range.start && this.#chars[endIdx - 1].char === CARRIAGE_RETURN) { endIdx--; } } else if (lastChar === CARRIAGE_RETURN) { endIdx--; } } return { start: this.getLocation(range.start), end: this.getLocation(endIdx) }; } // Converts a linear Code Point Index into a SourceLocation (line, column, index). getLocation(index: CodePointIndex): SourceLocation { // Does binary search. let low = 0; let high = this.lineStarts.length - 1; let line = 1; while (low <= high) { const mid = Math.floor((low + high) / 2); if (this.lineStarts[mid] <= index) { line = mid + 1; low = mid + 1; } else { high = mid - 1; } } const lineStartIndex = this.lineStarts[line - 1]; const column = index - lineStartIndex + 1; return { index, line, column }; } getSpan(span: CodePointSpan): Span { return { start: this.getLocation(span.start), end: this.getLocation(span.end) }; } // Creates a SourceRegion from a Span. makeRegion(span: Span): SourceRegion { // Basic validation if (span.start.index < 0 || span.end.index > this.length) { throw new Error(`Span out of bounds: ${span.start.index}-${span.end.index} (length: ${this.length})`); } return new SourceRegion(this, span); } // Creates a SourceRegion covering the entire SourceText. fullRegion(): SourceRegion { const start = this.getLocation(0); const end = this.getLocation(this.length); return this.makeRegion({ start, end }); } // Returns the full text of a specific line (1-based index) getLineText(line: number): string { const lineIndex = line - 1; if (lineIndex < 0 || lineIndex >= this.lineStarts.length) return ""; const startCp = this.lineStarts[lineIndex]; const endCp = (lineIndex + 1 < this.lineStarts.length) ? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself : this.#chars.length; // TODO: Consider removing \r or \n from the end if they exist. return this.sliceByCp(startCp, endCp); } tryGetLineRange(line: number): CodePointSpan | undefined { const lineIndex = line - 1; if (lineIndex < 0 || lineIndex >= this.lineStarts.length) { return undefined; } const start = this.lineStarts[lineIndex]; const end = (lineIndex + 1 < this.lineStarts.length) ? this.lineStarts[lineIndex + 1] : this.#chars.length; return rawSpan(start, end); } getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } { const range = this.tryGetLineRange(line); if (range === undefined) { throw new Error(`Line ${line} is out of bounds (line count: ${this.lineCount})`); } return range; } } // Creates a Span from two SourceLocations. export function span(start: SourceLocation, end: SourceLocation): Span { return { start, end }; } export function rawSpan(start: CodePointIndex, end: CodePointIndex): CodePointSpan { return { start, end }; } export function pointSpan(index: CodePointIndex): CodePointSpan { return { start: index, end: index }; } export function spanLength(span: CodePointSpan): number { return span.end - span.start; } export function isZeroWidth(span: CodePointSpan): boolean { return span.start === span.end; } export class SourceRegion { constructor( public readonly source: SourceText, public readonly span: Span ) {} codePointAt(index: CodePointIndex): CodePoint { return this.source.codePointAt(index); } get length(): number { return this.span.end.index - this.span.start.index; } get lineCount(): number { return this.span.end.line - this.span.start.line + 1; } get eofLocation(): SourceLocation { return this.span.end; } get eofSpan(): Span { const loc = this.eofLocation; return span(loc, loc); } get codePointSpan(): CodePointSpan { return rawSpan(this.span.start.index, this.span.end.index); } *codePoints(): IterableIterator<[CodePointIndex, CodePoint]> { const start = this.span.start.index; const end = this.span.end.index; for (let i = start; i < end; i++) { yield [i as CodePointIndex, this.codePointAt(i as CodePointIndex)]; } } toString(): string { return this.stringOf(this.span); } stringOf(span: Span): string { return this.source.sliceByCp(span.start.index, span.end.index); } // Returns a Span for the given line (1-based index). getLineSpan(line: number, stripNewlines = true): Span { if (line < this.span.start.line || line > this.span.end.line) { throw new Error(`Line ${line} is outside of region lines ${this.span.start.line}-${this.span.end.line}`); } return this.source.getLineSpan(line, stripNewlines); } // Iterates over all lines that intersect this region. // Yields a Span for each line. *lines(stripNewlines = true): IterableIterator { const startLine = this.span.start.line; const endLine = this.span.end.line; for (let currentLine = startLine; currentLine <= endLine; currentLine++) { yield this.getLineSpan(currentLine, stripNewlines); } } forEachLine(callback: (span: Span, lineNo: number) => void, stripNewlines = true): void { let lineNo = this.span.start.line; for (const lineSpan of this.lines(stripNewlines)) { callback(lineSpan, lineNo++); } } slice(span: CodePointSpan): string { if (span.start < this.span.start.index || span.end > this.span.end.index) { throw new Error(`CodePointSpan ${span.start}-${span.end} is outside region ${this.span.start.index}-${this.span.end.index}`); } return this.source.sliceByCp(span.start, span.end); } // Creates a sub-region within this region. // Validates that the new span is contained within the current region. subRegion(span: Span): SourceRegion { if (span.start.index < this.span.start.index || span.end.index > this.span.end.index) { throw new Error(`Sub-region span ${span.start.index}-${span.end.index} is not within parent region ${this.span.start.index}-${this.span.end.index}`); } return this.source.makeRegion(span); } } export type Span = { start: SourceLocation; end: SourceLocation; } export type SourceLocation = { index: CodePointIndex; line: number; // 1-based column: number; // 1-based } export function containsSpan(outer: CodePointSpan, inner: CodePointSpan): boolean { return outer.start <= inner.start && inner.end <= outer.end; } export function containsIndex(span: CodePointSpan, index: CodePointIndex): boolean { return span.start <= index && index < span.end; } // === Cursor === export class SourceCursor { private index: CodePointIndex; constructor(public readonly region: SourceRegion) { this.index = region.span.start.index; } current(): CodePointIndex { return this.index; } checkpoint(): CodePointIndex { return this.index; } restore(index: CodePointIndex) { this.index = index; } peek(): CodePoint | undefined { if (this.index >= this.region.span.end.index) return undefined; return this.region.codePointAt(this.index); } advance(): CodePoint | undefined { const cp = this.peek(); if (cp === undefined) return undefined; this.index += 1; return cp; } isAtEnd(): boolean { return this.index >= this.region.span.end.index; } spanFrom(start: CodePointIndex): CodePointSpan { return rawSpan(start, this.index); } currentSpan(): CodePointSpan { return this.isAtEnd() ? pointSpan(this.index) : rawSpan(this.index, this.index + 1); } eofSpan(): CodePointSpan { return pointSpan(this.region.span.end.index); } slice(span: CodePointSpan): string { return this.region.slice(span); } location(): SourceLocation { return this.region.source.getLocation(this.index); } moveToNextLineStart(): void { const loc = this.region.source.getLocation(this.index); const nextLine = loc.line + 1; if (nextLine > this.region.span.end.line) { this.index = this.region.span.end.index; return; } const range = this.region.source.getLineRange(nextLine); this.index = Math.min(range.start, this.region.span.end.index); } } // === Rendering Utilities === export type LineView = { lineNo: number; sourceLine: string; // The full raw text of the line // These split the line into 3 parts for coloring: // prefix | highlight | suffix prefix: string; highlight: string; suffix: string; // Helpers for underlines (e.g., " ^^^^^") gutterPad: string; // Padding to align line numbers underline: string; // The literal "^^^" string for CLI usage }; export function renderRegion(region: SourceRegion, contextLines = 1): LineView[] { return renderSpan(region, region.span, contextLines); } export function renderSpan(region: SourceRegion, span: Span, contextLines = 1): LineView[] { const text = region.source; const views: LineView[] = []; // Determine range of lines to show (including context) const startLine = Math.max(1, span.start.line - contextLines); const endLine = Math.min(text.lineStarts.length, span.end.line + contextLines); // Calculate the max width of line numbers for nice padding (e.g. " 9 |" vs " 10 |") const maxLineNoWidth = endLine.toString().length; for (let lineNo = startLine; lineNo <= endLine; lineNo++) { const lineRange = text.getLineRange(lineNo); // We strip the trailing newline for display purposes let lineRaw = text.sliceByCp(lineRange.start, lineRange.end); if (lineRaw.endsWith('\n') || lineRaw.endsWith('\r')) { lineRaw = lineRaw.trimEnd(); } // Determine the intersection of the Span with this specific Line // 1. Where does the highlight start on this line? // If this is the start line, use span.column. Otherwise start at 0 (beginning of line) // We subtract 1 because columns are 1-based, string indices are 0-based. const highlightStartCol = (lineNo === span.start.line) ? span.start.column - 1 : 0; // 2. Where does the highlight end on this line? // If this is the end line, use span.column. Otherwise end at the string length. const highlightEndCol = (lineNo === span.end.line) ? span.end.column - 1 : lineRaw.length; // Logic to distinguish context lines from error lines const isErrorLine = lineNo >= span.start.line && lineNo <= span.end.line; let prefix = "", highlight = "", suffix = ""; if (isErrorLine) { // Clamp indices to bounds (safety) const safeStart = Math.max(0, Math.min(highlightStartCol, lineRaw.length)); const safeEnd = Math.max(0, Math.min(highlightEndCol, lineRaw.length)); prefix = lineRaw.substring(0, safeStart); highlight = lineRaw.substring(safeStart, safeEnd); suffix = lineRaw.substring(safeEnd); } else { // Pure context line prefix = lineRaw; } // Build the "underline" string (e.g., " ^^^^") // Note: This naive approach assumes monospaced fonts and no fancy unicode widths, // which usually holds for code. let underline = ""; if (isErrorLine) { // Spaces for prefix underline += " ".repeat(prefix.length); // Carets for highlight (ensure at least 1 if it's a zero-width cursor position) const hlLen = Math.max(1, highlight.length); underline += "^".repeat(hlLen); } views.push({ lineNo, sourceLine: lineRaw, prefix, highlight, suffix, gutterPad: " ".repeat(maxLineNoWidth - lineNo.toString().length), underline }); } return views; }