// === Char type === export type StringIndex = number; // UTF-16 index into string export type CodePointIndex = number; // index into array of code-points export type CodePoint = number; // could also name it `UnicodeCodePoint`. Basically for `s: string` we have `s.codePointAt(i: index): char`. export function char(c: string): CodePoint { return c.codePointAt(0) as CodePoint; } export type CodePointRef = { char: CodePoint, offset: StringIndex, }; // === Source Text === export class SourceText { readonly source: string; // TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string. readonly chars: CodePointRef[]; // Stores the CodePointIndex where each line begins readonly lineStarts: CodePointIndex[]; constructor(rawSource: string) { const source = rawSource.normalize('NFC'); this.source = source; this.chars = []; this.lineStarts = [0]; // Line 1 always starts at index 0 let i = 0; while (i < source.length) { const char = source.codePointAt(i) as CodePoint; const cpIndex = this.chars.length; this.chars.push({ char: char, offset: i }); const size =(char > 0xFFFF ? 2 : 1); i += size; // === Newline Logic === if (char === NEW_LINE) { // Found a newline, the NEXT char starts a new line this.lineStarts.push(cpIndex + 1); } // Handle CR (Classic Mac) or CRLF start else if (char === CARRIAGE_RETURN) { // Check if the next char is '\n' (CRLF) // We peek ahead in the raw string to see if we need to skip the \n for line counting purposes // or just treat this as a newline. const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE; if (!nextIsNL) { // Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next. this.lineStarts.push(cpIndex + 1); } } } } get length(): number { return this.chars.length; } sliceByCp(start: number, end: number): string { const startRef = this.chars[start]; // Handle out of bounds gracefully if (!startRef) return ""; const startOff = startRef.offset; const endOff = end < this.chars.length ? this.chars[end].offset : this.source.length; return this.source.slice(startOff, endOff); } // Converts a linear Code Point Index into SourceLocation // getLocation(index: CodePointIndex): SourceLocation { // // TODO: can be implemented either by a linear or binary search. // return (0 as any); // } // Returns the full text of a specific line (1-based index) getLineText(line: number): string { const lineIndex = line - 1; if (lineIndex < 0 || lineIndex >= this.lineStarts.length) return ""; const startCp = this.lineStarts[lineIndex]; const endCp = (lineIndex + 1 < this.lineStarts.length) ? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself : this.chars.length; // TODO: Consider removing \r or \n from the end if they exist. return this.sliceByCp(startCp, endCp); } getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } { const lineIndex = line - 1; if (lineIndex < 0 || lineIndex >= this.lineStarts.length) { // TODO: This is a bit suspicious. Maybe return undefined? return { start: 0, end: 0 }; } const start = this.lineStarts[lineIndex]; const end = (lineIndex + 1 < this.lineStarts.length) ? this.lineStarts[lineIndex + 1] : this.chars.length; return { start, end }; } } export function sourceText(s: string): SourceText { return new SourceText(s); } export type Span = { start: SourceLocation; end: SourceLocation; } export type SourceLocation = { index: CodePointIndex; line: number; // 1-based column: number; // 1-based } // Whitespace export const NEW_LINE: CodePoint = char('\n'); export const CARRIAGE_RETURN: CodePoint = char('\r'); export const SPACE: CodePoint = char(' '); export const TAB: CodePoint = char('\t'); // Digit Boundaries export const DIGIT_0: CodePoint = char('0'); export const DIGIT_9: CodePoint = char('9'); export const DOT: CodePoint = char('.'); // Hex Boundaries export const LOWERCASE_a: CodePoint = char('a'); export const UPPERCASE_A: CodePoint = char('A'); export const LOWERCASE_f: CodePoint = char('f'); export const UPPERCASE_F: CodePoint = char('F'); // === Rendering Utilities === export type LineView = { lineNo: number; sourceLine: string; // The full raw text of the line // These split the line into 3 parts for coloring: // prefix | highlight | suffix prefix: string; highlight: string; suffix: string; // Helpers for underlines (e.g., " ^^^^^") gutterPad: string; // Padding to align line numbers underline: string; // The literal "^^^" string for CLI usage }; export function renderSpan(text: SourceText, span: Span, contextLines = 1): LineView[] { const views: LineView[] = []; // Determine range of lines to show (including context) const startLine = Math.max(1, span.start.line - contextLines); const endLine = Math.min(text.lineStarts.length, span.end.line + contextLines); // Calculate the max width of line numbers for nice padding (e.g. " 9 |" vs " 10 |") const maxLineNoWidth = endLine.toString().length; for (let lineNo = startLine; lineNo <= endLine; lineNo++) { const lineRange = text.getLineRange(lineNo); // We strip the trailing newline for display purposes let lineRaw = text.sliceByCp(lineRange.start, lineRange.end); if (lineRaw.endsWith('\n') || lineRaw.endsWith('\r')) { lineRaw = lineRaw.trimEnd(); } // Determine the intersection of the Span with this specific Line // 1. Where does the highlight start on this line? // If this is the start line, use span.column. Otherwise start at 0 (beginning of line) // We subtract 1 because columns are 1-based, string indices are 0-based. const highlightStartCol = (lineNo === span.start.line) ? span.start.column - 1 : 0; // 2. Where does the highlight end on this line? // If this is the end line, use span.column. Otherwise end at the string length. const highlightEndCol = (lineNo === span.end.line) ? span.end.column - 1 : lineRaw.length; // Logic to distinguish context lines from error lines const isErrorLine = lineNo >= span.start.line && lineNo <= span.end.line; let prefix = "", highlight = "", suffix = ""; if (isErrorLine) { // Clamp indices to bounds (safety) const safeStart = Math.max(0, Math.min(highlightStartCol, lineRaw.length)); const safeEnd = Math.max(0, Math.min(highlightEndCol, lineRaw.length)); prefix = lineRaw.substring(0, safeStart); highlight = lineRaw.substring(safeStart, safeEnd); suffix = lineRaw.substring(safeEnd); } else { // Pure context line prefix = lineRaw; } // Build the "underline" string (e.g., " ^^^^") // Note: This naive approach assumes monospaced fonts and no fancy unicode widths, // which usually holds for code. let underline = ""; if (isErrorLine) { // Spaces for prefix underline += " ".repeat(prefix.length); // Carets for highlight (ensure at least 1 if it's a zero-width cursor position) const hlLen = Math.max(1, highlight.length); underline += "^".repeat(hlLen); } views.push({ lineNo, sourceLine: lineRaw, prefix, highlight, suffix, gutterPad: " ".repeat(maxLineNoWidth - lineNo.toString().length), underline }); } return views; }