commit aa7ce6e064c88bb6115c71b8e26f73ae6cad9327 Author: Yura Dupyn <2153100+omedusyo@users.noreply.github.com> Date: Mon Apr 6 15:33:41 2026 +0200 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b947077 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +node_modules/ +dist/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..9ad2d10 --- /dev/null +++ b/README.md @@ -0,0 +1,12 @@ +TypeScript library for handling source code strings. + +It has its own String type that deals with unicode in a saner than JS's UTF16 strings. + +This take in JS string, and makes it into a fat string `SourceText` that handles all the insanity of UTF16 in JS (like JS leaking UTF16 internals so that there are code-points spanning multiple indices in the string array). +- it handles NFC Normalization +- makes the original string easy to traverse in error-free way by introducing a character abstraction - type `CodePoint` and its position within the SourceText called `CodePointIndex` +- It also tracks where line start (handling various platform specific weirdness like `\r\n`) + +It also allows for Spatial Tracking or verious sub-regions within the source. It introduces +- point-like `SourceLocation` abstraction (basically where a cursor could be) +- and interval-like `Span` abstraction (basically what a mouse selection could span) diff --git a/index.ts b/index.ts new file mode 100644 index 0000000..d99eb10 --- /dev/null +++ b/index.ts @@ -0,0 +1,239 @@ + +// === Char type === +export type StringIndex = number; // UTF-16 index into string +export type CodePointIndex = number; // index into array of code-points +export type CodePoint = number; // could also name it `UnicodeCodePoint`. Basically for `s: string` we have `s.codePointAt(i: index): char`. + +export function char(c: string): CodePoint { + return c.codePointAt(0) as CodePoint; +} + +export type CodePointRef = { + char: CodePoint, + offset: StringIndex, +}; + +// === Source Text === +export class SourceText { + readonly source: string; + // TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string. + readonly chars: CodePointRef[]; + + // Stores the CodePointIndex where each line begins + readonly lineStarts: CodePointIndex[]; + + constructor(rawSource: string) { + const source = rawSource.normalize('NFC'); + + this.source = source; + this.chars = []; + this.lineStarts = [0]; // Line 1 always starts at index 0 + + let i = 0; + while (i < source.length) { + const char = source.codePointAt(i) as CodePoint; + const cpIndex = this.chars.length; + this.chars.push({ char: char, offset: i }); + + const size =(char > 0xFFFF ? 2 : 1); + i += size; + + // === Newline Logic === + if (char === NEW_LINE) { + // Found a newline, the NEXT char starts a new line + this.lineStarts.push(cpIndex + 1); + } + // Handle CR (Classic Mac) or CRLF start + else if (char === CARRIAGE_RETURN) { + // Check if the next char is '\n' (CRLF) + // We peek ahead in the raw string to see if we need to skip the \n for line counting purposes + // or just treat this as a newline. + const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE; + if (!nextIsNL) { + // Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next. + this.lineStarts.push(cpIndex + 1); + } + } + } + } + + get length(): number { + return this.chars.length; + } + + sliceByCp(start: number, end: number): string { + const startRef = this.chars[start]; + // Handle out of bounds gracefully + if (!startRef) return ""; + + const startOff = startRef.offset; + const endOff = end < this.chars.length + ? this.chars[end].offset + : this.source.length; + + return this.source.slice(startOff, endOff); + } + + // Converts a linear Code Point Index into SourceLocation + // getLocation(index: CodePointIndex): SourceLocation { + // // TODO: can be implemented either by a linear or binary search. + // return (0 as any); + // } + + // Returns the full text of a specific line (1-based index) + getLineText(line: number): string { + const lineIndex = line - 1; + if (lineIndex < 0 || lineIndex >= this.lineStarts.length) return ""; + + const startCp = this.lineStarts[lineIndex]; + const endCp = (lineIndex + 1 < this.lineStarts.length) + ? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself + : this.chars.length; + + // TODO: Consider removing \r or \n from the end if they exist. + return this.sliceByCp(startCp, endCp); + } + + getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } { + const lineIndex = line - 1; + if (lineIndex < 0 || lineIndex >= this.lineStarts.length) { + // TODO: This is a bit suspicious. Maybe return undefined? + return { start: 0, end: 0 }; + } + + const start = this.lineStarts[lineIndex]; + const end = (lineIndex + 1 < this.lineStarts.length) + ? this.lineStarts[lineIndex + 1] + : this.chars.length; + + return { start, end }; + } +} + +export function sourceText(s: string): SourceText { + return new SourceText(s); +} + +export type Span = { + start: SourceLocation; + end: SourceLocation; +} + +export type SourceLocation = { + index: CodePointIndex; + line: number; // 1-based + column: number; // 1-based +} + +// Whitespace +export const NEW_LINE: CodePoint = char('\n'); +export const CARRIAGE_RETURN: CodePoint = char('\r'); +export const SPACE: CodePoint = char(' '); +export const TAB: CodePoint = char('\t'); + +// Digit Boundaries +export const DIGIT_0: CodePoint = char('0'); +export const DIGIT_9: CodePoint = char('9'); + +export const DOT: CodePoint = char('.'); + +// Hex Boundaries +export const LOWERCASE_a: CodePoint = char('a'); +export const UPPERCASE_A: CodePoint = char('A'); +export const LOWERCASE_f: CodePoint = char('f'); +export const UPPERCASE_F: CodePoint = char('F'); + +// === Rendering Utilities === + +export type LineView = { + lineNo: number; + sourceLine: string; // The full raw text of the line + + // These split the line into 3 parts for coloring: + // prefix | highlight | suffix + prefix: string; + highlight: string; + suffix: string; + + // Helpers for underlines (e.g., " ^^^^^") + gutterPad: string; // Padding to align line numbers + underline: string; // The literal "^^^" string for CLI usage +}; + +export function renderSpan(text: SourceText, span: Span, contextLines = 1): LineView[] { + const views: LineView[] = []; + + // Determine range of lines to show (including context) + const startLine = Math.max(1, span.start.line - contextLines); + const endLine = Math.min(text.lineStarts.length, span.end.line + contextLines); + + // Calculate the max width of line numbers for nice padding (e.g. " 9 |" vs " 10 |") + const maxLineNoWidth = endLine.toString().length; + + for (let lineNo = startLine; lineNo <= endLine; lineNo++) { + const lineRange = text.getLineRange(lineNo); + + // We strip the trailing newline for display purposes + let lineRaw = text.sliceByCp(lineRange.start, lineRange.end); + if (lineRaw.endsWith('\n') || lineRaw.endsWith('\r')) { + lineRaw = lineRaw.trimEnd(); + } + + // Determine the intersection of the Span with this specific Line + + // 1. Where does the highlight start on this line? + // If this is the start line, use span.column. Otherwise start at 0 (beginning of line) + // We subtract 1 because columns are 1-based, string indices are 0-based. + const highlightStartCol = (lineNo === span.start.line) + ? span.start.column - 1 + : 0; + + // 2. Where does the highlight end on this line? + // If this is the end line, use span.column. Otherwise end at the string length. + const highlightEndCol = (lineNo === span.end.line) + ? span.end.column - 1 + : lineRaw.length; + + // Logic to distinguish context lines from error lines + const isErrorLine = lineNo >= span.start.line && lineNo <= span.end.line; + + let prefix = "", highlight = "", suffix = ""; + + if (isErrorLine) { + // Clamp indices to bounds (safety) + const safeStart = Math.max(0, Math.min(highlightStartCol, lineRaw.length)); + const safeEnd = Math.max(0, Math.min(highlightEndCol, lineRaw.length)); + + prefix = lineRaw.substring(0, safeStart); + highlight = lineRaw.substring(safeStart, safeEnd); + suffix = lineRaw.substring(safeEnd); + } else { + // Pure context line + prefix = lineRaw; + } + + // Build the "underline" string (e.g., " ^^^^") + // Note: This naive approach assumes monospaced fonts and no fancy unicode widths, + // which usually holds for code. + let underline = ""; + if (isErrorLine) { + // Spaces for prefix + underline += " ".repeat(prefix.length); + // Carets for highlight (ensure at least 1 if it's a zero-width cursor position) + const hlLen = Math.max(1, highlight.length); + underline += "^".repeat(hlLen); + } + + views.push({ + lineNo, + sourceLine: lineRaw, + prefix, + highlight, + suffix, + gutterPad: " ".repeat(maxLineNoWidth - lineNo.toString().length), + underline + }); + } + + return views; +} diff --git a/tmp_repl/tmp_repl.md b/tmp_repl/tmp_repl.md new file mode 100644 index 0000000..e69de29