commit aa7ce6e064c88bb6115c71b8e26f73ae6cad9327
Author: Yura Dupyn <2153100+omedusyo@users.noreply.github.com>
Date:   Mon Apr 6 15:33:41 2026 +0200

    Initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b947077
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+node_modules/
+dist/
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..9ad2d10
--- /dev/null
+++ b/README.md
@@ -0,0 +1,12 @@
+TypeScript library for handling source code strings.
+
+It has its own String type that deals with unicode in a saner than JS's UTF16 strings.
+
+This take in JS string, and makes it into a fat string `SourceText` that handles all the insanity of UTF16 in JS (like JS leaking UTF16 internals so that there are code-points spanning multiple indices in the string array).
+- it handles NFC Normalization
+- makes the original string easy to traverse in error-free way by introducing a character abstraction - type `CodePoint` and its position within the SourceText called `CodePointIndex`
+- It also tracks where line start (handling various platform specific weirdness like `\r\n`)
+
+It also allows for Spatial Tracking or verious sub-regions within the source. It introduces
+- point-like `SourceLocation` abstraction (basically where a cursor could be)
+- and interval-like `Span` abstraction (basically what a mouse selection could span)
diff --git a/index.ts b/index.ts
new file mode 100644
index 0000000..d99eb10
--- /dev/null
+++ b/index.ts
@@ -0,0 +1,239 @@
+
+// === Char type ===
+export type StringIndex = number; // UTF-16 index into string
+export type CodePointIndex = number; // index into array of code-points
+export type CodePoint = number; // could also name it `UnicodeCodePoint`. Basically for `s: string` we have `s.codePointAt(i: index): char`.
+
+export function char(c: string): CodePoint {
+  return c.codePointAt(0) as CodePoint;
+}
+
+export type CodePointRef = {
+  char: CodePoint,
+  offset: StringIndex, 
+};
+
+// === Source Text ===
+export class SourceText {
+  readonly source: string;
+  // TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
+  readonly chars: CodePointRef[];
+
+  // Stores the CodePointIndex where each line begins
+  readonly lineStarts: CodePointIndex[];
+
+  constructor(rawSource: string) {
+    const source = rawSource.normalize('NFC');
+
+    this.source = source;
+    this.chars = [];
+    this.lineStarts = [0]; // Line 1 always starts at index 0
+
+    let i = 0;
+    while (i < source.length) {
+      const char = source.codePointAt(i) as CodePoint;
+      const cpIndex = this.chars.length;
+      this.chars.push({ char: char, offset: i });
+
+      const size =(char > 0xFFFF ? 2 : 1);
+      i += size;
+
+      // === Newline Logic ===
+      if (char === NEW_LINE) {
+        // Found a newline, the NEXT char starts a new line
+        this.lineStarts.push(cpIndex + 1);
+      }
+      // Handle CR (Classic Mac) or CRLF start
+      else if (char === CARRIAGE_RETURN) {
+        // Check if the next char is '\n' (CRLF)
+        // We peek ahead in the raw string to see if we need to skip the \n for line counting purposes
+        // or just treat this as a newline.
+        const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE;
+        if (!nextIsNL) {
+            // Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next.
+            this.lineStarts.push(cpIndex + 1);
+        }
+      }
+    }
+  }
+
+  get length(): number {
+    return this.chars.length;
+  }
+
+  sliceByCp(start: number, end: number): string {
+    const startRef = this.chars[start];
+    // Handle out of bounds gracefully
+    if (!startRef) return "";
+    
+    const startOff = startRef.offset;
+    const endOff = end < this.chars.length
+        ? this.chars[end].offset
+        : this.source.length;
+        
+    return this.source.slice(startOff, endOff);
+  }
+
+  // Converts a linear Code Point Index into SourceLocation
+  // getLocation(index: CodePointIndex): SourceLocation {
+  //   // TODO: can be implemented either by a linear or binary search.
+  //   return (0 as any);
+  // }
+
+  // Returns the full text of a specific line (1-based index)
+  getLineText(line: number): string {
+    const lineIndex = line - 1;
+    if (lineIndex < 0 || lineIndex >= this.lineStarts.length) return "";
+
+    const startCp = this.lineStarts[lineIndex];
+    const endCp = (lineIndex + 1 < this.lineStarts.length)
+      ? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself
+      : this.chars.length;
+
+    // TODO: Consider removing \r or \n from the end if they exist.
+    return this.sliceByCp(startCp, endCp);
+  }
+
+  getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
+    const lineIndex = line - 1;
+    if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
+      // TODO: This is a bit suspicious. Maybe return undefined?
+      return { start: 0, end: 0 };
+    }
+
+    const start = this.lineStarts[lineIndex];
+    const end = (lineIndex + 1 < this.lineStarts.length)
+      ? this.lineStarts[lineIndex + 1]
+      : this.chars.length;
+      
+    return { start, end };
+  }
+}
+
+export function sourceText(s: string): SourceText {
+  return new SourceText(s);
+}
+
+export type Span = {
+  start: SourceLocation;
+  end: SourceLocation;
+}
+
+export type SourceLocation = {
+  index: CodePointIndex;
+  line: number; // 1-based
+  column: number; // 1-based
+}
+
+// Whitespace
+export const NEW_LINE: CodePoint = char('\n');
+export const CARRIAGE_RETURN: CodePoint = char('\r');
+export const SPACE: CodePoint = char(' ');
+export const TAB: CodePoint = char('\t');
+
+// Digit Boundaries
+export const DIGIT_0: CodePoint = char('0');
+export const DIGIT_9: CodePoint = char('9');
+
+export const DOT: CodePoint = char('.');
+
+// Hex Boundaries
+export const LOWERCASE_a: CodePoint = char('a');
+export const UPPERCASE_A: CodePoint = char('A');
+export const LOWERCASE_f: CodePoint = char('f');
+export const UPPERCASE_F: CodePoint = char('F');
+
+// === Rendering Utilities ===
+
+export type LineView = {
+  lineNo: number;
+  sourceLine: string; // The full raw text of the line
+  
+  // These split the line into 3 parts for coloring:
+  // prefix | highlight | suffix
+  prefix: string;
+  highlight: string;
+  suffix: string;
+  
+  // Helpers for underlines (e.g., "    ^^^^^")
+  gutterPad: string; // Padding to align line numbers
+  underline: string; // The literal "^^^" string for CLI usage
+};
+
+export function renderSpan(text: SourceText, span: Span, contextLines = 1): LineView[] {
+  const views: LineView[] = [];
+  
+  // Determine range of lines to show (including context)
+  const startLine = Math.max(1, span.start.line - contextLines);
+  const endLine = Math.min(text.lineStarts.length, span.end.line + contextLines);
+
+  // Calculate the max width of line numbers for nice padding (e.g. "  9 |" vs " 10 |")
+  const maxLineNoWidth = endLine.toString().length;
+
+  for (let lineNo = startLine; lineNo <= endLine; lineNo++) {
+    const lineRange = text.getLineRange(lineNo);
+    
+    // We strip the trailing newline for display purposes
+    let lineRaw = text.sliceByCp(lineRange.start, lineRange.end);
+    if (lineRaw.endsWith('\n') || lineRaw.endsWith('\r')) {
+       lineRaw = lineRaw.trimEnd();
+    }
+    
+    // Determine the intersection of the Span with this specific Line
+    
+    // 1. Where does the highlight start on this line?
+    // If this is the start line, use span.column. Otherwise start at 0 (beginning of line)
+    // We subtract 1 because columns are 1-based, string indices are 0-based.
+    const highlightStartCol = (lineNo === span.start.line) 
+        ? span.start.column - 1 
+        : 0;
+
+    // 2. Where does the highlight end on this line?
+    // If this is the end line, use span.column. Otherwise end at the string length.
+    const highlightEndCol = (lineNo === span.end.line) 
+        ? span.end.column - 1 
+        : lineRaw.length;
+
+    // Logic to distinguish context lines from error lines
+    const isErrorLine = lineNo >= span.start.line && lineNo <= span.end.line;
+
+    let prefix = "", highlight = "", suffix = "";
+
+    if (isErrorLine) {
+       // Clamp indices to bounds (safety)
+       const safeStart = Math.max(0, Math.min(highlightStartCol, lineRaw.length));
+       const safeEnd = Math.max(0, Math.min(highlightEndCol, lineRaw.length));
+
+       prefix = lineRaw.substring(0, safeStart);
+       highlight = lineRaw.substring(safeStart, safeEnd);
+       suffix = lineRaw.substring(safeEnd);
+    } else {
+       // Pure context line
+       prefix = lineRaw; 
+    }
+
+    // Build the "underline" string (e.g., "    ^^^^")
+    // Note: This naive approach assumes monospaced fonts and no fancy unicode widths,
+    // which usually holds for code.
+    let underline = "";
+    if (isErrorLine) {
+        // Spaces for prefix
+        underline += " ".repeat(prefix.length); 
+        // Carets for highlight (ensure at least 1 if it's a zero-width cursor position)
+        const hlLen = Math.max(1, highlight.length);
+        underline += "^".repeat(hlLen);
+    }
+
+    views.push({
+      lineNo,
+      sourceLine: lineRaw,
+      prefix,
+      highlight,
+      suffix,
+      gutterPad: " ".repeat(maxLineNoWidth - lineNo.toString().length),
+      underline
+    });
+  }
+
+  return views;
+}
diff --git a/tmp_repl/tmp_repl.md b/tmp_repl/tmp_repl.md
new file mode 100644
index 0000000..e69de29