Move language files into dedicated folder
This commit is contained in:
parent
3d1cd89067
commit
1b406899e0
15 changed files with 7 additions and 343 deletions
239
src/lang/parser/source_text.ts
Normal file
239
src/lang/parser/source_text.ts
Normal file
|
|
@ -0,0 +1,239 @@
|
|||
|
||||
// === Char type ===
|
||||
export type StringIndex = number; // UTF-16 index into string
|
||||
export type CodePointIndex = number; // index into array of code-points
|
||||
export type CodePoint = number; // could also name it `UnicodeCodePoint`. Basically for `s: string` we have `s.codePointAt(i: index): char`.
|
||||
|
||||
export function char(c: string): CodePoint {
|
||||
return c.codePointAt(0)
|
||||
}
|
||||
|
||||
export type CodePointRef = {
|
||||
char: CodePoint,
|
||||
offset: StringIndex,
|
||||
};
|
||||
|
||||
// === Source Text ===
|
||||
export class SourceText {
|
||||
readonly source: string;
|
||||
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
|
||||
readonly chars: CodePointRef[];
|
||||
|
||||
// Stores the CodePointIndex where each line begins
|
||||
readonly lineStarts: CodePointIndex[];
|
||||
|
||||
constructor(rawSource: string) {
|
||||
const source = rawSource.normalize('NFC');
|
||||
|
||||
this.source = source;
|
||||
this.chars = [];
|
||||
this.lineStarts = [0]; // Line 1 always starts at index 0
|
||||
|
||||
let i = 0;
|
||||
while (i < source.length) {
|
||||
const char = source.codePointAt(i) as CodePoint;
|
||||
const cpIndex = this.chars.length;
|
||||
this.chars.push({ char: char, offset: i });
|
||||
|
||||
const size =(char > 0xFFFF ? 2 : 1);
|
||||
i += size;
|
||||
|
||||
// === Newline Logic ===
|
||||
if (char === NEW_LINE) {
|
||||
// Found a newline, the NEXT char starts a new line
|
||||
this.lineStarts.push(cpIndex + 1);
|
||||
}
|
||||
// Handle CR (Classic Mac) or CRLF start
|
||||
else if (char === CARRIAGE_RETURN) {
|
||||
// Check if the next char is '\n' (CRLF)
|
||||
// We peek ahead in the raw string to see if we need to skip the \n for line counting purposes
|
||||
// or just treat this as a newline.
|
||||
const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE;
|
||||
if (!nextIsNL) {
|
||||
// Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next.
|
||||
this.lineStarts.push(cpIndex + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
get length(): number {
|
||||
return this.chars.length;
|
||||
}
|
||||
|
||||
sliceByCp(start: number, end: number): string {
|
||||
const startRef = this.chars[start];
|
||||
// Handle out of bounds gracefully
|
||||
if (!startRef) return "";
|
||||
|
||||
const startOff = startRef.offset;
|
||||
const endOff = end < this.chars.length
|
||||
? this.chars[end].offset
|
||||
: this.source.length;
|
||||
|
||||
return this.source.slice(startOff, endOff);
|
||||
}
|
||||
|
||||
// Converts a linear Code Point Index into SourceLocation
|
||||
// getLocation(index: CodePointIndex): SourceLocation {
|
||||
// // TODO: can be implemented either by a linear or binary search.
|
||||
// return (0 as any);
|
||||
// }
|
||||
|
||||
// Returns the full text of a specific line (1-based index)
|
||||
getLineText(line: number): string {
|
||||
const lineIndex = line - 1;
|
||||
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) return "";
|
||||
|
||||
const startCp = this.lineStarts[lineIndex];
|
||||
const endCp = (lineIndex + 1 < this.lineStarts.length)
|
||||
? this.lineStarts[lineIndex + 1] - 1 // -1 to exclude the newline char itself
|
||||
: this.chars.length;
|
||||
|
||||
// TODO: Consider removing \r or \n from the end if they exist.
|
||||
return this.sliceByCp(startCp, endCp);
|
||||
}
|
||||
|
||||
getLineRange(line: number): { start: CodePointIndex, end: CodePointIndex } {
|
||||
const lineIndex = line - 1;
|
||||
if (lineIndex < 0 || lineIndex >= this.lineStarts.length) {
|
||||
// TODO: This is a bit suspicious. Maybe return undefined?
|
||||
return { start: 0, end: 0 };
|
||||
}
|
||||
|
||||
const start = this.lineStarts[lineIndex];
|
||||
const end = (lineIndex + 1 < this.lineStarts.length)
|
||||
? this.lineStarts[lineIndex + 1]
|
||||
: this.chars.length;
|
||||
|
||||
return { start, end };
|
||||
}
|
||||
}
|
||||
|
||||
export function sourceText(s: string) {
|
||||
return new SourceText(s);
|
||||
}
|
||||
|
||||
export type Span = {
|
||||
start: SourceLocation;
|
||||
end: SourceLocation;
|
||||
}
|
||||
|
||||
export type SourceLocation = {
|
||||
index: CodePointIndex;
|
||||
line: number; // 1-based
|
||||
column: number; // 1-based
|
||||
}
|
||||
|
||||
// Whitespace
|
||||
export const NEW_LINE: CodePoint = char('\n');
|
||||
export const CARRIAGE_RETURN: CodePoint = char('\r');
|
||||
export const SPACE: CodePoint = char(' ');
|
||||
export const TAB: CodePoint = char('\t');
|
||||
|
||||
// Digit Boundaries
|
||||
export const DIGIT_0: CodePoint = char('0');
|
||||
export const DIGIT_9: CodePoint = char('9');
|
||||
|
||||
export const DOT: CodePoint = char('.');
|
||||
|
||||
// Hex Boundaries
|
||||
export const LOWERCASE_a: CodePoint = char('a');
|
||||
export const UPPERCASE_A: CodePoint = char('A');
|
||||
export const LOWERCASE_f: CodePoint = char('f');
|
||||
export const UPPERCASE_F: CodePoint = char('F');
|
||||
|
||||
// === Rendering Utilities ===
|
||||
|
||||
export type LineView = {
|
||||
lineNo: number;
|
||||
sourceLine: string; // The full raw text of the line
|
||||
|
||||
// These split the line into 3 parts for coloring:
|
||||
// prefix | highlight | suffix
|
||||
prefix: string;
|
||||
highlight: string;
|
||||
suffix: string;
|
||||
|
||||
// Helpers for underlines (e.g., " ^^^^^")
|
||||
gutterPad: string; // Padding to align line numbers
|
||||
underline: string; // The literal "^^^" string for CLI usage
|
||||
};
|
||||
|
||||
export function renderSpan(text: SourceText, span: Span, contextLines = 1): LineView[] {
|
||||
const views: LineView[] = [];
|
||||
|
||||
// Determine range of lines to show (including context)
|
||||
const startLine = Math.max(1, span.start.line - contextLines);
|
||||
const endLine = Math.min(text.lineStarts.length, span.end.line + contextLines);
|
||||
|
||||
// Calculate the max width of line numbers for nice padding (e.g. " 9 |" vs " 10 |")
|
||||
const maxLineNoWidth = endLine.toString().length;
|
||||
|
||||
for (let lineNo = startLine; lineNo <= endLine; lineNo++) {
|
||||
const lineRange = text.getLineRange(lineNo);
|
||||
|
||||
// We strip the trailing newline for display purposes
|
||||
let lineRaw = text.sliceByCp(lineRange.start, lineRange.end);
|
||||
if (lineRaw.endsWith('\n') || lineRaw.endsWith('\r')) {
|
||||
lineRaw = lineRaw.trimEnd();
|
||||
}
|
||||
|
||||
// Determine the intersection of the Span with this specific Line
|
||||
|
||||
// 1. Where does the highlight start on this line?
|
||||
// If this is the start line, use span.column. Otherwise start at 0 (beginning of line)
|
||||
// We subtract 1 because columns are 1-based, string indices are 0-based.
|
||||
const highlightStartCol = (lineNo === span.start.line)
|
||||
? span.start.column - 1
|
||||
: 0;
|
||||
|
||||
// 2. Where does the highlight end on this line?
|
||||
// If this is the end line, use span.column. Otherwise end at the string length.
|
||||
const highlightEndCol = (lineNo === span.end.line)
|
||||
? span.end.column - 1
|
||||
: lineRaw.length;
|
||||
|
||||
// Logic to distinguish context lines from error lines
|
||||
const isErrorLine = lineNo >= span.start.line && lineNo <= span.end.line;
|
||||
|
||||
let prefix = "", highlight = "", suffix = "";
|
||||
|
||||
if (isErrorLine) {
|
||||
// Clamp indices to bounds (safety)
|
||||
const safeStart = Math.max(0, Math.min(highlightStartCol, lineRaw.length));
|
||||
const safeEnd = Math.max(0, Math.min(highlightEndCol, lineRaw.length));
|
||||
|
||||
prefix = lineRaw.substring(0, safeStart);
|
||||
highlight = lineRaw.substring(safeStart, safeEnd);
|
||||
suffix = lineRaw.substring(safeEnd);
|
||||
} else {
|
||||
// Pure context line
|
||||
prefix = lineRaw;
|
||||
}
|
||||
|
||||
// Build the "underline" string (e.g., " ^^^^")
|
||||
// Note: This naive approach assumes monospaced fonts and no fancy unicode widths,
|
||||
// which usually holds for code.
|
||||
let underline = "";
|
||||
if (isErrorLine) {
|
||||
// Spaces for prefix
|
||||
underline += " ".repeat(prefix.length);
|
||||
// Carets for highlight (ensure at least 1 if it's a zero-width cursor position)
|
||||
const hlLen = Math.max(1, highlight.length);
|
||||
underline += "^".repeat(hlLen);
|
||||
}
|
||||
|
||||
views.push({
|
||||
lineNo,
|
||||
sourceLine: lineRaw,
|
||||
prefix,
|
||||
highlight,
|
||||
suffix,
|
||||
gutterPad: " ".repeat(maxLineNoWidth - lineNo.toString().length),
|
||||
underline
|
||||
});
|
||||
}
|
||||
|
||||
return views;
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue