Improve and abstract Cursor design. Start scanner
This commit is contained in:
parent
d382b16e6d
commit
d5f9777711
8 changed files with 713 additions and 476 deletions
|
|
@ -17,7 +17,6 @@ export type CodePointRef = {
|
|||
export class SourceText {
|
||||
readonly source: string;
|
||||
// TODO: Later you can try to change this to two `Uint32Array`s - one for codepoints (each 20 bit but whatever), the other for pointers to original string.
|
||||
//
|
||||
readonly chars: CodePointRef[];
|
||||
|
||||
// Stores the CodePointIndex where each line begins
|
||||
|
|
@ -40,17 +39,16 @@ export class SourceText {
|
|||
i += size;
|
||||
|
||||
// === Newline Logic ===
|
||||
// 0x0A is '\n', 0x0D is '\r'
|
||||
if (char === 0x0A) {
|
||||
if (char === NEW_LINE) {
|
||||
// Found a newline, the NEXT char starts a new line
|
||||
this.lineStarts.push(cpIndex + 1);
|
||||
}
|
||||
// Handle CR (Classic Mac) or CRLF start
|
||||
else if (char === 0x0D) {
|
||||
else if (char === CARRIAGE_RETURN) {
|
||||
// Check if the next char is '\n' (CRLF)
|
||||
// We peek ahead in the raw string to see if we need to skip the \n for line counting purposes
|
||||
// or just treat this as a newline.
|
||||
const nextIsNL = i < source.length && source.codePointAt(i) === 0x0A;
|
||||
const nextIsNL = i < source.length && source.codePointAt(i) === NEW_LINE;
|
||||
if (!nextIsNL) {
|
||||
// Only push if it's NOT CRLF. If it is CRLF, the loop handles the \n next.
|
||||
this.lineStarts.push(cpIndex + 1);
|
||||
|
|
@ -101,15 +99,6 @@ export function sourceText(s: string) {
|
|||
return new SourceText(s);
|
||||
}
|
||||
|
||||
|
||||
export function isWhitespace(char: CodePoint): boolean {
|
||||
return char === 0x20 || char === 0x09 || char === 0x0A || char === 0x0D;
|
||||
}
|
||||
|
||||
export function isDigit(char: CodePoint): boolean {
|
||||
return char >= 0x30 && char <= 0x39;
|
||||
}
|
||||
|
||||
export type Span = {
|
||||
start: StringIndex,
|
||||
end: StringIndex,
|
||||
|
|
@ -123,3 +112,21 @@ export type SourceLocation = {
|
|||
column: number; // 1-based
|
||||
};
|
||||
|
||||
// Whitespace
|
||||
export const NEW_LINE: CodePoint = char('\n');
|
||||
export const CARRIAGE_RETURN: CodePoint = char('\r');
|
||||
export const SPACE: CodePoint = char(' ');
|
||||
export const TAB: CodePoint = char('\t');
|
||||
|
||||
// Digit Boundaries
|
||||
export const DIGIT_0: CodePoint = char('0');
|
||||
export const DIGIT_9: CodePoint = char('9');
|
||||
|
||||
export const DOT: CodePoint = char('.');
|
||||
|
||||
// Hex Boundaries
|
||||
export const LOWERCASE_a: CodePoint = char('a');
|
||||
export const UPPERCASE_A: CodePoint = char('A');
|
||||
export const LOWERCASE_f: CodePoint = char('f');
|
||||
export const UPPERCASE_F: CodePoint = char('F');
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue