Finish first scanning step
This commit is contained in:
parent
d5f9777711
commit
d553a05f45
2 changed files with 180 additions and 38 deletions
|
|
@ -4,41 +4,7 @@ import type { SourceText, Span, SourceLocation, CodePoint, StringIndex, CodePoin
|
|||
import { isDigit, isWhitespace, scanNumber, scanString } from './cursor';
|
||||
import type { Cursor, CursorState, GenericScanError, NumberError, StringError } from './cursor';
|
||||
import { Result } from '../result';
|
||||
|
||||
// === Language Specific Stuff ===
|
||||
|
||||
const DELIMITER_CHARS = ["(", ")", "{", "}", ".", ",", "@", "$", "#", '"', "\\"] as const;
|
||||
export type Delimiter = typeof DELIMITER_CHARS[number];
|
||||
const DELIMITER_SET: Set<CodePoint> = new Set(DELIMITER_CHARS.map(c => char(c)));
|
||||
|
||||
export type Keyword = "let" | "fn" | "match" | "apply" | "=" | "|" | "!";
|
||||
|
||||
// Returns the raw string. Does NOT create a token (the caller decides the token type).
|
||||
function scanRawIdentifier(cursor: Cursor): string {
|
||||
const start = cursor.currentIndex;
|
||||
// Consume until EOF or Delimiter or Whitespace (including comments)
|
||||
// TODO: How to check for comments? They are special in that they must start with two characters. Fuck.
|
||||
cursor.consumeWhile(c => !(DELIMITER_SET.has(c) || isWhitespace(c)));
|
||||
return cursor.text.sliceByCp(start, cursor.currentIndex);
|
||||
}
|
||||
|
||||
export type ExprScanError =
|
||||
| { tag: "UnexpectedCharacter", char: CodePoint, span: Span }
|
||||
| { tag: "InvalidIdentifier", text: string, reason: string, span: Span }
|
||||
| NumberError
|
||||
| StringError;
|
||||
|
||||
export type ExprStartToken =
|
||||
| { tag: "number", value: number, span: Span }
|
||||
| { tag: "string", text: string, span: Span }
|
||||
| { tag: "function_name", name: string, span: Span }
|
||||
| { tag: "variable_use", name: string, span: Span }
|
||||
| { tag: "tag", name: string, span: Span }
|
||||
| { tag: "tagged", name: string, span: Span } // TODO: This may be a bit weird. Actually we can lookahead and see if the next char after the identifier is NOT-AN-EXPRESSION start.
|
||||
| { tag: "keyword", kw: Keyword, span: Span }
|
||||
| { tag: "EOF", span: Span }
|
||||
|
||||
// TODO: Move this back to `cursor.ts`
|
||||
import { Expr } from 'src/value';
|
||||
|
||||
function skipWhitespaceAndComments(cursor: Cursor): number {
|
||||
let totalConsumed = 0;
|
||||
|
|
@ -76,11 +42,185 @@ function skipWhitespaceAndComments(cursor: Cursor): number {
|
|||
return totalConsumed;
|
||||
}
|
||||
|
||||
export function scanExprStart(cursor: Cursor): Result<ExprStartToken, ExprScanError> {
|
||||
// TODO
|
||||
return (0 as any);
|
||||
// === Language Specific Stuff ===
|
||||
const DELIMITER_CHARS = ["(", ")", "{", "}", ".", ",", "@", "$", "#", '"', "\\"] as const;
|
||||
export type Delimiter = typeof DELIMITER_CHARS[number];
|
||||
const DELIMITER_SET: Set<CodePoint> = new Set(DELIMITER_CHARS.map(c => char(c)));
|
||||
|
||||
const KEYWORD_LIST = ["let" , "fn" , "match" , "apply" , "=" , "|" , "!"] as const;
|
||||
export type Keyword = typeof KEYWORD_LIST[number];
|
||||
const KEYWORD_SET: Set<string> = new Set(KEYWORD_LIST);
|
||||
|
||||
// === Errors ===
|
||||
|
||||
export type ExprScanError =
|
||||
| GenericScanError
|
||||
| NumberError
|
||||
| StringError
|
||||
| { tag: "InvalidIdentifier", text: string, kind: IdentifierKind, reason: IdentifierErrorReason, span: Span }
|
||||
|
||||
// What kind of identifier were we trying to parse?
|
||||
export type IdentifierKind = "identifier" | "variable_use" | "tag_construction" | "function_call";
|
||||
export type IdentifierErrorReason =
|
||||
| { tag: "StartsWithDigit" }
|
||||
| { tag: "IsKeyword", kw: Keyword }
|
||||
|
||||
// === Tokens ===
|
||||
|
||||
export type ExprStartToken =
|
||||
| { tag: "number", value: number, span: Span }
|
||||
| { tag: "string", text: string, span: Span }
|
||||
| { tag: "function_name", name: string, span: Span }
|
||||
| { tag: "variable_use", name: string, span: Span }
|
||||
| { tag: "tag", name: string, span: Span }
|
||||
| { tag: "tagged", name: string, span: Span } // TODO: This may be a bit weird. Actually we can lookahead and see if the next char after the identifier is NOT-AN-EXPRESSION start.
|
||||
| { tag: "tuple_start", span: Span }
|
||||
| { tag: "record_start", span: Span }
|
||||
| { tag: "keyword", kw: Keyword, span: Span }
|
||||
| { tag: "EOF", span: Span }
|
||||
|
||||
// === Identifier Scanners ===
|
||||
|
||||
// Returns the raw string.
|
||||
function rawIdentifier(cursor: Cursor): string {
|
||||
const start = cursor.currentIndex;
|
||||
while (!cursor.eof()) {
|
||||
const c = cursor.peek();
|
||||
|
||||
if (DELIMITER_SET.has(c) || isWhitespace(c)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (c === char('/') && cursor.peek(1) === char('/')) {
|
||||
break;
|
||||
}
|
||||
cursor.next();
|
||||
}
|
||||
return cursor.text.sliceByCp(start, cursor.currentIndex);
|
||||
}
|
||||
|
||||
// Scans raw identifier,
|
||||
// checks if it is a keyword,
|
||||
// if it ain't, validates it into a proper identifier.
|
||||
function identifierOrKeyword(
|
||||
cursor: Cursor,
|
||||
kind: IdentifierKind,
|
||||
): { tag: "keyword", kw: Keyword, span: Span }
|
||||
| { tag: "identifier", name: string, span: Span } {
|
||||
const start = cursor.currentLocation();
|
||||
const text = rawIdentifier(cursor);
|
||||
const span = cursor.makeSpan(start);
|
||||
|
||||
if (KEYWORD_SET.has(text)) {
|
||||
return { tag: "keyword", kw: text as Keyword, span };
|
||||
}
|
||||
|
||||
// validation
|
||||
if (isDigit(char(text[0]))) {
|
||||
throw ({
|
||||
tag: "InvalidIdentifier",
|
||||
text,
|
||||
kind,
|
||||
reason: { tag: "StartsWithDigit" },
|
||||
span
|
||||
} as ExprScanError);
|
||||
}
|
||||
return { tag: "identifier", name: text, span };
|
||||
}
|
||||
|
||||
export function identifier(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } {
|
||||
const res = identifierOrKeyword(cursor, kind);
|
||||
|
||||
if (res.tag === "keyword") {
|
||||
throw ({
|
||||
tag: "InvalidIdentifier",
|
||||
text: res.kw,
|
||||
kind,
|
||||
reason: { tag: "IsKeyword", kw: res.kw },
|
||||
span: res.span
|
||||
} as ExprScanError);
|
||||
}
|
||||
|
||||
return { name: res.name, span: res.span };
|
||||
}
|
||||
|
||||
// === Literal Scanners ===
|
||||
// throws ExprScanError
|
||||
function number(cursor: Cursor): { value: number, span: Span } {
|
||||
const res = scanNumber(cursor);
|
||||
if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); }
|
||||
}
|
||||
|
||||
// throws ExprScanError
|
||||
function string(cursor: Cursor): { value: string, span: Span } {
|
||||
const res = scanString(cursor);
|
||||
if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); }
|
||||
}
|
||||
|
||||
// === complex scanners ===
|
||||
|
||||
// throws ExprScanError
|
||||
export function exprStart(cursor: Cursor): ExprStartToken {
|
||||
skipWhitespaceAndComments(cursor);
|
||||
|
||||
const start = cursor.currentLocation();
|
||||
if (cursor.eof()) {
|
||||
return { tag: "EOF", span: cursor.makeSpan(start) };
|
||||
}
|
||||
|
||||
const c = cursor.peek()!;
|
||||
|
||||
// === numbers ===
|
||||
if (isDigit(c) || (c === char('-') && isDigit(cursor.peek(1) ?? 0))) {
|
||||
const { value, span } = number(cursor);
|
||||
return { tag: "number", value: value, span };
|
||||
}
|
||||
|
||||
// === strings ===
|
||||
if (c === char('"')) {
|
||||
const { value, span } = string(cursor);
|
||||
return { tag: "string", text: value, span };
|
||||
}
|
||||
|
||||
// === variable use ===
|
||||
if (c === char('$')) {
|
||||
cursor.next();
|
||||
const { name } = identifier(cursor, 'variable_use');
|
||||
return { tag: "variable_use", name, span: cursor.makeSpan(start) };
|
||||
}
|
||||
|
||||
// === tags ===
|
||||
if (c === char('#')) {
|
||||
cursor.next();
|
||||
const { name } = identifier(cursor, 'tag_construction');
|
||||
return { tag: "tag", name, span: cursor.makeSpan(start) };
|
||||
}
|
||||
|
||||
|
||||
// === tuples ===
|
||||
if (c === char('(')) {
|
||||
cursor.next();
|
||||
return { tag: "tuple_start", span: cursor.makeSpan(start) };
|
||||
}
|
||||
|
||||
// === records ===
|
||||
if (c === char('{')) {
|
||||
cursor.next();
|
||||
return { tag: "record_start", span: cursor.makeSpan(start) };
|
||||
}
|
||||
|
||||
// === keywords & identifiers ===
|
||||
// Fallthrough: it must be a keyword or a function call
|
||||
const result = identifierOrKeyword(cursor, 'function_call');
|
||||
switch (result.tag) {
|
||||
case "keyword":
|
||||
return result;
|
||||
case "identifier":
|
||||
return { tag: "function_name", name: result.name, span: result.span };
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// TODO: Need a Token to TokenKind function
|
||||
// TODO: Need is_start_of_expression(token): boolean
|
||||
// identifier -> true
|
||||
|
|
|
|||
|
|
@ -211,6 +211,8 @@ export namespace Expr {
|
|||
export const call = (name: FunctionName, args: Expr[]): Expr => ({ tag: "call", name, args, });
|
||||
export const tag = (tag_name: Tag): Expr => ({ tag: "tag", tag_name, });
|
||||
export const tagged = (tag_name: Tag, expr: Expr): Expr => ({ tag: "tagged", tag_name, expr, });
|
||||
export const tuple = (exprs: Expr[]): Expr => ({ tag: "tuple", exprs });
|
||||
export const record = (fields: { name: FieldName, expr: Expr }[]): Expr => ({ tag: "record", fields });
|
||||
export const match = (arg: Expr, branches: { pattern: Pattern; body: Expr }[]): Expr => ({ tag: "match", arg, branches, });
|
||||
export const var_use = (name: VariableName): Expr => ({ tag: "var_use", name, });
|
||||
export const let_ = (bindings: ExprBinding[], body: Expr): Expr => ({ tag: "let", bindings, body, });
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue