import { CARRIAGE_RETURN, char, NEW_LINE, SPACE, TAB } from './source_text';
import type { SourceText, Span, SourceLocation, CodePoint, StringIndex, CodePointIndex } from './source_text';
import { isDigit, isWhitespace, scanNumber, scanString } from './cursor';
import type { Cursor, CursorState, GenericScanError, NumberError, StringError } from './cursor';
import { Result } from '../result';
import { Expr } from 'src/value';

export function skipWhitespaceAndComments(cursor: Cursor): number {
  let totalConsumed = 0;

  while (true) {
    // 1. Consume standard whitespace (spaces, tabs, newlines)
    const wsCount = cursor.consumeWhile(isWhitespace);
    totalConsumed += wsCount;

    // 2. Check for Line Comment start ('//')
    const c = cursor.peek();
    const nextC = cursor.peek(1);

    if (c === char('/') && nextC === char('/')) {
      // Found comment start. Consume the '//' markers
      cursor.next();
      cursor.next();
      totalConsumed += 2;

      // Consume everything until the next newline (or EOF).
      // Note: We do NOT consume the newline itself here.
      // We let the NEXT iteration of the 'while(true)' loop catch 
      // the newline as standard whitespace.
      const commentContentLength = cursor.consumeWhile(c => c !== NEW_LINE && c !== CARRIAGE_RETURN);
      totalConsumed += commentContentLength;
    } else {
      // We are not at a comment.
      // If we also didn't consume any whitespace in step 1, we are truly done.
      if (wsCount === 0) {
        break;
      }
    }
  }

  return totalConsumed;
}

// === Language Specific Stuff ===
const DELIMITER_CHARS = ["(", ")", "{", "}", ".", ",", "@", "$", "#", '"', "\\"] as const;
export type Delimiter = typeof DELIMITER_CHARS[number];
const DELIMITER_SET: Set<CodePoint> = new Set(DELIMITER_CHARS.map(c => char(c)));

const KEYWORD_LIST = ["let" , "fn" , "match" , "apply" , "=" , "|" , "!", ":"] as const;
export type Keyword = typeof KEYWORD_LIST[number];
const KEYWORD_SET: Set<string> = new Set(KEYWORD_LIST);

// === Errors ===

export type ExprScanError = 
  | GenericScanError
  | NumberError 
  | StringError
  | { tag: "InvalidIdentifier", text: string, kind: IdentifierKind, reason: IdentifierErrorReason, span: Span }

// What kind of identifier were we trying to parse?
export type IdentifierKind =
  | "variable_use"
  | "field_name"
  | "tag_construction"
  | "function_call"
  | "pattern_binding";

export type IdentifierErrorReason = 
  | { tag: "Empty" }
  | { tag: "StartsWithDigit" }
  | { tag: "IsKeyword", kw: Keyword }

// === Tokens ===

export type ExprStartToken =
  | { tag: "number", value: number, span: Span }
  | { tag: "string", text: string, span: Span }
  | { tag: "function_name", name: string, span: Span }
  | { tag: "variable_use", name: string, span: Span }
  | { tag: "tag", name: string, span: Span }
  | { tag: "tuple_start", span: Span }
  | { tag: "keyword", kw: Keyword, span: Span }
  // TODO: ger rid of EOF
  | { tag: "EOF", span: Span }

export type PatternStartToken =
  | { tag: "pattern_binding", name: string, span: Span }
  | { tag: "tag", name: string, span: Span }
  | { tag: "tuple_start", span: Span }
  | { tag: "keyword", kw: Keyword, span: Span }
  // TODO: ger rid of EOF
  | { tag: "EOF", span: Span };

// === Identifier Scanners ===

// Returns the raw string.
function rawIdentifier(cursor: Cursor): string {
  const start = cursor.currentIndex;
  while (!cursor.eof()) {
    const c = cursor.peek();

    if (DELIMITER_SET.has(c) || isWhitespace(c)) {
      break;
    }

    if (c === char('/') && cursor.peek(1) === char('/')) {
      break; 
    }
    cursor.next();
  }
  return cursor.text.sliceByCp(start, cursor.currentIndex);
}

// Scans raw identifier,
// checks if it is a keyword,
// if it ain't, validates it into a proper identifier.
function identifierOrKeywordScanner(
  cursor: Cursor,
  kind: IdentifierKind,
): { tag: "keyword", kw: Keyword, span: Span }
 | { tag: "identifier", name: string, span: Span } {
  const start = cursor.currentLocation();
  const text = rawIdentifier(cursor);
  const span = cursor.makeSpan(start);
  if (text.length === 0) {
    throw ({ 
      tag: "InvalidIdentifier", 
      text, 
      kind,
      reason: { tag: "Empty" },
      span 
    } as ExprScanError);
  }

  if (KEYWORD_SET.has(text)) {
    return { tag: "keyword", kw: text as Keyword, span };
  }

  // validation
  if (isDigit(char(text[0]))) {
    throw ({ 
      tag: "InvalidIdentifier", 
      text, 
      kind,
      reason: { tag: "StartsWithDigit" },
      span 
    } as ExprScanError);
  }
  return { tag: "identifier", name: text, span };
}

export function identifierScanner(cursor: Cursor, kind: IdentifierKind): { name: string, span: Span } {
  const res = identifierOrKeywordScanner(cursor, kind);
  
  if (res.tag === "keyword") {
    throw ({
      tag: "InvalidIdentifier",
      text: res.kw,
      kind,
      reason: { tag: "IsKeyword", kw: res.kw },
      span: res.span
    } as ExprScanError);
  }
  
  return { name: res.name, span: res.span };
}

// === Literal Scanners ===
// throws ExprScanError
// TODO: handle trailing whitespace
function number(cursor: Cursor): { value: number, span: Span } {
  const res = scanNumber(cursor);
  if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); }
}

// throws ExprScanError
// TODO: handle trailing whitespace
function string(cursor: Cursor): { value: string, span: Span } {
  const res = scanString(cursor);
  if (res.tag === "ok") { return res.value; } else { throw (res.error as ExprScanError); }
}

// === complex scanners ===

// TODO: in exprStart/patternStart make sure whitespace is consumed after they succesfuly produce token. Should we build it in the functions? Or should that be in `parser.ts`?

// throws ExprScanError
export function exprStart(cursor: Cursor): ExprStartToken {
  const start = cursor.currentLocation();
  if (cursor.eof()) {
    return { tag: "EOF", span: cursor.makeSpan(start) };
  }

  const c = cursor.peek()!;

  // === numbers ===
  if (isDigit(c) || (c === char('-') && isDigit(cursor.peek(1) ?? 0))) {
    const { value, span } = number(cursor);
    return { tag: "number", value: value, span };
  }
  
  // === strings ===
  if (c === char('"')) {
    const { value, span } = string(cursor);
    return { tag: "string", text: value, span };
  }

  // === variable use ===
  if (c === char('$')) {
    cursor.next();
    const { name } = identifierScanner(cursor, 'variable_use');
    return { tag: "variable_use", name, span: cursor.makeSpan(start) };
  }

  // === tags ===
  if (c === char('#')) {
    cursor.next();
    const { name } = identifierScanner(cursor, 'tag_construction');
    return { tag: "tag", name, span: cursor.makeSpan(start) };
  }


  // === tuples ===
  if (c === char('(')) {
    cursor.next();
    return { tag: "tuple_start", span: cursor.makeSpan(start) };
  }

  // === keywords & identifiers ===
  // Fallthrough: it must be a keyword or a function call
  const result = identifierOrKeywordScanner(cursor, 'function_call');
  switch (result.tag) {
    case "keyword":
      return result;
    case "identifier":
      return { tag: "function_name", name: result.name, span: result.span };
  }
}

export function patternStart(cursor: Cursor): PatternStartToken {
  const start = cursor.currentLocation();

  if (cursor.eof()) {
    return { tag: "EOF", span: cursor.makeSpan(start) };
  }

  const c = cursor.peek()!;
  // === tuple ===
  if (c === char('(')) {
    cursor.next();
    return { tag: "tuple_start", span: cursor.makeSpan(start) };
  }

  // === tag ===
  if (c === char('#')) {
    cursor.next();
    const { name } = identifierScanner(cursor, 'tag_construction');
    return { tag: "tag", name, span: cursor.makeSpan(start) };
  }

  // TODO: This is more subtle... -foo is a valid pattern name... I think I should restrict: can't have identifiers start with `-`? But then `-` itself can't be an identifier, which is a bit sad.
  // TODO: This is gonna be different once we allow number/string literals as patterns.
  if (isDigit(c) || c === char('"') || c === char('-')) {
     throw {
       tag: "UnexpectedCharacter",
       char: c,
       span: cursor.makeSpan(start)
     } as ExprScanError;
  }

  // === pattern binding ===
  // Fallthrough: it must be a keyword or a pattern-variable
  const result = identifierOrKeywordScanner(cursor, 'function_call');
  switch (result.tag) {
    case "keyword":
      return result;
    case "identifier":
      return { tag: "pattern_binding", name: result.name, span: result.span };
  }
}

export function isNextTokenExprStart(cursor: Cursor): boolean {
  const state = cursor.save();
  try {
    const token = exprStart(cursor);
    
    switch (token.tag) {
      case "number":
      case "string":
      case "variable_use":
      case "tag":
      case "tuple_start":
      case "function_name": // e.g. my_func(x)
        return true;

      case "keyword":
        switch (token.kw) {
          case "let":
          case "fn":
          case "match":
          case "apply":
          case ":":
            return true;
          case "=":
          case "|":
          case "!":
            return false;
        }

      case "EOF":
        return false;
        
      default:
        return false;
    }

  } catch (e) {
    return false;
  } finally {
    cursor.restore(state);
  }
}

export function isNextTokenProductPatternStart(cursor: Cursor): boolean {
  const state = cursor.save();
  try {
    const token = patternStart(cursor);
    switch (token.tag) {
      case "pattern_binding":
      case "tuple_start":
        return true;

      case "keyword":
        switch (token.kw) {
          case ":":
            return true;
          case "let":
          case "fn":
          case "match":
          case "apply":
          case "=":
          case "|":
          case "!":
            return false;
        }
      default:
        return false;
    }
  } catch (e) {
    return false;
  } finally {
    cursor.restore(state);
  }
}