module LexCore where

import ParserCoreUtils
import Panic
import Data.Char
import Numeric

isNameChar :: Char -> Bool
isNameChar c = isAlpha c || isDigit c || (c == '_') || (c == '\'')
	       || (c == '$') || (c == '-') || (c == '.')

isKeywordChar :: Char -> Bool
isKeywordChar c = isAlpha c || (c == '_') 

lexer :: (Token -> P a) -> P a 
lexer cont [] 		= cont TKEOF []
lexer cont ('\n':cs) 	= \line -> lexer cont cs (line+1)
lexer cont ('-':'>':cs) = cont TKrarrow cs

lexer cont (c:cs) 
      | isSpace c		= lexer cont cs
      | isLower c || (c == '_') = lexName cont TKname (c:cs)
      | isUpper c 		= lexName cont TKcname (c:cs)
      | isDigit c || (c == '-') = lexNum cont (c:cs)

lexer cont ('%':cs)  	= lexKeyword cont cs
lexer cont ('\'':cs) 	= lexChar cont cs
lexer cont ('\"':cs) 	= lexString [] cont cs 
lexer cont ('#':cs) 	= cont TKhash cs
lexer cont ('(':cs) 	= cont TKoparen cs
lexer cont (')':cs) 	= cont TKcparen cs
lexer cont ('{':cs) 	= cont TKobrace cs
lexer cont ('}':cs) 	= cont TKcbrace cs
lexer cont ('=':cs)     = cont TKeq cs
lexer cont (':':'=':':':cs) = cont TKcoloneqcolon cs
lexer cont (':':':':cs) = cont TKcoloncolon cs
lexer cont ('*':cs) 	= cont TKstar cs
lexer cont ('.':cs) 	= cont TKdot cs
lexer cont ('\\':cs)    = cont TKlambda cs
lexer cont ('@':cs) 	= cont TKat cs
lexer cont ('?':cs) 	= cont TKquestion cs
lexer cont (';':cs) 	= cont TKsemicolon cs
-- 20060420 GHC spits out constructors with colon in them nowadays. jds
-- 20061103 but it's easier to parse if we split on the colon, and treat them
-- as several tokens
lexer cont (':':cs)     = cont TKcolon cs
-- 20060420 Likewise does it create identifiers starting with dollar. jds
lexer cont ('$':cs)     = lexName cont TKname ('$':cs)
lexer _    (c:_)        = failP "invalid character" [c]

lexChar :: (Token -> String -> Int -> ParseResult a) -> String -> Int
        -> ParseResult a
lexChar cont ('\\':'x':h1:h0:'\'':cs)
    | isHexEscape [h1,h0] = cont (TKchar (hexToChar h1 h0)) cs
lexChar _    ('\\':cs)    = failP "invalid char character" ('\\':(take 10 cs))
lexChar _    ('\'':_)     = failP "invalid char character" ['\'']
lexChar _    ('\"':_)     = failP "invalid char character" ['\"']
lexChar cont (c:'\'':cs)  = cont (TKchar c) cs
lexChar _    cs           = panic ("lexChar: " ++ show cs)

lexString :: String -> (Token -> [Char] -> Int -> ParseResult a)
          -> String -> Int -> ParseResult a
lexString s cont ('\\':'x':h1:h0:cs) 
    | isHexEscape [h1,h0]  = lexString (s++[hexToChar h1 h0]) cont cs
lexString _ _    ('\\':_)  = failP "invalid string character" ['\\']
lexString _ _    ('\'':_)  = failP "invalid string character" ['\'']
lexString s cont ('\"':cs) = cont (TKstring s) cs
lexString s cont (c:cs)    = lexString (s++[c]) cont cs
lexString _ _    []        = panic "lexString []"

isHexEscape :: String -> Bool
isHexEscape = all (\c -> isHexDigit c && (isDigit c || isLower c))

hexToChar :: Char -> Char -> Char
hexToChar h1 h0 = chr (digitToInt h1 * 16 + digitToInt h0)

lexNum :: (Token -> String -> a) -> String -> a
lexNum cont cs =
  case cs of
     ('-':cs) -> f (-1) cs
     _ 	      -> f 1 cs
 where f sgn cs = 
         case span isDigit cs of
          (digits,'.':c:rest) 
		| isDigit c -> cont (TKrational (fromInteger sgn * r)) rest'
	        where ((r,rest'):_) = readFloat (digits ++ ('.':c:rest))
		-- When reading a floating-point number, which is
		-- a bit complicated, use the Haskell 98 library function
          (digits,rest) -> cont (TKinteger (sgn * (read digits))) rest

lexName :: (a -> String -> b) -> (String -> a) -> String -> b
lexName cont cstr cs = cont (cstr name) rest
   where (name,rest) = span isNameChar cs

lexKeyword :: (Token -> [Char] -> Int -> ParseResult a) -> String -> Int
           -> ParseResult a
lexKeyword cont cs = 
   case span isKeywordChar cs of
      ("module",rest) -> cont TKmodule rest
      ("data",rest)  -> cont TKdata rest
      ("newtype",rest) -> cont TKnewtype rest
      ("forall",rest) -> cont TKforall rest	
      ("rec",rest) -> cont TKrec rest	
      ("let",rest) -> cont TKlet rest	
      ("in",rest) -> cont TKin rest	
      ("case",rest) -> cont TKcase rest	
      ("of",rest) -> cont TKof rest	
      ("cast",rest) -> cont TKcast rest	
      ("note",rest) -> cont TKnote rest	
      ("external",rest) -> cont TKexternal rest
      ("local",rest) -> cont TKlocal rest
      ("_",rest) -> cont TKwild rest
      _ -> failP "invalid keyword" ('%':cs)