module Text.Read.Lex
( Lexeme(..)
, lex
, hsLex
, lexChar
, readIntP
, readOctP
, readDecP
, readHexP
)
where
import Text.ParserCombinators.ReadP
#ifdef __GLASGOW_HASKELL__
import GHC.Base
import GHC.Num( Num(..), Integer )
import GHC.Show( Show(..) )
#ifndef __HADDOCK__
import GHC.Unicode ( isSpace, isAlpha, isAlphaNum )
#endif
import GHC.Real( Ratio(..), Integral, Rational, (%), fromIntegral,
toInteger, (^), (^^), infinity, notANumber )
import GHC.List
import GHC.Enum( maxBound )
#else
import Prelude hiding ( lex )
import Data.Char( chr, ord, isSpace, isAlpha, isAlphaNum )
import Data.Ratio( Ratio, (%) )
#endif
#ifdef __HUGS__
import Hugs.Prelude( Ratio(..) )
#endif
import Data.Maybe
import Control.Monad
data Lexeme
= Char Char
| String String
| Punc String
| Ident String
| Symbol String
| Int Integer
| Rat Rational
| EOF
deriving (Eq, Show)
lex :: ReadP Lexeme
lex = skipSpaces >> lexToken
hsLex :: ReadP String
hsLex = do skipSpaces
(s,_) <- gather lexToken
return s
lexToken :: ReadP Lexeme
lexToken = lexEOF +++
lexLitChar +++
lexString +++
lexPunc +++
lexSymbol +++
lexId +++
lexNumber
lexEOF :: ReadP Lexeme
lexEOF = do s <- look
guard (null s)
return EOF
lexPunc :: ReadP Lexeme
lexPunc =
do c <- satisfy isPuncChar
return (Punc [c])
where
isPuncChar c = c `elem` ",;()[]{}`"
lexSymbol :: ReadP Lexeme
lexSymbol =
do s <- munch1 isSymbolChar
if s `elem` reserved_ops then
return (Punc s)
else
return (Symbol s)
where
isSymbolChar c = c `elem` "!@#$%&*+./<=>?\\^|:-~"
reserved_ops = ["..", "::", "=", "\\", "|", "<-", "->", "@", "~", "=>"]
lexId :: ReadP Lexeme
lexId = lex_nan <++ lex_id
where
lex_nan = (string "NaN" >> return (Rat notANumber)) +++
(string "Infinity" >> return (Rat infinity))
lex_id = do c <- satisfy isIdsChar
s <- munch isIdfChar
return (Ident (c:s))
isIdsChar c = isAlpha c || c == '_'
isIdfChar c = isAlphaNum c || c `elem` "_'"
#ifndef __GLASGOW_HASKELL__
infinity, notANumber :: Rational
infinity = 1 :% 0
notANumber = 0 :% 0
#endif
lexLitChar :: ReadP Lexeme
lexLitChar =
do char '\''
(c,esc) <- lexCharE
guard (esc || c /= '\'')
char '\''
return (Char c)
lexChar :: ReadP Char
lexChar = do { (c,_) <- lexCharE; return c }
lexCharE :: ReadP (Char, Bool)
lexCharE =
do c1 <- get
if c1 == '\\'
then do c2 <- lexEsc; return (c2, True)
else do return (c1, False)
where
lexEsc =
lexEscChar
+++ lexNumeric
+++ lexCntrlChar
+++ lexAscii
lexEscChar =
do c <- get
case c of
'a' -> return '\a'
'b' -> return '\b'
'f' -> return '\f'
'n' -> return '\n'
'r' -> return '\r'
't' -> return '\t'
'v' -> return '\v'
'\\' -> return '\\'
'\"' -> return '\"'
'\'' -> return '\''
_ -> pfail
lexNumeric =
do base <- lexBaseChar <++ return 10
n <- lexInteger base
guard (n <= toInteger (ord maxBound))
return (chr (fromInteger n))
lexCntrlChar =
do char '^'
c <- get
case c of
'@' -> return '\^@'
'A' -> return '\^A'
'B' -> return '\^B'
'C' -> return '\^C'
'D' -> return '\^D'
'E' -> return '\^E'
'F' -> return '\^F'
'G' -> return '\^G'
'H' -> return '\^H'
'I' -> return '\^I'
'J' -> return '\^J'
'K' -> return '\^K'
'L' -> return '\^L'
'M' -> return '\^M'
'N' -> return '\^N'
'O' -> return '\^O'
'P' -> return '\^P'
'Q' -> return '\^Q'
'R' -> return '\^R'
'S' -> return '\^S'
'T' -> return '\^T'
'U' -> return '\^U'
'V' -> return '\^V'
'W' -> return '\^W'
'X' -> return '\^X'
'Y' -> return '\^Y'
'Z' -> return '\^Z'
'[' -> return '\^['
'\\' -> return '\^\'
']' -> return '\^]'
'^' -> return '\^^'
'_' -> return '\^_'
_ -> pfail
lexAscii =
do choice
[ (string "SOH" >> return '\SOH') <++
(string "SO" >> return '\SO')
, string "NUL" >> return '\NUL'
, string "STX" >> return '\STX'
, string "ETX" >> return '\ETX'
, string "EOT" >> return '\EOT'
, string "ENQ" >> return '\ENQ'
, string "ACK" >> return '\ACK'
, string "BEL" >> return '\BEL'
, string "BS" >> return '\BS'
, string "HT" >> return '\HT'
, string "LF" >> return '\LF'
, string "VT" >> return '\VT'
, string "FF" >> return '\FF'
, string "CR" >> return '\CR'
, string "SI" >> return '\SI'
, string "DLE" >> return '\DLE'
, string "DC1" >> return '\DC1'
, string "DC2" >> return '\DC2'
, string "DC3" >> return '\DC3'
, string "DC4" >> return '\DC4'
, string "NAK" >> return '\NAK'
, string "SYN" >> return '\SYN'
, string "ETB" >> return '\ETB'
, string "CAN" >> return '\CAN'
, string "EM" >> return '\EM'
, string "SUB" >> return '\SUB'
, string "ESC" >> return '\ESC'
, string "FS" >> return '\FS'
, string "GS" >> return '\GS'
, string "RS" >> return '\RS'
, string "US" >> return '\US'
, string "SP" >> return '\SP'
, string "DEL" >> return '\DEL'
]
lexString :: ReadP Lexeme
lexString =
do char '"'
body id
where
body f =
do (c,esc) <- lexStrItem
if c /= '"' || esc
then body (f.(c:))
else let s = f "" in
return (String s)
lexStrItem = (lexEmpty >> lexStrItem)
+++ lexCharE
lexEmpty =
do char '\\'
c <- get
case c of
'&' -> do return ()
_ | isSpace c -> do skipSpaces; char '\\'; return ()
_ -> do pfail
type Base = Int
type Digits = [Int]
lexNumber :: ReadP Lexeme
lexNumber
= lexHexOct <++
lexDecNumber
lexHexOct :: ReadP Lexeme
lexHexOct
= do char '0'
base <- lexBaseChar
digits <- lexDigits base
return (Int (val (fromIntegral base) 0 digits))
lexBaseChar :: ReadP Int
lexBaseChar = do { c <- get;
case c of
'o' -> return 8
'O' -> return 8
'x' -> return 16
'X' -> return 16
_ -> pfail }
lexDecNumber :: ReadP Lexeme
lexDecNumber =
do xs <- lexDigits 10
mFrac <- lexFrac <++ return Nothing
mExp <- lexExp <++ return Nothing
return (value xs mFrac mExp)
where
value xs mFrac mExp = valueFracExp (val 10 0 xs) mFrac mExp
valueFracExp :: Integer -> Maybe Digits -> Maybe Integer
-> Lexeme
valueFracExp a Nothing Nothing
= Int a
valueFracExp a Nothing (Just exp)
| exp >= 0 = Int (a * (10 ^ exp))
| otherwise = Rat (valExp (fromInteger a) exp)
valueFracExp a (Just fs) mExp
= case mExp of
Nothing -> Rat rat
Just exp -> Rat (valExp rat exp)
where
rat :: Rational
rat = fromInteger a + frac 10 0 1 fs
valExp :: Rational -> Integer -> Rational
valExp rat exp = rat * (10 ^^ exp)
lexFrac :: ReadP (Maybe Digits)
lexFrac = do char '.'
fraction <- lexDigits 10
return (Just fraction)
lexExp :: ReadP (Maybe Integer)
lexExp = do char 'e' +++ char 'E'
exp <- signedExp +++ lexInteger 10
return (Just exp)
where
signedExp
= do c <- char '-' +++ char '+'
n <- lexInteger 10
return (if c == '-' then n else n)
lexDigits :: Int -> ReadP Digits
lexDigits base =
do s <- look
xs <- scan s id
guard (not (null xs))
return xs
where
scan (c:cs) f = case valDig base c of
Just n -> do get; scan cs (f.(n:))
Nothing -> do return (f [])
scan [] f = do return (f [])
lexInteger :: Base -> ReadP Integer
lexInteger base =
do xs <- lexDigits base
return (val (fromIntegral base) 0 xs)
val :: Num a => a -> a -> Digits -> a
val _ y [] = y
val base y (x:xs) = y' `seq` val base y' xs
where
y' = y * base + fromIntegral x
frac :: Integral a => a -> a -> a -> Digits -> Ratio a
frac _ a b [] = a % b
frac base a b (x:xs) = a' `seq` b' `seq` frac base a' b' xs
where
a' = a * base + fromIntegral x
b' = b * base
valDig :: Num a => a -> Char -> Maybe Int
valDig 8 c
| '0' <= c && c <= '7' = Just (ord c ord '0')
| otherwise = Nothing
valDig 10 c = valDecDig c
valDig 16 c
| '0' <= c && c <= '9' = Just (ord c ord '0')
| 'a' <= c && c <= 'f' = Just (ord c ord 'a' + 10)
| 'A' <= c && c <= 'F' = Just (ord c ord 'A' + 10)
| otherwise = Nothing
valDig _ _ = error "valDig: Bad base"
valDecDig :: Char -> Maybe Int
valDecDig c
| '0' <= c && c <= '9' = Just (ord c ord '0')
| otherwise = Nothing
readIntP :: Num a => a -> (Char -> Bool) -> (Char -> Int) -> ReadP a
readIntP base isDigit valDigit =
do s <- munch1 isDigit
return (val base 0 (map valDigit s))
readIntP' :: Num a => a -> ReadP a
readIntP' base = readIntP base isDigit valDigit
where
isDigit c = maybe False (const True) (valDig base c)
valDigit c = maybe 0 id (valDig base c)
readOctP, readDecP, readHexP :: Num a => ReadP a
readOctP = readIntP' 8
readDecP = readIntP' 10
readHexP = readIntP' 16