module Lexeme (
isLexCon, isLexVar, isLexId, isLexSym,
isLexConId, isLexConSym, isLexVarId, isLexVarSym,
startsVarSym, startsVarId, startsConSym, startsConId,
okVarOcc, okConOcc, okTcOcc,
okVarIdOcc, okVarSymOcc, okConIdOcc, okConSymOcc
) where
import FastString
import Util ((<||>))
import Data.Char
import qualified Data.Set as Set
isLexCon, isLexVar, isLexId, isLexSym :: FastString -> Bool
isLexConId, isLexConSym, isLexVarId, isLexVarSym :: FastString -> Bool
isLexCon cs = isLexConId cs || isLexConSym cs
isLexVar cs = isLexVarId cs || isLexVarSym cs
isLexId cs = isLexConId cs || isLexVarId cs
isLexSym cs = isLexConSym cs || isLexVarSym cs
isLexConId cs
| nullFS cs = False
| cs == (fsLit "[]") = True
| otherwise = startsConId (headFS cs)
isLexVarId cs
| nullFS cs = False
| otherwise = startsVarId (headFS cs)
isLexConSym cs
| nullFS cs = False
| cs == (fsLit "->") = True
| otherwise = startsConSym (headFS cs)
isLexVarSym fs
| fs == (fsLit "~R#") = True
| otherwise
= case (if nullFS fs then [] else unpackFS fs) of
[] -> False
(c:cs) -> startsVarSym c && all isVarSymChar cs
startsVarSym, startsVarId, startsConSym, startsConId :: Char -> Bool
startsVarSym c = startsVarSymASCII c || (ord c > 0x7f && isSymbol c)
startsConSym c = c == ':'
startsVarId c = c == '_' || case generalCategory c of
LowercaseLetter -> True
OtherLetter -> True
_ -> False
startsConId c = isUpper c || c == '('
startsVarSymASCII :: Char -> Bool
startsVarSymASCII c = c `elem` "!#$%&*+./<=>?@\\^|~-"
isVarSymChar :: Char -> Bool
isVarSymChar c = c == ':' || startsVarSym c
okVarOcc :: String -> Bool
okVarOcc str@(c:_)
| startsVarId c
= okVarIdOcc str
| startsVarSym c
= okVarSymOcc str
okVarOcc _ = False
okConOcc :: String -> Bool
okConOcc str@(c:_)
| startsConId c
= okConIdOcc str
| startsConSym c
= okConSymOcc str
| str == "[]"
= True
okConOcc _ = False
okTcOcc :: String -> Bool
okTcOcc "[]" = True
okTcOcc "->" = True
okTcOcc "~" = True
okTcOcc str@(c:_)
| startsConId c
= okConIdOcc str
| startsConSym c
= okConSymOcc str
| startsVarSym c
= okVarSymOcc str
okTcOcc _ = False
okVarIdOcc :: String -> Bool
okVarIdOcc str = okIdOcc str &&
not (str `Set.member` reservedIds)
okVarSymOcc :: String -> Bool
okVarSymOcc str = all okSymChar str &&
not (str `Set.member` reservedOps) &&
not (isDashes str)
okConIdOcc :: String -> Bool
okConIdOcc str = okIdOcc str ||
is_tuple_name1 str
where
is_tuple_name1 ('(' : rest) = is_tuple_name2 rest
is_tuple_name1 _ = False
is_tuple_name2 ")" = True
is_tuple_name2 (',' : rest) = is_tuple_name2 rest
is_tuple_name2 (ws : rest)
| isSpace ws = is_tuple_name2 rest
is_tuple_name2 _ = False
okConSymOcc :: String -> Bool
okConSymOcc ":" = True
okConSymOcc str = all okSymChar str &&
not (str `Set.member` reservedOps)
okIdOcc :: String -> Bool
okIdOcc str
= let hashes = dropWhile (okIdChar <||> okIdSuffixChar) str in
all (== '#') hashes
okIdChar :: Char -> Bool
okIdChar c = case generalCategory c of
UppercaseLetter -> True
LowercaseLetter -> True
OtherLetter -> True
TitlecaseLetter -> True
DecimalNumber -> True
OtherNumber -> True
_ -> c == '\'' || c == '_'
okIdSuffixChar :: Char -> Bool
okIdSuffixChar c = case generalCategory c of
ModifierLetter -> True
_ -> False
okSymChar :: Char -> Bool
okSymChar c
| c `elem` specialSymbols
= False
| c `elem` "_\"'"
= False
| otherwise
= case generalCategory c of
ConnectorPunctuation -> True
DashPunctuation -> True
OtherPunctuation -> True
MathSymbol -> True
CurrencySymbol -> True
ModifierSymbol -> True
OtherSymbol -> True
_ -> False
reservedIds :: Set.Set String
reservedIds = Set.fromList [ "case", "class", "data", "default", "deriving"
, "do", "else", "foreign", "if", "import", "in"
, "infix", "infixl", "infixr", "instance", "let"
, "module", "newtype", "of", "then", "type", "where"
, "_" ]
specialSymbols :: [Char]
specialSymbols = "(),;[]`{}"
reservedOps :: Set.Set String
reservedOps = Set.fromList [ "..", ":", "::", "=", "\\", "|", "<-", "->"
, "@", "~", "=>" ]
isDashes :: String -> Bool
isDashes ('-' : '-' : rest) = all (== '-') rest
isDashes _ = False