Distribution/Parsec.hs

{-# LANGUAGE CPP                 #-}
{-# LANGUAGE FlexibleContexts    #-}
{-# LANGUAGE GADTs               #-}
{-# LANGUAGE RankNTypes          #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Distribution.Parsec (
    Parsec(..),
    ParsecParser (..),
    runParsecParser,
    runParsecParser',
    simpleParsec,
    lexemeParsec,
    eitherParsec,
    explicitEitherParsec,
    -- * CabalParsing and and diagnostics
    CabalParsing (..),
    -- ** Warnings
    PWarnType (..),
    PWarning (..),
    showPWarning,
    -- ** Errors
    PError (..),
    showPError,
    -- * Position
    Position (..),
    incPos,
    retPos,
    showPos,
    zeroPos,
    -- * Utilities
    parsecToken,
    parsecToken',
    parsecFilePath,
    parsecQuoted,
    parsecMaybeQuoted,
    parsecCommaList,
    parsecLeadingCommaList,
    parsecOptCommaList,
    parsecLeadingOptCommaList,
    parsecStandard,
    parsecUnqualComponentName,
    ) where

import Data.Char                           (digitToInt, intToDigit)
import Data.List                           (transpose)
import Distribution.CabalSpecVersion
import Distribution.Compat.Prelude
import Distribution.Parsec.Error           (PError (..), showPError)
import Distribution.Parsec.FieldLineStream (FieldLineStream, fieldLineStreamFromString)
import Distribution.Parsec.Position        (Position (..), incPos, retPos, showPos, zeroPos)
import Distribution.Parsec.Warning         (PWarnType (..), PWarning (..), showPWarning)
import Numeric                             (showIntAtBase)
import Prelude ()

import qualified Distribution.Compat.CharParsing as P
import qualified Distribution.Compat.MonadFail   as Fail
import qualified Text.Parsec                     as Parsec

-------------------------------------------------------------------------------
-- Class
-------------------------------------------------------------------------------

-- | Class for parsing with @parsec@. Mainly used for @.cabal@ file fields.
--
-- For parsing @.cabal@ like file structure, see "Distribution.Fields".
--
class Parsec a where
    parsec :: CabalParsing m => m a

-- | Parsing class which
--
-- * can report Cabal parser warnings.
--
-- * knows @cabal-version@ we work with
--
class (P.CharParsing m, MonadPlus m, Fail.MonadFail m) => CabalParsing m where
    parsecWarning :: PWarnType -> String -> m ()

    parsecHaskellString :: m String
    parsecHaskellString = stringLiteral

    askCabalSpecVersion :: m CabalSpecVersion

-- | 'parsec' /could/ consume trailing spaces, this function /will/ consume.
lexemeParsec :: (CabalParsing m, Parsec a) => m a
lexemeParsec = parsec <* P.spaces

newtype ParsecParser a = PP { unPP
    :: CabalSpecVersion -> Parsec.Parsec FieldLineStream [PWarning] a
    }

liftParsec :: Parsec.Parsec FieldLineStream [PWarning] a -> ParsecParser a
liftParsec p = PP $ \_ -> p

instance Functor ParsecParser where
    fmap f p = PP $ \v -> fmap f (unPP p v)
    {-# INLINE fmap #-}

    x <$ p = PP $ \v -> x <$ unPP p v
    {-# INLINE (<$) #-}

instance Applicative ParsecParser where
    pure = liftParsec . pure
    {-# INLINE pure #-}

    f <*> x = PP $ \v -> unPP f v <*> unPP x v
    {-# INLINE (<*>) #-}
    f  *> x = PP $ \v -> unPP f v  *> unPP x v
    {-# INLINE (*>) #-}
    f <*  x = PP $ \v -> unPP f v <*  unPP x v
    {-# INLINE (<*) #-}

instance Alternative ParsecParser where
    empty = liftParsec empty

    a <|> b = PP $ \v -> unPP a v <|> unPP b v
    {-# INLINE (<|>) #-}

    many p = PP $ \v -> many (unPP p v)
    {-# INLINE many #-}

    some p = PP $ \v -> some (unPP p v)
    {-# INLINE some #-}

instance Monad ParsecParser where
    return = pure

    m >>= k = PP $ \v -> unPP m v >>= \x -> unPP (k x) v
    {-# INLINE (>>=) #-}
    (>>) = (*>)
    {-# INLINE (>>) #-}

#if !(MIN_VERSION_base(4,13,0))
    fail = Fail.fail
#endif

instance MonadPlus ParsecParser where
    mzero = empty
    mplus = (<|>)

instance Fail.MonadFail ParsecParser where
    fail = P.unexpected

instance P.Parsing ParsecParser where
    try p           = PP $ \v -> P.try (unPP p v)
    p <?> d         = PP $ \v -> unPP p v P.<?> d
    skipMany p      = PP $ \v -> P.skipMany (unPP p v)
    skipSome p      = PP $ \v -> P.skipSome (unPP p v)
    unexpected      = liftParsec . P.unexpected
    eof             = liftParsec P.eof
    notFollowedBy p = PP $ \v -> P.notFollowedBy (unPP p v)

instance P.CharParsing ParsecParser where
    satisfy   = liftParsec . P.satisfy
    char      = liftParsec . P.char
    notChar   = liftParsec . P.notChar
    anyChar   = liftParsec P.anyChar
    string    = liftParsec . P.string

instance CabalParsing ParsecParser where
    parsecWarning t w = liftParsec $ do
        spos <- Parsec.getPosition
        Parsec.modifyState
            (PWarning t (Position (Parsec.sourceLine spos) (Parsec.sourceColumn spos)) w :)
    askCabalSpecVersion = PP pure

-- | Parse a 'String' with 'lexemeParsec'.
simpleParsec :: Parsec a => String -> Maybe a
simpleParsec
    = either (const Nothing) Just
    . runParsecParser lexemeParsec "<simpleParsec>"
    . fieldLineStreamFromString

-- | Parse a 'String' with 'lexemeParsec'.
eitherParsec :: Parsec a => String -> Either String a
eitherParsec = explicitEitherParsec parsec

-- | Parse a 'String' with given 'ParsecParser'. Trailing whitespace is accepted.
explicitEitherParsec :: ParsecParser a -> String -> Either String a
explicitEitherParsec parser
    = either (Left . show) Right
    . runParsecParser (parser <* P.spaces) "<eitherParsec>"
    . fieldLineStreamFromString

-- | Run 'ParsecParser' with 'cabalSpecLatest'.
runParsecParser :: ParsecParser a -> FilePath -> FieldLineStream -> Either Parsec.ParseError a
runParsecParser = runParsecParser' cabalSpecLatest

-- | Like 'runParsecParser' but lets specify 'CabalSpecVersion' used.
--
-- @since 3.0.0.0
--
runParsecParser' :: CabalSpecVersion -> ParsecParser a -> FilePath -> FieldLineStream -> Either Parsec.ParseError a
runParsecParser' v p n = Parsec.runParser (unPP p v <* P.eof) [] n

instance Parsec a => Parsec (Identity a) where
    parsec = Identity <$> parsec

instance Parsec Bool where
    parsec = P.munch1 isAlpha >>= postprocess
      where
        postprocess str
            |  str == "True"  = pure True
            |  str == "False" = pure False
            | lstr == "true"  = parsecWarning PWTBoolCase caseWarning *> pure True
            | lstr == "false" = parsecWarning PWTBoolCase caseWarning *> pure False
            | otherwise       = fail $ "Not a boolean: " ++ str
          where
            lstr = map toLower str
            caseWarning =
                "Boolean values are case sensitive, use 'True' or 'False'."

-- | @[^ ,]@
parsecToken :: CabalParsing m => m String
parsecToken = parsecHaskellString <|> ((P.munch1 (\x -> not (isSpace x) && x /= ',')  P.<?> "identifier" ) >>= checkNotDoubleDash)

-- | @[^ ]@
parsecToken' :: CabalParsing m => m String
parsecToken' = parsecHaskellString <|> ((P.munch1 (not . isSpace) P.<?> "token") >>= checkNotDoubleDash)

checkNotDoubleDash ::  CabalParsing m => String -> m String
checkNotDoubleDash s = do
    when (s == "--") $ parsecWarning PWTDoubleDash $ unwords
        [ "Double-dash token found."
        , "Note: there are no end-of-line comments in .cabal files, only whole line comments."
        , "Use \"--\" (quoted double dash) to silence this warning, if you actually want -- token"
        ]

    return s

parsecFilePath :: CabalParsing m => m FilePath
parsecFilePath = parsecToken

-- | Parse a benchmark/test-suite types.
parsecStandard :: (CabalParsing m, Parsec ver) => (ver -> String -> a) -> m a
parsecStandard f = do
    cs   <- some $ P.try (component <* P.char '-')
    ver  <- parsec
    let name = map toLower (intercalate "-" cs)
    return $! f ver name
  where
    component = do
      cs <- P.munch1 isAlphaNum
      if all isDigit cs then fail "all digit component" else return cs
      -- each component must contain an alphabetic character, to avoid
      -- ambiguity in identifiers like foo-1 (the 1 is the version number).

parsecCommaList :: CabalParsing m => m a -> m [a]
parsecCommaList p = P.sepBy (p <* P.spaces) (P.char ',' *> P.spaces P.<?> "comma")

-- | Like 'parsecCommaList' but accept leading or trailing comma.
--
-- @
-- p (comma p)*  -- p `sepBy` comma
-- (comma p)*    -- leading comma
-- (p comma)*    -- trailing comma
-- @
parsecLeadingCommaList :: CabalParsing m => m a -> m [a]
parsecLeadingCommaList p = do
    c <- P.optional comma
    case c of
        Nothing -> toList <$> P.sepEndByNonEmpty lp comma <|> pure []
        Just _  -> toList <$> P.sepByNonEmpty lp comma
  where
    lp = p <* P.spaces
    comma = P.char ',' *> P.spaces P.<?> "comma"

parsecOptCommaList :: CabalParsing m => m a -> m [a]
parsecOptCommaList p = P.sepBy (p <* P.spaces) (P.optional comma)
  where
    comma = P.char ',' *> P.spaces

-- | Like 'parsecOptCommaList' but
--
-- * require all or none commas
-- * accept leading or trailing comma.
--
-- @
-- p (comma p)*  -- p `sepBy` comma
-- (comma p)*    -- leading comma
-- (p comma)*    -- trailing comma
-- p*            -- no commas: many p
-- @
--
-- @since 3.0.0.0
--
parsecLeadingOptCommaList :: CabalParsing m => m a -> m [a]
parsecLeadingOptCommaList p = do
    c <- P.optional comma
    case c of
        Nothing -> sepEndBy1Start <|> pure []
        Just _  -> toList <$> P.sepByNonEmpty lp comma
  where
    lp = p <* P.spaces
    comma = P.char ',' *> P.spaces P.<?> "comma"

    sepEndBy1Start = do
        x <- lp
        c <- P.optional comma
        case c of
            Nothing -> (x :) <$> many lp
            Just _  -> (x :) <$> P.sepEndBy lp comma

-- | Content isn't unquoted
parsecQuoted :: CabalParsing m => m a -> m a
parsecQuoted = P.between (P.char '"') (P.char '"')

-- | @parsecMaybeQuoted p = 'parsecQuoted' p <|> p@.
parsecMaybeQuoted :: CabalParsing m => m a -> m a
parsecMaybeQuoted p = parsecQuoted p <|> p

parsecUnqualComponentName :: CabalParsing m => m String
parsecUnqualComponentName = intercalate "-" <$> toList <$> P.sepByNonEmpty component (P.char '-')
  where
    component :: CabalParsing m => m String
    component = do
      cs <- P.munch1 isAlphaNum
      if all isDigit cs
        then fail "all digits in portion of unqualified component name"
        else return cs

stringLiteral :: forall m. P.CharParsing m => m String
stringLiteral = lit where
    lit :: m String
    lit = foldr (maybe id (:)) ""
        <$> P.between (P.char '"') (P.char '"' P.<?> "end of string") (many stringChar)
        P.<?> "string"

    stringChar :: m (Maybe Char)
    stringChar = Just <$> stringLetter
         <|> stringEscape
         P.<?> "string character"

    stringLetter :: m Char
    stringLetter = P.satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))

    stringEscape :: m (Maybe Char)
    stringEscape = P.char '\\' *> esc where
        esc :: m (Maybe Char)
        esc = Nothing <$ escapeGap
            <|> Nothing <$ escapeEmpty
            <|> Just <$> escapeCode

    escapeEmpty, escapeGap :: m Char
    escapeEmpty = P.char '&'
    escapeGap = P.skipSpaces1 *> (P.char '\\' P.<?> "end of string gap")

escapeCode :: forall m. P.CharParsing m => m Char
escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) P.<?> "escape code"
  where
  charControl, charNum :: m Char
  charControl = (\c -> toEnum (fromEnum c - fromEnum '@')) <$> (P.char '^' *> (P.upper <|> P.char '@'))
  charNum = toEnum <$> num
    where
      num :: m Int
      num = bounded 10 maxchar
        <|> (P.char 'o' *> bounded 8 maxchar)
        <|> (P.char 'x' *> bounded 16 maxchar)
      maxchar = fromEnum (maxBound :: Char)

  bounded :: Int -> Int -> m Int
  bounded base bnd = foldl' (\x d -> base * x + digitToInt d) 0
                 <$> bounded' (take base thedigits) (map digitToInt $ showIntAtBase base intToDigit bnd "")
    where
      thedigits :: [m Char]
      thedigits = map P.char ['0'..'9'] ++ map P.oneOf (transpose [['A'..'F'],['a'..'f']])

      toomuch :: m a
      toomuch = P.unexpected "out-of-range numeric escape sequence"

      bounded', bounded'' :: [m Char] -> [Int] -> m [Char]
      bounded' dps@(zero:_) bds = P.skipSome zero *> ([] <$ P.notFollowedBy (P.choice dps) <|> bounded'' dps bds)
                              <|> bounded'' dps bds
      bounded' []           _   = error "bounded called with base 0"
      bounded'' dps []         = [] <$ P.notFollowedBy (P.choice dps) <|> toomuch
      bounded'' dps (bd : bds) = let anyd :: m Char
                                     anyd = P.choice dps

                                     nomore :: m ()
                                     nomore = P.notFollowedBy anyd <|> toomuch

                                     (low, ex, high) = case splitAt bd dps of
                                        (low', ex' : high') -> (low', ex', high')
                                        (_, _)              -> error "escapeCode: Logic error"
                                  in ((:) <$> P.choice low <*> atMost (length bds) anyd) <* nomore
                                     <|> ((:) <$> ex <*> ([] <$ nomore <|> bounded'' dps bds))
                                     <|> if not (null bds)
                                            then (:) <$> P.choice high <*> atMost (length bds - 1) anyd <* nomore
                                            else empty
      atMost n p | n <= 0    = pure []
                 | otherwise = ((:) <$> p <*> atMost (n - 1) p) <|> pure []

  charEsc :: m Char
  charEsc = P.choice $ parseEsc <$> escMap

  parseEsc (c,code) = code <$ P.char c
  escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"

  charAscii :: m Char
  charAscii = P.choice $ parseAscii <$> asciiMap

  parseAscii (asc,code) = P.try $ code <$ P.string asc
  asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
  ascii2codes, ascii3codes :: [String]
  ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO"
                , "SI","EM","FS","GS","RS","US","SP"]
  ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK"
                ,"BEL","DLE","DC1","DC2","DC3","DC4","NAK"
                ,"SYN","ETB","CAN","SUB","ESC","DEL"]
  ascii2, ascii3 :: String
  ascii2 = "\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP"
  ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"