{-# LANGUAGE BangPatterns #-}

-- |
-- Module      :  Documentation.Haddock.Parser.Identifier
-- Copyright   :  (c) Alec Theriault 2019,
-- License     :  BSD-like
--
-- Maintainer  :  haddock@projects.haskell.org
-- Stability   :  experimental
-- Portability :  portable
--
-- Functionality for parsing identifiers and operators
module Documentation.Haddock.Parser.Identifier
  ( Identifier (..)
  , parseValid
  ) where

import Control.Monad (guard)
import Data.Char (isAlpha, isAlphaNum)
import Data.Functor (($>))
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Text.Parsec
  ( State (..)
  , getParserState
  , setParserState
  )
import qualified Text.Parsec as Parsec
import Text.Parsec.Pos (updatePosChar)
import Text.Read.Lex (isSymbolChar)

import Documentation.Haddock.Parser.Monad
import Documentation.Haddock.Types (Namespace (..))

-- | Identifier string surrounded with namespace, opening, and closing quotes/backticks.
data Identifier = Identifier !Namespace !Char String !Char
  deriving (Int -> Identifier -> ShowS
[Identifier] -> ShowS
Identifier -> String
(Int -> Identifier -> ShowS)
-> (Identifier -> String)
-> ([Identifier] -> ShowS)
-> Show Identifier
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> Identifier -> ShowS
showsPrec :: Int -> Identifier -> ShowS
$cshow :: Identifier -> String
show :: Identifier -> String
$cshowList :: [Identifier] -> ShowS
showList :: [Identifier] -> ShowS
Show, Identifier -> Identifier -> Bool
(Identifier -> Identifier -> Bool)
-> (Identifier -> Identifier -> Bool) -> Eq Identifier
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: Identifier -> Identifier -> Bool
== :: Identifier -> Identifier -> Bool
$c/= :: Identifier -> Identifier -> Bool
/= :: Identifier -> Identifier -> Bool
Eq)

parseValid :: Parser Identifier
parseValid :: Parser Identifier
parseValid = do
  s@State{stateInput = inp, statePos = pos} <- ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
ParsecT s u m (State s u)
getParserState

  case takeIdentifier inp of
    Maybe (Namespace, Char, Text, Char, Text)
Nothing -> String -> Parser Identifier
forall s u (m :: Type -> Type) a. String -> ParsecT s u m a
Parsec.parserFail String
"parseValid: Failed to match a valid identifier"
    Just (Namespace
ns, Char
op, Text
ident, Char
cl, Text
inp') ->
      let posOp :: SourcePos
posOp = SourcePos -> Char -> SourcePos
updatePosChar SourcePos
pos Char
op
          posIdent :: SourcePos
posIdent = (SourcePos -> Char -> SourcePos) -> SourcePos -> Text -> SourcePos
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl SourcePos -> Char -> SourcePos
updatePosChar SourcePos
posOp Text
ident
          posCl :: SourcePos
posCl = SourcePos -> Char -> SourcePos
updatePosChar SourcePos
posIdent Char
cl
          s' :: State Text ParserState
s' = State Text ParserState
s{stateInput = inp', statePos = posCl}
       in State Text ParserState
-> ParsecT Text ParserState Identity (State Text ParserState)
forall (m :: Type -> Type) s u.
Monad m =>
State s u -> ParsecT s u m (State s u)
setParserState State Text ParserState
s' ParsecT Text ParserState Identity (State Text ParserState)
-> Identifier -> Parser Identifier
forall (f :: Type -> Type) a b. Functor f => f a -> b -> f b
$> Namespace -> Char -> String -> Char -> Identifier
Identifier Namespace
ns Char
op (Text -> String
T.unpack Text
ident) Char
cl

-- | Try to parse a delimited identifier off the front of the given input.
--
-- This tries to match as many valid Haskell identifiers/operators as possible,
-- to the point of sometimes accepting invalid things (ex: keywords). Some
-- considerations:
--
--   - operators and identifiers can have module qualifications
--   - operators can be wrapped in parens (for prefix)
--   - identifiers can be wrapped in backticks (for infix)
--   - delimiters are backticks or regular ticks
--   - since regular ticks are also valid in identifiers, we opt for the
--     longest successful parse
--
-- This function should make /O(1)/ allocations
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
takeIdentifier :: Text -> Maybe (Namespace, Char, Text, Char, Text)
takeIdentifier Text
input = [(Namespace, Char, Text, Char, Text)]
-> Maybe (Namespace, Char, Text, Char, Text)
forall a. [a] -> Maybe a
listToMaybe ([(Namespace, Char, Text, Char, Text)]
 -> Maybe (Namespace, Char, Text, Char, Text))
-> [(Namespace, Char, Text, Char, Text)]
-> Maybe (Namespace, Char, Text, Char, Text)
forall a b. (a -> b) -> a -> b
$ do
  -- Optional namespace
  let (Namespace
ns, Text
input') = case Text -> Maybe (Char, Text)
T.uncons Text
input of
        Just (Char
'v', Text
i) -> (Namespace
Value, Text
i)
        Just (Char
't', Text
i) -> (Namespace
Type, Text
i)
        Maybe (Char, Text)
_ -> (Namespace
None, Text
input)

  -- Opening tick
  (op, input'') <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
input')
  guard (op == '\'' || op == '`')

  -- Identifier/operator
  (ident, input''') <- wrapped input''

  -- Closing tick
  (cl, input'''') <- maybeToList (T.uncons input''')
  guard (cl == '\'' || cl == '`')

  return (ns, op, ident, cl, input'''')
  where
    -- \| Parse out a wrapped, possibly qualified, operator or identifier
    wrapped :: Text -> [(Text, Text)]
wrapped Text
t = do
      (c, t') <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
t)
      -- Tuples
      case c of
        Char
'('
          | Just (Char
c', Text
_) <- Text -> Maybe (Char, Text)
T.uncons Text
t'
          , Char
c' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
',' Bool -> Bool -> Bool
|| Char
c' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
')' ->
              do
                let (Text
commas, Text
t'') = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
',') Text
t'
                (')', t''') <- Maybe (Char, Text) -> [(Char, Text)]
forall a. Maybe a -> [a]
maybeToList (Text -> Maybe (Char, Text)
T.uncons Text
t'')
                return (T.take (T.length commas + 2) t, t''')

        -- Parenthesized
        Char
'(' -> do
          (n, t'') <- Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
False Int
0 [] Text
t'
          (')', t''') <- maybeToList (T.uncons t'')
          return (T.take (n + 2) t, t''')

        -- Backticked
        Char
'`' -> do
          (n, t'') <- Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
False Int
0 [] Text
t'
          ('`', t''') <- maybeToList (T.uncons t'')
          return (T.take (n + 2) t, t''')

        -- Unadorned
        Char
_ -> do
          (n, t'') <- Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
False Int
0 [] Text
t
          return (T.take n t, t'')

    -- \| Parse out a possibly qualified operator or identifier
    general
      :: Bool
      -- \^ refuse inputs starting with operators
      -> Int
      -- \^ total characters \"consumed\" so far
      -> [(Int, Text)]
      -- \^ accumulated results
      -> Text
      -- \^ current input
      -> [(Int, Text)]
    -- \^ total characters parsed & what remains
    general :: Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general !Bool
identOnly !Int
i [(Int, Text)]
acc Text
t
      -- Starts with an identifier (either just an identifier, or a module qual)
      | Just (Int
n, Text
rest) <- Text -> Maybe (Int, Text)
identLike Text
t =
          if Text -> Bool
T.null Text
rest
            then [(Int, Text)]
acc
            else case HasCallStack => Text -> Char
Text -> Char
T.head Text
rest of
              Char
'`' -> (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i, Text
rest) (Int, Text) -> [(Int, Text)] -> [(Int, Text)]
forall a. a -> [a] -> [a]
: [(Int, Text)]
acc
              Char
')' -> (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i, Text
rest) (Int, Text) -> [(Int, Text)] -> [(Int, Text)]
forall a. a -> [a] -> [a]
: [(Int, Text)]
acc
              Char
'.' -> Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
False (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) [(Int, Text)]
acc (HasCallStack => Text -> Text
Text -> Text
T.tail Text
rest)
              Char
'\'' ->
                let (Int
m, Text
rest') = Text -> (Int, Text)
quotes Text
rest
                 in Bool -> Int -> [(Int, Text)] -> Text -> [(Int, Text)]
general Bool
True (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) ((Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i, Text
rest') (Int, Text) -> [(Int, Text)] -> [(Int, Text)]
forall a. a -> [a] -> [a]
: [(Int, Text)]
acc) (HasCallStack => Text -> Text
Text -> Text
T.tail Text
rest')
              Char
_ -> [(Int, Text)]
acc
      -- An operator
      | Just (Int
n, Text
rest) <- Text -> Maybe (Int, Text)
optr Text
t
      , Bool -> Bool
not Bool
identOnly =
          (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i, Text
rest) (Int, Text) -> [(Int, Text)] -> [(Int, Text)]
forall a. a -> [a] -> [a]
: [(Int, Text)]
acc
      -- Anything else
      | Bool
otherwise =
          [(Int, Text)]
acc

    -- \| Parse an identifier off the front of the input
    identLike :: Text -> Maybe (Int, Text)
identLike Text
t
      | Text -> Bool
T.null Text
t = Maybe (Int, Text)
forall a. Maybe a
Nothing
      | Char -> Bool
isAlpha (HasCallStack => Text -> Char
Text -> Char
T.head Text
t) Bool -> Bool -> Bool
|| Char
'_' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== HasCallStack => Text -> Char
Text -> Char
T.head Text
t =
          let !(Text
idt, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span (\Char
c -> Char -> Bool
isAlphaNum Char
c Bool -> Bool -> Bool
|| Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'_') Text
t
              !(Text
octos, Text
rest') = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') Text
rest
           in (Int, Text) -> Maybe (Int, Text)
forall a. a -> Maybe a
Just (Text -> Int
T.length Text
idt Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Text -> Int
T.length Text
octos, Text
rest')
      | Bool
otherwise = Maybe (Int, Text)
forall a. Maybe a
Nothing

    -- \| Parse all but the last quote off the front of the input
    -- PRECONDITION: T.head t `elem` ['\'', '`']
    quotes :: Text -> (Int, Text)
    quotes :: Text -> (Int, Text)
quotes Text
t =
      let !n :: Int
n = Text -> Int
T.length ((Char -> Bool) -> Text -> Text
T.takeWhile (Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: Type -> Type) a.
(Foldable t, Eq a) =>
a -> t a -> Bool
`elem` [Char
'\'', Char
'`']) Text
t) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
       in (Int
n, Int -> Text -> Text
T.drop Int
n Text
t)

    -- \| Parse an operator off the front of the input
    optr :: Text -> Maybe (Int, Text)
optr Text
t =
      let !(Text
op, Text
rest) = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
isSymbolChar Text
t
       in if Text -> Bool
T.null Text
op then Maybe (Int, Text)
forall a. Maybe a
Nothing else (Int, Text) -> Maybe (Int, Text)
forall a. a -> Maybe a
Just (Text -> Int
T.length Text
op, Text
rest)