{-# LANGUAGE OverloadedStrings, CPP #-}
{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE UnboxedTuples #-}

-- |
-- Module      : Data.Text.Lazy.Read
-- Copyright   : (c) 2010, 2011 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Portability : GHC
--
-- Functions used frequently when reading textual data.
module Data.Text.Lazy.Read
    (
      Reader
    , decimal
    , hexadecimal
    , signed
    , rational
    , double
    ) where

import Control.Monad (liftM)
import Data.Char (ord)
import Data.Int (Int8, Int16, Int32, Int64)
import Data.Ratio ((%))
import Data.Text.Internal.Read
import Data.Text.Array as A
import Data.Text.Lazy as T
import Data.Text.Internal.Lazy as T (Text(..))
import qualified Data.Text.Internal as T (Text(..))
import qualified Data.Text.Internal.Private as T (spanAscii_)
import Data.Word (Word, Word8, Word16, Word32, Word64)

-- | Read some text.  If the read succeeds, return its value and the
-- remaining text, otherwise an error message.
type Reader a = Text -> Either String (a, Text)
type Parser = IParser Text

-- | Read a decimal integer.  The input must begin with at least one
-- decimal digit, and is consumed until a non-digit or end of string
-- is reached.
--
-- This function does not handle leading sign characters.  If you need
-- to handle signed input, use @'signed' 'decimal'@.
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
-- incorrect results.  If you are worried about overflow, use
-- 'Integer' for your result type.
decimal :: Integral a => Reader a
{-# SPECIALIZE decimal :: Reader Int #-}
{-# SPECIALIZE decimal :: Reader Int8 #-}
{-# SPECIALIZE decimal :: Reader Int16 #-}
{-# SPECIALIZE decimal :: Reader Int32 #-}
{-# SPECIALIZE decimal :: Reader Int64 #-}
{-# SPECIALIZE decimal :: Reader Integer #-}
{-# SPECIALIZE decimal :: Reader Data.Word.Word #-}
{-# SPECIALIZE decimal :: Reader Word8 #-}
{-# SPECIALIZE decimal :: Reader Word16 #-}
{-# SPECIALIZE decimal :: Reader Word32 #-}
{-# SPECIALIZE decimal :: Reader Word64 #-}
decimal :: forall a. Integral a => Reader a
decimal Text
txt
    | Text -> Bool
T.null Text
h  = String -> Either String (a, Text)
forall a b. a -> Either a b
Left String
"input does not start with a digit"
    | Bool
otherwise = (a, Text) -> Either String (a, Text)
forall a b. b -> Either a b
Right ((a -> Char -> a) -> a -> Text -> a
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' a -> Char -> a
forall {a}. Num a => a -> Char -> a
go a
0 Text
h, Text
t)
  where (# Text
h, Text
t #)  = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
10) Text
txt
        go :: a -> Char -> a
go a
n Char
d = (a
n a -> a -> a
forall a. Num a => a -> a -> a
* a
10 a -> a -> a
forall a. Num a => a -> a -> a
+ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
digitToInt Char
d))

-- | Read a hexadecimal integer, consisting of an optional leading
-- @\"0x\"@ followed by at least one hexadecimal digit. Input is
-- consumed until a non-hex-digit or end of string is reached.
-- This function is case insensitive.
--
-- This function does not handle leading sign characters.  If you need
-- to handle signed input, use @'signed' 'hexadecimal'@.
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
-- incorrect results.  If you are worried about overflow, use
-- 'Integer' for your result type.
hexadecimal :: Integral a => Reader a
{-# SPECIALIZE hexadecimal :: Reader Int #-}
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
hexadecimal :: forall a. Integral a => Reader a
hexadecimal Text
txt
    | Text
h Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"0x" Bool -> Bool -> Bool
|| Text
h Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"0X" = Reader a
forall a. Integral a => Reader a
hex Text
t
    | Bool
otherwise              = Reader a
forall a. Integral a => Reader a
hex Text
txt
 where (Text
h,Text
t) = Int64 -> Text -> (Text, Text)
T.splitAt Int64
2 Text
txt

hex :: Integral a => Reader a
{-# SPECIALIZE hexadecimal :: Reader Int #-}
{-# SPECIALIZE hexadecimal :: Reader Int8 #-}
{-# SPECIALIZE hexadecimal :: Reader Int16 #-}
{-# SPECIALIZE hexadecimal :: Reader Int32 #-}
{-# SPECIALIZE hexadecimal :: Reader Int64 #-}
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
{-# SPECIALIZE hexadecimal :: Reader Word #-}
{-# SPECIALIZE hexadecimal :: Reader Word8 #-}
{-# SPECIALIZE hexadecimal :: Reader Word16 #-}
{-# SPECIALIZE hexadecimal :: Reader Word32 #-}
{-# SPECIALIZE hexadecimal :: Reader Word64 #-}
hex :: forall a. Integral a => Reader a
hex Text
txt
    | Text -> Bool
T.null Text
h  = String -> Either String (a, Text)
forall a b. a -> Either a b
Left String
"input does not start with a hexadecimal digit"
    | Bool
otherwise = (a, Text) -> Either String (a, Text)
forall a b. b -> Either a b
Right ((a -> Char -> a) -> a -> Text -> a
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' a -> Char -> a
forall {a}. Num a => a -> Char -> a
go a
0 Text
h, Text
t)
  where (# Text
h, Text
t #)  = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
10 Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'A' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
6 Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'a' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
6) Text
txt
        go :: a -> Char -> a
go a
n Char
d = (a
n a -> a -> a
forall a. Num a => a -> a -> a
* a
16 a -> a -> a
forall a. Num a => a -> a -> a
+ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
hexDigitToInt Char
d))

-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
-- apply it to the result of applying the given reader.
signed :: Num a => Reader a -> Reader a
{-# INLINE signed #-}
signed :: forall a. Num a => Reader a -> Reader a
signed Reader a
f = IParser Text a -> Reader a
forall t a. IParser t a -> IReader t a
runP (IParser Text a -> IParser Text a
forall a. Num a => Parser a -> Parser a
signa (Reader a -> IParser Text a
forall t a. IReader t a -> IParser t a
P Reader a
f))

-- | Read a rational number.
--
-- This function accepts an optional leading sign character, followed
-- by at least one decimal digit.  The syntax similar to that accepted
-- by the 'read' function, with the exception that a trailing @\'.\'@
-- or @\'e\'@ /not/ followed by a number is not consumed.
--
-- Examples:
--
-- >rational "3"     == Right (3.0, "")
-- >rational "3.1"   == Right (3.1, "")
-- >rational "3e4"   == Right (30000.0, "")
-- >rational "3.1e4" == Right (31000.0, "")
-- >rational ".3"    == Left "input does not start with a digit"
-- >rational "e3"    == Left "input does not start with a digit"
--
-- Examples of differences from 'read':
--
-- >rational "3.foo" == Right (3.0, ".foo")
-- >rational "3e"    == Right (3.0, "e")
rational :: Fractional a => Reader a
{-# SPECIALIZE rational :: Reader Double #-}
rational :: forall a. Fractional a => Reader a
rational = (Integer -> Integer -> Integer -> a) -> Reader a
forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty ((Integer -> Integer -> Integer -> a) -> Reader a)
-> (Integer -> Integer -> Integer -> a) -> Reader a
forall a b. (a -> b) -> a -> b
$ \Integer
real Integer
frac Integer
fracDenom -> Rational -> a
forall a. Fractional a => Rational -> a
fromRational (Rational -> a) -> Rational -> a
forall a b. (a -> b) -> a -> b
$
                     Integer
real Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% Integer
1 Rational -> Rational -> Rational
forall a. Num a => a -> a -> a
+ Integer
frac Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% Integer
fracDenom

-- | Read a rational number.
--
-- The syntax accepted by this function is the same as for 'rational'.
--
-- /Note/: This function is almost ten times faster than 'rational',
-- but is slightly less accurate.
--
-- The 'Double' type supports about 16 decimal places of accuracy.
-- For 94.2% of numbers, this function and 'rational' give identical
-- results, but for the remaining 5.8%, this function loses precision
-- around the 15th decimal place.  For 0.001% of numbers, this
-- function will lose precision at the 13th or 14th decimal place.
double :: Reader Double
double :: Reader Double
double = (Integer -> Integer -> Integer -> Double) -> Reader Double
forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty ((Integer -> Integer -> Integer -> Double) -> Reader Double)
-> (Integer -> Integer -> Integer -> Double) -> Reader Double
forall a b. (a -> b) -> a -> b
$ \Integer
real Integer
frac Integer
fracDenom ->
                   Integer -> Double
forall a. Num a => Integer -> a
fromInteger Integer
real Double -> Double -> Double
forall a. Num a => a -> a -> a
+
                   Integer -> Double
forall a. Num a => Integer -> a
fromInteger Integer
frac Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Integer -> Double
forall a. Num a => Integer -> a
fromInteger Integer
fracDenom

signa :: Num a => Parser a -> Parser a
{-# SPECIALIZE signa :: Parser Int -> Parser Int #-}
{-# SPECIALIZE signa :: Parser Int8 -> Parser Int8 #-}
{-# SPECIALIZE signa :: Parser Int16 -> Parser Int16 #-}
{-# SPECIALIZE signa :: Parser Int32 -> Parser Int32 #-}
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
signa :: forall a. Num a => Parser a -> Parser a
signa Parser a
p = do
  Word8
sign <- Word8 -> IParser Text Word8 -> IParser Text Word8
forall a t. a -> IParser t a -> IParser t a
perhaps (Char -> Word8
ord8 Char
'+') (IParser Text Word8 -> IParser Text Word8)
-> IParser Text Word8 -> IParser Text Word8
forall a b. (a -> b) -> a -> b
$ (Word8 -> Bool) -> IParser Text Word8
charAscii (\Word8
c -> Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'-' Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+')
  if Word8
sign Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+' then Parser a
p else a -> a
forall a. Num a => a -> a
negate (a -> a) -> Parser a -> Parser a
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser a
p

charAscii :: (Word8 -> Bool) -> Parser Word8
charAscii :: (Word8 -> Bool) -> IParser Text Word8
charAscii Word8 -> Bool
p = IReader Text Word8 -> IParser Text Word8
forall t a. IReader t a -> IParser t a
P (IReader Text Word8 -> IParser Text Word8)
-> IReader Text Word8 -> IParser Text Word8
forall a b. (a -> b) -> a -> b
$ \case
  Text
Empty -> String -> Either String (Word8, Text)
forall a b. a -> Either a b
Left String
"character does not match"
  -- len is > 0, unless the internal invariant of Text is violated
  Chunk (T.Text Array
arr Int
off Int
len) Text
ts -> let c :: Word8
c = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
off in
    if Word8 -> Bool
p Word8
c
    then (Word8, Text) -> Either String (Word8, Text)
forall a b. b -> Either a b
Right (Word8
c, if Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
1 then Text
ts else Text -> Text -> Text
Chunk (Array -> Int -> Int -> Text
T.Text Array
arr (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)) Text
ts)
    else String -> Either String (Word8, Text)
forall a b. a -> Either a b
Left String
"character does not match"

floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
{-# INLINE floaty #-}
floaty :: forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty Integer -> Integer -> Integer -> a
f = IParser Text a -> IReader Text a
forall t a. IParser t a -> IReader t a
runP (IParser Text a -> IReader Text a)
-> IParser Text a -> IReader Text a
forall a b. (a -> b) -> a -> b
$ do
  Word8
sign <- Word8 -> IParser Text Word8 -> IParser Text Word8
forall a t. a -> IParser t a -> IParser t a
perhaps (Char -> Word8
ord8 Char
'+') (IParser Text Word8 -> IParser Text Word8)
-> IParser Text Word8 -> IParser Text Word8
forall a b. (a -> b) -> a -> b
$ (Word8 -> Bool) -> IParser Text Word8
charAscii (\Word8
c -> Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'-' Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+')
  Integer
real <- IReader Text Integer -> IParser Text Integer
forall t a. IReader t a -> IParser t a
P IReader Text Integer
forall a. Integral a => Reader a
decimal
  T Integer
fraction Int
fracDigits <- T -> IParser Text T -> IParser Text T
forall a t. a -> IParser t a -> IParser t a
perhaps (Integer -> Int -> T
T Integer
0 Int
0) (IParser Text T -> IParser Text T)
-> IParser Text T -> IParser Text T
forall a b. (a -> b) -> a -> b
$ do
    Word8
_ <- (Word8 -> Bool) -> IParser Text Word8
charAscii (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'.')
    Int
digits <- IReader Text Int -> IParser Text Int
forall t a. IReader t a -> IParser t a
P (IReader Text Int -> IParser Text Int)
-> IReader Text Int -> IParser Text Int
forall a b. (a -> b) -> a -> b
$ \Text
t -> (Int, Text) -> Either String (Int, Text)
forall a b. b -> Either a b
Right (let (# Text
hd, Text
_ #) = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
10) Text
t in Int64 -> Int
int64ToInt (Text -> Int64
T.length Text
hd), Text
t)
    Integer
n <- IReader Text Integer -> IParser Text Integer
forall t a. IReader t a -> IParser t a
P IReader Text Integer
forall a. Integral a => Reader a
decimal
    T -> IParser Text T
forall a. a -> IParser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (T -> IParser Text T) -> T -> IParser Text T
forall a b. (a -> b) -> a -> b
$ Integer -> Int -> T
T Integer
n Int
digits
  let e :: Word8 -> Bool
e Word8
c = Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'e' Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'E'
  Int
power <- Int -> IParser Text Int -> IParser Text Int
forall a t. a -> IParser t a -> IParser t a
perhaps Int
0 ((Word8 -> Bool) -> IParser Text Word8
charAscii Word8 -> Bool
e IParser Text Word8 -> IParser Text Int -> IParser Text Int
forall a b. IParser Text a -> IParser Text b -> IParser Text b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> IParser Text Int -> IParser Text Int
forall a. Num a => Parser a -> Parser a
signa (IReader Text Int -> IParser Text Int
forall t a. IReader t a -> IParser t a
P IReader Text Int
forall a. Integral a => Reader a
decimal) :: Parser Int)
  let n :: a
n = if Int
fracDigits Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
          then if Int
power Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
               then Integer -> a
forall a. Num a => Integer -> a
fromInteger Integer
real
               else Integer -> a
forall a. Num a => Integer -> a
fromInteger Integer
real a -> a -> a
forall a. Num a => a -> a -> a
* (a
10 a -> Int -> a
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
power)
          else if Int
power Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
               then Integer -> Integer -> Integer -> a
f Integer
real Integer
fraction (Integer
10 Integer -> Int -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
fracDigits)
               else Integer -> Integer -> Integer -> a
f Integer
real Integer
fraction (Integer
10 Integer -> Int -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
fracDigits) a -> a -> a
forall a. Num a => a -> a -> a
* (a
10 a -> Int -> a
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
power)
  a -> IParser Text a
forall a. a -> IParser Text a
forall (m :: * -> *) a. Monad m => a -> m a
return (a -> IParser Text a) -> a -> IParser Text a
forall a b. (a -> b) -> a -> b
$! if Word8
sign Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+'
            then a
n
            else -a
n

int64ToInt :: Int64 -> Int
int64ToInt :: Int64 -> Int
int64ToInt = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral

ord8 :: Char -> Word8
ord8 :: Char -> Word8
ord8 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord

-- | For the sake of performance this function does not check
-- that a char is in ASCII range; it is a responsibility of @p@.
spanAscii_ :: (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ :: (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ Word8 -> Bool
p = Text -> (# Text, Text #)
loop
  where
    loop :: Text -> (# Text, Text #)
loop Text
Empty = (# Text
Empty, Text
Empty #)
    loop (Chunk Text
t Text
ts) = let (# Text
t', t'' :: Text
t''@(T.Text Array
_ Int
_ Int
len) #) = (Word8 -> Bool) -> Text -> (# Text, Text #)
T.spanAscii_ Word8 -> Bool
p Text
t in
      if Int
len Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
      then let (# Text
ts', Text
ts'' #) = Text -> (# Text, Text #)
loop Text
ts in (# Text -> Text -> Text
Chunk Text
t Text
ts', Text
ts'' #)
      else (# Text -> Text -> Text
Chunk Text
t' Text
Empty, Text -> Text -> Text
Chunk Text
t'' Text
ts #)