{-# LANGUAGE OverloadedStrings, UnboxedTuples, CPP #-}
{-# LANGUAGE Trustworthy #-}

-- |
-- Module      : Data.Text.Read
-- Copyright   : (c) 2010, 2011 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Portability : GHC
--
-- Functions used frequently when reading textual data.
module Data.Text.Read
    (
      Reader
    , decimal
    , hexadecimal
    , signed
    , rational
    , double
    ) where

import Control.Monad (liftM)
import Data.Char (ord)
import Data.Int (Int8, Int16, Int32, Int64)
import Data.Ratio ((%))
import Data.Text as T
import Data.Text.Internal as T (Text(..))
import Data.Text.Array as A
import Data.Text.Internal.Private (spanAscii_)
import Data.Text.Internal.Read
import Data.Word (Word, Word8, Word16, Word32, Word64)

-- | Read some text.  If the read succeeds, return its value and the
-- remaining text, otherwise an error message.
type Reader a = Text -> Either String (a, Text)
type Parser a = IParser Text a

-- | Read a decimal integer.  The input must begin with at least one
-- decimal digit, and is consumed until a non-digit or end of string
-- is reached.
--
-- This function does not handle leading sign characters.  If you need
-- to handle signed input, use @'signed' 'decimal'@.
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
-- incorrect results.  If you are worried about overflow, use
-- 'Integer' for your result type.
decimal :: Integral a => Reader a
{-# SPECIALIZE decimal :: Reader Int #-}
{-# SPECIALIZE decimal :: Reader Int8 #-}
{-# SPECIALIZE decimal :: Reader Int16 #-}
{-# SPECIALIZE decimal :: Reader Int32 #-}
{-# SPECIALIZE decimal :: Reader Int64 #-}
{-# SPECIALIZE decimal :: Reader Integer #-}
{-# SPECIALIZE decimal :: Reader Data.Word.Word #-}
{-# SPECIALIZE decimal :: Reader Word8 #-}
{-# SPECIALIZE decimal :: Reader Word16 #-}
{-# SPECIALIZE decimal :: Reader Word32 #-}
{-# SPECIALIZE decimal :: Reader Word64 #-}
decimal :: forall a. Integral a => Reader a
decimal Text
txt
    | Text -> Bool
T.null Text
h  = String -> Either String (a, Text)
forall a b. a -> Either a b
Left String
"input does not start with a digit"
    | Bool
otherwise = (a, Text) -> Either String (a, Text)
forall a b. b -> Either a b
Right ((a -> Char -> a) -> a -> Text -> a
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' a -> Char -> a
forall {a}. Num a => a -> Char -> a
go a
0 Text
h, Text
t)
  where (# Text
h,Text
t #)  = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
10) Text
txt
        go :: a -> Char -> a
go a
n Char
d = (a
n a -> a -> a
forall a. Num a => a -> a -> a
* a
10 a -> a -> a
forall a. Num a => a -> a -> a
+ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
digitToInt Char
d))

-- | Read a hexadecimal integer, consisting of an optional leading
-- @\"0x\"@ followed by at least one hexadecimal digit. Input is
-- consumed until a non-hex-digit or end of string is reached.
-- This function is case insensitive.
--
-- This function does not handle leading sign characters.  If you need
-- to handle signed input, use @'signed' 'hexadecimal'@.
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
-- incorrect results.  If you are worried about overflow, use
-- 'Integer' for your result type.
hexadecimal :: Integral a => Reader a
{-# SPECIALIZE hexadecimal :: Reader Int #-}
{-# SPECIALIZE hexadecimal :: Reader Int8 #-}
{-# SPECIALIZE hexadecimal :: Reader Int16 #-}
{-# SPECIALIZE hexadecimal :: Reader Int32 #-}
{-# SPECIALIZE hexadecimal :: Reader Int64 #-}
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
{-# SPECIALIZE hexadecimal :: Reader Word #-}
{-# SPECIALIZE hexadecimal :: Reader Word8 #-}
{-# SPECIALIZE hexadecimal :: Reader Word16 #-}
{-# SPECIALIZE hexadecimal :: Reader Word32 #-}
{-# SPECIALIZE hexadecimal :: Reader Word64 #-}
hexadecimal :: forall a. Integral a => Reader a
hexadecimal Text
txt
    | Text
h Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"0x" Bool -> Bool -> Bool
|| Text
h Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
"0X" = Reader a
forall a. Integral a => Reader a
hex Text
t
    | Bool
otherwise              = Reader a
forall a. Integral a => Reader a
hex Text
txt
 where (Text
h,Text
t) = Int -> Text -> (Text, Text)
T.splitAt Int
2 Text
txt

hex :: Integral a => Reader a
{-# SPECIALIZE hex :: Reader Int #-}
{-# SPECIALIZE hex :: Reader Int8 #-}
{-# SPECIALIZE hex :: Reader Int16 #-}
{-# SPECIALIZE hex :: Reader Int32 #-}
{-# SPECIALIZE hex :: Reader Int64 #-}
{-# SPECIALIZE hex :: Reader Integer #-}
{-# SPECIALIZE hex :: Reader Word #-}
{-# SPECIALIZE hex :: Reader Word8 #-}
{-# SPECIALIZE hex :: Reader Word16 #-}
{-# SPECIALIZE hex :: Reader Word32 #-}
{-# SPECIALIZE hex :: Reader Word64 #-}
hex :: forall a. Integral a => Reader a
hex Text
txt
    | Text -> Bool
T.null Text
h  = String -> Either String (a, Text)
forall a b. a -> Either a b
Left String
"input does not start with a hexadecimal digit"
    | Bool
otherwise = (a, Text) -> Either String (a, Text)
forall a b. b -> Either a b
Right ((a -> Char -> a) -> a -> Text -> a
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' a -> Char -> a
forall {a}. Num a => a -> Char -> a
go a
0 Text
h, Text
t)
  where (# Text
h,Text
t #)  = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
10 Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'A' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
6 Bool -> Bool -> Bool
|| Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'a' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
6) Text
txt
        go :: a -> Char -> a
go a
n Char
d = (a
n a -> a -> a
forall a. Num a => a -> a -> a
* a
16 a -> a -> a
forall a. Num a => a -> a -> a
+ Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
hexDigitToInt Char
d))

-- | Read an optional leading sign character (@\'-\'@ or @\'+\'@) and
-- apply it to the result of applying the given reader.
signed :: Num a => Reader a -> Reader a
{-# INLINE signed #-}
signed :: forall a. Num a => Reader a -> Reader a
signed Reader a
f = IParser Text a -> Reader a
forall t a. IParser t a -> IReader t a
runP (IParser Text a -> IParser Text a
forall a. Num a => Parser a -> Parser a
signa (Reader a -> IParser Text a
forall t a. IReader t a -> IParser t a
P Reader a
f))

-- | Read a rational number.
--
-- This function accepts an optional leading sign character, followed
-- by at least one decimal digit.  The syntax similar to that accepted
-- by the 'read' function, with the exception that a trailing @\'.\'@
-- or @\'e\'@ /not/ followed by a number is not consumed.
--
-- Examples (with behaviour identical to 'read'):
--
-- >rational "3"     == Right (3.0, "")
-- >rational "3.1"   == Right (3.1, "")
-- >rational "3e4"   == Right (30000.0, "")
-- >rational "3.1e4" == Right (31000.0, "")
-- >rational ".3"    == Left "input does not start with a digit"
-- >rational "e3"    == Left "input does not start with a digit"
--
-- Examples of differences from 'read':
--
-- >rational "3.foo" == Right (3.0, ".foo")
-- >rational "3e"    == Right (3.0, "e")
rational :: Fractional a => Reader a
{-# SPECIALIZE rational :: Reader Double #-}
rational :: forall a. Fractional a => Reader a
rational = (Integer -> Integer -> Integer -> a) -> Reader a
forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty ((Integer -> Integer -> Integer -> a) -> Reader a)
-> (Integer -> Integer -> Integer -> a) -> Reader a
forall a b. (a -> b) -> a -> b
$ \Integer
real Integer
frac Integer
fracDenom -> Rational -> a
forall a. Fractional a => Rational -> a
fromRational (Rational -> a) -> Rational -> a
forall a b. (a -> b) -> a -> b
$
                     Integer
real Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% Integer
1 Rational -> Rational -> Rational
forall a. Num a => a -> a -> a
+ Integer
frac Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% Integer
fracDenom

-- | Read a rational number.
--
-- The syntax accepted by this function is the same as for 'rational'.
--
-- /Note/: This function is almost ten times faster than 'rational',
-- but is slightly less accurate.
--
-- The 'Double' type supports about 16 decimal places of accuracy.
-- For 94.2% of numbers, this function and 'rational' give identical
-- results, but for the remaining 5.8%, this function loses precision
-- around the 15th decimal place.  For 0.001% of numbers, this
-- function will lose precision at the 13th or 14th decimal place.
double :: Reader Double
double :: Reader Double
double = (Integer -> Integer -> Integer -> Double) -> Reader Double
forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty ((Integer -> Integer -> Integer -> Double) -> Reader Double)
-> (Integer -> Integer -> Integer -> Double) -> Reader Double
forall a b. (a -> b) -> a -> b
$ \Integer
real Integer
frac Integer
fracDenom ->
                   Integer -> Double
forall a. Num a => Integer -> a
fromInteger Integer
real Double -> Double -> Double
forall a. Num a => a -> a -> a
+
                   Integer -> Double
forall a. Num a => Integer -> a
fromInteger Integer
frac Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Integer -> Double
forall a. Num a => Integer -> a
fromInteger Integer
fracDenom

signa :: Num a => Parser a -> Parser a
{-# SPECIALIZE signa :: Parser Int -> Parser Int #-}
{-# SPECIALIZE signa :: Parser Int8 -> Parser Int8 #-}
{-# SPECIALIZE signa :: Parser Int16 -> Parser Int16 #-}
{-# SPECIALIZE signa :: Parser Int32 -> Parser Int32 #-}
{-# SPECIALIZE signa :: Parser Int64 -> Parser Int64 #-}
{-# SPECIALIZE signa :: Parser Integer -> Parser Integer #-}
signa :: forall a. Num a => Parser a -> Parser a
signa Parser a
p = do
  sign <- Word8 -> IParser Text Word8 -> IParser Text Word8
forall a t. a -> IParser t a -> IParser t a
perhaps (Char -> Word8
ord8 Char
'+') (IParser Text Word8 -> IParser Text Word8)
-> IParser Text Word8 -> IParser Text Word8
forall a b. (a -> b) -> a -> b
$ (Word8 -> Bool) -> IParser Text Word8
charAscii (\Word8
c -> Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'-' Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+')
  if sign == ord8 '+' then p else negate `liftM` p

charAscii :: (Word8 -> Bool) -> Parser Word8
charAscii :: (Word8 -> Bool) -> IParser Text Word8
charAscii Word8 -> Bool
p = IReader Text Word8 -> IParser Text Word8
forall t a. IReader t a -> IParser t a
P (IReader Text Word8 -> IParser Text Word8)
-> IReader Text Word8 -> IParser Text Word8
forall a b. (a -> b) -> a -> b
$ \(Text Array
arr Int
off Int
len) -> let c :: Word8
c = Array -> Int -> Word8
A.unsafeIndex Array
arr Int
off in
  if Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Word8 -> Bool
p Word8
c
  then (Word8, Text) -> Either String (Word8, Text)
forall a b. b -> Either a b
Right (Word8
c, Array -> Int -> Int -> Text
Text Array
arr (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))
  else String -> Either String (Word8, Text)
forall a b. a -> Either a b
Left String
"character does not match"

floaty :: Fractional a => (Integer -> Integer -> Integer -> a) -> Reader a
{-# INLINE floaty #-}
floaty :: forall a.
Fractional a =>
(Integer -> Integer -> Integer -> a) -> Reader a
floaty Integer -> Integer -> Integer -> a
f = IParser Text a -> IReader Text a
forall t a. IParser t a -> IReader t a
runP (IParser Text a -> IReader Text a)
-> IParser Text a -> IReader Text a
forall a b. (a -> b) -> a -> b
$ do
  sign <- Word8 -> IParser Text Word8 -> IParser Text Word8
forall a t. a -> IParser t a -> IParser t a
perhaps (Char -> Word8
ord8 Char
'+') (IParser Text Word8 -> IParser Text Word8)
-> IParser Text Word8 -> IParser Text Word8
forall a b. (a -> b) -> a -> b
$ (Word8 -> Bool) -> IParser Text Word8
charAscii (\Word8
c -> Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'-' Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'+')
  real <- P decimal
  T fraction fracDigits <- perhaps (T 0 0) $ do
    _ <- charAscii (== ord8 '.')
    digits <- P $ \Text
t -> (Int, Text) -> Either String (Int, Text)
forall a b. b -> Either a b
Right (let (# Text
hd, Text
_ #) = (Word8 -> Bool) -> Text -> (# Text, Text #)
spanAscii_ (\Word8
w -> Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Char -> Word8
ord8 Char
'0' Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
10) Text
t in Text -> Int
T.length Text
hd, Text
t)
    n <- P decimal
    return $ T n digits
  let e Word8
c = Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'e' Bool -> Bool -> Bool
|| Word8
c Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Char -> Word8
ord8 Char
'E'
  power <- perhaps 0 (charAscii e >> signa (P decimal) :: Parser Int)
  let n = if Int
fracDigits Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
          then if Int
power Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
               then Integer -> a
forall a. Num a => Integer -> a
fromInteger Integer
real
               else Integer -> a
forall a. Num a => Integer -> a
fromInteger Integer
real a -> a -> a
forall a. Num a => a -> a -> a
* (a
10 a -> Int -> a
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
power)
          else if Int
power Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0
               then Integer -> Integer -> Integer -> a
f Integer
real Integer
fraction (Integer
10 Integer -> Int -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
fracDigits)
               else Integer -> Integer -> Integer -> a
f Integer
real Integer
fraction (Integer
10 Integer -> Int -> Integer
forall a b. (Num a, Integral b) => a -> b -> a
^ Int
fracDigits) a -> a -> a
forall a. Num a => a -> a -> a
* (a
10 a -> Int -> a
forall a b. (Fractional a, Integral b) => a -> b -> a
^^ Int
power)
  return $! if sign == ord8 '+'
            then n
            else -n

ord8 :: Char -> Word8
ord8 :: Char -> Word8
ord8 = Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> (Char -> Int) -> Char -> Word8
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Int
ord