{-# LANGUAGE BangPatterns,CPP #-}
{-# LANGUAGE Trustworthy #-}
module Data.Text.Lazy.Encoding
(
decodeLatin1
, decodeUtf8'
, decodeUtf8With
, decodeUtf16LEWith
, decodeUtf16BEWith
, decodeUtf32LEWith
, decodeUtf32BEWith
, decodeASCII
, decodeUtf8
, decodeUtf16LE
, decodeUtf16BE
, decodeUtf32LE
, decodeUtf32BE
, encodeUtf8
, encodeUtf16LE
, encodeUtf16BE
, encodeUtf32LE
, encodeUtf32BE
, encodeUtf8Builder
, encodeUtf8BuilderEscaped
) where
import Control.Exception (evaluate, try)
import Data.Monoid (Monoid(..))
import Data.Text.Encoding.Error (OnDecodeError, UnicodeException, strictDecode)
import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldrChunks)
import Data.Word (Word8)
import qualified Data.ByteString as S
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Builder.Prim as BP
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Internal as B
import qualified Data.Text as T
import qualified Data.Text.Encoding as TE
import qualified Data.Text.Internal.Lazy.Encoding.Fusion as E
import qualified Data.Text.Internal.Lazy.Fusion as F
import Data.Text.Unsafe (unsafeDupablePerformIO)
decodeASCII :: B.ByteString -> Text
decodeASCII :: ByteString -> Text
decodeASCII = (ByteString -> Text -> Text) -> Text -> [ByteString] -> Text
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Text -> Text -> Text
chunk (Text -> Text -> Text)
-> (ByteString -> Text) -> ByteString -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeASCII) Text
empty ([ByteString] -> Text)
-> (ByteString -> [ByteString]) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
B.toChunks
decodeLatin1 :: B.ByteString -> Text
decodeLatin1 :: ByteString -> Text
decodeLatin1 = (ByteString -> Text -> Text) -> Text -> [ByteString] -> Text
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (Text -> Text -> Text
chunk (Text -> Text -> Text)
-> (ByteString -> Text) -> ByteString -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Text
TE.decodeLatin1) Text
empty ([ByteString] -> Text)
-> (ByteString -> [ByteString]) -> ByteString -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
B.toChunks
decodeUtf8With :: OnDecodeError -> B.ByteString -> Text
decodeUtf8With :: OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
onErr (B.Chunk ByteString
b0 ByteString
bs0) =
case OnDecodeError -> ByteString -> Decoding
TE.streamDecodeUtf8With OnDecodeError
onErr ByteString
b0 of
TE.Some Text
t ByteString
l ByteString -> Decoding
f -> Text -> Text -> Text
chunk Text
t ((ByteString -> Decoding) -> ByteString -> ByteString -> Text
go ByteString -> Decoding
f ByteString
l ByteString
bs0)
where
go :: (ByteString -> Decoding) -> ByteString -> ByteString -> Text
go ByteString -> Decoding
f0 ByteString
_ (B.Chunk ByteString
b ByteString
bs) =
case ByteString -> Decoding
f0 ByteString
b of
TE.Some Text
t ByteString
l ByteString -> Decoding
f -> Text -> Text -> Text
chunk Text
t ((ByteString -> Decoding) -> ByteString -> ByteString -> Text
go ByteString -> Decoding
f ByteString
l ByteString
bs)
go ByteString -> Decoding
_ ByteString
l ByteString
_
| ByteString -> Bool
S.null ByteString
l = Text
empty
| Bool
otherwise =
let !t :: Text
t = String -> Text
T.pack (ByteString -> String
skipBytes ByteString
l)
skipBytes :: ByteString -> String
skipBytes = (Word8 -> String -> String) -> String -> ByteString -> String
forall a. (Word8 -> a -> a) -> a -> ByteString -> a
S.foldr (\Word8
x String
s' ->
case OnDecodeError
onErr String
desc (Word8 -> Maybe Word8
forall a. a -> Maybe a
Just Word8
x) of
Just Char
c -> Char
c Char -> String -> String
forall a. a -> [a] -> [a]
: String
s'
Maybe Char
Nothing -> String
s') [] in
Text -> Text -> Text
Chunk Text
t Text
Empty
desc :: String
desc = String
"Data.Text.Lazy.Encoding.decodeUtf8With: Invalid UTF-8 stream"
decodeUtf8With OnDecodeError
_ ByteString
_ = Text
empty
decodeUtf8 :: B.ByteString -> Text
decodeUtf8 :: ByteString -> Text
decodeUtf8 = OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
strictDecode
{-# INLINE[0] decodeUtf8 #-}
decodeUtf8' :: B.ByteString -> Either UnicodeException Text
decodeUtf8' :: ByteString -> Either UnicodeException Text
decodeUtf8' ByteString
bs = IO (Either UnicodeException Text) -> Either UnicodeException Text
forall a. IO a -> a
unsafeDupablePerformIO (IO (Either UnicodeException Text) -> Either UnicodeException Text)
-> IO (Either UnicodeException Text)
-> Either UnicodeException Text
forall a b. (a -> b) -> a -> b
$ do
let t :: Text
t = ByteString -> Text
decodeUtf8 ByteString
bs
IO Text -> IO (Either UnicodeException Text)
forall e a. Exception e => IO a -> IO (Either e a)
try (Text -> IO Text
forall a. a -> IO a
evaluate (Text -> ()
rnf Text
t () -> Text -> Text
forall a b. a -> b -> b
`seq` Text
t))
where
rnf :: Text -> ()
rnf Text
Empty = ()
rnf (Chunk Text
_ Text
ts) = Text -> ()
rnf Text
ts
{-# INLINE decodeUtf8' #-}
encodeUtf8 :: Text -> B.ByteString
encodeUtf8 :: Text -> ByteString
encodeUtf8 = (Text -> ByteString -> ByteString)
-> ByteString -> Text -> ByteString
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (ByteString -> ByteString -> ByteString
B.Chunk (ByteString -> ByteString -> ByteString)
-> (Text -> ByteString) -> Text -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf8) ByteString
B.Empty
encodeUtf8Builder :: Text -> B.Builder
encodeUtf8Builder :: Text -> Builder
encodeUtf8Builder =
(Text -> Builder -> Builder) -> Builder -> Text -> Builder
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (\Text
c Builder
b -> Text -> Builder
TE.encodeUtf8Builder Text
c Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
b) Builder
forall a. Monoid a => a
Data.Monoid.mempty
{-# INLINE encodeUtf8BuilderEscaped #-}
encodeUtf8BuilderEscaped :: BP.BoundedPrim Word8 -> Text -> B.Builder
encodeUtf8BuilderEscaped :: BoundedPrim Word8 -> Text -> Builder
encodeUtf8BuilderEscaped BoundedPrim Word8
prim =
(Text -> Builder -> Builder) -> Builder -> Text -> Builder
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks (\Text
c Builder
b -> BoundedPrim Word8 -> Text -> Builder
TE.encodeUtf8BuilderEscaped BoundedPrim Word8
prim Text
c Builder -> Builder -> Builder
forall a. Monoid a => a -> a -> a
`mappend` Builder
b) Builder
forall a. Monoid a => a
mempty
decodeUtf16LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16LEWith :: OnDecodeError -> ByteString -> Text
decodeUtf16LEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf16LE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf16LEWith #-}
decodeUtf16LE :: B.ByteString -> Text
decodeUtf16LE :: ByteString -> Text
decodeUtf16LE = OnDecodeError -> ByteString -> Text
decodeUtf16LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16LE #-}
decodeUtf16BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf16BEWith :: OnDecodeError -> ByteString -> Text
decodeUtf16BEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf16BE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf16BEWith #-}
decodeUtf16BE :: B.ByteString -> Text
decodeUtf16BE :: ByteString -> Text
decodeUtf16BE = OnDecodeError -> ByteString -> Text
decodeUtf16BEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf16BE #-}
encodeUtf16LE :: Text -> B.ByteString
encodeUtf16LE :: Text -> ByteString
encodeUtf16LE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf16LE) [] Text
txt)
{-# INLINE encodeUtf16LE #-}
encodeUtf16BE :: Text -> B.ByteString
encodeUtf16BE :: Text -> ByteString
encodeUtf16BE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf16BE) [] Text
txt)
{-# INLINE encodeUtf16BE #-}
decodeUtf32LEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32LEWith :: OnDecodeError -> ByteString -> Text
decodeUtf32LEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf32LE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf32LEWith #-}
decodeUtf32LE :: B.ByteString -> Text
decodeUtf32LE :: ByteString -> Text
decodeUtf32LE = OnDecodeError -> ByteString -> Text
decodeUtf32LEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf32LE #-}
decodeUtf32BEWith :: OnDecodeError -> B.ByteString -> Text
decodeUtf32BEWith :: OnDecodeError -> ByteString -> Text
decodeUtf32BEWith OnDecodeError
onErr ByteString
bs = Stream Char -> Text
F.unstream (OnDecodeError -> ByteString -> Stream Char
E.streamUtf32BE OnDecodeError
onErr ByteString
bs)
{-# INLINE decodeUtf32BEWith #-}
decodeUtf32BE :: B.ByteString -> Text
decodeUtf32BE :: ByteString -> Text
decodeUtf32BE = OnDecodeError -> ByteString -> Text
decodeUtf32BEWith OnDecodeError
strictDecode
{-# INLINE decodeUtf32BE #-}
encodeUtf32LE :: Text -> B.ByteString
encodeUtf32LE :: Text -> ByteString
encodeUtf32LE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf32LE) [] Text
txt)
{-# INLINE encodeUtf32LE #-}
encodeUtf32BE :: Text -> B.ByteString
encodeUtf32BE :: Text -> ByteString
encodeUtf32BE Text
txt = [ByteString] -> ByteString
B.fromChunks ((Text -> [ByteString] -> [ByteString])
-> [ByteString] -> Text -> [ByteString]
forall a. (Text -> a -> a) -> a -> Text -> a
foldrChunks ((:) (ByteString -> [ByteString] -> [ByteString])
-> (Text -> ByteString) -> Text -> [ByteString] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
TE.encodeUtf32BE) [] Text
txt)
{-# INLINE encodeUtf32BE #-}