{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE BangPatterns #-}
module GHC.Internal.IO.Encoding.Failure (
CodingFailureMode(..), codingFailureModeSuffix,
isSurrogate,
recoverDecode, recoverEncode,
recoverDecode#, recoverEncode#,
) where
import GHC.Internal.IO
import GHC.Internal.IO.Buffer
import GHC.Internal.IO.Exception
import GHC.Internal.Base
import GHC.Internal.Char
import GHC.Internal.Word
import GHC.Internal.Show
import GHC.Internal.Num
import GHC.Internal.Real ( fromIntegral )
data CodingFailureMode
= ErrorOnCodingFailure
| IgnoreCodingFailure
| TransliterateCodingFailure
| RoundtripFailure
deriving ( Int -> CodingFailureMode -> ShowS
[CodingFailureMode] -> ShowS
CodingFailureMode -> String
(Int -> CodingFailureMode -> ShowS)
-> (CodingFailureMode -> String)
-> ([CodingFailureMode] -> ShowS)
-> Show CodingFailureMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> CodingFailureMode -> ShowS
showsPrec :: Int -> CodingFailureMode -> ShowS
$cshow :: CodingFailureMode -> String
show :: CodingFailureMode -> String
$cshowList :: [CodingFailureMode] -> ShowS
showList :: [CodingFailureMode] -> ShowS
Show
)
codingFailureModeSuffix :: CodingFailureMode -> String
codingFailureModeSuffix :: CodingFailureMode -> String
codingFailureModeSuffix CodingFailureMode
ErrorOnCodingFailure = String
""
codingFailureModeSuffix CodingFailureMode
IgnoreCodingFailure = String
"//IGNORE"
codingFailureModeSuffix CodingFailureMode
TransliterateCodingFailure = String
"//TRANSLIT"
codingFailureModeSuffix CodingFailureMode
RoundtripFailure = String
"//ROUNDTRIP"
unrepresentableChar :: Char
unrepresentableChar :: Char
unrepresentableChar = Char
'\xFFFD'
{-# INLINE isSurrogate #-}
isSurrogate :: Char -> Bool
isSurrogate :: Char -> Bool
isSurrogate Char
c = (Int
0xD800 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xDBFF)
Bool -> Bool -> Bool
|| (Int
0xDC00 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xDFFF)
where x :: Int
x = Char -> Int
ord Char
c
{-# INLINE escapeToRoundtripCharacterSurrogate #-}
escapeToRoundtripCharacterSurrogate :: Word8 -> Char
escapeToRoundtripCharacterSurrogate :: Word8 -> Char
escapeToRoundtripCharacterSurrogate Word8
b
| Word8
b Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128 = Int -> Char
chr (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b)
| Bool
otherwise = Int -> Char
chr (Int
0xDC00 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b)
{-# INLINE unescapeRoundtripCharacterSurrogate #-}
unescapeRoundtripCharacterSurrogate :: Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate :: Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate Char
c
| Int
0xDC80 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xDD00 = Word8 -> Maybe Word8
forall a. a -> Maybe a
Just (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x)
| Bool
otherwise = Maybe Word8
forall a. Maybe a
Nothing
where x :: Int
x = Char -> Int
ord Char
c
recoverDecode# :: CodingFailureMode -> Buffer Word8 -> Buffer Char
-> State# RealWorld -> (# State# RealWorld, Buffer Word8, Buffer Char #)
recoverDecode# :: CodingFailureMode
-> Buffer Word8
-> Buffer Char
-> State# RealWorld
-> (# State# RealWorld, Buffer Word8, Buffer Char #)
recoverDecode# CodingFailureMode
cfm Buffer Word8
input Buffer Char
output State# RealWorld
st =
let !(# State# RealWorld
st', (Buffer Word8
bIn, Buffer Char
bOut) #) = IO (Buffer Word8, Buffer Char)
-> State# RealWorld
-> (# State# RealWorld, (Buffer Word8, Buffer Char) #)
forall a. IO a -> State# RealWorld -> (# State# RealWorld, a #)
unIO (CodingFailureMode
-> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char)
recoverDecode CodingFailureMode
cfm Buffer Word8
input Buffer Char
output) State# RealWorld
st
in (# State# RealWorld
st', Buffer Word8
bIn, Buffer Char
bOut #)
recoverDecode :: CodingFailureMode -> Buffer Word8 -> Buffer Char
-> IO (Buffer Word8, Buffer Char)
recoverDecode :: CodingFailureMode
-> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char)
recoverDecode CodingFailureMode
cfm input :: Buffer Word8
input@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Word8
iraw, bufL :: forall e. Buffer e -> Int
bufL=Int
ir, bufR :: forall e. Buffer e -> Int
bufR=Int
_ }
output :: Buffer Char
output@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Char
oraw, bufL :: forall e. Buffer e -> Int
bufL=Int
_, bufR :: forall e. Buffer e -> Int
bufR=Int
ow } =
case CodingFailureMode
cfm of
CodingFailureMode
ErrorOnCodingFailure -> do
b <- RawBuffer Word8 -> Int -> IO Word8
readWord8Buf RawBuffer Word8
iraw Int
ir
ioe_decodingError b
CodingFailureMode
IgnoreCodingFailure -> (Buffer Word8, Buffer Char) -> IO (Buffer Word8, Buffer Char)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Word8
input { bufL=ir+1 }, Buffer Char
output)
CodingFailureMode
TransliterateCodingFailure -> do
ow' <- RawBuffer Char -> Int -> Char -> IO Int
writeCharBuf RawBuffer Char
oraw Int
ow Char
unrepresentableChar
return (input { bufL=ir+1 }, output { bufR=ow' })
CodingFailureMode
RoundtripFailure -> do
b <- RawBuffer Word8 -> Int -> IO Word8
readWord8Buf RawBuffer Word8
iraw Int
ir
ow' <- writeCharBuf oraw ow (escapeToRoundtripCharacterSurrogate b)
return (input { bufL=ir+1 }, output { bufR=ow' })
recoverEncode# :: CodingFailureMode -> Buffer Char -> Buffer Word8
-> State# RealWorld -> (# State# RealWorld, Buffer Char, Buffer Word8 #)
recoverEncode# :: CodingFailureMode
-> Buffer Char
-> Buffer Word8
-> State# RealWorld
-> (# State# RealWorld, Buffer Char, Buffer Word8 #)
recoverEncode# CodingFailureMode
cfm Buffer Char
input Buffer Word8
output State# RealWorld
st =
let !(# State# RealWorld
st', (Buffer Char
bIn, Buffer Word8
bOut) #) = IO (Buffer Char, Buffer Word8)
-> State# RealWorld
-> (# State# RealWorld, (Buffer Char, Buffer Word8) #)
forall a. IO a -> State# RealWorld -> (# State# RealWorld, a #)
unIO (CodingFailureMode
-> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8)
recoverEncode CodingFailureMode
cfm Buffer Char
input Buffer Word8
output) State# RealWorld
st
in (# State# RealWorld
st', Buffer Char
bIn, Buffer Word8
bOut #)
recoverEncode :: CodingFailureMode -> Buffer Char -> Buffer Word8
-> IO (Buffer Char, Buffer Word8)
recoverEncode :: CodingFailureMode
-> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8)
recoverEncode CodingFailureMode
cfm input :: Buffer Char
input@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Char
iraw, bufL :: forall e. Buffer e -> Int
bufL=Int
ir, bufR :: forall e. Buffer e -> Int
bufR=Int
_ }
output :: Buffer Word8
output@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Word8
oraw, bufL :: forall e. Buffer e -> Int
bufL=Int
_, bufR :: forall e. Buffer e -> Int
bufR=Int
ow } = do
(c,ir') <- RawBuffer Char -> Int -> IO (Char, Int)
readCharBuf RawBuffer Char
iraw Int
ir
case cfm of
CodingFailureMode
IgnoreCodingFailure -> (Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL=ir' }, Buffer Word8
output)
CodingFailureMode
TransliterateCodingFailure -> do
if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'?'
then (Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL=ir' }, Buffer Word8
output)
else do
_ir' <- RawBuffer Char -> Int -> Char -> IO Int
writeCharBuf RawBuffer Char
iraw Int
ir Char
'?'
return (input, output)
CodingFailureMode
RoundtripFailure | Just Word8
x <- Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate Char
c -> do
RawBuffer Word8 -> Int -> Word8 -> IO ()
writeWord8Buf RawBuffer Word8
oraw Int
ow Word8
x
(Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL=ir' }, Buffer Word8
output { bufR=ow+1 })
CodingFailureMode
_ -> Char -> IO (Buffer Char, Buffer Word8)
forall a. Char -> IO a
ioe_encodingError Char
c
ioe_decodingError :: Word8 -> IO a
ioe_decodingError :: forall a. Word8 -> IO a
ioe_decodingError Word8
b = IOException -> IO a
forall a. HasCallStack => IOException -> IO a
ioException
(Maybe Handle
-> IOErrorType
-> String
-> String
-> Maybe CInt
-> Maybe String
-> IOException
IOError Maybe Handle
forall a. Maybe a
Nothing IOErrorType
InvalidArgument String
"recoverDecode"
(String
"cannot decode byte sequence starting from " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word8 -> String
forall a. Show a => a -> String
show Word8
b) Maybe CInt
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing)
ioe_encodingError :: Char -> IO a
ioe_encodingError :: forall a. Char -> IO a
ioe_encodingError Char
ch = IOException -> IO a
forall a. HasCallStack => IOException -> IO a
ioException
(Maybe Handle
-> IOErrorType
-> String
-> String
-> Maybe CInt
-> Maybe String
-> IOException
IOError Maybe Handle
forall a. Maybe a
Nothing IOErrorType
InvalidArgument String
"recoverEncode"
(String
"cannot encode character " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. Show a => a -> String
show Char
ch) Maybe CInt
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing)