{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP, NoImplicitPrelude #-}
{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module GHC.Internal.IO.Encoding (
BufferCodec(..), TextEncoding(..), TextEncoder, TextDecoder, CodingProgress(..),
latin1, latin1_encode, latin1_decode,
utf8, utf8_bom,
utf16, utf16le, utf16be,
utf32, utf32le, utf32be,
initLocaleEncoding,
getLocaleEncoding, getFileSystemEncoding, getForeignEncoding,
setLocaleEncoding, setFileSystemEncoding, setForeignEncoding,
char8,
mkTextEncoding,
argvEncoding
) where
import GHC.Internal.Base
import GHC.Internal.IO.Exception
import GHC.Internal.IO.Buffer
import GHC.Internal.IO.Encoding.Failure
import GHC.Internal.IO.Encoding.Types
#if !defined(mingw32_HOST_OS)
import qualified GHC.Internal.IO.Encoding.Iconv as Iconv
#else
import qualified GHC.Internal.IO.Encoding.CodePage as CodePage
import GHC.Internal.Text.Read (reads)
#endif
import qualified GHC.Internal.IO.Encoding.Latin1 as Latin1
import qualified GHC.Internal.IO.Encoding.UTF8 as UTF8
import qualified GHC.Internal.IO.Encoding.UTF16 as UTF16
import qualified GHC.Internal.IO.Encoding.UTF32 as UTF32
import GHC.Internal.List
import GHC.Internal.Word
import GHC.Internal.Data.IORef
import GHC.Internal.Unicode (toUpper)
import GHC.Internal.IO.Unsafe (unsafePerformIO)
latin1 :: TextEncoding
latin1 :: TextEncoding
latin1 = TextEncoding
Latin1.latin1_checked
utf8 :: TextEncoding
utf8 :: TextEncoding
utf8 = TextEncoding
UTF8.utf8
utf8_bom :: TextEncoding
utf8_bom :: TextEncoding
utf8_bom = TextEncoding
UTF8.utf8_bom
utf16 :: TextEncoding
utf16 :: TextEncoding
utf16 = TextEncoding
UTF16.utf16
utf16le :: TextEncoding
utf16le :: TextEncoding
utf16le = TextEncoding
UTF16.utf16le
utf16be :: TextEncoding
utf16be :: TextEncoding
utf16be = TextEncoding
UTF16.utf16be
utf32 :: TextEncoding
utf32 :: TextEncoding
utf32 = TextEncoding
UTF32.utf32
utf32le :: TextEncoding
utf32le :: TextEncoding
utf32le = TextEncoding
UTF32.utf32le
utf32be :: TextEncoding
utf32be :: TextEncoding
utf32be = TextEncoding
UTF32.utf32be
getLocaleEncoding :: IO TextEncoding
{-# NOINLINE getLocaleEncoding #-}
getFileSystemEncoding :: IO TextEncoding
{-# NOINLINE getFileSystemEncoding #-}
getForeignEncoding :: IO TextEncoding
{-# NOINLINE getForeignEncoding #-}
setLocaleEncoding :: TextEncoding -> IO ()
{-# NOINLINE setLocaleEncoding #-}
setFileSystemEncoding :: TextEncoding -> IO ()
{-# NOINLINE setFileSystemEncoding #-}
setForeignEncoding :: TextEncoding -> IO ()
{-# NOINLINE setForeignEncoding #-}
(IO TextEncoding
getLocaleEncoding, TextEncoding -> IO ()
setLocaleEncoding) = TextEncoding -> (IO TextEncoding, TextEncoding -> IO ())
forall a. a -> (IO a, a -> IO ())
mkGlobal TextEncoding
initLocaleEncoding
(IO TextEncoding
getFileSystemEncoding, TextEncoding -> IO ()
setFileSystemEncoding) = TextEncoding -> (IO TextEncoding, TextEncoding -> IO ())
forall a. a -> (IO a, a -> IO ())
mkGlobal TextEncoding
initFileSystemEncoding
(IO TextEncoding
getForeignEncoding, TextEncoding -> IO ()
setForeignEncoding) = TextEncoding -> (IO TextEncoding, TextEncoding -> IO ())
forall a. a -> (IO a, a -> IO ())
mkGlobal TextEncoding
initForeignEncoding
mkGlobal :: a -> (IO a, a -> IO ())
mkGlobal :: forall a. a -> (IO a, a -> IO ())
mkGlobal a
x = IO (IO a, a -> IO ()) -> (IO a, a -> IO ())
forall a. IO a -> a
unsafePerformIO (IO (IO a, a -> IO ()) -> (IO a, a -> IO ()))
-> IO (IO a, a -> IO ()) -> (IO a, a -> IO ())
forall a b. (a -> b) -> a -> b
$ do
x_ref <- a -> IO (IORef a)
forall a. a -> IO (IORef a)
newIORef a
x
return (readIORef x_ref, writeIORef x_ref)
{-# NOINLINE mkGlobal #-}
initLocaleEncoding, initFileSystemEncoding, initForeignEncoding :: TextEncoding
{-# NOINLINE initLocaleEncoding #-}
#if defined(javascript_HOST_ARCH)
initLocaleEncoding = utf8
initFileSystemEncoding = utf8
initForeignEncoding = utf8
#elif !defined(mingw32_HOST_OS)
initLocaleEncoding = unsafePerformIO $ mkTextEncoding' ErrorOnCodingFailure Iconv.localeEncodingName
initFileSystemEncoding = unsafePerformIO $ mkTextEncoding' RoundtripFailure Iconv.localeEncodingName
initForeignEncoding = unsafePerformIO $ mkTextEncoding' IgnoreCodingFailure Iconv.localeEncodingName
#else
initLocaleEncoding :: TextEncoding
initLocaleEncoding = TextEncoding
CodePage.localeEncoding
initFileSystemEncoding :: TextEncoding
initFileSystemEncoding = CodingFailureMode -> TextEncoding
CodePage.mkLocaleEncoding CodingFailureMode
RoundtripFailure
initForeignEncoding :: TextEncoding
initForeignEncoding = CodingFailureMode -> TextEncoding
CodePage.mkLocaleEncoding CodingFailureMode
IgnoreCodingFailure
#endif
argvEncoding :: IO TextEncoding
#if defined(mingw32_HOST_OS)
argvEncoding :: IO TextEncoding
argvEncoding = TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return TextEncoding
utf8
#else
argvEncoding = getFileSystemEncoding
#endif
char8 :: TextEncoding
char8 :: TextEncoding
char8 = TextEncoding
Latin1.latin1
mkTextEncoding :: String -> IO TextEncoding
mkTextEncoding :: String -> IO TextEncoding
mkTextEncoding String
e = case Maybe CodingFailureMode
mb_coding_failure_mode of
Maybe CodingFailureMode
Nothing -> String -> IO TextEncoding
forall a. String -> IO a
unknownEncodingErr String
e
Just CodingFailureMode
cfm -> CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' CodingFailureMode
cfm String
enc
where
(String
enc, String
suffix) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/') String
e
mb_coding_failure_mode :: Maybe CodingFailureMode
mb_coding_failure_mode = case String
suffix of
String
"" -> CodingFailureMode -> Maybe CodingFailureMode
forall a. a -> Maybe a
Just CodingFailureMode
ErrorOnCodingFailure
String
"//IGNORE" -> CodingFailureMode -> Maybe CodingFailureMode
forall a. a -> Maybe a
Just CodingFailureMode
IgnoreCodingFailure
String
"//TRANSLIT" -> CodingFailureMode -> Maybe CodingFailureMode
forall a. a -> Maybe a
Just CodingFailureMode
TransliterateCodingFailure
String
"//ROUNDTRIP" -> CodingFailureMode -> Maybe CodingFailureMode
forall a. a -> Maybe a
Just CodingFailureMode
RoundtripFailure
String
_ -> Maybe CodingFailureMode
forall a. Maybe a
Nothing
mkTextEncoding' :: CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' :: CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' CodingFailureMode
cfm String
enc =
case [Char -> Char
toUpper Char
c | Char
c <- String
enc, Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'-'] of
String
"UTF8" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF8.mkUTF8 CodingFailureMode
cfm
String
"UTF16" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF16.mkUTF16 CodingFailureMode
cfm
String
"UTF16LE" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF16.mkUTF16le CodingFailureMode
cfm
String
"UTF16BE" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF16.mkUTF16be CodingFailureMode
cfm
String
"UTF32" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF32.mkUTF32 CodingFailureMode
cfm
String
"UTF32LE" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF32.mkUTF32le CodingFailureMode
cfm
String
"UTF32BE" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF32.mkUTF32be CodingFailureMode
cfm
String
_ | Bool
isAscii -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (CodingFailureMode -> TextEncoding
Latin1.mkAscii CodingFailureMode
cfm)
String
_ | Bool
isLatin1 -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (CodingFailureMode -> TextEncoding
Latin1.mkLatin1_checked CodingFailureMode
cfm)
#if defined(mingw32_HOST_OS)
Char
'C':Char
'P':String
n | [(Word32
cp,String
"")] <- ReadS Word32
forall a. Read a => ReadS a
reads String
n -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> Word32 -> TextEncoding
CodePage.mkCodePageEncoding CodingFailureMode
cfm Word32
cp
String
_ -> String -> IO TextEncoding
forall a. String -> IO a
unknownEncodingErr (String
enc String -> String -> String
forall a. [a] -> [a] -> [a]
++ CodingFailureMode -> String
codingFailureModeSuffix CodingFailureMode
cfm)
#else
_ -> do res <- Iconv.mkIconvEncoding cfm enc
case res of
Just e -> return e
Nothing -> unknownEncodingErr (enc ++ codingFailureModeSuffix cfm)
#endif
where
isAscii :: Bool
isAscii = String
enc String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
`elem` [String]
asciiEncNames
isLatin1 :: Bool
isLatin1 = String
enc String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
`elem` [String]
latin1EncNames
asciiEncNames :: [String]
asciiEncNames =
[ String
"ANSI_X3.4-1968", String
"iso-ir-6", String
"ANSI_X3.4-1986", String
"ISO_646.irv:1991"
, String
"US-ASCII", String
"us", String
"IBM367", String
"cp367", String
"csASCII", String
"ASCII", String
"ISO646-US"
]
latin1EncNames :: [String]
latin1EncNames =
[ String
"ISO_8859-1:1987", String
"iso-ir-100", String
"ISO_8859-1", String
"ISO-8859-1", String
"latin1",
String
"l1", String
"IBM819", String
"CP819", String
"csISOLatin1"
]
latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
latin1_encode CharBuffer
input Buffer Word8
output = (State# RealWorld
-> (# State# RealWorld, (CharBuffer, Buffer Word8) #))
-> IO (CharBuffer, Buffer Word8)
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld
-> (# State# RealWorld, (CharBuffer, Buffer Word8) #))
-> IO (CharBuffer, Buffer Word8))
-> (State# RealWorld
-> (# State# RealWorld, (CharBuffer, Buffer Word8) #))
-> IO (CharBuffer, Buffer Word8)
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
st -> case EncodeBuffer#
Latin1.latin1_encode CharBuffer
input Buffer Word8
output State# RealWorld
st of
(# State# RealWorld
st', CodingProgress
_why, CharBuffer
input', Buffer Word8
output' #) -> (# State# RealWorld
st', (CharBuffer
input', Buffer Word8
output') #)
latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
latin1_decode Buffer Word8
input CharBuffer
output = (State# RealWorld
-> (# State# RealWorld, (Buffer Word8, CharBuffer) #))
-> IO (Buffer Word8, CharBuffer)
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld
-> (# State# RealWorld, (Buffer Word8, CharBuffer) #))
-> IO (Buffer Word8, CharBuffer))
-> (State# RealWorld
-> (# State# RealWorld, (Buffer Word8, CharBuffer) #))
-> IO (Buffer Word8, CharBuffer)
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
st -> case DecodeBuffer#
Latin1.latin1_decode Buffer Word8
input CharBuffer
output State# RealWorld
st of
(# State# RealWorld
st', CodingProgress
_why, Buffer Word8
input', CharBuffer
output' #) -> (# State# RealWorld
st', (Buffer Word8
input',CharBuffer
output') #)
unknownEncodingErr :: String -> IO a
unknownEncodingErr :: forall a. String -> IO a
unknownEncodingErr String
e = IOException -> IO a
forall a. HasCallStack => IOException -> IO a
ioException (Maybe Handle
-> IOErrorType
-> String
-> String
-> Maybe CInt
-> Maybe String
-> IOException
IOError Maybe Handle
forall a. Maybe a
Nothing IOErrorType
NoSuchThing String
"mkTextEncoding"
(String
"unknown encoding:" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e) Maybe CInt
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing)