{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP, NoImplicitPrelude #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}
module GHC.IO.Encoding (
BufferCodec(..), TextEncoding(..), TextEncoder, TextDecoder, CodingProgress(..),
latin1, latin1_encode, latin1_decode,
utf8, utf8_bom,
utf16, utf16le, utf16be,
utf32, utf32le, utf32be,
initLocaleEncoding,
getLocaleEncoding, getFileSystemEncoding, getForeignEncoding,
setLocaleEncoding, setFileSystemEncoding, setForeignEncoding,
char8,
mkTextEncoding,
argvEncoding
) where
import GHC.Base
import GHC.IO.Exception
import GHC.IO.Buffer
import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types
#if !defined(mingw32_HOST_OS)
import qualified GHC.IO.Encoding.Iconv as Iconv
#else
import qualified GHC.IO.Encoding.CodePage as CodePage
import Text.Read (reads)
#endif
import qualified GHC.IO.Encoding.Latin1 as Latin1
import qualified GHC.IO.Encoding.UTF8 as UTF8
import qualified GHC.IO.Encoding.UTF16 as UTF16
import qualified GHC.IO.Encoding.UTF32 as UTF32
import GHC.List
import GHC.Word
import Data.IORef
import Data.Char (toUpper)
import System.IO.Unsafe (unsafePerformIO)
latin1 :: TextEncoding
latin1 :: TextEncoding
latin1 = TextEncoding
Latin1.latin1_checked
utf8 :: TextEncoding
utf8 :: TextEncoding
utf8 = TextEncoding
UTF8.utf8
utf8_bom :: TextEncoding
utf8_bom :: TextEncoding
utf8_bom = TextEncoding
UTF8.utf8_bom
utf16 :: TextEncoding
utf16 :: TextEncoding
utf16 = TextEncoding
UTF16.utf16
utf16le :: TextEncoding
utf16le :: TextEncoding
utf16le = TextEncoding
UTF16.utf16le
utf16be :: TextEncoding
utf16be :: TextEncoding
utf16be = TextEncoding
UTF16.utf16be
utf32 :: TextEncoding
utf32 :: TextEncoding
utf32 = TextEncoding
UTF32.utf32
utf32le :: TextEncoding
utf32le :: TextEncoding
utf32le = TextEncoding
UTF32.utf32le
utf32be :: TextEncoding
utf32be :: TextEncoding
utf32be = TextEncoding
UTF32.utf32be
getLocaleEncoding :: IO TextEncoding
{-# NOINLINE getLocaleEncoding #-}
getFileSystemEncoding :: IO TextEncoding
{-# NOINLINE getFileSystemEncoding #-}
getForeignEncoding :: IO TextEncoding
{-# NOINLINE getForeignEncoding #-}
setLocaleEncoding :: TextEncoding -> IO ()
{-# NOINLINE setLocaleEncoding #-}
setFileSystemEncoding :: TextEncoding -> IO ()
{-# NOINLINE setFileSystemEncoding #-}
setForeignEncoding :: TextEncoding -> IO ()
{-# NOINLINE setForeignEncoding #-}
(IO TextEncoding
getLocaleEncoding, TextEncoding -> IO ()
setLocaleEncoding) = TextEncoding -> (IO TextEncoding, TextEncoding -> IO ())
forall a. a -> (IO a, a -> IO ())
mkGlobal TextEncoding
initLocaleEncoding
(IO TextEncoding
getFileSystemEncoding, TextEncoding -> IO ()
setFileSystemEncoding) = TextEncoding -> (IO TextEncoding, TextEncoding -> IO ())
forall a. a -> (IO a, a -> IO ())
mkGlobal TextEncoding
initFileSystemEncoding
(IO TextEncoding
getForeignEncoding, TextEncoding -> IO ()
setForeignEncoding) = TextEncoding -> (IO TextEncoding, TextEncoding -> IO ())
forall a. a -> (IO a, a -> IO ())
mkGlobal TextEncoding
initForeignEncoding
mkGlobal :: a -> (IO a, a -> IO ())
mkGlobal :: forall a. a -> (IO a, a -> IO ())
mkGlobal a
x = IO (IO a, a -> IO ()) -> (IO a, a -> IO ())
forall a. IO a -> a
unsafePerformIO (IO (IO a, a -> IO ()) -> (IO a, a -> IO ()))
-> IO (IO a, a -> IO ()) -> (IO a, a -> IO ())
forall a b. (a -> b) -> a -> b
$ do
IORef a
x_ref <- a -> IO (IORef a)
forall a. a -> IO (IORef a)
newIORef a
x
(IO a, a -> IO ()) -> IO (IO a, a -> IO ())
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (IORef a -> IO a
forall a. IORef a -> IO a
readIORef IORef a
x_ref, IORef a -> a -> IO ()
forall a. IORef a -> a -> IO ()
writeIORef IORef a
x_ref)
{-# NOINLINE mkGlobal #-}
initLocaleEncoding, initFileSystemEncoding, initForeignEncoding :: TextEncoding
{-# NOINLINE initLocaleEncoding #-}
#if defined(js_HOST_ARCH)
initLocaleEncoding = utf8
initFileSystemEncoding = utf8
initForeignEncoding = utf8
#elif !defined(mingw32_HOST_OS)
initLocaleEncoding :: TextEncoding
initLocaleEncoding = IO TextEncoding -> TextEncoding
forall a. IO a -> a
unsafePerformIO (IO TextEncoding -> TextEncoding)
-> IO TextEncoding -> TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' CodingFailureMode
ErrorOnCodingFailure String
Iconv.localeEncodingName
initFileSystemEncoding :: TextEncoding
initFileSystemEncoding = IO TextEncoding -> TextEncoding
forall a. IO a -> a
unsafePerformIO (IO TextEncoding -> TextEncoding)
-> IO TextEncoding -> TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' CodingFailureMode
RoundtripFailure String
Iconv.localeEncodingName
initForeignEncoding :: TextEncoding
initForeignEncoding = IO TextEncoding -> TextEncoding
forall a. IO a -> a
unsafePerformIO (IO TextEncoding -> TextEncoding)
-> IO TextEncoding -> TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' CodingFailureMode
IgnoreCodingFailure String
Iconv.localeEncodingName
#else
initLocaleEncoding = CodePage.localeEncoding
initFileSystemEncoding = CodePage.mkLocaleEncoding RoundtripFailure
initForeignEncoding = CodePage.mkLocaleEncoding IgnoreCodingFailure
#endif
argvEncoding :: IO TextEncoding
#if defined(mingw32_HOST_OS)
argvEncoding = return utf8
#else
argvEncoding :: IO TextEncoding
argvEncoding = IO TextEncoding
getFileSystemEncoding
#endif
char8 :: TextEncoding
char8 :: TextEncoding
char8 = TextEncoding
Latin1.latin1
mkTextEncoding :: String -> IO TextEncoding
mkTextEncoding :: String -> IO TextEncoding
mkTextEncoding String
e = case Maybe CodingFailureMode
mb_coding_failure_mode of
Maybe CodingFailureMode
Nothing -> String -> IO TextEncoding
forall a. String -> IO a
unknownEncodingErr String
e
Just CodingFailureMode
cfm -> CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' CodingFailureMode
cfm String
enc
where
(String
enc, String
suffix) = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'/') String
e
mb_coding_failure_mode :: Maybe CodingFailureMode
mb_coding_failure_mode = case String
suffix of
String
"" -> CodingFailureMode -> Maybe CodingFailureMode
forall a. a -> Maybe a
Just CodingFailureMode
ErrorOnCodingFailure
String
"//IGNORE" -> CodingFailureMode -> Maybe CodingFailureMode
forall a. a -> Maybe a
Just CodingFailureMode
IgnoreCodingFailure
String
"//TRANSLIT" -> CodingFailureMode -> Maybe CodingFailureMode
forall a. a -> Maybe a
Just CodingFailureMode
TransliterateCodingFailure
String
"//ROUNDTRIP" -> CodingFailureMode -> Maybe CodingFailureMode
forall a. a -> Maybe a
Just CodingFailureMode
RoundtripFailure
String
_ -> Maybe CodingFailureMode
forall a. Maybe a
Nothing
mkTextEncoding' :: CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' :: CodingFailureMode -> String -> IO TextEncoding
mkTextEncoding' CodingFailureMode
cfm String
enc =
case [Char -> Char
toUpper Char
c | Char
c <- String
enc, Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'-'] of
String
"UTF8" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF8.mkUTF8 CodingFailureMode
cfm
String
"UTF16" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF16.mkUTF16 CodingFailureMode
cfm
String
"UTF16LE" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF16.mkUTF16le CodingFailureMode
cfm
String
"UTF16BE" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF16.mkUTF16be CodingFailureMode
cfm
String
"UTF32" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF32.mkUTF32 CodingFailureMode
cfm
String
"UTF32LE" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF32.mkUTF32le CodingFailureMode
cfm
String
"UTF32BE" -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (TextEncoding -> IO TextEncoding)
-> TextEncoding -> IO TextEncoding
forall a b. (a -> b) -> a -> b
$ CodingFailureMode -> TextEncoding
UTF32.mkUTF32be CodingFailureMode
cfm
String
_ | Bool
isAscii -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (CodingFailureMode -> TextEncoding
Latin1.mkAscii CodingFailureMode
cfm)
String
_ | Bool
isLatin1 -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (CodingFailureMode -> TextEncoding
Latin1.mkLatin1_checked CodingFailureMode
cfm)
#if defined(mingw32_HOST_OS)
'C':'P':n | [(cp,"")] <- reads n -> return $ CodePage.mkCodePageEncoding cfm cp
_ -> unknownEncodingErr (enc ++ codingFailureModeSuffix cfm)
#else
String
_ -> do Maybe TextEncoding
res <- CodingFailureMode -> String -> IO (Maybe TextEncoding)
Iconv.mkIconvEncoding CodingFailureMode
cfm String
enc
case Maybe TextEncoding
res of
Just TextEncoding
e -> TextEncoding -> IO TextEncoding
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return TextEncoding
e
Maybe TextEncoding
Nothing -> String -> IO TextEncoding
forall a. String -> IO a
unknownEncodingErr (String
enc String -> String -> String
forall a. [a] -> [a] -> [a]
++ CodingFailureMode -> String
codingFailureModeSuffix CodingFailureMode
cfm)
#endif
where
isAscii :: Bool
isAscii = String
enc String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
`elem` [String]
asciiEncNames
isLatin1 :: Bool
isLatin1 = String
enc String -> [String] -> Bool
forall a. Eq a => a -> [a] -> Bool
`elem` [String]
latin1EncNames
asciiEncNames :: [String]
asciiEncNames =
[ String
"ANSI_X3.4-1968", String
"iso-ir-6", String
"ANSI_X3.4-1986", String
"ISO_646.irv:1991"
, String
"US-ASCII", String
"us", String
"IBM367", String
"cp367", String
"csASCII", String
"ASCII", String
"ISO646-US"
]
latin1EncNames :: [String]
latin1EncNames =
[ String
"ISO_8859-1:1987", String
"iso-ir-100", String
"ISO_8859-1", String
"ISO-8859-1", String
"latin1",
String
"l1", String
"IBM819", String
"CP819", String
"csISOLatin1"
]
latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
latin1_encode :: CharBuffer -> Buffer Word8 -> IO (CharBuffer, Buffer Word8)
latin1_encode CharBuffer
input Buffer Word8
output = ((CodingProgress, CharBuffer, Buffer Word8)
-> (CharBuffer, Buffer Word8))
-> IO (CodingProgress, CharBuffer, Buffer Word8)
-> IO (CharBuffer, Buffer Word8)
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(CodingProgress
_why,CharBuffer
input',Buffer Word8
output') -> (CharBuffer
input',Buffer Word8
output')) (IO (CodingProgress, CharBuffer, Buffer Word8)
-> IO (CharBuffer, Buffer Word8))
-> IO (CodingProgress, CharBuffer, Buffer Word8)
-> IO (CharBuffer, Buffer Word8)
forall a b. (a -> b) -> a -> b
$ EncodeBuffer
Latin1.latin1_encode CharBuffer
input Buffer Word8
output
latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
latin1_decode :: Buffer Word8 -> CharBuffer -> IO (Buffer Word8, CharBuffer)
latin1_decode Buffer Word8
input CharBuffer
output = ((CodingProgress, Buffer Word8, CharBuffer)
-> (Buffer Word8, CharBuffer))
-> IO (CodingProgress, Buffer Word8, CharBuffer)
-> IO (Buffer Word8, CharBuffer)
forall a b. (a -> b) -> IO a -> IO b
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(CodingProgress
_why,Buffer Word8
input',CharBuffer
output') -> (Buffer Word8
input',CharBuffer
output')) (IO (CodingProgress, Buffer Word8, CharBuffer)
-> IO (Buffer Word8, CharBuffer))
-> IO (CodingProgress, Buffer Word8, CharBuffer)
-> IO (Buffer Word8, CharBuffer)
forall a b. (a -> b) -> a -> b
$ DecodeBuffer
Latin1.latin1_decode Buffer Word8
input CharBuffer
output
unknownEncodingErr :: String -> IO a
unknownEncodingErr :: forall a. String -> IO a
unknownEncodingErr String
e = IOException -> IO a
forall a. IOException -> IO a
ioException (Maybe Handle
-> IOErrorType
-> String
-> String
-> Maybe CInt
-> Maybe String
-> IOException
IOError Maybe Handle
forall a. Maybe a
Nothing IOErrorType
NoSuchThing String
"mkTextEncoding"
(String
"unknown encoding:" String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
e) Maybe CInt
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing)