{-# LANGUAGE CPP                  #-}
{-# LANGUAGE NoImplicitPrelude    #-}
{- |
   Module      :  System.Win32.Encoding
   Copyright   :  2012 shelarcy
   License     :  BSD-style

   Maintainer  :  shelarcy@gmail.com
   Stability   :  Provisional
   Portability :  Non-portable (Win32 API)

   Encode/Decode multibyte character using Win32 API.
-}

module GHC.Internal.IO.Windows.Encoding
  ( encodeMultiByte
  , encodeMultiByteIO
  , encodeMultiByteRawIO
  , decodeMultiByte
  , decodeMultiByteIO
  , wideCharToMultiByte
  , multiByteToWideChar
  , withGhcInternalToUTF16
  , withUTF16ToGhcInternal
  ) where

import GHC.Internal.Word (Word8, Word16)
import GHC.Internal.Foreign.C.Types        (CInt(..))
import GHC.Internal.Foreign.C.String       (peekCAStringLen, peekCWStringLen,
                               withCWStringLen, withCAStringLen, )
import GHC.Internal.Foreign.Ptr (nullPtr, Ptr ())
import GHC.Internal.Foreign.Marshal.Array  (allocaArray)
import GHC.Internal.Foreign.Marshal.Unsafe (unsafeLocalState)
import GHC.Internal.Windows
import GHC.Internal.IO.Encoding.CodePage (CodePage, getCurrentCodePage)
import GHC.Internal.IO
import GHC.Internal.Base
import GHC.Internal.Real

-- | The "System.IO" output functions (e.g. `putStr`) don't
-- automatically convert to multibyte string on Windows, so this
-- function is provided to make the conversion from a Unicode string
-- in the given code page to a proper multibyte string.  To get the
-- code page for the console, use `getCurrentCodePage`.
--
encodeMultiByte :: CodePage -> String -> String
encodeMultiByte :: Word32 -> String -> String
encodeMultiByte Word32
cp = IO String -> String
forall a. IO a -> a
unsafeLocalState (IO String -> String) -> (String -> IO String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word32 -> String -> IO String
encodeMultiByteIO Word32
cp

{-# INLINE encodeMultiByteIO' #-}
-- | String must not be zero length.
encodeMultiByteIO' :: CodePage -> String -> ((LPCSTR, CInt) -> IO a) -> IO a
encodeMultiByteIO' :: forall a. Word32 -> String -> ((LPCSTR, CInt) -> IO a) -> IO a
encodeMultiByteIO' Word32
cp String
wstr (LPCSTR, CInt) -> IO a
transformer =
  String -> (CWStringLen -> IO a) -> IO a
forall a. String -> (CWStringLen -> IO a) -> IO a
withCWStringLen String
wstr ((CWStringLen -> IO a) -> IO a) -> (CWStringLen -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \(Ptr CWchar
cwstr,Int
len) -> do
    mbchars' <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"WideCharToMultiByte" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$ Word32
-> Word32
-> Ptr CWchar
-> CInt
-> LPCSTR
-> CInt
-> LPCSTR
-> LPBOOL
-> IO CInt
wideCharToMultiByte
                Word32
cp
                Word32
0
                Ptr CWchar
cwstr
                (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
                LPCSTR
forall a. Ptr a
nullPtr CInt
0
                LPCSTR
forall a. Ptr a
nullPtr LPBOOL
forall a. Ptr a
nullPtr
    -- mbchar' is the length of buffer required
    allocaArray (fromIntegral mbchars') $ \LPCSTR
mbstr -> do
      mbchars <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"WideCharToMultiByte" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$ Word32
-> Word32
-> Ptr CWchar
-> CInt
-> LPCSTR
-> CInt
-> LPCSTR
-> LPBOOL
-> IO CInt
wideCharToMultiByte
                 Word32
cp
                 Word32
0
                 Ptr CWchar
cwstr
                 (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
                 LPCSTR
mbstr CInt
mbchars'
                 LPCSTR
forall a. Ptr a
nullPtr LPBOOL
forall a. Ptr a
nullPtr
      transformer (mbstr,fromIntegral mbchars)

-- converts [Char] to UTF-16
encodeMultiByteIO :: CodePage -> String -> IO String
encodeMultiByteIO :: Word32 -> String -> IO String
encodeMultiByteIO Word32
_ String
"" = String -> IO String
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return String
""
encodeMultiByteIO Word32
cp String
s = Word32 -> String -> ((LPCSTR, CInt) -> IO String) -> IO String
forall a. Word32 -> String -> ((LPCSTR, CInt) -> IO a) -> IO a
encodeMultiByteIO' Word32
cp String
s (LPCSTR, CInt) -> IO String
forall {a}. Integral a => (LPCSTR, a) -> IO String
toString
  where toString :: (LPCSTR, a) -> IO String
toString (LPCSTR
st,a
l) = CStringLen -> IO String
peekCAStringLen (LPCSTR
st,a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
l)

-- converts [Char] to UTF-16
encodeMultiByteRawIO :: CodePage -> String -> IO (LPCSTR, CInt)
encodeMultiByteRawIO :: Word32 -> String -> IO (LPCSTR, CInt)
encodeMultiByteRawIO Word32
_ String
"" = (LPCSTR, CInt) -> IO (LPCSTR, CInt)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (LPCSTR
forall a. Ptr a
nullPtr, CInt
0)
encodeMultiByteRawIO Word32
cp String
s = Word32
-> String
-> ((LPCSTR, CInt) -> IO (LPCSTR, CInt))
-> IO (LPCSTR, CInt)
forall a. Word32 -> String -> ((LPCSTR, CInt) -> IO a) -> IO a
encodeMultiByteIO' Word32
cp String
s (LPCSTR, CInt) -> IO (LPCSTR, CInt)
forall {m :: * -> *} {a} {b} {a}.
(Monad m, Integral a, Num b) =>
(a, a) -> m (a, b)
toSizedCString
  where toSizedCString :: (a, a) -> m (a, b)
toSizedCString (a
st,a
l) = (a, b) -> m (a, b)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (a
st, a -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
l)

foreign import ccall "WideCharToMultiByte"
  wideCharToMultiByte
        :: CodePage
        -> DWORD   -- dwFlags,
        -> LPCWSTR -- lpWideCharStr
        -> CInt    -- cchWideChar
        -> LPSTR   -- lpMultiByteStr
        -> CInt    -- cbMultiByte
        -> LPCSTR  -- lpMultiByteStr
        -> LPBOOL  -- lpbFlags
        -> IO CInt

-- | The `System.IO` input functions (e.g. `getLine`) don't
-- automatically convert to Unicode, so this function is provided to
-- make the conversion from a multibyte string in the given code page
-- to a proper Unicode string.  To get the code page for the console,
-- use `getConsoleCP`.
stringToUnicode :: CodePage -> String -> IO String
stringToUnicode :: Word32 -> String -> IO String
stringToUnicode Word32
_cp String
"" = String -> IO String
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return String
""
     -- MultiByteToWideChar doesn't handle empty strings (#1929)
stringToUnicode Word32
cp String
mbstr =
  String -> (CStringLen -> IO String) -> IO String
forall a. String -> (CStringLen -> IO a) -> IO a
withCAStringLen String
mbstr ((CStringLen -> IO String) -> IO String)
-> (CStringLen -> IO String) -> IO String
forall a b. (a -> b) -> a -> b
$ \(LPCSTR
cstr,Int
len) -> do
    wchars <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"MultiByteToWideChar" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> LPCSTR -> CInt -> Ptr CWchar -> CInt -> IO CInt
multiByteToWideChar
                Word32
cp
                Word32
0
                LPCSTR
cstr
                (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
                Ptr CWchar
forall a. Ptr a
nullPtr CInt
0
    -- wchars is the length of buffer required
    allocaArray (fromIntegral wchars) $ \Ptr CWchar
cwstr -> do
      wchars' <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"MultiByteToWideChar" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$ Word32 -> Word32 -> LPCSTR -> CInt -> Ptr CWchar -> CInt -> IO CInt
multiByteToWideChar
                Word32
cp
                Word32
0
                LPCSTR
cstr
                (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
                Ptr CWchar
cwstr CInt
wchars
      peekCWStringLen (cwstr,fromIntegral wchars')  -- converts UTF-16 to [Char]

foreign import ccall unsafe "MultiByteToWideChar"
  multiByteToWideChar
        :: CodePage
        -> DWORD   -- dwFlags,
        -> LPCSTR  -- lpMultiByteStr
        -> CInt    -- cbMultiByte
        -> LPWSTR  -- lpWideCharStr
        -> CInt    -- cchWideChar
        -> IO CInt

decodeMultiByte :: CodePage -> String -> String
decodeMultiByte :: Word32 -> String -> String
decodeMultiByte Word32
cp = IO String -> String
forall a. IO a -> a
unsafeLocalState (IO String -> String) -> (String -> IO String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Word32 -> String -> IO String
decodeMultiByteIO Word32
cp

-- | Because of `stringToUnicode` is unclear name, we use `decodeMultiByteIO`
-- for alias of `stringToUnicode`.
decodeMultiByteIO :: CodePage -> String -> IO String
decodeMultiByteIO :: Word32 -> String -> IO String
decodeMultiByteIO = Word32 -> String -> IO String
stringToUnicode
{-# INLINE decodeMultiByteIO #-}

foreign import ccall unsafe "MultiByteToWideChar"
  multiByteToWideChar'
        :: CodePage
        -> DWORD   -- dwFlags,
        -> Ptr Word8  -- lpMultiByteStr
        -> CInt    -- cbMultiByte
        -> Ptr Word16  -- lpWideCharStr
        -> CInt    -- cchWideChar
        -> IO CInt

-- TODO: GHC is internally UTF-32 which means we have re-encode for
--       Windows which is annoying. Switch to UTF-16 on IoNative
--       being default.
withGhcInternalToUTF16 :: Ptr Word8 -> Int -> ((Ptr Word16, CInt) -> IO a)
                       -> IO a
withGhcInternalToUTF16 :: forall a. Ptr Word8 -> Int -> ((Ptr Word16, CInt) -> IO a) -> IO a
withGhcInternalToUTF16 Ptr Word8
ptr Int
len (Ptr Word16, CInt) -> IO a
fn
 = do cp <- IO Word32
getCurrentCodePage
      wchars <- failIfZero "withGhcInternalToUTF16" $
                  multiByteToWideChar' cp 0 ptr (fromIntegral len) nullPtr 0
      -- wchars is the length of buffer required
      allocaArray (fromIntegral wchars) $ \Ptr Word16
cwstr -> do
        wchars' <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"withGhcInternalToUTF16" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$
                    Word32
-> Word32 -> Ptr Word8 -> CInt -> Ptr Word16 -> CInt -> IO CInt
multiByteToWideChar' Word32
cp Word32
0 Ptr Word8
ptr (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len) Ptr Word16
cwstr CInt
wchars
        fn (cwstr, wchars')

foreign import ccall "WideCharToMultiByte"
  wideCharToMultiByte'
        :: CodePage
        -> DWORD   -- dwFlags,
        -> Ptr Word16 -- lpWideCharStr
        -> CInt    -- cchWideChar
        -> Ptr Word8   -- lpMultiByteStr
        -> CInt    -- cbMultiByte
        -> LPCSTR  -- lpMultiByteStr
        -> LPBOOL  -- lpbFlags
        -> IO CInt

-- TODO: GHC is internally UTF-32 which means we have re-encode for
--       Windows which is annoying. Switch to UTF-16 on IoNative
--       being default.

-- | Decode a UTF16 buffer into the given buffer in the current code page.
-- The source UTF16 buffer is filled by the function given as argument.
withUTF16ToGhcInternal :: Ptr Word8 -- Buffer to store the encoded string in.
                       -> Int       -- Length of the buffer
                       -- Function to fill source buffer.
                       ->  ( CInt       -- Size of available buffer in bytes
                          -> Ptr Word16 -- Temporary source buffer.
                          -> IO CInt    -- Actual length of buffer content.
                           )
                       -> IO Int    -- Returns number of bytes stored in buffer.
withUTF16ToGhcInternal :: Ptr Word8 -> Int -> (CInt -> Ptr Word16 -> IO CInt) -> IO Int
withUTF16ToGhcInternal Ptr Word8
ptr Int
len CInt -> Ptr Word16 -> IO CInt
fn
 = do cp <- IO Word32
getCurrentCodePage
      -- Annoyingly the IO system is very UTF-32 oriented and asks for bytes
      -- as buffer reads.  Problem is we don't know how many bytes we'll end up
      -- having as UTF-32 MultiByte encoded UTF-16. So be conservative.  We assume
      -- that a single byte may expand to atmost 1 Word16.  So assume that each
      -- byte does and divide the requested number of bytes by two since each
      -- Word16 encoded wchar may expand to only two Word8 sequences.
      let reqBytes = Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
len Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2)
      allocaArray reqBytes $ \Ptr Word16
w_ptr -> do
        w_len <- CInt -> Ptr Word16 -> IO CInt
fn (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
reqBytes) Ptr Word16
w_ptr
        if w_len == 0
           then return 0 else do
                -- Get required length of encoding
                mbchars' <- failIfZero "withUTF16ToGhcInternal" $
                              wideCharToMultiByte' cp 0 w_ptr
                                                  (fromIntegral w_len) nullPtr
                                                  0 nullPtr nullPtr
                assert (mbchars' <= (fromIntegral len)) $ do
                  -- mbchar' is the length of buffer required
                  mbchars <- failIfZero "withUTF16ToGhcInternal" $
                                wideCharToMultiByte' cp 0 w_ptr
                                                    (fromIntegral w_len) ptr
                                                    mbchars' nullPtr nullPtr
                  return $ fromIntegral mbchars