{-# LANGUAGE CPP                  #-}
{-# LANGUAGE NoImplicitPrelude    #-}
{- |
   Module      :  System.Win32.Encoding
   Copyright   :  2012 shelarcy
   License     :  BSD-style

   Maintainer  :  shelarcy@gmail.com
   Stability   :  Provisional
   Portability :  Non-portable (Win32 API)

   Enocode/Decode mutibyte character using Win32 API.
-}

module GHC.IO.Windows.Encoding
  ( encodeMultiByte
  , encodeMultiByteIO
  , encodeMultiByteRawIO
  , decodeMultiByte
  , decodeMultiByteIO
  , wideCharToMultiByte
  , multiByteToWideChar
  , withGhcInternalToUTF16
  , withUTF16ToGhcInternal
  ) where

import Data.Word (Word8, Word16)
import Foreign.C.Types        (CInt(..))
import Foreign.C.String       (peekCAStringLen, peekCWStringLen,
                               withCWStringLen, withCAStringLen, )
import Foreign.Ptr (nullPtr, Ptr ())
import Foreign.Marshal.Array  (allocaArray)
import Foreign.Marshal.Unsafe (unsafeLocalState)
import GHC.Windows
import GHC.IO.Encoding.CodePage (CodePage, getCurrentCodePage)
import GHC.IO
import GHC.Base
import GHC.Real

#include "windows_cconv.h"

-- | The "System.IO" output functions (e.g. `putStr`) don't
-- automatically convert to multibyte string on Windows, so this
-- function is provided to make the conversion from a Unicode string
-- in the given code page to a proper multibyte string.  To get the
-- code page for the console, use `getCurrentCodePage`.
--
encodeMultiByte :: CodePage -> String -> String
encodeMultiByte :: DWORD -> String -> String
encodeMultiByte DWORD
cp = IO String -> String
forall a. IO a -> a
unsafeLocalState (IO String -> String) -> (String -> IO String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DWORD -> String -> IO String
encodeMultiByteIO DWORD
cp

{-# INLINE encodeMultiByteIO' #-}
-- | String must not be zero length.
encodeMultiByteIO' :: CodePage -> String -> ((LPCSTR, CInt) -> IO a) -> IO a
encodeMultiByteIO' :: forall a. DWORD -> String -> ((LPCSTR, CInt) -> IO a) -> IO a
encodeMultiByteIO' DWORD
cp String
wstr (LPCSTR, CInt) -> IO a
transformer =
  String -> (CWStringLen -> IO a) -> IO a
forall a. String -> (CWStringLen -> IO a) -> IO a
withCWStringLen String
wstr ((CWStringLen -> IO a) -> IO a) -> (CWStringLen -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \(Ptr CWchar
cwstr,Int
len) -> do
    CInt
mbchars' <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"WideCharToMultiByte" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$ DWORD
-> DWORD
-> Ptr CWchar
-> CInt
-> LPCSTR
-> CInt
-> LPCSTR
-> LPBOOL
-> IO CInt
wideCharToMultiByte
                DWORD
cp
                DWORD
0
                Ptr CWchar
cwstr
                (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
                LPCSTR
forall a. Ptr a
nullPtr CInt
0
                LPCSTR
forall a. Ptr a
nullPtr LPBOOL
forall a. Ptr a
nullPtr
    -- mbchar' is the length of buffer required
    Int -> (LPCSTR -> IO a) -> IO a
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray (CInt -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
mbchars') ((LPCSTR -> IO a) -> IO a) -> (LPCSTR -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \LPCSTR
mbstr -> do
      CInt
mbchars <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"WideCharToMultiByte" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$ DWORD
-> DWORD
-> Ptr CWchar
-> CInt
-> LPCSTR
-> CInt
-> LPCSTR
-> LPBOOL
-> IO CInt
wideCharToMultiByte
                 DWORD
cp
                 DWORD
0
                 Ptr CWchar
cwstr
                 (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
                 LPCSTR
mbstr CInt
mbchars'
                 LPCSTR
forall a. Ptr a
nullPtr LPBOOL
forall a. Ptr a
nullPtr
      (LPCSTR, CInt) -> IO a
transformer (LPCSTR
mbstr,CInt -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
mbchars)

-- converts [Char] to UTF-16
encodeMultiByteIO :: CodePage -> String -> IO String
encodeMultiByteIO :: DWORD -> String -> IO String
encodeMultiByteIO DWORD
_ String
"" = String -> IO String
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return String
""
encodeMultiByteIO DWORD
cp String
s = DWORD -> String -> ((LPCSTR, CInt) -> IO String) -> IO String
forall a. DWORD -> String -> ((LPCSTR, CInt) -> IO a) -> IO a
encodeMultiByteIO' DWORD
cp String
s (LPCSTR, CInt) -> IO String
forall {a}. Integral a => (LPCSTR, a) -> IO String
toString
  where toString :: (LPCSTR, a) -> IO String
toString (LPCSTR
st,a
l) = CStringLen -> IO String
peekCAStringLen (LPCSTR
st,a -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
l)

-- converts [Char] to UTF-16
encodeMultiByteRawIO :: CodePage -> String -> IO (LPCSTR, CInt)
encodeMultiByteRawIO :: DWORD -> String -> IO (LPCSTR, CInt)
encodeMultiByteRawIO DWORD
_ String
"" = (LPCSTR, CInt) -> IO (LPCSTR, CInt)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (LPCSTR
forall a. Ptr a
nullPtr, CInt
0)
encodeMultiByteRawIO DWORD
cp String
s = DWORD
-> String
-> ((LPCSTR, CInt) -> IO (LPCSTR, CInt))
-> IO (LPCSTR, CInt)
forall a. DWORD -> String -> ((LPCSTR, CInt) -> IO a) -> IO a
encodeMultiByteIO' DWORD
cp String
s (LPCSTR, CInt) -> IO (LPCSTR, CInt)
forall {m :: * -> *} {a} {b} {a}.
(Monad m, Integral a, Num b) =>
(a, a) -> m (a, b)
toSizedCString
  where toSizedCString :: (a, a) -> m (a, b)
toSizedCString (a
st,a
l) = (a, b) -> m (a, b)
forall a. a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return (a
st, a -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
l)

foreign import WINDOWS_CCONV "WideCharToMultiByte"
  wideCharToMultiByte
        :: CodePage
        -> DWORD   -- dwFlags,
        -> LPCWSTR -- lpWideCharStr
        -> CInt    -- cchWideChar
        -> LPSTR   -- lpMultiByteStr
        -> CInt    -- cbMultiByte
        -> LPCSTR  -- lpMultiByteStr
        -> LPBOOL  -- lpbFlags
        -> IO CInt

-- | The `System.IO` input functions (e.g. `getLine`) don't
-- automatically convert to Unicode, so this function is provided to
-- make the conversion from a multibyte string in the given code page
-- to a proper Unicode string.  To get the code page for the console,
-- use `getConsoleCP`.
stringToUnicode :: CodePage -> String -> IO String
stringToUnicode :: DWORD -> String -> IO String
stringToUnicode DWORD
_cp String
"" = String -> IO String
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return String
""
     -- MultiByteToWideChar doesn't handle empty strings (#1929)
stringToUnicode DWORD
cp String
mbstr =
  String -> (CStringLen -> IO String) -> IO String
forall a. String -> (CStringLen -> IO a) -> IO a
withCAStringLen String
mbstr ((CStringLen -> IO String) -> IO String)
-> (CStringLen -> IO String) -> IO String
forall a b. (a -> b) -> a -> b
$ \(LPCSTR
cstr,Int
len) -> do
    CInt
wchars <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"MultiByteToWideChar" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$ DWORD -> DWORD -> LPCSTR -> CInt -> Ptr CWchar -> CInt -> IO CInt
multiByteToWideChar
                DWORD
cp
                DWORD
0
                LPCSTR
cstr
                (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
                Ptr CWchar
forall a. Ptr a
nullPtr CInt
0
    -- wchars is the length of buffer required
    Int -> (Ptr CWchar -> IO String) -> IO String
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray (CInt -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
wchars) ((Ptr CWchar -> IO String) -> IO String)
-> (Ptr CWchar -> IO String) -> IO String
forall a b. (a -> b) -> a -> b
$ \Ptr CWchar
cwstr -> do
      CInt
wchars' <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"MultiByteToWideChar" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$ DWORD -> DWORD -> LPCSTR -> CInt -> Ptr CWchar -> CInt -> IO CInt
multiByteToWideChar
                DWORD
cp
                DWORD
0
                LPCSTR
cstr
                (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)
                Ptr CWchar
cwstr CInt
wchars
      CWStringLen -> IO String
peekCWStringLen (Ptr CWchar
cwstr,CInt -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
wchars')  -- converts UTF-16 to [Char]

foreign import WINDOWS_CCONV unsafe "MultiByteToWideChar"
  multiByteToWideChar
        :: CodePage
        -> DWORD   -- dwFlags,
        -> LPCSTR  -- lpMultiByteStr
        -> CInt    -- cbMultiByte
        -> LPWSTR  -- lpWideCharStr
        -> CInt    -- cchWideChar
        -> IO CInt

decodeMultiByte :: CodePage -> String -> String
decodeMultiByte :: DWORD -> String -> String
decodeMultiByte DWORD
cp = IO String -> String
forall a. IO a -> a
unsafeLocalState (IO String -> String) -> (String -> IO String) -> String -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. DWORD -> String -> IO String
decodeMultiByteIO DWORD
cp

-- | Because of `stringToUnicode` is unclear name, we use `decodeMultiByteIO`
-- for alias of `stringToUnicode`.
decodeMultiByteIO :: CodePage -> String -> IO String
decodeMultiByteIO :: DWORD -> String -> IO String
decodeMultiByteIO = DWORD -> String -> IO String
stringToUnicode
{-# INLINE decodeMultiByteIO #-}

foreign import WINDOWS_CCONV unsafe "MultiByteToWideChar"
  multiByteToWideChar'
        :: CodePage
        -> DWORD   -- dwFlags,
        -> Ptr Word8  -- lpMultiByteStr
        -> CInt    -- cbMultiByte
        -> Ptr Word16  -- lpWideCharStr
        -> CInt    -- cchWideChar
        -> IO CInt

-- TODO: GHC is internally UTF-32 which means we have re-encode for
--       Windows which is annoying. Switch to UTF-16 on IoNative
--       being default.
withGhcInternalToUTF16 :: Ptr Word8 -> Int -> ((Ptr Word16, CInt) -> IO a)
                       -> IO a
withGhcInternalToUTF16 :: forall a. Ptr Word8 -> Int -> ((Ptr Word16, CInt) -> IO a) -> IO a
withGhcInternalToUTF16 Ptr Word8
ptr Int
len (Ptr Word16, CInt) -> IO a
fn
 = do DWORD
cp <- IO DWORD
getCurrentCodePage
      CInt
wchars <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"withGhcInternalToUTF16" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$
                  DWORD
-> DWORD -> Ptr Word8 -> CInt -> Ptr Word16 -> CInt -> IO CInt
multiByteToWideChar' DWORD
cp DWORD
0 Ptr Word8
ptr (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len) Ptr Word16
forall a. Ptr a
nullPtr CInt
0
      -- wchars is the length of buffer required
      Int -> (Ptr Word16 -> IO a) -> IO a
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray (CInt -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
wchars) ((Ptr Word16 -> IO a) -> IO a) -> (Ptr Word16 -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \Ptr Word16
cwstr -> do
        CInt
wchars' <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"withGhcInternalToUTF16" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$
                    DWORD
-> DWORD -> Ptr Word8 -> CInt -> Ptr Word16 -> CInt -> IO CInt
multiByteToWideChar' DWORD
cp DWORD
0 Ptr Word8
ptr (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len) Ptr Word16
cwstr CInt
wchars
        (Ptr Word16, CInt) -> IO a
fn (Ptr Word16
cwstr, CInt
wchars')

foreign import WINDOWS_CCONV "WideCharToMultiByte"
  wideCharToMultiByte'
        :: CodePage
        -> DWORD   -- dwFlags,
        -> Ptr Word16 -- lpWideCharStr
        -> CInt    -- cchWideChar
        -> Ptr Word8   -- lpMultiByteStr
        -> CInt    -- cbMultiByte
        -> LPCSTR  -- lpMultiByteStr
        -> LPBOOL  -- lpbFlags
        -> IO CInt

-- TODO: GHC is internally UTF-32 which means we have re-encode for
--       Windows which is annoying. Switch to UTF-16 on IoNative
--       being default.

-- | Decode a UTF16 buffer into the given buffer in the current code page.
-- The source UTF16 buffer is filled by the function given as argument.
withUTF16ToGhcInternal :: Ptr Word8 -- Buffer to store the encoded string in.
                       -> Int       -- Length of the buffer
                       -- Function to fill source buffer.
                       ->  ( CInt       -- Size of available buffer in bytes
                          -> Ptr Word16 -- Temporary source buffer.
                          -> IO CInt    -- Actual length of buffer content.
                           )
                       -> IO Int    -- Returns number of bytes stored in buffer.
withUTF16ToGhcInternal :: Ptr Word8 -> Int -> (CInt -> Ptr Word16 -> IO CInt) -> IO Int
withUTF16ToGhcInternal Ptr Word8
ptr Int
len CInt -> Ptr Word16 -> IO CInt
fn
 = do DWORD
cp <- IO DWORD
getCurrentCodePage
      -- Annoyingly the IO system is very UTF-32 oriented and asks for bytes
      -- as buffer reads.  Problem is we don't know how many bytes we'll end up
      -- having as UTF-32 MultiByte encoded UTF-16. So be conservative.  We assume
      -- that a single byte may expand to atmost 1 Word16.  So assume that each
      -- byte does and divide the requested number of bytes by two since each
      -- Word16 encoded wchar may expand to only two Word8 sequences.
      let reqBytes :: Int
reqBytes = Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
len Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2)
      Int -> (Ptr Word16 -> IO Int) -> IO Int
forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray Int
reqBytes ((Ptr Word16 -> IO Int) -> IO Int)
-> (Ptr Word16 -> IO Int) -> IO Int
forall a b. (a -> b) -> a -> b
$ \Ptr Word16
w_ptr -> do
        CInt
w_len <- CInt -> Ptr Word16 -> IO CInt
fn (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
reqBytes) Ptr Word16
w_ptr
        if CInt
w_len CInt -> CInt -> Bool
forall a. Eq a => a -> a -> Bool
== CInt
0
           then Int -> IO Int
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return Int
0 else do
                -- Get required length of encoding
                CInt
mbchars' <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"withUTF16ToGhcInternal" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$
                              DWORD
-> DWORD
-> Ptr Word16
-> CInt
-> Ptr Word8
-> CInt
-> LPCSTR
-> LPBOOL
-> IO CInt
wideCharToMultiByte' DWORD
cp DWORD
0 Ptr Word16
w_ptr
                                                  (CInt -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
w_len) Ptr Word8
forall a. Ptr a
nullPtr
                                                  CInt
0 LPCSTR
forall a. Ptr a
nullPtr LPBOOL
forall a. Ptr a
nullPtr
                Bool -> IO Int -> IO Int
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (CInt
mbchars' CInt -> CInt -> Bool
forall a. Ord a => a -> a -> Bool
<= (Int -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
len)) (IO Int -> IO Int) -> IO Int -> IO Int
forall a b. (a -> b) -> a -> b
$ do
                  -- mbchar' is the length of buffer required
                  CInt
mbchars <- String -> IO CInt -> IO CInt
forall a. (Eq a, Num a) => String -> IO a -> IO a
failIfZero String
"withUTF16ToGhcInternal" (IO CInt -> IO CInt) -> IO CInt -> IO CInt
forall a b. (a -> b) -> a -> b
$
                                DWORD
-> DWORD
-> Ptr Word16
-> CInt
-> Ptr Word8
-> CInt
-> LPCSTR
-> LPBOOL
-> IO CInt
wideCharToMultiByte' DWORD
cp DWORD
0 Ptr Word16
w_ptr
                                                    (CInt -> CInt
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
w_len) Ptr Word8
ptr
                                                    CInt
mbchars' LPCSTR
forall a. Ptr a
nullPtr LPBOOL
forall a. Ptr a
nullPtr
                  Int -> IO Int
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> IO Int) -> Int -> IO Int
forall a b. (a -> b) -> a -> b
$ CInt -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral CInt
mbchars