module GHC.IO.Encoding.CodePage.API (
mkCodePageEncoding
) where
import Foreign.C
import Foreign.Ptr
import Foreign.Marshal
import Foreign.Storable
import Data.Bits
import Data.Either
import Data.Word
import GHC.Base
import GHC.List
import GHC.IO.Buffer
import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types
import GHC.IO.Encoding.UTF16
import GHC.Num
import GHC.Show
import GHC.Real
import GHC.Windows
import GHC.ForeignPtr (castForeignPtr)
import System.Posix.Internals
c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP = False
debugIO :: String -> IO ()
debugIO s
| c_DEBUG_DUMP = puts s
| otherwise = return ()
#if defined(i386_HOST_ARCH)
# define WINDOWS_CCONV stdcall
#elif defined(x86_64_HOST_ARCH)
# define WINDOWS_CCONV ccall
#else
# error Unknown mingw32 arch
#endif
type LPCSTR = Ptr Word8
mAX_DEFAULTCHAR :: Int
mAX_DEFAULTCHAR = 2
mAX_LEADBYTES :: Int
mAX_LEADBYTES = 12
data CPINFO = CPINFO {
maxCharSize :: UINT,
defaultChar :: [BYTE],
leadByte :: [BYTE]
}
instance Storable CPINFO where
sizeOf _ = sizeOf (undefined :: UINT) + (mAX_DEFAULTCHAR + mAX_LEADBYTES) * sizeOf (undefined :: BYTE)
alignment _ = alignment (undefined :: CInt)
peek ptr = do
ptr <- return $ castPtr ptr
a <- peek ptr
ptr <- return $ castPtr $ advancePtr ptr 1
b <- peekArray mAX_DEFAULTCHAR ptr
c <- peekArray mAX_LEADBYTES (advancePtr ptr mAX_DEFAULTCHAR)
return $ CPINFO a b c
poke ptr val = do
ptr <- return $ castPtr ptr
poke ptr (maxCharSize val)
ptr <- return $ castPtr $ advancePtr ptr 1
pokeArray' "CPINFO.defaultChar" mAX_DEFAULTCHAR ptr (defaultChar val)
pokeArray' "CPINFO.leadByte" mAX_LEADBYTES (advancePtr ptr mAX_DEFAULTCHAR) (leadByte val)
pokeArray' :: Storable a => String -> Int -> Ptr a -> [a] -> IO ()
pokeArray' msg sz ptr xs | length xs == sz = pokeArray ptr xs
| otherwise = error $ msg ++ ": expected " ++ show sz ++ " elements in list but got " ++ show (length xs)
foreign import WINDOWS_CCONV unsafe "windows.h GetCPInfo"
c_GetCPInfo :: UINT
-> Ptr CPINFO
-> IO BOOL
foreign import WINDOWS_CCONV unsafe "windows.h MultiByteToWideChar"
c_MultiByteToWideChar :: UINT
-> DWORD
-> LPCSTR
-> CInt
-> LPWSTR
-> CInt
-> IO CInt
foreign import WINDOWS_CCONV unsafe "windows.h WideCharToMultiByte"
c_WideCharToMultiByte :: UINT
-> DWORD
-> LPWSTR
-> CInt
-> LPCSTR
-> CInt
-> LPCSTR
-> LPBOOL
-> IO CInt
foreign import WINDOWS_CCONV unsafe "windows.h IsDBCSLeadByteEx"
c_IsDBCSLeadByteEx :: UINT
-> BYTE
-> IO BOOL
mkCodePageEncoding :: CodingFailureMode -> Word32 -> TextEncoding
mkCodePageEncoding cfm cp
= TextEncoding {
textEncodingName = "CP" ++ show cp,
mkTextDecoder = newCP (recoverDecode cfm) cpDecode cp,
mkTextEncoder = newCP (recoverEncode cfm) cpEncode cp
}
newCP :: (Buffer from -> Buffer to -> IO (Buffer from, Buffer to))
-> (Word32 -> Int -> CodeBuffer from to)
-> Word32
-> IO (BufferCodec from to ())
newCP rec fn cp = do
max_char_size <- alloca $ \cpinfo_ptr -> do
success <- c_GetCPInfo cp cpinfo_ptr
when (not success) $ throwGetLastError ("GetCPInfo " ++ show cp)
fmap (fromIntegral . maxCharSize) $ peek cpinfo_ptr
debugIO $ "GetCPInfo " ++ show cp ++ " = " ++ show max_char_size
return $ BufferCodec {
encode = fn cp max_char_size,
recover = rec,
close = return (),
getState = return (),
setState = const $ return ()
}
utf16_native_encode' :: EncodeBuffer
utf16_native_decode' :: DecodeBuffer
#ifdef WORDS_BIGENDIAN
utf16_native_encode' = utf16be_encode
utf16_native_decode' = utf16be_decode
#else
utf16_native_encode' = utf16le_encode
utf16_native_decode' = utf16le_decode
#endif
saner :: CodeBuffer from to
-> Buffer from -> Buffer to
-> IO (CodingProgress, Int, Buffer from, Buffer to)
saner code ibuf obuf = do
(why, ibuf', obuf') <- code ibuf obuf
if isEmptyBuffer ibuf'
then return (InputUnderflow, bufferElems ibuf, ibuf', obuf')
else return (why, bufL ibuf' bufL ibuf, ibuf', obuf')
byteView :: Buffer CWchar -> Buffer Word8
byteView (Buffer {..}) = Buffer { bufState = bufState, bufRaw = castForeignPtr bufRaw, bufSize = bufSize * 2, bufL = bufL * 2, bufR = bufR * 2 }
cwcharView :: Buffer Word8 -> Buffer CWchar
cwcharView (Buffer {..}) = Buffer { bufState = bufState, bufRaw = castForeignPtr bufRaw, bufSize = half bufSize, bufL = half bufL, bufR = half bufR }
where half x = case x `divMod` 2 of (y, 0) -> y
_ -> error "cwcharView: utf16_(encode|decode) (wrote out|consumed) non multiple-of-2 number of bytes"
utf16_native_encode :: CodeBuffer Char CWchar
utf16_native_encode ibuf obuf = do
(why, ibuf, obuf) <- utf16_native_encode' ibuf (byteView obuf)
return (why, ibuf, cwcharView obuf)
utf16_native_decode :: CodeBuffer CWchar Char
utf16_native_decode ibuf obuf = do
(why, ibuf, obuf) <- utf16_native_decode' (byteView ibuf) obuf
return (why, cwcharView ibuf, obuf)
cpDecode :: Word32 -> Int -> DecodeBuffer
cpDecode cp max_char_size = \ibuf obuf -> do
#ifdef CHARBUF_UTF16
let mbuf = obuf
#else
let sz = (bufferElems ibuf * 2)
`min` (bufferAvailable obuf * 2)
mbuf <- newBuffer (2 * sz) sz WriteBuffer :: IO (Buffer CWchar)
#endif
debugIO $ "cpDecode " ++ summaryBuffer ibuf ++ " " ++ summaryBuffer mbuf
(why1, ibuf', mbuf') <- cpRecode try' is_valid_prefix max_char_size 1 0 1 ibuf mbuf
debugIO $ "cpRecode (cpDecode) = " ++ show why1 ++ " " ++ summaryBuffer ibuf' ++ " " ++ summaryBuffer mbuf'
#ifdef CHARBUF_UTF16
return (why1, ibuf', mbuf')
#else
debugIO $ "utf16_native_decode " ++ summaryBuffer mbuf' ++ " " ++ summaryBuffer obuf
(why2, target_utf16_count, mbuf', obuf) <- saner utf16_native_decode (mbuf' { bufState = ReadBuffer }) obuf
debugIO $ "utf16_native_decode = " ++ show why2 ++ " " ++ summaryBuffer mbuf' ++ " " ++ summaryBuffer obuf
case why2 of
InputUnderflow | isEmptyBuffer mbuf' -> return (why1, ibuf', obuf)
| otherwise -> error "cpDecode: impossible underflown UTF-16 buffer"
InvalidSequence -> error "InvalidSequence on output of Windows API"
OutputUnderflow -> do
byte_count <- bSearch "cpDecode" (cpRecode try' is_valid_prefix max_char_size 1 0 1) ibuf mbuf target_utf16_count (target_utf16_count `div` 2) target_utf16_count (target_utf16_count * max_char_size)
return (OutputUnderflow, bufferRemove byte_count ibuf, obuf)
#endif
where
is_valid_prefix = c_IsDBCSLeadByteEx cp
try' iptr icnt optr ocnt
| ocnt == 0 = return (Left True)
| otherwise = do
err <- c_MultiByteToWideChar (fromIntegral cp) 8
iptr (fromIntegral icnt) optr (fromIntegral ocnt)
debugIO $ "MultiByteToWideChar " ++ show cp ++ " 8 " ++ show iptr ++ " " ++ show icnt ++ " " ++ show optr ++ " " ++ show ocnt ++ "\n = " ++ show err
case err of
0 -> do
err <- getLastError
case err of
122 -> return (Left True)
1113 -> return (Left False)
_ -> failWith "MultiByteToWideChar" err
wrote_chars -> return (Right (fromIntegral wrote_chars))
cpEncode :: Word32 -> Int -> EncodeBuffer
cpEncode cp _max_char_size = \ibuf obuf -> do
#ifdef CHARBUF_UTF16
let mbuf' = ibuf
#else
let sz = (bufferElems ibuf * 2)
`min` (bufferAvailable obuf * 2)
mbuf <- newBuffer (2 * sz) sz WriteBuffer
(why1, ibuf', mbuf') <- utf16_native_encode ibuf mbuf
#endif
debugIO $ "\ncpEncode " ++ summaryBuffer mbuf' ++ " " ++ summaryBuffer obuf
(why2, target_utf16_count, mbuf', obuf) <- saner (cpRecode try' is_valid_prefix 2 1 1 0) (mbuf' { bufState = ReadBuffer }) obuf
debugIO $ "cpRecode (cpEncode) = " ++ show why2 ++ " " ++ summaryBuffer mbuf' ++ " " ++ summaryBuffer obuf
#ifdef CHARBUF_UTF16
return (why2, mbuf', obuf)
#else
case why2 of
InputUnderflow | isEmptyBuffer mbuf' -> return (why1, ibuf', obuf)
| otherwise -> error "cpEncode: impossible underflown UTF-16 buffer"
_ -> do
utf32_count <- bSearch "cpEncode" utf16_native_encode ibuf mbuf target_utf16_count (target_utf16_count `div` 2) target_utf16_count target_utf16_count
return (why2, bufferRemove utf32_count ibuf, obuf)
#endif
where
is_valid_prefix _ = return False
try' iptr icnt optr ocnt
| ocnt == 0 = return (Left True)
| otherwise = alloca $ \defaulted_ptr -> do
poke defaulted_ptr False
err <- c_WideCharToMultiByte (fromIntegral cp) 0
iptr (fromIntegral icnt) optr (fromIntegral ocnt)
nullPtr defaulted_ptr
defaulted <- peek defaulted_ptr
debugIO $ "WideCharToMultiByte " ++ show cp ++ " 0 " ++ show iptr ++ " " ++ show icnt ++ " " ++ show optr ++ " " ++ show ocnt ++ " NULL " ++ show defaulted_ptr ++ "\n = " ++ show err ++ ", " ++ show defaulted
case err of
0 -> do
err <- getLastError
case err of
122 -> return (Left True)
1113 -> return (Left False)
_ -> failWith "WideCharToMultiByte" err
wrote_bytes | defaulted -> return (Left False)
| otherwise -> return (Right (fromIntegral wrote_bytes))
bSearch :: String
-> CodeBuffer from to
-> Buffer from -> Buffer to
-> Int
-> Int -> Int -> Int
-> IO Int
bSearch msg code ibuf mbuf target_to_elems = go
where
go mn md mx = do
(_why, ibuf, mbuf) <- code (ibuf { bufR = bufL ibuf + md }) mbuf
debugIO $ "code (bSearch " ++ msg ++ ") " ++ show md ++ " = " ++ show _why ++ ", " ++ summaryBuffer ibuf ++ summaryBuffer mbuf
case bufferElems mbuf `compare` target_to_elems of
EQ -> debugIO ("bSearch = " ++ show solution) >> return solution
where solution = md bufferElems ibuf
LT -> go' (md+1) mx
GT -> go' mn (md1)
go' mn mx | mn <= mx = go mn (mn + ((mx mn) `div` 2)) mx
| otherwise = error $ "bSearch(" ++ msg ++ "): search crossed! " ++ show (summaryBuffer ibuf, summaryBuffer mbuf, target_to_elems, mn, mx)
cpRecode :: forall from to. (Show from, Storable from)
=> (Ptr from -> Int -> Ptr to -> Int -> IO (Either Bool Int))
-> (from -> IO Bool)
-> Int
-> Int
-> Int -> Int
-> CodeBuffer from to
cpRecode try' is_valid_prefix max_i_size min_o_size iscale oscale = go
where
go :: CodeBuffer from to
go ibuf obuf | isEmptyBuffer ibuf = return (InputUnderflow, ibuf, obuf)
| bufferAvailable obuf < min_o_size = return (OutputUnderflow, ibuf, obuf)
| otherwise = try (bufferElems ibuf `min` ((max_i_size * bufferAvailable obuf) `div` min_o_size)) seek_smaller
where
done why = return (why, ibuf, obuf)
seek_smaller n longer_was_valid
| n <= 1 = if longer_was_valid
then done OutputUnderflow
else do byte <- withBuffer ibuf $ \ptr -> peekElemOff ptr (bufL ibuf)
valid_prefix <- is_valid_prefix byte
done (if valid_prefix && bufferElems ibuf < max_i_size then InputUnderflow else InvalidSequence)
| n < 2 * max_i_size = try (n 1) (\pred_n pred_n_was_valid -> seek_smaller pred_n (longer_was_valid || pred_n_was_valid))
| let n' = n `div` 2 = try n' (post_divide n' longer_was_valid)
post_divide _ _ n True = seek_smaller n True
post_divide n' longer_was_valid n False | n < n' + max_i_size 1 = try (n + 1) (post_divide n' longer_was_valid)
| otherwise = seek_smaller n' longer_was_valid
try n k_fail = withBuffer ibuf $ \iptr -> withBuffer obuf $ \optr -> do
ei_err_wrote <- try' (iptr `plusPtr` (bufL ibuf `shiftL` iscale)) n
(optr `plusPtr` (bufR obuf `shiftL` oscale)) (bufferAvailable obuf)
debugIO $ "try " ++ show n ++ " = " ++ show ei_err_wrote
case ei_err_wrote of
Left True -> k_fail n True
Left False -> k_fail n False
Right wrote_elts -> go (bufferRemove n ibuf) (obuf { bufR = bufR obuf + wrote_elts })