{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP
           , NoImplicitPrelude
           , ForeignFunctionInterface
           , NondecreasingIndentation
  #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.IO.Encoding.Iconv
-- Copyright   :  (c) The University of Glasgow, 2008-2009
-- License     :  see libraries/base/LICENSE
-- 
-- Maintainer  :  libraries@haskell.org
-- Stability   :  internal
-- Portability :  non-portable
--
-- This module provides text encoding/decoding using iconv
--
-----------------------------------------------------------------------------

-- #hide
module GHC.IO.Encoding.Iconv (
#if !defined(mingw32_HOST_OS)
   iconvEncoding, mkIconvEncoding,
   localeEncodingName
#endif
 ) where

#include "MachDeps.h"
#include "HsBaseConfig.h"

#if !defined(mingw32_HOST_OS)

import Foreign.Safe
import Foreign.C
import Data.Maybe
import GHC.Base
import GHC.IO.Buffer
import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types
import GHC.List (span)
import GHC.Num
import GHC.Show
import GHC.Real
import System.IO.Unsafe (unsafePerformIO)
import System.Posix.Internals

c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP = False

iconv_trace :: String -> IO ()
iconv_trace s
 | c_DEBUG_DUMP = puts s
 | otherwise    = return ()

-- -----------------------------------------------------------------------------
-- iconv encoders/decoders

{-# NOINLINE localeEncodingName #-}
localeEncodingName :: String
localeEncodingName = unsafePerformIO $ do
   -- Use locale_charset() or nl_langinfo(CODESET) to get the encoding
   -- if we have either of them.
   cstr <- c_localeEncoding
   peekCAString cstr -- Assume charset names are ASCII

-- We hope iconv_t is a storable type.  It should be, since it has at least the
-- value -1, which is a possible return value from iconv_open.
type IConv = CLong -- ToDo: (#type iconv_t)

foreign import ccall unsafe "hs_iconv_open"
    hs_iconv_open :: CString -> CString -> IO IConv

foreign import ccall unsafe "hs_iconv_close"
    hs_iconv_close :: IConv -> IO CInt

foreign import ccall unsafe "hs_iconv"
    hs_iconv :: IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize
	  -> IO CSize

foreign import ccall unsafe "localeEncoding"
    c_localeEncoding :: IO CString

haskellChar :: String
#ifdef WORDS_BIGENDIAN
haskellChar | charSize == 2 = "UTF-16BE"
            | otherwise     = "UTF-32BE"
#else
haskellChar | charSize == 2 = "UTF-16LE"
            | otherwise     = "UTF-32LE"
#endif

char_shift :: Int
char_shift | charSize == 2 = 1
           | otherwise     = 2

iconvEncoding :: String -> IO TextEncoding
iconvEncoding = mkIconvEncoding ErrorOnCodingFailure

mkIconvEncoding :: CodingFailureMode -> String -> IO TextEncoding
mkIconvEncoding cfm charset = do
  return (TextEncoding { 
                textEncodingName = charset,
		mkTextDecoder = newIConv raw_charset (haskellChar ++ suffix) (recoverDecode cfm) iconvDecode,
		mkTextEncoder = newIConv haskellChar charset                 (recoverEncode cfm) iconvEncode})
  where
    -- An annoying feature of GNU iconv is that the //PREFIXES only take
    -- effect when they appear on the tocode parameter to iconv_open:
    (raw_charset, suffix) = span (/= '/') charset

newIConv :: String -> String
   -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
   -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
   -> IO (BufferCodec a b ())
newIConv from to rec fn =
  -- Assume charset names are ASCII
  withCAString from $ \ from_str ->
  withCAString to   $ \ to_str -> do
    iconvt <- throwErrnoIfMinus1 "mkTextEncoding" $ hs_iconv_open to_str from_str
    let iclose = throwErrnoIfMinus1_ "Iconv.close" $ hs_iconv_close iconvt
    return BufferCodec{
                encode = fn iconvt,
                recover = rec,
                close  = iclose,
                -- iconv doesn't supply a way to save/restore the state
                getState = return (),
                setState = const $ return ()
                }

iconvDecode :: IConv -> DecodeBuffer
iconvDecode iconv_t ibuf obuf = iconvRecode iconv_t ibuf 0 obuf char_shift

iconvEncode :: IConv -> EncodeBuffer
iconvEncode iconv_t ibuf obuf = iconvRecode iconv_t ibuf char_shift obuf 0

iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int 
            -> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode iconv_t
  input@Buffer{  bufRaw=iraw, bufL=ir, bufR=iw, bufSize=_  }  iscale
  output@Buffer{ bufRaw=oraw, bufL=_,  bufR=ow, bufSize=os }  oscale
  = do
    iconv_trace ("haskelChar=" ++ show haskellChar)
    iconv_trace ("iconvRecode before, input=" ++ show (summaryBuffer input))
    iconv_trace ("iconvRecode before, output=" ++ show (summaryBuffer output))
    withRawBuffer iraw $ \ piraw -> do
    withRawBuffer oraw $ \ poraw -> do
    with (piraw `plusPtr` (ir `shiftL` iscale)) $ \ p_inbuf -> do
    with (poraw `plusPtr` (ow `shiftL` oscale)) $ \ p_outbuf -> do
    with (fromIntegral ((iw-ir) `shiftL` iscale)) $ \ p_inleft -> do
    with (fromIntegral ((os-ow) `shiftL` oscale)) $ \ p_outleft -> do
      res <- hs_iconv iconv_t p_inbuf p_inleft p_outbuf p_outleft
      new_inleft  <- peek p_inleft
      new_outleft <- peek p_outleft
      let 
	  new_inleft'  = fromIntegral new_inleft `shiftR` iscale
	  new_outleft' = fromIntegral new_outleft `shiftR` oscale
	  new_input  
            | new_inleft == 0  = input { bufL = 0, bufR = 0 }
	    | otherwise        = input { bufL = iw - new_inleft' }
	  new_output = output{ bufR = os - new_outleft' }
      iconv_trace ("iconv res=" ++ show res)
      iconv_trace ("iconvRecode after,  input=" ++ show (summaryBuffer new_input))
      iconv_trace ("iconvRecode after,  output=" ++ show (summaryBuffer new_output))
      if (res /= -1)
	then do -- all input translated
	   return (InputUnderflow, new_input, new_output)
	else do
      errno <- getErrno
      case errno of
        e | e == e2BIG  -> return (OutputUnderflow, new_input, new_output)
          | e == eINVAL -> return (InputUnderflow, new_input, new_output)
           -- Sometimes iconv reports EILSEQ for a
           -- character in the input even when there is no room
           -- in the output; in this case we might be about to
           -- change the encoding anyway, so the following bytes
           -- could very well be in a different encoding.
           --
           -- Because we can only say InvalidSequence if there is at least
           -- one element left in the output, we have to special case this.
          | e == eILSEQ -> return (if new_outleft' == 0 then OutputUnderflow else InvalidSequence, new_input, new_output)
          | otherwise -> do
              iconv_trace ("iconv returned error: " ++ show (errnoToIOError "iconv" e Nothing Nothing))
              throwErrno "iconvRecoder"

#endif /* !mingw32_HOST_OS */