{-# LANGUAGE BangPatterns, CPP, GeneralizedNewtypeDeriving #-}
-- |
-- Module      : Data.Text.Foreign
-- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Portability : GHC
--
-- Support for using 'Text' data with native code via the Haskell
-- foreign function interface.

module Data.Text.Foreign
    (
    -- * Interoperability with native code
    -- $interop
      I16
    -- * Safe conversion functions
    , fromPtr
    , useAsPtr
    , asForeignPtr
    -- ** Encoding as UTF-8
    , peekCStringLen
    , withCStringLen
    -- * Unsafe conversion code
    , lengthWord16
    , unsafeCopyToPtr
    -- * Low-level manipulation
    -- $lowlevel
    , dropWord16
    , takeWord16
    ) where

#if defined(ASSERTS)
import Control.Exception (assert)
#endif
import Control.Monad.ST.Unsafe (unsafeIOToST)
import Data.ByteString.Unsafe (unsafePackCStringLen, unsafeUseAsCStringLen)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Text.Internal (Text(..), empty)
import Data.Text.Internal.Functions (unsafeWithForeignPtr)
import Data.Text.Unsafe (lengthWord16)
import Data.Word (Word16)
import Foreign.C.String (CStringLen)
import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray)
import Foreign.Marshal.Alloc (allocaBytes)
import Foreign.Ptr (Ptr, castPtr, plusPtr)
import Foreign.Storable (peek, poke)
import qualified Data.Text.Array as A

-- $interop
--
-- The 'Text' type is implemented using arrays that are not guaranteed
-- to have a fixed address in the Haskell heap. All communication with
-- native code must thus occur by copying data back and forth.
--
-- The 'Text' type's internal representation is UTF-16, using the
-- platform's native endianness.  This makes copied data suitable for
-- use with native libraries that use a similar representation, such
-- as ICU.  To interoperate with native libraries that use different
-- internal representations, such as UTF-8 or UTF-32, consider using
-- the functions in the 'Data.Text.Encoding' module.

-- | A type representing a number of UTF-16 code units.
newtype I16 = I16 Int
    deriving (I16
forall a. a -> a -> Bounded a
maxBound :: I16
$cmaxBound :: I16
minBound :: I16
$cminBound :: I16
Bounded, Int -> I16
I16 -> Int
I16 -> [I16]
I16 -> I16
I16 -> I16 -> [I16]
I16 -> I16 -> I16 -> [I16]
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
enumFromThenTo :: I16 -> I16 -> I16 -> [I16]
$cenumFromThenTo :: I16 -> I16 -> I16 -> [I16]
enumFromTo :: I16 -> I16 -> [I16]
$cenumFromTo :: I16 -> I16 -> [I16]
enumFromThen :: I16 -> I16 -> [I16]
$cenumFromThen :: I16 -> I16 -> [I16]
enumFrom :: I16 -> [I16]
$cenumFrom :: I16 -> [I16]
fromEnum :: I16 -> Int
$cfromEnum :: I16 -> Int
toEnum :: Int -> I16
$ctoEnum :: Int -> I16
pred :: I16 -> I16
$cpred :: I16 -> I16
succ :: I16 -> I16
$csucc :: I16 -> I16
Enum, I16 -> I16 -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: I16 -> I16 -> Bool
$c/= :: I16 -> I16 -> Bool
== :: I16 -> I16 -> Bool
$c== :: I16 -> I16 -> Bool
Eq, Enum I16
Real I16
I16 -> Integer
I16 -> I16 -> (I16, I16)
I16 -> I16 -> I16
forall a.
Real a
-> Enum a
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> (a, a))
-> (a -> a -> (a, a))
-> (a -> Integer)
-> Integral a
toInteger :: I16 -> Integer
$ctoInteger :: I16 -> Integer
divMod :: I16 -> I16 -> (I16, I16)
$cdivMod :: I16 -> I16 -> (I16, I16)
quotRem :: I16 -> I16 -> (I16, I16)
$cquotRem :: I16 -> I16 -> (I16, I16)
mod :: I16 -> I16 -> I16
$cmod :: I16 -> I16 -> I16
div :: I16 -> I16 -> I16
$cdiv :: I16 -> I16 -> I16
rem :: I16 -> I16 -> I16
$crem :: I16 -> I16 -> I16
quot :: I16 -> I16 -> I16
$cquot :: I16 -> I16 -> I16
Integral, Integer -> I16
I16 -> I16
I16 -> I16 -> I16
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
fromInteger :: Integer -> I16
$cfromInteger :: Integer -> I16
signum :: I16 -> I16
$csignum :: I16 -> I16
abs :: I16 -> I16
$cabs :: I16 -> I16
negate :: I16 -> I16
$cnegate :: I16 -> I16
* :: I16 -> I16 -> I16
$c* :: I16 -> I16 -> I16
- :: I16 -> I16 -> I16
$c- :: I16 -> I16 -> I16
+ :: I16 -> I16 -> I16
$c+ :: I16 -> I16 -> I16
Num, Eq I16
I16 -> I16 -> Bool
I16 -> I16 -> Ordering
I16 -> I16 -> I16
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: I16 -> I16 -> I16
$cmin :: I16 -> I16 -> I16
max :: I16 -> I16 -> I16
$cmax :: I16 -> I16 -> I16
>= :: I16 -> I16 -> Bool
$c>= :: I16 -> I16 -> Bool
> :: I16 -> I16 -> Bool
$c> :: I16 -> I16 -> Bool
<= :: I16 -> I16 -> Bool
$c<= :: I16 -> I16 -> Bool
< :: I16 -> I16 -> Bool
$c< :: I16 -> I16 -> Bool
compare :: I16 -> I16 -> Ordering
$ccompare :: I16 -> I16 -> Ordering
Ord, ReadPrec [I16]
ReadPrec I16
Int -> ReadS I16
ReadS [I16]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [I16]
$creadListPrec :: ReadPrec [I16]
readPrec :: ReadPrec I16
$creadPrec :: ReadPrec I16
readList :: ReadS [I16]
$creadList :: ReadS [I16]
readsPrec :: Int -> ReadS I16
$creadsPrec :: Int -> ReadS I16
Read, Num I16
Ord I16
I16 -> Rational
forall a. Num a -> Ord a -> (a -> Rational) -> Real a
toRational :: I16 -> Rational
$ctoRational :: I16 -> Rational
Real, Int -> I16 -> ShowS
[I16] -> ShowS
I16 -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [I16] -> ShowS
$cshowList :: [I16] -> ShowS
show :: I16 -> String
$cshow :: I16 -> String
showsPrec :: Int -> I16 -> ShowS
$cshowsPrec :: Int -> I16 -> ShowS
Show)

-- | /O(n)/ Create a new 'Text' from a 'Ptr' 'Word16' by copying the
-- contents of the array.
fromPtr :: Ptr Word16           -- ^ source array
        -> I16                  -- ^ length of source array (in 'Word16' units)
        -> IO Text
fromPtr :: Ptr Word16 -> I16 -> IO Text
fromPtr Ptr Word16
_   (I16 Int
0)   = forall (m :: * -> *) a. Monad m => a -> m a
return Text
empty
fromPtr Ptr Word16
ptr (I16 Int
len) =
#if defined(ASSERTS)
    assert (len > 0) $
#endif
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! Array -> Int -> Int -> Text
Text Array
arr Int
0 Int
len
  where
    arr :: Array
arr = (forall s. ST s (MArray s)) -> Array
A.run (forall s. Int -> ST s (MArray s)
A.new Int
len forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall {s}. MArray s -> ST s (MArray s)
copy)
    copy :: MArray s -> ST s (MArray s)
copy MArray s
marr = Ptr Word16 -> Int -> ST s (MArray s)
loop Ptr Word16
ptr Int
0
      where
        loop :: Ptr Word16 -> Int -> ST s (MArray s)
loop !Ptr Word16
p !Int
i | Int
i forall a. Eq a => a -> a -> Bool
== Int
len = forall (m :: * -> *) a. Monad m => a -> m a
return MArray s
marr
                   | Bool
otherwise = do
          forall s. MArray s -> Int -> Word16 -> ST s ()
A.unsafeWrite MArray s
marr Int
i forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall a s. IO a -> ST s a
unsafeIOToST (forall a. Storable a => Ptr a -> IO a
peek Ptr Word16
p)
          Ptr Word16 -> Int -> ST s (MArray s)
loop (Ptr Word16
p forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
2) (Int
i forall a. Num a => a -> a -> a
+ Int
1)

-- $lowlevel
--
-- Foreign functions that use UTF-16 internally may return indices in
-- units of 'Word16' instead of characters.  These functions may
-- safely be used with such indices, as they will adjust offsets if
-- necessary to preserve the validity of a Unicode string.

-- | /O(1)/ Return the prefix of the 'Text' of @n@ 'Word16' units in
-- length.
--
-- If @n@ would cause the 'Text' to end inside a surrogate pair, the
-- end of the prefix will be advanced by one additional 'Word16' unit
-- to maintain its validity.
takeWord16 :: I16 -> Text -> Text
takeWord16 :: I16 -> Text -> Text
takeWord16 (I16 Int
n) t :: Text
t@(Text Array
arr Int
off Int
len)
    | Int
n forall a. Ord a => a -> a -> Bool
<= Int
0               = Text
empty
    | Int
n forall a. Ord a => a -> a -> Bool
>= Int
len Bool -> Bool -> Bool
|| Int
m forall a. Ord a => a -> a -> Bool
>= Int
len = Text
t
    | Bool
otherwise            = Array -> Int -> Int -> Text
Text Array
arr Int
off Int
m
  where
    m :: Int
m | Word16
w forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
w forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF = Int
n
      | Bool
otherwise                = Int
nforall a. Num a => a -> a -> a
+Int
1
    w :: Word16
w = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offforall a. Num a => a -> a -> a
+Int
nforall a. Num a => a -> a -> a
-Int
1)

-- | /O(1)/ Return the suffix of the 'Text', with @n@ 'Word16' units
-- dropped from its beginning.
--
-- If @n@ would cause the 'Text' to begin inside a surrogate pair, the
-- beginning of the suffix will be advanced by one additional 'Word16'
-- unit to maintain its validity.
dropWord16 :: I16 -> Text -> Text
dropWord16 :: I16 -> Text -> Text
dropWord16 (I16 Int
n) t :: Text
t@(Text Array
arr Int
off Int
len)
    | Int
n forall a. Ord a => a -> a -> Bool
<= Int
0               = Text
t
    | Int
n forall a. Ord a => a -> a -> Bool
>= Int
len Bool -> Bool -> Bool
|| Int
m forall a. Ord a => a -> a -> Bool
>= Int
len = Text
empty
    | Bool
otherwise            = Array -> Int -> Int -> Text
Text Array
arr (Int
offforall a. Num a => a -> a -> a
+Int
m) (Int
lenforall a. Num a => a -> a -> a
-Int
m)
  where
    m :: Int
m | Word16
w forall a. Ord a => a -> a -> Bool
< Word16
0xD800 Bool -> Bool -> Bool
|| Word16
w forall a. Ord a => a -> a -> Bool
> Word16
0xDBFF = Int
n
      | Bool
otherwise                = Int
nforall a. Num a => a -> a -> a
+Int
1
    w :: Word16
w = Array -> Int -> Word16
A.unsafeIndex Array
arr (Int
offforall a. Num a => a -> a -> a
+Int
nforall a. Num a => a -> a -> a
-Int
1)

-- | /O(n)/ Copy a 'Text' to an array.  The array is assumed to be big
-- enough to hold the contents of the entire 'Text'.
unsafeCopyToPtr :: Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr :: Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr (Text Array
arr Int
off Int
len) Ptr Word16
ptr = Ptr Word16 -> Int -> IO ()
loop Ptr Word16
ptr Int
off
  where
    end :: Int
end = Int
off forall a. Num a => a -> a -> a
+ Int
len
    loop :: Ptr Word16 -> Int -> IO ()
loop !Ptr Word16
p !Int
i | Int
i forall a. Eq a => a -> a -> Bool
== Int
end  = forall (m :: * -> *) a. Monad m => a -> m a
return ()
               | Bool
otherwise = do
      forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word16
p (Array -> Int -> Word16
A.unsafeIndex Array
arr Int
i)
      Ptr Word16 -> Int -> IO ()
loop (Ptr Word16
p forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
2) (Int
i forall a. Num a => a -> a -> a
+ Int
1)

-- | /O(n)/ Perform an action on a temporary, mutable copy of a
-- 'Text'.  The copy is freed as soon as the action returns.
useAsPtr :: Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr :: forall a. Text -> (Ptr Word16 -> I16 -> IO a) -> IO a
useAsPtr t :: Text
t@(Text Array
_arr Int
_off Int
len) Ptr Word16 -> I16 -> IO a
action =
    forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes (Int
len forall a. Num a => a -> a -> a
* Int
2) forall a b. (a -> b) -> a -> b
$ \Ptr Word16
buf -> do
      Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr Text
t Ptr Word16
buf
      Ptr Word16 -> I16 -> IO a
action (forall a b. Ptr a -> Ptr b
castPtr Ptr Word16
buf) (Int -> I16
I16 Int
len)

-- | /O(n)/ Make a mutable copy of a 'Text'.
asForeignPtr :: Text -> IO (ForeignPtr Word16, I16)
asForeignPtr :: Text -> IO (ForeignPtr Word16, I16)
asForeignPtr t :: Text
t@(Text Array
_arr Int
_off Int
len) = do
  ForeignPtr Word16
fp <- forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray Int
len
  forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word16
fp forall a b. (a -> b) -> a -> b
$ Text -> Ptr Word16 -> IO ()
unsafeCopyToPtr Text
t
  forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word16
fp, Int -> I16
I16 Int
len)

-- | /O(n)/ Decode a C string with explicit length, which is assumed
-- to have been encoded as UTF-8. If decoding fails, a
-- 'UnicodeException' is thrown.
--
-- @since 1.0.0.0
peekCStringLen :: CStringLen -> IO Text
peekCStringLen :: CStringLen -> IO Text
peekCStringLen CStringLen
cs = do
  ByteString
bs <- CStringLen -> IO ByteString
unsafePackCStringLen CStringLen
cs
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! ByteString -> Text
decodeUtf8 ByteString
bs

-- | Marshal a 'Text' into a C string encoded as UTF-8 in temporary
-- storage, with explicit length information. The encoded string may
-- contain NUL bytes, and is not followed by a trailing NUL byte.
--
-- The temporary storage is freed when the subcomputation terminates
-- (either normally or via an exception), so the pointer to the
-- temporary storage must /not/ be used after this function returns.
--
-- @since 1.0.0.0
withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
withCStringLen :: forall a. Text -> (CStringLen -> IO a) -> IO a
withCStringLen Text
t CStringLen -> IO a
act = forall a. ByteString -> (CStringLen -> IO a) -> IO a
unsafeUseAsCStringLen (Text -> ByteString
encodeUtf8 Text
t) CStringLen -> IO a
act