{-# LANGUAGE CPP, GeneralizedNewtypeDeriving, MagicHash #-}
-- |
-- Module      : Data.Text.Foreign
-- Copyright   : (c) 2009, 2010 Bryan O'Sullivan
--
-- License     : BSD-style
-- Maintainer  : bos@serpentine.com
-- Portability : GHC
--
-- Support for using 'Text' data with native code via the Haskell
-- foreign function interface.

module Data.Text.Foreign
    (
    -- * Interoperability with native code
    -- $interop
      I8
    -- * Safe conversion functions
    , fromPtr
    , fromPtr0
    , useAsPtr
    , asForeignPtr
    -- ** Encoding as UTF-8
    , withCString
    , peekCStringLen
    , withCStringLen
    -- * Unsafe conversion code
    , lengthWord8
    , unsafeCopyToPtr
    -- * Low-level manipulation
    -- $lowlevel
    , dropWord8
    , takeWord8
    ) where

import Control.Monad.ST.Unsafe (unsafeSTToIO)
import Data.ByteString.Unsafe (unsafePackCStringLen, unsafeUseAsCStringLen)
import Data.Text.Encoding (decodeUtf8, encodeUtf8)
import Data.Text.Internal (Text(..), empty)
import Data.Text.Internal.Unsafe (unsafeWithForeignPtr)
import Data.Text.Show (addrLen)
import Data.Text.Unsafe (lengthWord8)
import Data.Word (Word8)
import Foreign.C.String (CString, CStringLen)
import Foreign.ForeignPtr (ForeignPtr, mallocForeignPtrArray)
import Foreign.Marshal.Alloc (allocaBytes)
import Foreign.Ptr (Ptr, castPtr)
import Foreign.Storable (pokeByteOff)
import GHC.Exts (Ptr(..))
import qualified Data.Text.Array as A

-- $interop
--
-- The 'Text' type is implemented using arrays that are not guaranteed
-- to have a fixed address in the Haskell heap. All communication with
-- native code must thus occur by copying data back and forth.
--
-- The 'Text' type's internal representation is UTF-8.
-- To interoperate with native libraries that use different
-- internal representations, such as UTF-16 or UTF-32, consider using
-- the functions in the 'Data.Text.Encoding' module.

-- | A type representing a number of UTF-8 code units.
--
-- @since 2.0
newtype I8 = I8 Int
    deriving (I8
I8 -> I8 -> Bounded I8
forall a. a -> a -> Bounded a
$cminBound :: I8
minBound :: I8
$cmaxBound :: I8
maxBound :: I8
Bounded, Int -> I8
I8 -> Int
I8 -> [I8]
I8 -> I8
I8 -> I8 -> [I8]
I8 -> I8 -> I8 -> [I8]
(I8 -> I8)
-> (I8 -> I8)
-> (Int -> I8)
-> (I8 -> Int)
-> (I8 -> [I8])
-> (I8 -> I8 -> [I8])
-> (I8 -> I8 -> [I8])
-> (I8 -> I8 -> I8 -> [I8])
-> Enum I8
forall a.
(a -> a)
-> (a -> a)
-> (Int -> a)
-> (a -> Int)
-> (a -> [a])
-> (a -> a -> [a])
-> (a -> a -> [a])
-> (a -> a -> a -> [a])
-> Enum a
$csucc :: I8 -> I8
succ :: I8 -> I8
$cpred :: I8 -> I8
pred :: I8 -> I8
$ctoEnum :: Int -> I8
toEnum :: Int -> I8
$cfromEnum :: I8 -> Int
fromEnum :: I8 -> Int
$cenumFrom :: I8 -> [I8]
enumFrom :: I8 -> [I8]
$cenumFromThen :: I8 -> I8 -> [I8]
enumFromThen :: I8 -> I8 -> [I8]
$cenumFromTo :: I8 -> I8 -> [I8]
enumFromTo :: I8 -> I8 -> [I8]
$cenumFromThenTo :: I8 -> I8 -> I8 -> [I8]
enumFromThenTo :: I8 -> I8 -> I8 -> [I8]
Enum, I8 -> I8 -> Bool
(I8 -> I8 -> Bool) -> (I8 -> I8 -> Bool) -> Eq I8
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: I8 -> I8 -> Bool
== :: I8 -> I8 -> Bool
$c/= :: I8 -> I8 -> Bool
/= :: I8 -> I8 -> Bool
Eq, Enum I8
Real I8
(Real I8, Enum I8) =>
(I8 -> I8 -> I8)
-> (I8 -> I8 -> I8)
-> (I8 -> I8 -> I8)
-> (I8 -> I8 -> I8)
-> (I8 -> I8 -> (I8, I8))
-> (I8 -> I8 -> (I8, I8))
-> (I8 -> Integer)
-> Integral I8
I8 -> Integer
I8 -> I8 -> (I8, I8)
I8 -> I8 -> I8
forall a.
(Real a, Enum a) =>
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> (a, a))
-> (a -> a -> (a, a))
-> (a -> Integer)
-> Integral a
$cquot :: I8 -> I8 -> I8
quot :: I8 -> I8 -> I8
$crem :: I8 -> I8 -> I8
rem :: I8 -> I8 -> I8
$cdiv :: I8 -> I8 -> I8
div :: I8 -> I8 -> I8
$cmod :: I8 -> I8 -> I8
mod :: I8 -> I8 -> I8
$cquotRem :: I8 -> I8 -> (I8, I8)
quotRem :: I8 -> I8 -> (I8, I8)
$cdivMod :: I8 -> I8 -> (I8, I8)
divMod :: I8 -> I8 -> (I8, I8)
$ctoInteger :: I8 -> Integer
toInteger :: I8 -> Integer
Integral, Integer -> I8
I8 -> I8
I8 -> I8 -> I8
(I8 -> I8 -> I8)
-> (I8 -> I8 -> I8)
-> (I8 -> I8 -> I8)
-> (I8 -> I8)
-> (I8 -> I8)
-> (I8 -> I8)
-> (Integer -> I8)
-> Num I8
forall a.
(a -> a -> a)
-> (a -> a -> a)
-> (a -> a -> a)
-> (a -> a)
-> (a -> a)
-> (a -> a)
-> (Integer -> a)
-> Num a
$c+ :: I8 -> I8 -> I8
+ :: I8 -> I8 -> I8
$c- :: I8 -> I8 -> I8
- :: I8 -> I8 -> I8
$c* :: I8 -> I8 -> I8
* :: I8 -> I8 -> I8
$cnegate :: I8 -> I8
negate :: I8 -> I8
$cabs :: I8 -> I8
abs :: I8 -> I8
$csignum :: I8 -> I8
signum :: I8 -> I8
$cfromInteger :: Integer -> I8
fromInteger :: Integer -> I8
Num, Eq I8
Eq I8 =>
(I8 -> I8 -> Ordering)
-> (I8 -> I8 -> Bool)
-> (I8 -> I8 -> Bool)
-> (I8 -> I8 -> Bool)
-> (I8 -> I8 -> Bool)
-> (I8 -> I8 -> I8)
-> (I8 -> I8 -> I8)
-> Ord I8
I8 -> I8 -> Bool
I8 -> I8 -> Ordering
I8 -> I8 -> I8
forall a.
Eq a =>
(a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
$ccompare :: I8 -> I8 -> Ordering
compare :: I8 -> I8 -> Ordering
$c< :: I8 -> I8 -> Bool
< :: I8 -> I8 -> Bool
$c<= :: I8 -> I8 -> Bool
<= :: I8 -> I8 -> Bool
$c> :: I8 -> I8 -> Bool
> :: I8 -> I8 -> Bool
$c>= :: I8 -> I8 -> Bool
>= :: I8 -> I8 -> Bool
$cmax :: I8 -> I8 -> I8
max :: I8 -> I8 -> I8
$cmin :: I8 -> I8 -> I8
min :: I8 -> I8 -> I8
Ord, ReadPrec [I8]
ReadPrec I8
Int -> ReadS I8
ReadS [I8]
(Int -> ReadS I8)
-> ReadS [I8] -> ReadPrec I8 -> ReadPrec [I8] -> Read I8
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
$creadsPrec :: Int -> ReadS I8
readsPrec :: Int -> ReadS I8
$creadList :: ReadS [I8]
readList :: ReadS [I8]
$creadPrec :: ReadPrec I8
readPrec :: ReadPrec I8
$creadListPrec :: ReadPrec [I8]
readListPrec :: ReadPrec [I8]
Read, Num I8
Ord I8
(Num I8, Ord I8) => (I8 -> Rational) -> Real I8
I8 -> Rational
forall a. (Num a, Ord a) => (a -> Rational) -> Real a
$ctoRational :: I8 -> Rational
toRational :: I8 -> Rational
Real, Int -> I8 -> ShowS
[I8] -> ShowS
I8 -> String
(Int -> I8 -> ShowS)
-> (I8 -> String) -> ([I8] -> ShowS) -> Show I8
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> I8 -> ShowS
showsPrec :: Int -> I8 -> ShowS
$cshow :: I8 -> String
show :: I8 -> String
$cshowList :: [I8] -> ShowS
showList :: [I8] -> ShowS
Show)

-- | /O(n)/ Create a new 'Text' from a 'Ptr' 'Word8' by copying the
-- contents of the array.
fromPtr :: Ptr Word8           -- ^ source array
        -> I8                  -- ^ length of source array (in 'Word8' units)
        -> IO Text
fromPtr :: Ptr Word8 -> I8 -> IO Text
fromPtr Ptr Word8
_   (I8 Int
0)   = Text -> IO Text
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Text
empty
fromPtr Ptr Word8
ptr (I8 Int
len) = ST Any Text -> IO Text
forall s a. ST s a -> IO a
unsafeSTToIO (ST Any Text -> IO Text) -> ST Any Text -> IO Text
forall a b. (a -> b) -> a -> b
$ do
  dst <- Int -> ST Any (MArray Any)
forall s. Int -> ST s (MArray s)
A.new Int
len
  A.copyFromPointer dst 0 ptr len
  arr <- A.unsafeFreeze dst
  return $! Text arr 0 len

-- | /O(n)/ Create a new 'Text' from a 'Ptr' 'Word8' by copying the
-- contents of the NUL-terminated array.
--
-- @since 2.0.1
fromPtr0 :: Ptr Word8           -- ^ source array
         -> IO Text
fromPtr0 :: Ptr Word8 -> IO Text
fromPtr0 ptr :: Ptr Word8
ptr@(Ptr Addr#
addr#) = Ptr Word8 -> I8 -> IO Text
fromPtr Ptr Word8
ptr (Int -> I8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Addr# -> Int
addrLen Addr#
addr#))

-- $lowlevel
--
-- Foreign functions that use UTF-8 internally may return indices in
-- units of 'Word8' instead of characters.  These functions may
-- safely be used with such indices, as they will adjust offsets if
-- necessary to preserve the validity of a Unicode string.

-- | /O(1)/ Return the prefix of the 'Text' of @n@ 'Word8' units in
-- length.
--
-- If @n@ would cause the 'Text' to end inside a code point, the
-- end of the prefix will be advanced by several additional 'Word8' units
-- to maintain its validity.
--
-- @since 2.0
takeWord8 :: I8 -> Text -> Text
takeWord8 :: I8 -> Text -> Text
takeWord8 = ((Text, Text) -> Text
forall a b. (a, b) -> a
fst ((Text, Text) -> Text) -> (Text -> (Text, Text)) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.) ((Text -> (Text, Text)) -> Text -> Text)
-> (I8 -> Text -> (Text, Text)) -> I8 -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. I8 -> Text -> (Text, Text)
splitAtWord8

-- | /O(1)/ Return the suffix of the 'Text', with @n@ 'Word8' units
-- dropped from its beginning.
--
-- If @n@ would cause the 'Text' to begin inside a code point, the
-- beginning of the suffix will be advanced by several additional 'Word8'
-- unit to maintain its validity.
--
-- @since 2.0
dropWord8 :: I8 -> Text -> Text
dropWord8 :: I8 -> Text -> Text
dropWord8 = ((Text, Text) -> Text
forall a b. (a, b) -> b
snd ((Text, Text) -> Text) -> (Text -> (Text, Text)) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
.) ((Text -> (Text, Text)) -> Text -> Text)
-> (I8 -> Text -> (Text, Text)) -> I8 -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. I8 -> Text -> (Text, Text)
splitAtWord8

splitAtWord8 :: I8 -> Text -> (Text, Text)
splitAtWord8 :: I8 -> Text -> (Text, Text)
splitAtWord8 (I8 Int
n) t :: Text
t@(Text Array
arr Int
off Int
len)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0               = (Text
empty, Text
t)
    | Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len Bool -> Bool -> Bool
|| Int
m Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = (Text
t, Text
empty)
    | Bool
otherwise            = (Array -> Int -> Int -> Text
Text Array
arr Int
off Int
m, Array -> Int -> Int -> Text
Text Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
m) (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
m))
  where
    m :: Int
m | Word8
w0 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<  Word8
0x80 = Int
n   -- last char is ASCII
      | Word8
w0 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xF0 = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3 -- last char starts 4-byte sequence
      | Word8
w0 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xE0 = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2 -- last char starts 3-byte sequence
      | Word8
w0 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 -- last char starts 2-byte sequence
      | Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xF0 = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2 -- pre-last char starts 4-byte sequence
      | Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xE0 = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 -- pre-last char starts 3-byte sequence
      | Word8
w1 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xC0 = Int
n   -- pre-last char starts 2-byte sequence
      | Word8
w2 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0xF0 = Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 -- pre-pre-last char starts 4-byte sequence
      | Bool
otherwise  = Int
n   -- pre-pre-last char starts 3-byte sequence
    w0 :: Word8
w0 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
    w1 :: Word8
w1 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2)
    w2 :: Word8
w2 = Array -> Int -> Word8
A.unsafeIndex Array
arr (Int
offInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
3)

-- | /O(n)/ Copy a 'Text' to an array.  The array is assumed to be big
-- enough to hold the contents of the entire 'Text'.
unsafeCopyToPtr :: Text -> Ptr Word8 -> IO ()
unsafeCopyToPtr :: Text -> Ptr Word8 -> IO ()
unsafeCopyToPtr (Text Array
arr Int
off Int
len) Ptr Word8
ptr = ST Any () -> IO ()
forall s a. ST s a -> IO a
unsafeSTToIO (ST Any () -> IO ()) -> ST Any () -> IO ()
forall a b. (a -> b) -> a -> b
$ Array -> Int -> Ptr Word8 -> Int -> ST Any ()
forall s. Array -> Int -> Ptr Word8 -> Int -> ST s ()
A.copyToPointer Array
arr Int
off Ptr Word8
ptr Int
len

-- | /O(n)/ Perform an action on a temporary, mutable copy of a
-- 'Text'.  The copy is freed as soon as the action returns.
useAsPtr :: Text -> (Ptr Word8 -> I8 -> IO a) -> IO a
useAsPtr :: forall a. Text -> (Ptr Word8 -> I8 -> IO a) -> IO a
useAsPtr t :: Text
t@(Text Array
_arr Int
_off Int
len) Ptr Word8 -> I8 -> IO a
action =
    Int -> (Ptr Word8 -> IO a) -> IO a
forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes Int
len ((Ptr Word8 -> IO a) -> IO a) -> (Ptr Word8 -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
buf -> do
      Text -> Ptr Word8 -> IO ()
unsafeCopyToPtr Text
t Ptr Word8
buf
      Ptr Word8 -> I8 -> IO a
action (Ptr Word8 -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
buf) (Int -> I8
I8 Int
len)

-- | /O(n)/ Make a mutable copy of a 'Text'.
asForeignPtr :: Text -> IO (ForeignPtr Word8, I8)
asForeignPtr :: Text -> IO (ForeignPtr Word8, I8)
asForeignPtr t :: Text
t@(Text Array
_arr Int
_off Int
len) = do
  fp <- Int -> IO (ForeignPtr Word8)
forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray Int
len
  unsafeWithForeignPtr fp $ unsafeCopyToPtr t
  return (fp, I8 len)

-- | Marshal a 'Text' into a C string with a trailing NUL byte,
-- encoded as UTF-8 in temporary storage.
--
-- The temporary storage is freed when the subcomputation terminates
-- (either normally or via an exception), so the pointer to the
-- temporary storage must /not/ be used after this function returns.
--
-- @since 2.0.1
withCString :: Text -> (CString -> IO a) -> IO a
withCString :: forall a. Text -> (CString -> IO a) -> IO a
withCString t :: Text
t@(Text Array
_arr Int
_off Int
len) CString -> IO a
action =
  Int -> (Ptr Word8 -> IO a) -> IO a
forall a b. Int -> (Ptr a -> IO b) -> IO b
allocaBytes (Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) ((Ptr Word8 -> IO a) -> IO a) -> (Ptr Word8 -> IO a) -> IO a
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
buf -> do
    Text -> Ptr Word8 -> IO ()
unsafeCopyToPtr Text
t Ptr Word8
buf
    Ptr Word8 -> Int -> Word8 -> IO ()
forall b. Ptr b -> Int -> Word8 -> IO ()
forall a b. Storable a => Ptr b -> Int -> a -> IO ()
pokeByteOff Ptr Word8
buf Int
len (Word8
0 :: Word8)
    CString -> IO a
action (Ptr Word8 -> CString
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
buf)

-- | /O(n)/ Decode a C string with explicit length, which is assumed
-- to have been encoded as UTF-8. If decoding fails, a
-- 'UnicodeException' is thrown.
--
-- @since 1.0.0.0
peekCStringLen :: CStringLen -> IO Text
peekCStringLen :: CStringLen -> IO Text
peekCStringLen CStringLen
cs = do
  bs <- CStringLen -> IO ByteString
unsafePackCStringLen CStringLen
cs
  return $! decodeUtf8 bs

-- | Marshal a 'Text' into a C string encoded as UTF-8 in temporary
-- storage, with explicit length information. The encoded string may
-- contain NUL bytes, and is not followed by a trailing NUL byte.
--
-- The temporary storage is freed when the subcomputation terminates
-- (either normally or via an exception), so the pointer to the
-- temporary storage must /not/ be used after this function returns.
--
-- @since 1.0.0.0
withCStringLen :: Text -> (CStringLen -> IO a) -> IO a
withCStringLen :: forall a. Text -> (CStringLen -> IO a) -> IO a
withCStringLen Text
t CStringLen -> IO a
act = ByteString -> (CStringLen -> IO a) -> IO a
forall a. ByteString -> (CStringLen -> IO a) -> IO a
unsafeUseAsCStringLen (Text -> ByteString
encodeUtf8 Text
t) CStringLen -> IO a
act