{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnliftedFFITypes #-}
module Data.Text.Internal.Validate
(
isValidUtf8ByteString
, isValidUtf8ByteArray
, isValidUtf8ByteArrayUnpinned
, isValidUtf8ByteArrayPinned
) where
import Data.Array.Byte (ByteArray(ByteArray))
import Data.ByteString (ByteString)
import GHC.Exts (isTrue#,isByteArrayPinned#)
#ifdef SIMDUTF
import Data.Text.Unsafe (unsafeDupablePerformIO)
import Data.Text.Internal.ByteStringCompat (withBS)
import Data.Text.Internal.Unsafe (unsafeWithForeignPtr)
import Data.Text.Internal.Validate.Simd (c_is_valid_utf8_bytearray_safe,c_is_valid_utf8_bytearray_unsafe,c_is_valid_utf8_ptr_unsafe)
#else
import GHC.Exts (ByteArray#)
import Data.Text.Internal.Encoding.Utf8 (CodePoint(..),DecoderResult(..),utf8DecodeStart,utf8DecodeContinue)
import GHC.Exts (Int(I#),indexWord8Array#)
import GHC.Word (Word8(W8#))
import qualified Data.ByteString as B
#if !MIN_VERSION_bytestring(0,11,2)
import qualified Data.ByteString.Unsafe as B
#endif
#endif
isValidUtf8ByteString :: ByteString -> Bool
#ifdef SIMDUTF
isValidUtf8ByteString bs = withBS bs $ \fp len -> unsafeDupablePerformIO $
unsafeWithForeignPtr fp $ \ptr -> (/= 0) <$> c_is_valid_utf8_ptr_unsafe ptr (fromIntegral len)
#else
#if MIN_VERSION_bytestring(0,11,2)
isValidUtf8ByteString :: ByteString -> Bool
isValidUtf8ByteString = ByteString -> Bool
B.isValidUtf8
#else
isValidUtf8ByteString bs = start 0
where
start ix
| ix >= B.length bs = True
| otherwise = case utf8DecodeStart (B.unsafeIndex bs ix) of
Accept{} -> start (ix + 1)
Reject{} -> False
Incomplete st _ -> step (ix + 1) st
step ix st
| ix >= B.length bs = False
| otherwise = case utf8DecodeContinue (B.unsafeIndex bs ix) st (CodePoint 0) of
Accept{} -> start (ix + 1)
Reject{} -> False
Incomplete st' _ -> step (ix + 1) st'
#endif
#endif
isValidUtf8ByteArray ::
ByteArray
-> Int
-> Int
-> Bool
isValidUtf8ByteArray :: ByteArray -> Int -> Int -> Bool
isValidUtf8ByteArray b :: ByteArray
b@(ByteArray ByteArray#
b#) !Int
off !Int
len
| Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
131072
, Int# -> Bool
isTrue# (ByteArray# -> Int#
isByteArrayPinned# ByteArray#
b#)
= ByteArray -> Int -> Int -> Bool
isValidUtf8ByteArrayPinned ByteArray
b Int
off Int
len
| Bool
otherwise = ByteArray -> Int -> Int -> Bool
isValidUtf8ByteArrayUnpinned ByteArray
b Int
off Int
len
isValidUtf8ByteArrayUnpinned ::
ByteArray
-> Int
-> Int
-> Bool
#ifdef SIMDUTF
isValidUtf8ByteArrayUnpinned (ByteArray bs) !off !len =
unsafeDupablePerformIO $ (/= 0) <$> c_is_valid_utf8_bytearray_unsafe bs (fromIntegral off) (fromIntegral len)
#else
isValidUtf8ByteArrayUnpinned :: ByteArray -> Int -> Int -> Bool
isValidUtf8ByteArrayUnpinned (ByteArray ByteArray#
bs) = ByteArray# -> Int -> Int -> Bool
isValidUtf8ByteArrayHaskell# ByteArray#
bs
#endif
isValidUtf8ByteArrayPinned ::
ByteArray
-> Int
-> Int
-> Bool
#ifdef SIMDUTF
isValidUtf8ByteArrayPinned (ByteArray bs) !off !len =
unsafeDupablePerformIO $ (/= 0) <$> c_is_valid_utf8_bytearray_safe bs (fromIntegral off) (fromIntegral len)
#else
isValidUtf8ByteArrayPinned :: ByteArray -> Int -> Int -> Bool
isValidUtf8ByteArrayPinned (ByteArray ByteArray#
bs) = ByteArray# -> Int -> Int -> Bool
isValidUtf8ByteArrayHaskell# ByteArray#
bs
#endif
#ifndef SIMDUTF
isValidUtf8ByteArrayHaskell# ::
ByteArray#
-> Int
-> Int
-> Bool
isValidUtf8ByteArrayHaskell# :: ByteArray# -> Int -> Int -> Bool
isValidUtf8ByteArrayHaskell# ByteArray#
b !Int
off !Int
len = Int -> Bool
start Int
off
where
indexWord8 :: ByteArray# -> Int -> Word8
indexWord8 :: ByteArray# -> Int -> Word8
indexWord8 !ByteArray#
x (I# Int#
i) = Word8# -> Word8
W8# (ByteArray# -> Int# -> Word8#
indexWord8Array# ByteArray#
x Int#
i)
start :: Int -> Bool
start Int
ix
| Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = Bool
True
| Bool
otherwise = case Word8 -> DecoderResult
utf8DecodeStart (ByteArray# -> Int -> Word8
indexWord8 ByteArray#
b Int
ix) of
Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Reject{} -> Bool
False
Incomplete DecoderState
st CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st
step :: Int -> DecoderState -> Bool
step Int
ix DecoderState
st
| Int
ix Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = Bool
False
| Bool
otherwise = case Word8 -> DecoderState -> CodePoint -> DecoderResult
utf8DecodeContinue (ByteArray# -> Int -> Word8
indexWord8 ByteArray#
b Int
ix) DecoderState
st (Int -> CodePoint
CodePoint Int
0) of
Accept{} -> Int -> Bool
start (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Reject{} -> Bool
False
Incomplete DecoderState
st' CodePoint
_ -> Int -> DecoderState -> Bool
step (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) DecoderState
st'
#endif