module GHC.Data.FastString
(
bytesFS,
fastStringToByteString,
mkFastStringByteString,
fastZStringToByteString,
unsafeMkByteString,
fastStringToShortByteString,
mkFastStringShortByteString,
FastZString,
hPutFZS,
zString,
lengthFZS,
FastString(..),
fsLit,
mkFastString,
mkFastStringBytes,
mkFastStringByteList,
mkFastString#,
unpackFS,
zEncodeFS,
uniqueOfFS,
lengthFS,
nullFS,
appendFS,
headFS,
concatFS,
consFS,
nilFS,
isUnderscoreFS,
hPutFS,
getFastStringTable,
getFastStringZEncCounter,
PtrString (..),
sLit,
mkPtrString#,
mkPtrString,
unpackPtrString,
lengthPS
) where
#include "HsVersions.h"
import GHC.Prelude as Prelude
import GHC.Utils.Encoding
import GHC.Utils.IO.Unsafe
import GHC.Utils.Panic.Plain
import GHC.Utils.Misc
import Control.Concurrent.MVar
import Control.DeepSeq
import Control.Monad
import Data.ByteString (ByteString)
import Data.ByteString.Short (ShortByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Unsafe as BS
import qualified Data.ByteString.Short as SBS
import qualified Data.ByteString.Short.Internal as SBS
import Foreign.C
import System.IO
import Data.Data
import Data.IORef
import Data.Char
import Data.Semigroup as Semi
import Foreign
#if GHC_STAGE >= 2
import GHC.Conc.Sync (sharedCAF)
#endif
#if __GLASGOW_HASKELL__ < 811
import GHC.Base (unpackCString#,unpackNBytes#)
#endif
import GHC.Exts
import GHC.IO
bytesFS, fastStringToByteString :: FastString -> ByteString
bytesFS = fastStringToByteString
fastStringToByteString f = SBS.fromShort $ fs_sbs f
fastStringToShortByteString :: FastString -> ShortByteString
fastStringToShortByteString = fs_sbs
fastZStringToByteString :: FastZString -> ByteString
fastZStringToByteString (FastZString bs) = bs
unsafeMkByteString :: String -> ByteString
unsafeMkByteString = BSC.pack
hashFastString :: FastString -> Int
hashFastString fs = hashStr $ fs_sbs fs
newtype FastZString = FastZString ByteString
deriving NFData
hPutFZS :: Handle -> FastZString -> IO ()
hPutFZS handle (FastZString bs) = BS.hPut handle bs
zString :: FastZString -> String
zString (FastZString bs) =
inlinePerformIO $ BS.unsafeUseAsCStringLen bs peekCAStringLen
lengthFZS :: FastZString -> Int
lengthFZS (FastZString bs) = BS.length bs
mkFastZStringString :: String -> FastZString
mkFastZStringString str = FastZString (BSC.pack str)
data FastString = FastString {
uniq :: !Int,
n_chars :: !Int,
fs_sbs :: !ShortByteString,
fs_zenc :: FastZString
}
instance Eq FastString where
f1 == f2 = uniq f1 == uniq f2
instance Ord FastString where
a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
max x y | x >= y = x
| otherwise = y
min x y | x <= y = x
| otherwise = y
compare a b = cmpFS a b
instance IsString FastString where
fromString = fsLit
instance Semi.Semigroup FastString where
(<>) = appendFS
instance Monoid FastString where
mempty = nilFS
mappend = (Semi.<>)
mconcat = concatFS
instance Show FastString where
show fs = show (unpackFS fs)
instance Data FastString where
toConstr _ = abstractConstr "FastString"
gunfold _ _ = error "gunfold"
dataTypeOf _ = mkNoRepType "FastString"
instance NFData FastString where
rnf fs = seq fs ()
cmpFS :: FastString -> FastString -> Ordering
cmpFS fs1 fs2 =
if uniq fs1 == uniq fs2 then EQ else
compare (fs_sbs fs1) (fs_sbs fs2)
data FastStringTable = FastStringTable
!(IORef Int)
!(IORef Int)
(Array# (IORef FastStringTableSegment))
data FastStringTableSegment = FastStringTableSegment
!(MVar ())
!(IORef Int)
(MutableArray# RealWorld [FastString])
segmentBits, numSegments, segmentMask, initialNumBuckets :: Int
segmentBits = 8
numSegments = 256
segmentMask = 0xff
initialNumBuckets = 64
hashToSegment# :: Int# -> Int#
hashToSegment# hash# = hash# `andI#` segmentMask#
where
!(I# segmentMask#) = segmentMask
hashToIndex# :: MutableArray# RealWorld [FastString] -> Int# -> Int#
hashToIndex# buckets# hash# =
(hash# `uncheckedIShiftRL#` segmentBits#) `remInt#` size#
where
!(I# segmentBits#) = segmentBits
size# = sizeofMutableArray# buckets#
maybeResizeSegment :: IORef FastStringTableSegment -> IO FastStringTableSegment
maybeResizeSegment segmentRef = do
segment@(FastStringTableSegment lock counter old#) <- readIORef segmentRef
let oldSize# = sizeofMutableArray# old#
newSize# = oldSize# *# 2#
(I# n#) <- readIORef counter
if isTrue# (n# <# newSize#)
then return segment
else do
resizedSegment@(FastStringTableSegment _ _ new#) <- IO $ \s1# ->
case newArray# newSize# [] s1# of
(# s2#, arr# #) -> (# s2#, FastStringTableSegment lock counter arr# #)
forM_ [0 .. (I# oldSize#) 1] $ \(I# i#) -> do
fsList <- IO $ readArray# old# i#
forM_ fsList $ \fs -> do
let
!(I# hash#) = hashFastString fs
idx# = hashToIndex# new# hash#
IO $ \s1# ->
case readArray# new# idx# s1# of
(# s2#, bucket #) -> case writeArray# new# idx# (fs: bucket) s2# of
s3# -> (# s3#, () #)
writeIORef segmentRef resizedSegment
return resizedSegment
stringTable :: FastStringTable
stringTable = unsafePerformIO $ do
let !(I# numSegments#) = numSegments
!(I# initialNumBuckets#) = initialNumBuckets
loop a# i# s1#
| isTrue# (i# ==# numSegments#) = s1#
| otherwise = case newMVar () `unIO` s1# of
(# s2#, lock #) -> case newIORef 0 `unIO` s2# of
(# s3#, counter #) -> case newArray# initialNumBuckets# [] s3# of
(# s4#, buckets# #) -> case newIORef
(FastStringTableSegment lock counter buckets#) `unIO` s4# of
(# s5#, segment #) -> case writeArray# a# i# segment s5# of
s6# -> loop a# (i# +# 1#) s6#
uid <- newIORef 603979776
n_zencs <- newIORef 0
tab <- IO $ \s1# ->
case newArray# numSegments# (panic "string_table") s1# of
(# s2#, arr# #) -> case loop arr# 0# s2# of
s3# -> case unsafeFreezeArray# arr# s3# of
(# s4#, segments# #) ->
(# s4#, FastStringTable uid n_zencs segments# #)
#if GHC_STAGE < 2
return tab
#else
sharedCAF tab getOrSetLibHSghcFastStringTable
foreign import ccall unsafe "getOrSetLibHSghcFastStringTable"
getOrSetLibHSghcFastStringTable :: Ptr a -> IO (Ptr a)
#endif
mkFastString# :: Addr# -> FastString
mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
where ptr = Ptr a#
mkFastStringWith
:: (Int -> IORef Int-> IO FastString) -> ShortByteString -> IO FastString
mkFastStringWith mk_fs sbs = do
FastStringTableSegment lock _ buckets# <- readIORef segmentRef
let idx# = hashToIndex# buckets# hash#
bucket <- IO $ readArray# buckets# idx#
res <- bucket_match bucket sbs
case res of
Just found -> return found
Nothing -> do
noDuplicate
n <- get_uid
new_fs <- mk_fs n n_zencs
withMVar lock $ \_ -> insert new_fs
where
!(FastStringTable uid n_zencs segments#) = stringTable
get_uid = atomicModifyIORef' uid $ \n -> (n+1,n)
!(I# hash#) = hashStr sbs
(# segmentRef #) = indexArray# segments# (hashToSegment# hash#)
insert fs = do
FastStringTableSegment _ counter buckets# <- maybeResizeSegment segmentRef
let idx# = hashToIndex# buckets# hash#
bucket <- IO $ readArray# buckets# idx#
res <- bucket_match bucket sbs
case res of
Just found -> return found
Nothing -> do
IO $ \s1# ->
case writeArray# buckets# idx# (fs: bucket) s1# of
s2# -> (# s2#, () #)
modifyIORef' counter succ
return fs
bucket_match :: [FastString] -> ShortByteString -> IO (Maybe FastString)
bucket_match [] _ = return Nothing
bucket_match (fs@(FastString {fs_sbs=fs_sbs}) : ls) sbs
| fs_sbs == sbs = return (Just fs)
| otherwise = bucket_match ls sbs
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
mkFastStringBytes !ptr !len =
unsafeDupablePerformIO $ do
sbs <- newSBSFromPtr ptr len
mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
newSBSFromPtr :: Ptr a -> Int -> IO ShortByteString
newSBSFromPtr (Ptr src#) (I# len#) = do
IO $ \s ->
case newByteArray# len# s of { (# s, dst# #) ->
case copyAddrToByteArray# src# dst# 0# len# s of { s ->
case unsafeFreezeByteArray# dst# s of { (# s, ba# #) ->
(# s, SBS.SBS ba# #) }}}
mkFastStringByteString :: ByteString -> FastString
mkFastStringByteString bs =
let sbs = SBS.toShort bs in
inlinePerformIO $
mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
mkFastStringShortByteString :: ShortByteString -> FastString
mkFastStringShortByteString sbs =
inlinePerformIO $ mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
mkFastString :: String -> FastString
mkFastString str =
inlinePerformIO $ do
sbs <- utf8EncodeShortByteString str
mkFastStringWith (mkNewFastStringShortByteString sbs) sbs
mkFastStringByteList :: [Word8] -> FastString
mkFastStringByteList str = mkFastStringShortByteString (SBS.pack str)
mkZFastString :: IORef Int -> ShortByteString -> FastZString
mkZFastString n_zencs sbs = unsafePerformIO $ do
atomicModifyIORef' n_zencs $ \n -> (n+1, ())
return $ mkFastZStringString (zEncodeString (utf8DecodeShortByteString sbs))
mkNewFastStringShortByteString :: ShortByteString -> Int
-> IORef Int -> IO FastString
mkNewFastStringShortByteString sbs uid n_zencs = do
let zstr = mkZFastString n_zencs sbs
chars <- countUTF8Chars sbs
return (FastString uid chars sbs zstr)
hashStr :: ShortByteString -> Int
hashStr sbs@(SBS.SBS ba#) = loop 0# 0#
where
!(I# len#) = SBS.length sbs
loop h n =
if isTrue# (n ==# len#) then
I# h
else
let
!c = indexInt8Array# ba# n
!h2 = (h *# 16777619#) `xorI#` c
in
loop h2 (n +# 1#)
lengthFS :: FastString -> Int
lengthFS fs = n_chars fs
nullFS :: FastString -> Bool
nullFS fs = SBS.null $ fs_sbs fs
unpackFS :: FastString -> String
unpackFS fs = utf8DecodeShortByteString $ fs_sbs fs
zEncodeFS :: FastString -> FastZString
zEncodeFS fs = fs_zenc fs
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = mkFastStringByteString
$ BS.append (bytesFS fs1) (bytesFS fs2)
concatFS :: [FastString] -> FastString
concatFS = mkFastStringShortByteString . mconcat . map fs_sbs
headFS :: FastString -> Char
headFS fs
| SBS.null $ fs_sbs fs = panic "headFS: Empty FastString"
headFS fs = head $ unpackFS fs
consFS :: Char -> FastString -> FastString
consFS c fs = mkFastString (c : unpackFS fs)
uniqueOfFS :: FastString -> Int
uniqueOfFS fs = uniq fs
nilFS :: FastString
nilFS = mkFastString ""
isUnderscoreFS :: FastString -> Bool
isUnderscoreFS fs = fs == fsLit "_"
getFastStringTable :: IO [[[FastString]]]
getFastStringTable =
forM [0 .. numSegments 1] $ \(I# i#) -> do
let (# segmentRef #) = indexArray# segments# i#
FastStringTableSegment _ _ buckets# <- readIORef segmentRef
let bucketSize = I# (sizeofMutableArray# buckets#)
forM [0 .. bucketSize 1] $ \(I# j#) ->
IO $ readArray# buckets# j#
where
!(FastStringTable _ _ segments#) = stringTable
getFastStringZEncCounter :: IO Int
getFastStringZEncCounter = readIORef n_zencs
where
!(FastStringTable _ n_zencs _) = stringTable
hPutFS :: Handle -> FastString -> IO ()
hPutFS handle fs = BS.hPut handle $ bytesFS fs
data PtrString = PtrString !(Ptr Word8) !Int
mkPtrString# :: Addr# -> PtrString
mkPtrString# a# = PtrString (Ptr a#) (ptrStrLength (Ptr a#))
mkPtrString :: String -> PtrString
mkPtrString s =
unsafePerformIO (do
let len = length s
p <- mallocBytes len
let
loop :: Int -> String -> IO ()
loop !_ [] = return ()
loop n (c:cs) = do
pokeByteOff p n (fromIntegral (ord c) :: Word8)
loop (1+n) cs
loop 0 s
return (PtrString p len)
)
unpackPtrString :: PtrString -> String
unpackPtrString (PtrString (Ptr p#) (I# n#)) = unpackNBytes# p# n#
lengthPS :: PtrString -> Int
lengthPS (PtrString _ n) = n
foreign import ccall unsafe "strlen"
ptrStrLength :: Ptr Word8 -> Int
sLit :: String -> PtrString
sLit x = mkPtrString x
fsLit :: String -> FastString
fsLit x = mkFastString x