%
% (c) The University of Glasgow, 19972006
%
\begin{code}
module FastString
(
FastString(..),
fsLit,
mkFastString,
mkFastStringBytes,
mkFastStringByteList,
mkFastStringForeignPtr,
#if defined(__GLASGOW_HASKELL__)
mkFastString#,
#endif
mkZFastString,
mkZFastStringBytes,
unpackFS,
bytesFS,
isZEncoded,
zEncodeFS,
uniqueOfFS,
lengthFS,
nullFS,
appendFS,
headFS,
tailFS,
concatFS,
consFS,
nilFS,
hPutFS,
getFastStringTable,
hasZEncoding,
LitString,
sLit,
#if defined(__GLASGOW_HASKELL__)
mkLitString#,
#endif
mkLitString,
unpackLitString,
lengthLS
) where
#include "HsVersions.h"
import Encoding
import FastTypes
import FastFunctions
import Panic
import Foreign
import Foreign.C
import GHC.Exts
import System.IO
import System.IO.Unsafe ( unsafePerformIO )
import Data.IORef ( IORef, newIORef, readIORef, atomicModifyIORef )
import Data.Maybe ( isJust )
import Data.Char ( ord )
#if __GLASGOW_HASKELL__ >= 611
import GHC.IO ( IO(..) )
#else
import GHC.IOBase ( IO(..) )
#endif
import GHC.Ptr ( Ptr(..) )
#if defined(__GLASGOW_HASKELL__)
import GHC.Base ( unpackCString# )
#endif
#define hASH_TBL_SIZE 4091
#define hASH_TBL_SIZE_UNBOXED 4091#
data FastString = FastString {
uniq :: !Int,
n_bytes :: !Int,
n_chars :: !Int,
buf :: !(ForeignPtr Word8),
enc :: FSEncoding
}
data FSEncoding
= ZEncoded
| UTF8Encoded !(IORef (Maybe FastString))
instance Eq FastString where
f1 == f2 = uniq f1 == uniq f2
instance Ord FastString where
a <= b = case cmpFS a b of { LT -> True; EQ -> True; GT -> False }
a < b = case cmpFS a b of { LT -> True; EQ -> False; GT -> False }
a >= b = case cmpFS a b of { LT -> False; EQ -> True; GT -> True }
a > b = case cmpFS a b of { LT -> False; EQ -> False; GT -> True }
max x y | x >= y = x
| otherwise = y
min x y | x <= y = x
| otherwise = y
compare a b = cmpFS a b
instance Show FastString where
show fs = show (unpackFS fs)
cmpFS :: FastString -> FastString -> Ordering
cmpFS (FastString u1 l1 _ buf1 _) (FastString u2 l2 _ buf2 _) =
if u1 == u2 then EQ else
case unsafeMemcmp buf1 buf2 (min l1 l2) `compare` 0 of
LT -> LT
EQ -> compare l1 l2
GT -> GT
unsafeMemcmp :: ForeignPtr a -> ForeignPtr b -> Int -> Int
unsafeMemcmp buf1 buf2 l =
inlinePerformIO $
withForeignPtr buf1 $ \p1 ->
withForeignPtr buf2 $ \p2 ->
memcmp p1 p2 l
#ifndef __HADDOCK__
foreign import ccall unsafe "ghc_memcmp"
memcmp :: Ptr a -> Ptr b -> Int -> IO Int
#endif
data FastStringTable =
FastStringTable
!Int
(MutableArray# RealWorld [FastString])
string_table :: IORef FastStringTable
string_table =
unsafePerformIO $ do
tab <- IO $ \s1# -> case newArray# hASH_TBL_SIZE_UNBOXED [] s1# of
(# s2#, arr# #) ->
(# s2#, FastStringTable 0 arr# #)
newIORef tab
lookupTbl :: FastStringTable -> Int -> IO [FastString]
lookupTbl (FastStringTable _ arr#) (I# i#) =
IO $ \ s# -> readArray# arr# i# s#
updTbl :: FastStringTable -> Int -> [FastString] -> IO FastStringTable
updTbl (FastStringTable uid arr#) (I# i#) ls = do
(IO $ \ s# -> case writeArray# arr# i# ls s# of { s2# -> (# s2#, () #) })
return (FastStringTable (uid+1) arr#)
mkFSInternal :: Ptr Word8 -> Int
-> (Int -> IO FastString)
-> IO FastString
mkFSInternal ptr len mk_it = do
r <- atomicModifyIORef string_table $
\fs_tbl@(FastStringTable uid _) ->
let h = hashStr ptr len
add_it ls = do
fs <- mk_it uid
fst' <- updTbl fs_tbl h (fs:ls)
fs `seq` fst' `seq` return (fst', fs)
in unsafePerformIO $ do
lookup_result <- lookupTbl fs_tbl h
case lookup_result of
[] -> add_it []
ls -> do
b <- bucket_match ls len ptr
case b of
Nothing -> add_it ls
Just v -> return (fs_tbl, v)
r `seq` return r
mkFastString# :: Addr# -> FastString
mkFastString# a# = mkFastStringBytes ptr (ptrStrLength ptr)
where ptr = Ptr a#
mkFastStringBytes :: Ptr Word8 -> Int -> FastString
mkFastStringBytes ptr len = inlinePerformIO $ do
mkFSInternal ptr len (\uid -> copyNewFastString uid ptr len)
mkZFastStringBytes :: Ptr Word8 -> Int -> FastString
mkZFastStringBytes ptr len = inlinePerformIO $ do
mkFSInternal ptr len (\uid -> copyNewZFastString uid ptr len)
mkFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkFastStringForeignPtr ptr fp len = do
mkFSInternal ptr len (\uid -> mkNewFastString uid ptr fp len)
mkZFastStringForeignPtr :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO FastString
mkZFastStringForeignPtr ptr fp len = do
mkFSInternal ptr len (\uid -> mkNewZFastString uid ptr fp len)
mkFastString :: String -> FastString
mkFastString str =
inlinePerformIO $ do
let l = utf8EncodedLength str
buf <- mallocForeignPtrBytes l
withForeignPtr buf $ \ptr -> do
utf8EncodeString ptr str
mkFastStringForeignPtr ptr buf l
mkFastStringByteList :: [Word8] -> FastString
mkFastStringByteList str =
inlinePerformIO $ do
let l = Prelude.length str
buf <- mallocForeignPtrBytes l
withForeignPtr buf $ \ptr -> do
pokeArray (castPtr ptr) str
mkFastStringForeignPtr ptr buf l
mkZFastString :: String -> FastString
mkZFastString str =
inlinePerformIO $ do
let l = Prelude.length str
buf <- mallocForeignPtrBytes l
withForeignPtr buf $ \ptr -> do
pokeCAString (castPtr ptr) str
mkZFastStringForeignPtr ptr buf l
bucket_match :: [FastString] -> Int -> Ptr Word8 -> IO (Maybe FastString)
bucket_match [] _ _ = return Nothing
bucket_match (v@(FastString _ l _ buf _):ls) len ptr
| len == l = do
b <- cmpStringPrefix ptr buf len
if b then return (Just v)
else bucket_match ls len ptr
| otherwise =
bucket_match ls len ptr
mkNewFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
-> IO FastString
mkNewFastString uid ptr fp len = do
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
return (FastString uid len n_chars fp (UTF8Encoded ref))
mkNewZFastString :: Int -> Ptr Word8 -> ForeignPtr Word8 -> Int
-> IO FastString
mkNewZFastString uid _ fp len = do
return (FastString uid len len fp ZEncoded)
copyNewFastString :: Int -> Ptr Word8 -> Int -> IO FastString
copyNewFastString uid ptr len = do
fp <- copyBytesToForeignPtr ptr len
ref <- newIORef Nothing
n_chars <- countUTF8Chars ptr len
return (FastString uid len n_chars fp (UTF8Encoded ref))
copyNewZFastString :: Int -> Ptr Word8 -> Int -> IO FastString
copyNewZFastString uid ptr len = do
fp <- copyBytesToForeignPtr ptr len
return (FastString uid len len fp ZEncoded)
copyBytesToForeignPtr :: Ptr Word8 -> Int -> IO (ForeignPtr Word8)
copyBytesToForeignPtr ptr len = do
fp <- mallocForeignPtrBytes len
withForeignPtr fp $ \ptr' -> copyBytes ptr' ptr len
return fp
cmpStringPrefix :: Ptr Word8 -> ForeignPtr Word8 -> Int -> IO Bool
cmpStringPrefix ptr fp len =
withForeignPtr fp $ \ptr' -> do
r <- memcmp ptr ptr' len
return (r == 0)
hashStr :: Ptr Word8 -> Int -> Int
hashStr (Ptr a#) (I# len#) = loop 0# 0#
where
loop h n | n GHC.Exts.==# len# = I# h
| otherwise = loop h2 (n GHC.Exts.+# 1#)
where !c = ord# (indexCharOffAddr# a# n)
!h2 = (c GHC.Exts.+# (h GHC.Exts.*# 128#)) `remInt#`
hASH_TBL_SIZE#
lengthFS :: FastString -> Int
lengthFS f = n_chars f
isZEncoded :: FastString -> Bool
isZEncoded fs | ZEncoded <- enc fs = True
| otherwise = False
hasZEncoding :: FastString -> Bool
hasZEncoding (FastString _ _ _ _ enc) =
case enc of
ZEncoded -> False
UTF8Encoded ref ->
inlinePerformIO $ do
m <- readIORef ref
return (isJust m)
nullFS :: FastString -> Bool
nullFS f = n_bytes f == 0
unpackFS :: FastString -> String
unpackFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
case enc of
ZEncoded -> peekCAStringLen (castPtr ptr,n_bytes)
UTF8Encoded _ -> utf8DecodeString ptr n_bytes
bytesFS :: FastString -> [Word8]
bytesFS (FastString _ n_bytes _ buf _) =
inlinePerformIO $ withForeignPtr buf $ \ptr ->
peekArray n_bytes ptr
zEncodeFS :: FastString -> FastString
zEncodeFS fs@(FastString _ _ _ _ enc) =
case enc of
ZEncoded -> fs
UTF8Encoded ref ->
inlinePerformIO $ do
r <- atomicModifyIORef ref $ \m ->
case m of
Just fs -> (m, fs)
Nothing ->
let efs = mkZFastString (zEncodeString (unpackFS fs)) in
efs `seq` (Just efs, efs)
r `seq` return r
appendFS :: FastString -> FastString -> FastString
appendFS fs1 fs2 = mkFastString (unpackFS fs1 ++ unpackFS fs2)
concatFS :: [FastString] -> FastString
concatFS ls = mkFastString (Prelude.concat (map unpackFS ls))
headFS :: FastString -> Char
headFS (FastString _ 0 _ _ _) = panic "headFS: Empty FastString"
headFS (FastString _ _ _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
case enc of
ZEncoded -> do
w <- peek (castPtr ptr)
return (castCCharToChar w)
UTF8Encoded _ ->
return (fst (utf8DecodeChar ptr))
tailFS :: FastString -> FastString
tailFS (FastString _ 0 _ _ _) = panic "tailFS: Empty FastString"
tailFS (FastString _ n_bytes _ buf enc) =
inlinePerformIO $ withForeignPtr buf $ \ptr -> do
case enc of
ZEncoded -> do
return $! mkZFastStringBytes (ptr `plusPtr` 1) (n_bytes 1)
UTF8Encoded _ -> do
let (_,ptr') = utf8DecodeChar ptr
let off = ptr' `minusPtr` ptr
return $! mkFastStringBytes (ptr `plusPtr` off) (n_bytes off)
consFS :: Char -> FastString -> FastString
consFS c fs = mkFastString (c : unpackFS fs)
uniqueOfFS :: FastString -> FastInt
uniqueOfFS (FastString u _ _ _ _) = iUnbox u
nilFS :: FastString
nilFS = mkFastString ""
getFastStringTable :: IO [[FastString]]
getFastStringTable = do
tbl <- readIORef string_table
buckets <- mapM (lookupTbl tbl) [0 .. hASH_TBL_SIZE]
return buckets
hPutFS :: Handle -> FastString -> IO ()
hPutFS handle (FastString _ len _ fp _)
| len == 0 = return ()
| otherwise = do withForeignPtr fp $ \ptr -> hPutBuf handle ptr len
type LitString = Ptr Word8
#if defined(__GLASGOW_HASKELL__)
mkLitString# :: Addr# -> LitString
mkLitString# a# = Ptr a#
#endif
mkLitString :: String -> LitString
mkLitString s =
unsafePerformIO (do
p <- mallocBytes (length s + 1)
let
loop :: Int -> String -> IO ()
loop n cs | n `seq` null cs = pokeByteOff p n (0 :: Word8)
loop n (c:cs) = do
pokeByteOff p n (fromIntegral (ord c) :: Word8)
loop (1+n) cs
loop _ [] = panic "mkLitString"
loop 0 s
return p
)
unpackLitString :: LitString -> String
unpackLitString p_ = case pUnbox p_ of
p -> unpack (_ILIT(0))
where
unpack n = case indexWord8OffFastPtrAsFastChar p n of
ch -> if ch `eqFastChar` _CLIT('\0')
then [] else cBox ch : unpack (n +# _ILIT(1))
lengthLS :: LitString -> Int
lengthLS = ptrStrLength
#if 0
type LitString = String
mkLitString :: String -> LitString
mkLitString = id
unpackLitString :: LitString -> String
unpackLitString = id
lengthLS :: LitString -> Int
lengthLS = length
#endif
foreign import ccall unsafe "ghc_strlen"
ptrStrLength :: Ptr Word8 -> Int
pokeCAString :: Ptr CChar -> String -> IO ()
pokeCAString ptr str =
let
go [] _ = return ()
go (c:cs) n = do pokeElemOff ptr n (castCharToCChar c); go cs (n+1)
in
go str 0
sLit :: String -> LitString
sLit x = mkLitString x
fsLit :: String -> FastString
fsLit x = mkFastString x
\end{code}