{-# LANGUAGE ScopedTypeVariables, CPP, BangPatterns, RankNTypes #-} #if __GLASGOW_HASKELL__ >= 703 {-# LANGUAGE Unsafe #-} #endif {-# OPTIONS_HADDOCK hide #-} -- | Copyright : (c) 2010 - 2011 Simon Meier -- License : BSD3-style (see LICENSE) -- -- Maintainer : Simon Meier <iridcode@gmail.com> -- Stability : unstable, private -- Portability : GHC -- -- *Warning:* this module is internal. If you find that you need it then please -- contact the maintainers and explain what you are trying to do and discuss -- what you would need in the public API. It is important that you do this as -- the module may not be exposed at all in future releases. -- -- Core types and functions for the 'Builder' monoid and its generalization, -- the 'Put' monad. -- -- The design of the 'Builder' monoid is optimized such that -- -- 1. buffers of arbitrary size can be filled as efficiently as possible and -- -- 2. sequencing of 'Builder's is as cheap as possible. -- -- We achieve (1) by completely handing over control over writing to the buffer -- to the 'BuildStep' implementing the 'Builder'. This 'BuildStep' is just told -- the start and the end of the buffer (represented as a 'BufferRange'). Then, -- the 'BuildStep' can write to as big a prefix of this 'BufferRange' in any -- way it desires. If the 'BuildStep' is done, the 'BufferRange' is full, or a -- long sequence of bytes should be inserted directly, then the 'BuildStep' -- signals this to its caller using a 'BuildSignal'. -- -- We achieve (2) by requiring that every 'Builder' is implemented by a -- 'BuildStep' that takes a continuation 'BuildStep', which it calls with the -- updated 'BufferRange' after it is done. Therefore, only two pointers have -- to be passed in a function call to implement concatenation of 'Builder's. -- Moreover, many 'Builder's are completely inlined, which enables the compiler -- to sequence them without a function call and with no boxing at all. -- -- This design gives the implementation of a 'Builder' full access to the 'IO' -- monad. Therefore, utmost care has to be taken to not overwrite anything -- outside the given 'BufferRange's. Moreover, further care has to be taken to -- ensure that 'Builder's and 'Put's are referentially transparent. See the -- comments of the 'builder' and 'put' functions for further information. -- Note that there are /no safety belts/ at all, when implementing a 'Builder' -- using an 'IO' action: you are writing code that might enable the next -- buffer-overflow attack on a Haskell server! -- module Data.ByteString.Builder.Internal ( -- * Buffer management Buffer(..) , BufferRange(..) , newBuffer , bufferSize , byteStringFromBuffer , ChunkIOStream(..) , buildStepToCIOS , ciosUnitToLazyByteString , ciosToLazyByteString -- * Build signals and steps , BuildSignal , BuildStep , finalBuildStep , done , bufferFull , insertChunk , fillWithBuildStep -- * The Builder monoid , Builder , builder , runBuilder , runBuilderWith -- ** Primitive combinators , empty , append , flush , ensureFree -- , sizedChunksInsert , byteStringCopy , byteStringInsert , byteStringThreshold , lazyByteStringCopy , lazyByteStringInsert , lazyByteStringThreshold , shortByteString , maximalCopySize , byteString , lazyByteString -- ** Execution , toLazyByteStringWith , AllocationStrategy , safeStrategy , untrimmedStrategy , customStrategy , L.smallChunkSize , L.defaultChunkSize , L.chunkOverhead -- * The Put monad , Put , put , runPut -- ** Execution , putToLazyByteString , putToLazyByteStringWith , hPut -- ** Conversion to and from Builders , putBuilder , fromPut -- -- ** Lifting IO actions -- , putLiftIO ) where import Control.Arrow (second) import Control.Applicative (Applicative(..), (<$>)) -- import Control.Exception (return) import Data.Monoid import qualified Data.ByteString as S import qualified Data.ByteString.Internal as S import qualified Data.ByteString.Lazy.Internal as L import qualified Data.ByteString.Short.Internal as Sh #if __GLASGOW_HASKELL__ >= 611 import qualified GHC.IO.Buffer as IO (Buffer(..), newByteBuffer) import GHC.IO.Handle.Internals (wantWritableHandle, flushWriteBuffer) import GHC.IO.Handle.Types (Handle__, haByteBuffer, haBufferMode) import System.IO (hFlush, BufferMode(..)) import Data.IORef #else import qualified Data.ByteString.Lazy as L #endif import System.IO (Handle) #if MIN_VERSION_base(4,4,0) #if MIN_VERSION_base(4,7,0) import Foreign #else import Foreign hiding (unsafeForeignPtrToPtr) #endif import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr) import System.IO.Unsafe (unsafeDupablePerformIO) #else import Foreign import GHC.IO (unsafeDupablePerformIO) #endif ------------------------------------------------------------------------------ -- Buffers ------------------------------------------------------------------------------ -- | A range of bytes in a buffer represented by the pointer to the first byte -- of the range and the pointer to the first byte /after/ the range. data BufferRange = BufferRange {-# UNPACK #-} !(Ptr Word8) -- First byte of range {-# UNPACK #-} !(Ptr Word8) -- First byte /after/ range -- | A 'Buffer' together with the 'BufferRange' of free bytes. The filled -- space starts at offset 0 and ends at the first free byte. data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8) {-# UNPACK #-} !BufferRange -- | Combined size of the filled and free space in the buffer. {-# INLINE bufferSize #-} bufferSize :: Buffer -> Int bufferSize (Buffer fpbuf (BufferRange _ ope)) = ope `minusPtr` unsafeForeignPtrToPtr fpbuf -- | Allocate a new buffer of the given size. {-# INLINE newBuffer #-} newBuffer :: Int -> IO Buffer newBuffer size = do fpbuf <- S.mallocByteString size let pbuf = unsafeForeignPtrToPtr fpbuf return $! Buffer fpbuf (BufferRange pbuf (pbuf `plusPtr` size)) -- | Convert the filled part of a 'Buffer' to a strict 'S.ByteString'. {-# INLINE byteStringFromBuffer #-} byteStringFromBuffer :: Buffer -> S.ByteString byteStringFromBuffer (Buffer fpbuf (BufferRange op _)) = S.PS fpbuf 0 (op `minusPtr` unsafeForeignPtrToPtr fpbuf) --- | Prepend the filled part of a 'Buffer' to a lazy 'L.ByteString' --- trimming it if necessary. {-# INLINE trimmedChunkFromBuffer #-} trimmedChunkFromBuffer :: AllocationStrategy -> Buffer -> L.ByteString -> L.ByteString trimmedChunkFromBuffer (AllocationStrategy _ _ trim) buf k | S.null bs = k | trim (S.length bs) (bufferSize buf) = L.Chunk (S.copy bs) k | otherwise = L.Chunk bs k where bs = byteStringFromBuffer buf ------------------------------------------------------------------------------ -- Chunked IO Stream ------------------------------------------------------------------------------ -- | A stream of chunks that are constructed in the 'IO' monad. -- -- This datatype serves as the common interface for the buffer-by-buffer -- execution of a 'BuildStep' by 'buildStepToCIOS'. Typical users of this -- interface are 'ciosToLazyByteString' or iteratee-style libraries like -- @enumerator@. data ChunkIOStream a = Finished Buffer a -- ^ The partially filled last buffer together with the result. | Yield1 S.ByteString (IO (ChunkIOStream a)) -- ^ Yield a /non-empty/ strict 'S.ByteString'. -- | A smart constructor for yielding one chunk that ignores the chunk if -- it is empty. {-# INLINE yield1 #-} yield1 :: S.ByteString -> IO (ChunkIOStream a) -> IO (ChunkIOStream a) yield1 bs cios | S.null bs = cios | otherwise = return $ Yield1 bs cios -- | Convert a @'ChunkIOStream' ()@ to a lazy 'L.ByteString' using -- 'unsafeDupablePerformIO'. {-# INLINE ciosUnitToLazyByteString #-} ciosUnitToLazyByteString :: AllocationStrategy -> L.ByteString -> ChunkIOStream () -> L.ByteString ciosUnitToLazyByteString strategy k = go where go (Finished buf _) = trimmedChunkFromBuffer strategy buf k go (Yield1 bs io) = L.Chunk bs $ unsafeDupablePerformIO (go <$> io) -- | Convert a 'ChunkIOStream' to a lazy tuple of the result and the written -- 'L.ByteString' using 'unsafeDupablePerformIO'. {-# INLINE ciosToLazyByteString #-} ciosToLazyByteString :: AllocationStrategy -> (a -> (b, L.ByteString)) -> ChunkIOStream a -> (b, L.ByteString) ciosToLazyByteString strategy k = go where go (Finished buf x) = second (trimmedChunkFromBuffer strategy buf) $ k x go (Yield1 bs io) = second (L.Chunk bs) $ unsafeDupablePerformIO (go <$> io) ------------------------------------------------------------------------------ -- Build signals ------------------------------------------------------------------------------ -- | 'BuildStep's may be called *multiple times* and they must not rise an -- async. exception. type BuildStep a = BufferRange -> IO (BuildSignal a) -- | 'BuildSignal's abstract signals to the caller of a 'BuildStep'. There are -- three signals: 'done', 'bufferFull', or 'insertChunks signals data BuildSignal a = Done {-# UNPACK #-} !(Ptr Word8) a | BufferFull {-# UNPACK #-} !Int {-# UNPACK #-} !(Ptr Word8) (BuildStep a) | InsertChunk {-# UNPACK #-} !(Ptr Word8) S.ByteString (BuildStep a) -- | Signal that the current 'BuildStep' is done and has computed a value. {-# INLINE done #-} done :: Ptr Word8 -- ^ Next free byte in current 'BufferRange' -> a -- ^ Computed value -> BuildSignal a done = Done -- | Signal that the current buffer is full. {-# INLINE bufferFull #-} bufferFull :: Int -- ^ Minimal size of next 'BufferRange'. -> Ptr Word8 -- ^ Next free byte in current 'BufferRange'. -> BuildStep a -- ^ 'BuildStep' to run on the next 'BufferRange'. This 'BuildStep' -- may assume that it is called with a 'BufferRange' of at least the -- required minimal size; i.e., the caller of this 'BuildStep' must -- guarantee this. -> BuildSignal a bufferFull = BufferFull -- | Signal that a 'S.ByteString' chunk should be inserted directly. {-# INLINE insertChunk #-} insertChunk :: Ptr Word8 -- ^ Next free byte in current 'BufferRange' -> S.ByteString -- ^ Chunk to insert. -> BuildStep a -- ^ 'BuildStep' to run on next 'BufferRange' -> BuildSignal a insertChunk op bs = InsertChunk op bs -- | Fill a 'BufferRange' using a 'BuildStep'. {-# INLINE fillWithBuildStep #-} fillWithBuildStep :: BuildStep a -- ^ Build step to use for filling the 'BufferRange'. -> (Ptr Word8 -> a -> IO b) -- ^ Handling the 'done' signal -> (Ptr Word8 -> Int -> BuildStep a -> IO b) -- ^ Handling the 'bufferFull' signal -> (Ptr Word8 -> S.ByteString -> BuildStep a -> IO b) -- ^ Handling the 'insertChunk' signal -> BufferRange -- ^ Buffer range to fill. -> IO b -- ^ Value computed while filling this 'BufferRange'. fillWithBuildStep step fDone fFull fChunk !br = do signal <- step br case signal of Done op x -> fDone op x BufferFull minSize op nextStep -> fFull op minSize nextStep InsertChunk op bs nextStep -> fChunk op bs nextStep ------------------------------------------------------------------------------ -- The 'Builder' monoid ------------------------------------------------------------------------------ -- | 'Builder's denote sequences of bytes. -- They are 'Monoid's where -- 'mempty' is the zero-length sequence and -- 'mappend' is concatenation, which runs in /O(1)/. newtype Builder = Builder (forall r. BuildStep r -> BuildStep r) -- | Construct a 'Builder'. In contrast to 'BuildStep's, 'Builder's are -- referentially transparent. {-# INLINE builder #-} builder :: (forall r. BuildStep r -> BuildStep r) -- ^ A function that fills a 'BufferRange', calls the continuation with -- the updated 'BufferRange' once its done, and signals its caller how -- to proceed using 'done', 'bufferFull', or 'insertChunk'. -- -- This function must be referentially transparent; i.e., calling it -- multiple times with equally sized 'BufferRange's must result in the -- same sequence of bytes being written. If you need mutable state, -- then you must allocate it anew upon each call of this function. -- Moroever, this function must call the continuation once its done. -- Otherwise, concatenation of 'Builder's does not work. Finally, this -- function must write to all bytes that it claims it has written. -- Otherwise, the resulting 'Builder' is not guaranteed to be -- referentially transparent and sensitive data might leak. -> Builder builder = Builder -- | The final build step that returns the 'done' signal. finalBuildStep :: BuildStep () finalBuildStep !(BufferRange op _) = return $ Done op () -- | Run a 'Builder' with the 'finalBuildStep'. {-# INLINE runBuilder #-} runBuilder :: Builder -- ^ 'Builder' to run -> BuildStep () -- ^ 'BuildStep' that writes the byte stream of this -- 'Builder' and signals 'done' upon completion. runBuilder b = runBuilderWith b finalBuildStep -- | Run a 'Builder'. {-# INLINE runBuilderWith #-} runBuilderWith :: Builder -- ^ 'Builder' to run -> BuildStep a -- ^ Continuation 'BuildStep' -> BuildStep a runBuilderWith (Builder b) = b -- | The 'Builder' denoting a zero-length sequence of bytes. This function is -- only exported for use in rewriting rules. Use 'mempty' otherwise. {-# INLINE[1] empty #-} empty :: Builder empty = Builder (\cont -> (\range -> cont range)) -- This eta expansion (hopefully) allows GHC to worker-wrapper the -- 'BufferRange' in the 'empty' base case of loops (since -- worker-wrapper requires (TODO: verify this) that all paths match -- against the wrapped argument. -- | Concatenate two 'Builder's. This function is only exported for use in rewriting -- rules. Use 'mappend' otherwise. {-# INLINE[1] append #-} append :: Builder -> Builder -> Builder append (Builder b1) (Builder b2) = Builder $ b1 . b2 instance Monoid Builder where {-# INLINE mempty #-} mempty = empty {-# INLINE mappend #-} mappend = append {-# INLINE mconcat #-} mconcat = foldr mappend mempty -- | Flush the current buffer. This introduces a chunk boundary. {-# INLINE flush #-} flush :: Builder flush = builder step where step k !(BufferRange op _) = return $ insertChunk op S.empty k ------------------------------------------------------------------------------ -- Put ------------------------------------------------------------------------------ -- | A 'Put' action denotes a computation of a value that writes a stream of -- bytes as a side-effect. 'Put's are strict in their side-effect; i.e., the -- stream of bytes will always be written before the computed value is -- returned. -- -- 'Put's are a generalization of 'Builder's. The typical use case is the -- implementation of an encoding that might fail (e.g., an interface to the -- 'zlib' compression library or the conversion from Base64 encoded data to -- 8-bit data). For a 'Builder', the only way to handle and report such a -- failure is ignore it or call 'error'. In contrast, 'Put' actions are -- expressive enough to allow reportng and handling such a failure in a pure -- fashion. -- -- @'Put' ()@ actions are isomorphic to 'Builder's. The functions 'putBuilder' -- and 'fromPut' convert between these two types. Where possible, you should -- use 'Builder's, as sequencing them is slightly cheaper than sequencing -- 'Put's because they do not carry around a computed value. newtype Put a = Put { unPut :: forall r. (a -> BuildStep r) -> BuildStep r } -- | Construct a 'Put' action. In contrast to 'BuildStep's, 'Put's are -- referentially transparent in the sense that sequencing the same 'Put' -- multiple times yields every time the same value with the same side-effect. {-# INLINE put #-} put :: (forall r. (a -> BuildStep r) -> BuildStep r) -- ^ A function that fills a 'BufferRange', calls the continuation with -- the updated 'BufferRange' and its computed value once its done, and -- signals its caller how to proceed using 'done', 'bufferFull', or -- 'insertChunk' signals. -- -- This function must be referentially transparent; i.e., calling it -- multiple times with equally sized 'BufferRange's must result in the -- same sequence of bytes being written and the same value being -- computed. If you need mutable state, then you must allocate it anew -- upon each call of this function. Moroever, this function must call -- the continuation once its done. Otherwise, monadic sequencing of -- 'Put's does not work. Finally, this function must write to all bytes -- that it claims it has written. Otherwise, the resulting 'Put' is -- not guaranteed to be referentially transparent and sensitive data -- might leak. -> Put a put = Put -- | Run a 'Put'. {-# INLINE runPut #-} runPut :: Put a -- ^ Put to run -> BuildStep a -- ^ 'BuildStep' that first writes the byte stream of -- this 'Put' and then yields the computed value using -- the 'done' signal. runPut (Put p) = p $ \x (BufferRange op _) -> return $ Done op x instance Functor Put where fmap f p = Put $ \k -> unPut p (\x -> k (f x)) {-# INLINE fmap #-} -- | Synonym for '<*' from 'Applicative'; used in rewriting rules. {-# INLINE[1] ap_l #-} ap_l :: Put a -> Put b -> Put a ap_l (Put a) (Put b) = Put $ \k -> a (\a' -> b (\_ -> k a')) -- | Synonym for '*>' from 'Applicative' and '>>' from 'Monad'; used in -- rewriting rules. {-# INLINE[1] ap_r #-} ap_r :: Put a -> Put b -> Put b ap_r (Put a) (Put b) = Put $ \k -> a (\_ -> b k) instance Applicative Put where {-# INLINE pure #-} pure x = Put $ \k -> k x {-# INLINE (<*>) #-} Put f <*> Put a = Put $ \k -> f (\f' -> a (\a' -> k (f' a'))) #if MIN_VERSION_base(4,2,0) {-# INLINE (<*) #-} (<*) = ap_l {-# INLINE (*>) #-} (*>) = ap_r #endif instance Monad Put where {-# INLINE return #-} return x = Put $ \k -> k x {-# INLINE (>>=) #-} Put m >>= f = Put $ \k -> m (\m' -> unPut (f m') k) {-# INLINE (>>) #-} (>>) = ap_r -- Conversion between Put and Builder ------------------------------------- -- | Run a 'Builder' as a side-effect of a @'Put' ()@ action. {-# INLINE[1] putBuilder #-} putBuilder :: Builder -> Put () putBuilder (Builder b) = Put $ \k -> b (k ()) -- | Convert a @'Put' ()@ action to a 'Builder'. {-# INLINE fromPut #-} fromPut :: Put () -> Builder fromPut (Put p) = Builder $ \k -> p (\_ -> k) -- We rewrite consecutive uses of 'putBuilder' such that the append of the -- involved 'Builder's is used. This can significantly improve performance, -- when the bound-checks of the concatenated builders are fused. -- ap_l rules {-# RULES "ap_l/putBuilder" forall b1 b2. ap_l (putBuilder b1) (putBuilder b2) = putBuilder (append b1 b2) "ap_l/putBuilder/assoc_r" forall b1 b2 (p :: Put a). ap_l (putBuilder b1) (ap_l (putBuilder b2) p) = ap_l (putBuilder (append b1 b2)) p "ap_l/putBuilder/assoc_l" forall (p :: Put a) b1 b2. ap_l (ap_l p (putBuilder b1)) (putBuilder b2) = ap_l p (putBuilder (append b1 b2)) #-} -- ap_r rules {-# RULES "ap_r/putBuilder" forall b1 b2. ap_r (putBuilder b1) (putBuilder b2) = putBuilder (append b1 b2) "ap_r/putBuilder/assoc_r" forall b1 b2 (p :: Put a). ap_r (putBuilder b1) (ap_r (putBuilder b2) p) = ap_r (putBuilder (append b1 b2)) p "ap_r/putBuilder/assoc_l" forall (p :: Put a) b1 b2. ap_r (ap_r p (putBuilder b1)) (putBuilder b2) = ap_r p (putBuilder (append b1 b2)) #-} -- combined ap_l/ap_r rules {-# RULES "ap_l/ap_r/putBuilder/assoc_r" forall b1 b2 (p :: Put a). ap_l (putBuilder b1) (ap_r (putBuilder b2) p) = ap_l (putBuilder (append b1 b2)) p "ap_r/ap_l/putBuilder/assoc_r" forall b1 b2 (p :: Put a). ap_r (putBuilder b1) (ap_l (putBuilder b2) p) = ap_l (putBuilder (append b1 b2)) p "ap_l/ap_r/putBuilder/assoc_l" forall (p :: Put a) b1 b2. ap_l (ap_r p (putBuilder b1)) (putBuilder b2) = ap_r p (putBuilder (append b1 b2)) "ap_r/ap_l/putBuilder/assoc_l" forall (p :: Put a) b1 b2. ap_r (ap_l p (putBuilder b1)) (putBuilder b2) = ap_r p (putBuilder (append b1 b2)) #-} -- Lifting IO actions --------------------- {- -- | Lift an 'IO' action to a 'Put' action. {-# INLINE putLiftIO #-} putLiftIO :: IO a -> Put a putLiftIO io = put $ \k br -> io >>= (`k` br) -} ------------------------------------------------------------------------------ -- Executing a Put directly on a buffered Handle ------------------------------------------------------------------------------ -- | Run a 'Put' action redirecting the produced output to a 'Handle'. -- -- The output is buffered using the 'Handle's associated buffer. If this -- buffer is too small to execute one step of the 'Put' action, then -- it is replaced with a large enough buffer. hPut :: forall a. Handle -> Put a -> IO a #if __GLASGOW_HASKELL__ >= 611 hPut h p = do fillHandle 1 (runPut p) where fillHandle :: Int -> BuildStep a -> IO a fillHandle !minFree step = do next <- wantWritableHandle "hPut" h fillHandle_ next where -- | We need to return an inner IO action that is executed outside -- the lock taken on the Handle for two reasons: -- -- 1. GHC.IO.Handle.Internals mentions in "Note [async]" that -- we should never do any side-effecting operations before -- an interuptible operation that may raise an async. exception -- as long as we are inside 'wantWritableHandle' and the like. -- We possibly run the interuptible 'flushWriteBuffer' right at -- the start of 'fillHandle', hence entering it a second time is -- not safe, as it could lead to a 'BuildStep' being run twice. -- -- FIXME (SM): Adapt this function or at least its documentation, -- as it is OK to run a 'BuildStep' twice. We dropped this -- requirement in favor of being able to use -- 'unsafeDupablePerformIO' and the speed improvement that it -- brings. -- -- 2. We use the 'S.hPut' function to also write to the handle. -- This function tries to take the same lock taken by -- 'wantWritableHandle'. Therefore, we cannot call 'S.hPut' -- inside 'wantWritableHandle'. -- fillHandle_ :: Handle__ -> IO (IO a) fillHandle_ h_ = do makeSpace =<< readIORef refBuf fillBuffer =<< readIORef refBuf where refBuf = haByteBuffer h_ freeSpace buf = IO.bufSize buf - IO.bufR buf makeSpace buf | IO.bufSize buf < minFree = do flushWriteBuffer h_ s <- IO.bufState <$> readIORef refBuf IO.newByteBuffer minFree s >>= writeIORef refBuf | freeSpace buf < minFree = flushWriteBuffer h_ | otherwise = #if __GLASGOW_HASKELL__ >= 613 return () #else -- required for ghc-6.12 flushWriteBuffer h_ #endif fillBuffer buf | freeSpace buf < minFree = error $ unlines [ "Data.ByteString.Builder.Internal.hPut: internal error." , " Not enough space after flush." , " required: " ++ show minFree , " free: " ++ show (freeSpace buf) ] | otherwise = do let !br = BufferRange op (pBuf `plusPtr` IO.bufSize buf) res <- fillWithBuildStep step doneH fullH insertChunkH br touchForeignPtr fpBuf return res where fpBuf = IO.bufRaw buf pBuf = unsafeForeignPtrToPtr fpBuf op = pBuf `plusPtr` IO.bufR buf {-# INLINE updateBufR #-} updateBufR op' = do let !off' = op' `minusPtr` pBuf !buf' = buf {IO.bufR = off'} writeIORef refBuf buf' doneH op' x = do updateBufR op' -- We must flush if this Handle is set to NoBuffering. -- If it is set to LineBuffering, be conservative and -- flush anyway (we didn't check for newlines in the data). -- Flushing must happen outside this 'wantWriteableHandle' -- due to the possible async. exception. case haBufferMode h_ of BlockBuffering _ -> return $ return x _line_or_no_buffering -> return $ hFlush h >> return x fullH op' minSize nextStep = do updateBufR op' return $ fillHandle minSize nextStep -- 'fillHandle' will flush the buffer (provided there is -- really less than 'minSize' space left) before executing -- the 'nextStep'. insertChunkH op' bs nextStep = do updateBufR op' return $ do S.hPut h bs fillHandle 1 nextStep #else hPut h p = go =<< buildStepToCIOS strategy (runPut p) where strategy = untrimmedStrategy L.smallChunkSize L.defaultChunkSize go (Finished buf x) = S.hPut h (byteStringFromBuffer buf) >> return x go (Yield1 bs io) = S.hPut h bs >> io >>= go #endif -- | Execute a 'Put' and return the computed result and the bytes -- written during the computation as a lazy 'L.ByteString'. -- -- This function is strict in the computed result and lazy in the writing of -- the bytes. For example, given -- -- @ --infinitePut = sequence_ (repeat (putBuilder (word8 1))) >> return 0 -- @ -- -- evaluating the expression -- -- @ --fst $ putToLazyByteString infinitePut -- @ -- -- does not terminate, while evaluating the expression -- -- @ --L.head $ snd $ putToLazyByteString infinitePut -- @ -- -- does terminate and yields the value @1 :: Word8@. -- -- An illustrative example for these strictness properties is the -- implementation of Base64 decoding (<http://en.wikipedia.org/wiki/Base64>). -- -- @ --type DecodingState = ... -- --decodeBase64 :: 'S.ByteString' -> DecodingState -> 'Put' (Maybe DecodingState) --decodeBase64 = ... -- @ -- -- The above function takes a strict 'S.ByteString' supposed to represent -- Base64 encoded data and the current decoding state. -- It writes the decoded bytes as the side-effect of the 'Put' and returns the -- new decoding state, if the decoding of all data in the 'S.ByteString' was -- successful. The checking if the strict 'S.ByteString' represents Base64 -- encoded data and the actual decoding are fused. This makes the common case, -- where all data represents Base64 encoded data, more efficient. It also -- implies that all data must be decoded before the final decoding -- state can be returned. 'Put's are intended for implementing such fused -- checking and decoding/encoding, which is reflected in their strictness -- properties. {-# NOINLINE putToLazyByteString #-} putToLazyByteString :: Put a -- ^ 'Put' to execute -> (a, L.ByteString) -- ^ Result and lazy 'L.ByteString' -- written as its side-effect putToLazyByteString = putToLazyByteStringWith (safeStrategy L.smallChunkSize L.defaultChunkSize) (\x -> (x, L.Empty)) -- | Execute a 'Put' with a buffer-allocation strategy and a continuation. For -- example, 'putToLazyByteString' is implemented as follows. -- -- @ --putToLazyByteString = 'putToLazyByteStringWith' -- ('safeStrategy' 'L.smallChunkSize' 'L.defaultChunkSize') (\x -> (x, L.empty)) -- @ -- {-# INLINE putToLazyByteStringWith #-} putToLazyByteStringWith :: AllocationStrategy -- ^ Buffer allocation strategy to use -> (a -> (b, L.ByteString)) -- ^ Continuation to use for computing the final result and the tail of -- its side-effect (the written bytes). -> Put a -- ^ 'Put' to execute -> (b, L.ByteString) -- ^ Resulting lazy 'L.ByteString' putToLazyByteStringWith strategy k p = ciosToLazyByteString strategy k $ unsafeDupablePerformIO $ buildStepToCIOS strategy (runPut p) ------------------------------------------------------------------------------ -- ByteString insertion / controlling chunk boundaries ------------------------------------------------------------------------------ -- Raw memory ------------- -- | Ensure that there are at least 'n' free bytes for the following 'Builder'. {-# INLINE ensureFree #-} ensureFree :: Int -> Builder ensureFree minFree = builder step where step k br@(BufferRange op ope) | ope `minusPtr` op < minFree = return $ bufferFull minFree op k | otherwise = k br -- | Copy the bytes from a 'BufferRange' into the output stream. wrappedBytesCopyStep :: BufferRange -- ^ Input 'BufferRange'. -> BuildStep a -> BuildStep a wrappedBytesCopyStep !(BufferRange ip0 ipe) k = go ip0 where go !ip !(BufferRange op ope) | inpRemaining <= outRemaining = do copyBytes op ip inpRemaining let !br' = BufferRange (op `plusPtr` inpRemaining) ope k br' | otherwise = do copyBytes op ip outRemaining let !ip' = ip `plusPtr` outRemaining return $ bufferFull 1 ope (go ip') where outRemaining = ope `minusPtr` op inpRemaining = ipe `minusPtr` ip -- Strict ByteStrings ------------------------------------------------------------------------------ -- | Construct a 'Builder' that copies the strict 'S.ByteString's, if it is -- smaller than the treshold, and inserts it directly otherwise. -- -- For example, @byteStringThreshold 1024@ copies strict 'S.ByteString's whose size -- is less or equal to 1kb, and inserts them directly otherwise. This implies -- that the average chunk-size of the generated lazy 'L.ByteString' may be as -- low as 513 bytes, as there could always be just a single byte between the -- directly inserted 1025 byte, strict 'S.ByteString's. -- {-# INLINE byteStringThreshold #-} byteStringThreshold :: Int -> S.ByteString -> Builder byteStringThreshold maxCopySize = \bs -> builder $ step bs where step !bs@(S.PS _ _ len) !k br@(BufferRange !op _) | len <= maxCopySize = byteStringCopyStep bs k br | otherwise = return $ insertChunk op bs k -- | Construct a 'Builder' that copies the strict 'S.ByteString'. -- -- Use this function to create 'Builder's from smallish (@<= 4kb@) -- 'S.ByteString's or if you need to guarantee that the 'S.ByteString' is not -- shared with the chunks generated by the 'Builder'. -- {-# INLINE byteStringCopy #-} byteStringCopy :: S.ByteString -> Builder byteStringCopy = \bs -> builder $ byteStringCopyStep bs {-# INLINE byteStringCopyStep #-} byteStringCopyStep :: S.ByteString -> BuildStep a -> BuildStep a byteStringCopyStep (S.PS ifp ioff isize) !k0 br0@(BufferRange op ope) -- Ensure that the common case is not recursive and therefore yields -- better code. | op' <= ope = do copyBytes op ip isize touchForeignPtr ifp k0 (BufferRange op' ope) | otherwise = do wrappedBytesCopyStep (BufferRange ip ipe) k br0 where op' = op `plusPtr` isize ip = unsafeForeignPtrToPtr ifp `plusPtr` ioff ipe = ip `plusPtr` isize k br = do touchForeignPtr ifp -- input consumed: OK to release here k0 br -- | Construct a 'Builder' that always inserts the strict 'S.ByteString' -- directly as a chunk. -- -- This implies flushing the output buffer, even if it contains just -- a single byte. You should therefore use 'byteStringInsert' only for large -- (@> 8kb@) 'S.ByteString's. Otherwise, the generated chunks are too -- fragmented to be processed efficiently afterwards. -- {-# INLINE byteStringInsert #-} byteStringInsert :: S.ByteString -> Builder byteStringInsert = \bs -> builder $ \k (BufferRange op _) -> return $ insertChunk op bs k -- Short bytestrings ------------------------------------------------------------------------------ -- | Construct a 'Builder' that copies the 'SH.ShortByteString'. -- {-# INLINE shortByteString #-} shortByteString :: Sh.ShortByteString -> Builder shortByteString = \sbs -> builder $ shortByteStringCopyStep sbs -- | Copy the bytes from a 'SH.ShortByteString' into the output stream. {-# INLINE shortByteStringCopyStep #-} shortByteStringCopyStep :: Sh.ShortByteString -- ^ Input 'SH.ShortByteString'. -> BuildStep a -> BuildStep a shortByteStringCopyStep !sbs k = go 0 (Sh.length sbs) where go !ip !ipe !(BufferRange op ope) | inpRemaining <= outRemaining = do Sh.copyToPtr sbs ip op inpRemaining let !br' = BufferRange (op `plusPtr` inpRemaining) ope k br' | otherwise = do Sh.copyToPtr sbs ip op outRemaining let !ip' = ip + outRemaining return $ bufferFull 1 ope (go ip' ipe) where outRemaining = ope `minusPtr` op inpRemaining = ipe - ip -- Lazy bytestrings ------------------------------------------------------------------------------ -- | Construct a 'Builder' that uses the thresholding strategy of 'byteStringThreshold' -- for each chunk of the lazy 'L.ByteString'. -- {-# INLINE lazyByteStringThreshold #-} lazyByteStringThreshold :: Int -> L.ByteString -> Builder lazyByteStringThreshold maxCopySize = L.foldrChunks (\bs b -> byteStringThreshold maxCopySize bs `mappend` b) mempty -- TODO: We could do better here. Currently, Large, Small, Large, leads to -- an unnecessary copy of the 'Small' chunk. -- | Construct a 'Builder' that copies the lazy 'L.ByteString'. -- {-# INLINE lazyByteStringCopy #-} lazyByteStringCopy :: L.ByteString -> Builder lazyByteStringCopy = L.foldrChunks (\bs b -> byteStringCopy bs `mappend` b) mempty -- | Construct a 'Builder' that inserts all chunks of the lazy 'L.ByteString' -- directly. -- {-# INLINE lazyByteStringInsert #-} lazyByteStringInsert :: L.ByteString -> Builder lazyByteStringInsert = L.foldrChunks (\bs b -> byteStringInsert bs `mappend` b) mempty -- | Create a 'Builder' denoting the same sequence of bytes as a strict -- 'S.ByteString'. -- The 'Builder' inserts large 'S.ByteString's directly, but copies small ones -- to ensure that the generated chunks are large on average. -- {-# INLINE byteString #-} byteString :: S.ByteString -> Builder byteString = byteStringThreshold maximalCopySize -- | Create a 'Builder' denoting the same sequence of bytes as a lazy -- 'S.ByteString'. -- The 'Builder' inserts large chunks of the lazy 'L.ByteString' directly, -- but copies small ones to ensure that the generated chunks are large on -- average. -- {-# INLINE lazyByteString #-} lazyByteString :: L.ByteString -> Builder lazyByteString = lazyByteStringThreshold maximalCopySize -- FIXME: also insert the small chunk for [large,small,large] directly. -- Perhaps it makes even sense to concatenate the small chunks in -- [large,small,small,small,large] and insert them directly afterwards to avoid -- unnecessary buffer spilling. Hmm, but that uncontrollably increases latency -- => no good! -- | The maximal size of a 'S.ByteString' that is copied. -- @2 * 'L.smallChunkSize'@ to guarantee that on average a chunk is of -- 'L.smallChunkSize'. maximalCopySize :: Int maximalCopySize = 2 * L.smallChunkSize ------------------------------------------------------------------------------ -- Builder execution ------------------------------------------------------------------------------ -- | A buffer allocation strategy for executing 'Builder's. -- The strategy -- -- > 'AllocationStrategy' firstBufSize bufSize trim -- -- states that the first buffer is of size @firstBufSize@, all following buffers -- are of size @bufSize@, and a buffer of size @n@ filled with @k@ bytes should -- be trimmed iff @trim k n@ is 'True'. data AllocationStrategy = AllocationStrategy (Maybe (Buffer, Int) -> IO Buffer) {-# UNPACK #-} !Int (Int -> Int -> Bool) -- | Create a custom allocation strategy. See the code for 'safeStrategy' and -- 'untrimmedStrategy' for examples. {-# INLINE customStrategy #-} customStrategy :: (Maybe (Buffer, Int) -> IO Buffer) -- ^ Buffer allocation function. If 'Nothing' is given, then a new first -- buffer should be allocated. If @'Just' (oldBuf, minSize)@ is given, -- then a buffer with minimal size 'minSize' must be returned. The -- strategy may reuse the 'oldBuffer', if it can guarantee that this -- referentially transparent and 'oldBuffer' is large enough. -> Int -- ^ Default buffer size. -> (Int -> Int -> Bool) -- ^ A predicate @trim used allocated@ returning 'True', if the buffer -- should be trimmed before it is returned. -> AllocationStrategy customStrategy = AllocationStrategy -- | Sanitize a buffer size; i.e., make it at least the size of an 'Int'. {-# INLINE sanitize #-} sanitize :: Int -> Int sanitize = max (sizeOf (undefined :: Int)) -- | Use this strategy for generating lazy 'L.ByteString's whose chunks are -- discarded right after they are generated. For example, if you just generate -- them to write them to a network socket. {-# INLINE untrimmedStrategy #-} untrimmedStrategy :: Int -- ^ Size of the first buffer -> Int -- ^ Size of successive buffers -> AllocationStrategy -- ^ An allocation strategy that does not trim any of the -- filled buffers before converting it to a chunk untrimmedStrategy firstSize bufSize = AllocationStrategy nextBuffer (sanitize bufSize) (\_ _ -> False) where {-# INLINE nextBuffer #-} nextBuffer Nothing = newBuffer $ sanitize firstSize nextBuffer (Just (_, minSize)) = newBuffer minSize -- | Use this strategy for generating lazy 'L.ByteString's whose chunks are -- likely to survive one garbage collection. This strategy trims buffers -- that are filled less than half in order to avoid spilling too much memory. {-# INLINE safeStrategy #-} safeStrategy :: Int -- ^ Size of first buffer -> Int -- ^ Size of successive buffers -> AllocationStrategy -- ^ An allocation strategy that guarantees that at least half -- of the allocated memory is used for live data safeStrategy firstSize bufSize = AllocationStrategy nextBuffer (sanitize bufSize) trim where trim used size = 2 * used < size {-# INLINE nextBuffer #-} nextBuffer Nothing = newBuffer $ sanitize firstSize nextBuffer (Just (_, minSize)) = newBuffer minSize -- | /Heavy inlining./ Execute a 'Builder' with custom execution parameters. -- -- This function is inlined despite its heavy code-size to allow fusing with -- the allocation strategy. For example, the default 'Builder' execution -- function 'toLazyByteString' is defined as follows. -- -- @ -- {-\# NOINLINE toLazyByteString \#-} -- toLazyByteString = -- toLazyByteStringWith ('safeStrategy' 'L.smallChunkSize' 'L.defaultChunkSize') L.empty -- @ -- -- where @L.empty@ is the zero-length lazy 'L.ByteString'. -- -- In most cases, the parameters used by 'toLazyByteString' give good -- performance. A sub-performing case of 'toLazyByteString' is executing short -- (<128 bytes) 'Builder's. In this case, the allocation overhead for the first -- 4kb buffer and the trimming cost dominate the cost of executing the -- 'Builder'. You can avoid this problem using -- -- >toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty -- -- This reduces the allocation and trimming overhead, as all generated -- 'L.ByteString's fit into the first buffer and there is no trimming -- required, if more than 64 bytes and less than 128 bytes are written. -- {-# INLINE toLazyByteStringWith #-} toLazyByteStringWith :: AllocationStrategy -- ^ Buffer allocation strategy to use -> L.ByteString -- ^ Lazy 'L.ByteString' to use as the tail of the generated lazy -- 'L.ByteString' -> Builder -- ^ 'Builder' to execute -> L.ByteString -- ^ Resulting lazy 'L.ByteString' toLazyByteStringWith strategy k b = ciosUnitToLazyByteString strategy k $ unsafeDupablePerformIO $ buildStepToCIOS strategy (runBuilder b) -- | Convert a 'BuildStep' to a 'ChunkIOStream' stream by executing it on -- 'Buffer's allocated according to the given 'AllocationStrategy'. {-# INLINE buildStepToCIOS #-} buildStepToCIOS :: AllocationStrategy -- ^ Buffer allocation strategy to use -> BuildStep a -- ^ 'BuildStep' to execute -> IO (ChunkIOStream a) buildStepToCIOS !(AllocationStrategy nextBuffer bufSize trim) = \step -> nextBuffer Nothing >>= fill step where fill !step !buf@(Buffer fpbuf br@(BufferRange _ pe)) = do res <- fillWithBuildStep step doneH fullH insertChunkH br touchForeignPtr fpbuf return res where pbuf = unsafeForeignPtrToPtr fpbuf doneH op' x = return $ Finished (Buffer fpbuf (BufferRange op' pe)) x fullH op' minSize nextStep = wrapChunk op' $ const $ nextBuffer (Just (buf, max minSize bufSize)) >>= fill nextStep insertChunkH op' bs nextStep = wrapChunk op' $ \isEmpty -> yield1 bs $ -- Checking for empty case avoids allocating 'n-1' empty -- buffers for 'n' insertChunkH right after each other. if isEmpty then fill nextStep buf else do buf' <- nextBuffer (Just (buf, bufSize)) fill nextStep buf' -- Wrap and yield a chunk, trimming it if necesary {-# INLINE wrapChunk #-} wrapChunk !op' mkCIOS | chunkSize == 0 = mkCIOS True | trim chunkSize size = do bs <- S.create chunkSize $ \pbuf' -> copyBytes pbuf' pbuf chunkSize -- FIXME: We could reuse the trimmed buffer here. return $ Yield1 bs (mkCIOS False) | otherwise = return $ Yield1 (S.PS fpbuf 0 chunkSize) (mkCIOS False) where chunkSize = op' `minusPtr` pbuf size = pe `minusPtr` pbuf