{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}
{-# OPTIONS_GHC -Wno-redundant-constraints -Wno-name-shadowing #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.Compact
-- Copyright   :  (c) The University of Glasgow 2001-2009
--                (c) Giovanni Campagna <gcampagn@cs.stanford.edu> 2014
-- License     :  BSD-style (see the file LICENSE)
--
-- Maintainer  :  libraries@haskell.org
-- Stability   :  unstable
-- Portability :  non-portable (GHC Extensions)
--
-- This module provides a data structure, called a 'Compact', for
-- holding immutable, fully evaluated data in a consecutive block of memory.
-- Compact regions are good for two things:
--
--  1. Data in a compact region is not traversed during GC; any
--  incoming pointer to a compact region keeps the entire region
--  live.  Thus, if you put a long-lived data structure in a compact
--  region, you may save a lot of cycles during major collections,
--  since you will no longer be (uselessly) retraversing this
--  data structure.
--
--  2. Because the data is stored contiguously, you can easily
--  dump the memory to disk and/or send it over the network.
--  For applications that are not bandwidth bound (GHC's heap
--  representation can be as much of a x4 expansion over a
--  binary serialization), this can lead to substantial speedups.
--
-- For example, suppose you have a function @loadBigStruct :: IO BigStruct@,
-- which loads a large data structure from the file system.  You can "compact"
-- the structure with the following code:
--
-- @
--      do r <- 'compact' =<< loadBigStruct
--         let x = 'getCompact' r :: BigStruct
--         -- Do things with x
-- @
--
-- Note that 'compact' will not preserve internal sharing; use
-- 'compactWithSharing' (which is 10x slower) if you have cycles and/or
-- must preserve sharing.  The 'Compact' pointer @r@ can be used
-- to add more data to a compact region; see 'compactAdd' or
-- 'compactAddWithSharing'.
--
-- The implementation of compact regions is described by:
--
--  * Edward Z. Yang, Giovanni Campagna, Ömer Ağacan, Ahmed El-Hassany, Abhishek
--    Kulkarni, Ryan Newton. \"/Efficient communication and Collection with Compact
--    Normal Forms/\". In Proceedings of the 20th ACM SIGPLAN International
--    Conference on Functional Programming. September 2015. <http://ezyang.com/compact.html>
--
-- This library is supported by GHC 8.2 and later.

module GHC.Compact (
  -- * The Compact type
  Compact(..),

  -- * Compacting data
  compact,
  compactWithSharing,
  compactAdd,
  compactAddWithSharing,

  -- * Inspecting a Compact
  getCompact,
  inCompact,
  isCompact,
  compactSize,

  -- * Other utilities
  compactResize,

  -- * Internal operations
  mkCompact,
  compactSized,
  ) where

import Control.Concurrent.MVar
import GHC.Prim
import GHC.Types

-- | A 'Compact' contains fully evaluated, pure, immutable data.
--
-- 'Compact' serves two purposes:
--
-- * Data stored in a 'Compact' has no garbage collection overhead.
--   The garbage collector considers the whole 'Compact' to be alive
--   if there is a reference to any object within it.
--
-- * A 'Compact' can be serialized, stored, and deserialized again.
--   The serialized data can only be deserialized by the exact binary
--   that created it, but it can be stored indefinitely before
--   deserialization.
--
-- Compacts are self-contained, so compacting data involves copying
-- it; if you have data that lives in two 'Compact's, each will have a
-- separate copy of the data.
--
-- The cost of compaction is fully evaluating the data + copying it. However,
-- because 'compact' does not stop-the-world, retaining internal sharing during
-- the compaction process is very costly. The user can choose whether to
-- 'compact' or 'compactWithSharing'.
--
-- When you have a @'Compact' a@, you can get a pointer to the actual object
-- in the region using 'getCompact'.  The 'Compact' type
-- serves as handle on the region itself; you can use this handle
-- to add data to a specific 'Compact' with 'compactAdd' or
-- 'compactAddWithSharing' (giving you a new handle which corresponds
-- to the same compact region, but points to the newly added object
-- in the region).  At the moment, due to technical reasons,
-- it's not possible to get the @'Compact' a@ if you only have an @a@,
-- so make sure you hold on to the handle as necessary.
--
-- Data in a compact doesn't ever move, so compacting data is also a
-- way to pin arbitrary data structures in memory.
--
-- There are some limitations on what can be compacted:
--
-- * Functions.  Compaction only applies to data.
--
-- * Pinned 'ByteArray#' objects cannot be compacted.  This is for a
--   good reason: the memory is pinned so that it can be referenced by
--   address (the address might be stored in a C data structure, for
--   example), so we can't make a copy of it to store in the 'Compact'.
--
-- * Objects with mutable pointer fields (e.g. 'Data.IORef.IORef',
--   'GHC.Array.MutableArray') also cannot be compacted, because subsequent
--   mutation would destroy the property that a compact is self-contained.
--
-- If compaction encounters any of the above, a 'Control.Exception.CompactionFailed'
-- exception will be thrown by the compaction operation.
--
data Compact a = Compact Compact# a !(MVar ())
    -- we can *read* from a Compact without taking a lock, but only
    -- one thread can be writing to the compact at any given time.
    -- The MVar here is to enforce mutual exclusion among writers.
    -- Note: the MVar protects the Compact# only, not the pure value 'a'

-- | Make a new 'Compact' object, given a pointer to the true
-- underlying region.  You must uphold the invariant that @a@ lives
-- in the compact region.
--
mkCompact
  :: Compact# -> a -> State# RealWorld -> (# State# RealWorld, Compact a #)
mkCompact :: forall a.
Compact#
-> a -> State# RealWorld -> (# State# RealWorld, Compact a #)
mkCompact Compact#
compact# a
a State# RealWorld
s =
  case IO (MVar ()) -> State# RealWorld -> (# State# RealWorld, MVar () #)
forall {a}. IO a -> State# RealWorld -> (# State# RealWorld, a #)
unIO (() -> IO (MVar ())
forall a. a -> IO (MVar a)
newMVar ()) State# RealWorld
s of { (# State# RealWorld
s1, MVar ()
lock #) ->
  (# State# RealWorld
s1, Compact# -> a -> MVar () -> Compact a
forall a. Compact# -> a -> MVar () -> Compact a
Compact Compact#
compact# a
a MVar ()
lock #) }
 where
  unIO :: IO a -> State# RealWorld -> (# State# RealWorld, a #)
unIO (IO State# RealWorld -> (# State# RealWorld, a #)
a) = State# RealWorld -> (# State# RealWorld, a #)
a

-- | Transfer @a@ into a new compact region, with a preallocated size (in
-- bytes), possibly preserving sharing or not.  If you know how big the data
-- structure in question is, you can save time by picking an appropriate block
-- size for the compact region.
--
compactSized
    :: Int -- ^ Size of the compact region, in bytes
    -> Bool -- ^ Whether to retain internal sharing
    -> a
    -> IO (Compact a)
compactSized :: forall a. Int -> Bool -> a -> IO (Compact a)
compactSized (I# Int#
size) Bool
share a
a = (State# RealWorld -> (# State# RealWorld, Compact a #))
-> IO (Compact a)
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld -> (# State# RealWorld, Compact a #))
 -> IO (Compact a))
-> (State# RealWorld -> (# State# RealWorld, Compact a #))
-> IO (Compact a)
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s0 ->
  case Word# -> State# RealWorld -> (# State# RealWorld, Compact# #)
compactNew# (Int# -> Word#
int2Word# Int#
size) State# RealWorld
s0 of { (# State# RealWorld
s1, Compact#
compact# #) ->
  case Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
forall {a}.
Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
compactAddPrim Compact#
compact# a
a State# RealWorld
s1 of { (# State# RealWorld
s2, a
pk #) ->
  Compact#
-> a -> State# RealWorld -> (# State# RealWorld, Compact a #)
forall a.
Compact#
-> a -> State# RealWorld -> (# State# RealWorld, Compact a #)
mkCompact Compact#
compact# a
pk State# RealWorld
s2 }}
 where
  compactAddPrim :: Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
compactAddPrim
    | Bool
share = Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
forall {a}.
Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
compactAddWithSharing#
    | Bool
otherwise = Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
forall {a}.
Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
compactAdd#

-- | Retrieve a direct pointer to the value pointed at by a 'Compact' reference.
-- If you have used 'compactAdd', there may be multiple 'Compact' references
-- into the same compact region. Upholds the property:
--
-- > inCompact c (getCompact c) == True
--
getCompact :: Compact a -> a
getCompact :: forall a. Compact a -> a
getCompact (Compact Compact#
_ a
obj MVar ()
_) = a
obj

-- | Compact a value. /O(size of unshared data)/
--
-- If the structure contains any internal sharing, the shared data
-- will be duplicated during the compaction process.  This will
-- not terminate if the structure contains cycles (use 'compactWithSharing'
-- instead).
--
-- The object in question must not contain any functions or data with mutable
-- pointers; if it does, 'compact' will raise an exception. In the future, we
-- may add a type class which will help statically check if this is the case or
-- not.
--
compact :: a -> IO (Compact a)
compact :: forall a. a -> IO (Compact a)
compact = Int -> Bool -> a -> IO (Compact a)
forall a. Int -> Bool -> a -> IO (Compact a)
compactSized Int
31268 Bool
False

-- | Compact a value, retaining any internal sharing and
-- cycles. /O(size of data)/
--
-- This is typically about 10x slower than 'compact', because it works
-- by maintaining a hash table mapping uncompacted objects to
-- compacted objects.
--
-- The object in question must not contain any functions or data with mutable
-- pointers; if it does, 'compact' will raise an exception. In the future, we
-- may add a type class which will help statically check if this is the case or
-- not.
--
compactWithSharing :: a -> IO (Compact a)
compactWithSharing :: forall a. a -> IO (Compact a)
compactWithSharing = Int -> Bool -> a -> IO (Compact a)
forall a. Int -> Bool -> a -> IO (Compact a)
compactSized Int
31268 Bool
True

-- | Add a value to an existing 'Compact'.  This will help you avoid
-- copying when the value contains pointers into the compact region,
-- but remember that after compaction this value will only be deallocated
-- with the entire compact region.
--
-- Behaves exactly like 'compact' with respect to sharing and what data
-- it accepts.
--
compactAdd :: Compact b -> a -> IO (Compact a)
compactAdd :: forall b a. Compact b -> a -> IO (Compact a)
compactAdd (Compact Compact#
compact# b
_ MVar ()
lock) a
a = MVar () -> (() -> IO (Compact a)) -> IO (Compact a)
forall a b. MVar a -> (a -> IO b) -> IO b
withMVar MVar ()
lock ((() -> IO (Compact a)) -> IO (Compact a))
-> (() -> IO (Compact a)) -> IO (Compact a)
forall a b. (a -> b) -> a -> b
$ \()
_ -> (State# RealWorld -> (# State# RealWorld, Compact a #))
-> IO (Compact a)
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld -> (# State# RealWorld, Compact a #))
 -> IO (Compact a))
-> (State# RealWorld -> (# State# RealWorld, Compact a #))
-> IO (Compact a)
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s ->
  case Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
forall {a}.
Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
compactAdd# Compact#
compact# a
a State# RealWorld
s of { (# State# RealWorld
s1, a
pk #) ->
  (# State# RealWorld
s1, Compact# -> a -> MVar () -> Compact a
forall a. Compact# -> a -> MVar () -> Compact a
Compact Compact#
compact# a
pk MVar ()
lock #) }

-- | Add a value to an existing 'Compact', like 'compactAdd',
-- but behaving exactly like 'compactWithSharing' with respect to sharing and
-- what data it accepts.
--
compactAddWithSharing :: Compact b -> a -> IO (Compact a)
compactAddWithSharing :: forall b a. Compact b -> a -> IO (Compact a)
compactAddWithSharing (Compact Compact#
compact# b
_ MVar ()
lock) a
a =
  MVar () -> (() -> IO (Compact a)) -> IO (Compact a)
forall a b. MVar a -> (a -> IO b) -> IO b
withMVar MVar ()
lock ((() -> IO (Compact a)) -> IO (Compact a))
-> (() -> IO (Compact a)) -> IO (Compact a)
forall a b. (a -> b) -> a -> b
$ \()
_ -> (State# RealWorld -> (# State# RealWorld, Compact a #))
-> IO (Compact a)
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld -> (# State# RealWorld, Compact a #))
 -> IO (Compact a))
-> (State# RealWorld -> (# State# RealWorld, Compact a #))
-> IO (Compact a)
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s ->
    case Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
forall {a}.
Compact# -> a -> State# RealWorld -> (# State# RealWorld, a #)
compactAddWithSharing# Compact#
compact# a
a State# RealWorld
s of { (# State# RealWorld
s1, a
pk #) ->
    (# State# RealWorld
s1, Compact# -> a -> MVar () -> Compact a
forall a. Compact# -> a -> MVar () -> Compact a
Compact Compact#
compact# a
pk MVar ()
lock #) }

-- | Check if the second argument is inside the passed 'Compact'.
--
inCompact :: Compact b -> a -> IO Bool
inCompact :: forall b a. Compact b -> a -> IO Bool
inCompact (Compact Compact#
buffer b
_ MVar ()
_) !a
val =
  (State# RealWorld -> (# State# RealWorld, Bool #)) -> IO Bool
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO (\State# RealWorld
s -> case Compact# -> a -> State# RealWorld -> (# State# RealWorld, Int# #)
forall a.
Compact# -> a -> State# RealWorld -> (# State# RealWorld, Int# #)
compactContains# Compact#
buffer a
val State# RealWorld
s of
         (# State# RealWorld
s', Int#
v #) -> (# State# RealWorld
s', Int# -> Bool
isTrue# Int#
v #) )

-- | Check if the argument is in any 'Compact'.  If true, the value in question
-- is also fully evaluated, since any value in a compact region must
-- be fully evaluated.
--
isCompact :: a -> IO Bool
isCompact :: forall a. a -> IO Bool
isCompact !a
val =
  (State# RealWorld -> (# State# RealWorld, Bool #)) -> IO Bool
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO (\State# RealWorld
s -> case a -> State# RealWorld -> (# State# RealWorld, Int# #)
forall a. a -> State# RealWorld -> (# State# RealWorld, Int# #)
compactContainsAny# a
val State# RealWorld
s of
         (# State# RealWorld
s', Int#
v #) -> (# State# RealWorld
s', Int# -> Bool
isTrue# Int#
v #) )

-- | Returns the size in bytes of the compact region.
--
compactSize :: Compact a -> IO Word
compactSize :: forall a. Compact a -> IO Word
compactSize (Compact Compact#
buffer a
_ MVar ()
lock) = MVar () -> (() -> IO Word) -> IO Word
forall a b. MVar a -> (a -> IO b) -> IO b
withMVar MVar ()
lock ((() -> IO Word) -> IO Word) -> (() -> IO Word) -> IO Word
forall a b. (a -> b) -> a -> b
$ \()
_ -> (State# RealWorld -> (# State# RealWorld, Word #)) -> IO Word
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld -> (# State# RealWorld, Word #)) -> IO Word)
-> (State# RealWorld -> (# State# RealWorld, Word #)) -> IO Word
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s0 ->
   case Compact# -> State# RealWorld -> (# State# RealWorld, Word# #)
compactSize# Compact#
buffer State# RealWorld
s0 of (# State# RealWorld
s1, Word#
sz #) -> (# State# RealWorld
s1, Word# -> Word
W# Word#
sz #)

-- | __Experimental__  This function doesn't actually resize a compact
-- region; rather, it changes the default block size which we allocate
-- when the current block runs out of space, and also appends a block
-- to the compact region.
--
compactResize :: Compact a -> Word -> IO ()
compactResize :: forall a. Compact a -> Word -> IO ()
compactResize (Compact Compact#
oldBuffer a
_ MVar ()
lock) (W# Word#
new_size) =
  MVar () -> (() -> IO ()) -> IO ()
forall a b. MVar a -> (a -> IO b) -> IO b
withMVar MVar ()
lock ((() -> IO ()) -> IO ()) -> (() -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \()
_ -> (State# RealWorld -> (# State# RealWorld, () #)) -> IO ()
forall a. (State# RealWorld -> (# State# RealWorld, a #)) -> IO a
IO ((State# RealWorld -> (# State# RealWorld, () #)) -> IO ())
-> (State# RealWorld -> (# State# RealWorld, () #)) -> IO ()
forall a b. (a -> b) -> a -> b
$ \State# RealWorld
s ->
    case Compact# -> Word# -> State# RealWorld -> State# RealWorld
compactResize# Compact#
oldBuffer Word#
new_size State# RealWorld
s of
      State# RealWorld
s' -> (# State# RealWorld
s', () #)