cmm/CmmContFlowOpt.hs

{-# LANGUAGE GADTs #-}
{-# OPTIONS_GHC -fno-warn-incomplete-patterns #-}
module CmmContFlowOpt
    ( cmmCfgOpts
    , cmmCfgOptsProc
    , removeUnreachableBlocksProc
    , replaceLabels
    )
where

import Hoopl
import BlockId
import Cmm
import CmmUtils
import Maybes
import Panic

import Control.Monad
import Prelude hiding (succ, unzip, zip)


-- Note [What is shortcutting]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Consider this Cmm code:
--
-- L1: ...
--     goto L2;
-- L2: goto L3;
-- L3: ...
--
-- Here L2 is an empty block and contains only an unconditional branch
-- to L3. In this situation any block that jumps to L2 can jump
-- directly to L3:
--
-- L1: ...
--     goto L3;
-- L2: goto L3;
-- L3: ...
--
-- In this situation we say that we shortcut L2 to L3. One of
-- consequences of shortcutting is that some blocks of code may become
-- unreachable (in the example above this is true for L2).


-- Note [Control-flow optimisations]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- This optimisation does four things:
--
--   - If a block finishes in an unconditonal branch to another block
--     and that is the only jump to that block we concatenate the
--     destination block at the end of the current one.
--
--   - If a block finishes in an unconditional branch, we may be able
--     to shortcut the destination block.
--
--   - If a block finishes in a call whose continuation block is a
--     goto, then we can shortcut the destination, making the
--     continuation block the destination of the goto - but see Note
--     [Shortcut call returns].
--
--   - For block finishing in conditional branch we try to invert the
--     condition and shortcut destination of alternatives.
--
--   - For any block that is not a call we try to shortcut the
--     destination(s).
--
-- Blocks are processed using postorder DFS traversal. A side effect
-- of determining traversal order with a graph search is elimination
-- of any blocks that are unreachable.
--
-- Transformations are improved by working from the end of the graph
-- towards the beginning, because we may be able to perform many
-- shortcuts in one go.


-- Note [Shortcut call returns]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- We are going to maintain the "current" graph (BlockEnv CmmBlock) as
-- we go, and also a mapping from BlockId to BlockId, representing
-- continuation labels that we have renamed.  This latter mapping is
-- important because we might shortcut a CmmCall continuation.  For
-- example:
--
--    Sp[0] = L
--    call g returns to L
--    L: goto M
--    M: ...
--
-- So when we shortcut the L block, we need to replace not only
-- the continuation of the call, but also references to L in the
-- code (e.g. the assignment Sp[0] = L):
--
--    Sp[0] = M
--    call g returns to M
--    M: ...
--
-- So we keep track of which labels we have renamed and apply the mapping
-- at the end with replaceLabels.


-- Note [Shortcut call returns and proc-points]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Consider this code that you might get from a recursive
-- let-no-escape:
--
--       goto L1
--      L1:
--       if (Hp > HpLim) then L2 else L3
--      L2:
--       call stg_gc_noregs returns to L4
--      L4:
--       goto L1
--      L3:
--       ...
--       goto L1
--
-- Then the control-flow optimiser shortcuts L4.  But that turns L1
-- into the call-return proc point, and every iteration of the loop
-- has to shuffle variables to and from the stack.  So we must *not*
-- shortcut L4.
--
-- Moreover not shortcutting call returns is probably fine.  If L4 can
-- concat with its branch target then it will still do so.  And we
-- save some compile time because we don't have to traverse all the
-- code in replaceLabels.
--
-- However, we probably do want to do this if we are splitting proc
-- points, because L1 will be a proc-point anyway, so merging it with
-- L4 reduces the number of proc points.  Unfortunately recursive
-- let-no-escapes won't generate very good code with proc-point
-- splitting on - we should probably compile them to explicitly use
-- the native calling convention instead.

cmmCfgOpts :: Bool -> CmmGraph -> CmmGraph
cmmCfgOpts split g = fst (blockConcat split g)

cmmCfgOptsProc :: Bool -> CmmDecl -> CmmDecl
cmmCfgOptsProc split (CmmProc info lbl live g) = CmmProc info' lbl live g'
    where (g', env) = blockConcat split g
          info' = info{ info_tbls = new_info_tbls }
          new_info_tbls = mapFromList (map upd_info (mapToList (info_tbls info)))

          -- If we changed any labels, then we have to update the info tables
          -- too, except for the top-level info table because that might be
          -- referred to by other procs.
          upd_info (k,info)
             | Just k' <- mapLookup k env
             = (k', if k' == g_entry g'
                       then info
                       else info{ cit_lbl = infoTblLbl k' })
             | otherwise
             = (k,info)
cmmCfgOptsProc _ top = top


blockConcat :: Bool -> CmmGraph -> (CmmGraph, BlockEnv BlockId)
blockConcat splitting_procs g@CmmGraph { g_entry = entry_id }
  = (replaceLabels shortcut_map $ ofBlockMap new_entry new_blocks, shortcut_map')
  where
     -- We might be able to shortcut the entry BlockId itself.
     -- Remember to update the shortcut_map, since we also have to
     -- update the info_tbls mapping now.
     (new_entry, shortcut_map')
       | Just entry_blk <- mapLookup entry_id new_blocks
       , Just dest      <- canShortcut entry_blk
       = (dest, mapInsert entry_id dest shortcut_map)
       | otherwise
       = (entry_id, shortcut_map)

     -- blocks is a list of blocks in DFS postorder, while blockmap is
     -- a map of blocks. We process each element from blocks and update
     -- blockmap accordingly
     blocks = postorderDfs g
     blockmap = foldr addBlock emptyBody blocks

     -- Accumulator contains three components:
     --  * map of blocks in a graph
     --  * map of shortcut labels. See Note [Shortcut call returns]
     --  * map containing number of predecessors for each block. We discard
     --    it after we process all blocks.
     (new_blocks, shortcut_map, _) =
           foldr maybe_concat (blockmap, mapEmpty, initialBackEdges) blocks

     -- Map of predecessors for initial graph. We increase number of
     -- predecessors for entry block by one to denote that it is
     -- target of a jump, even if no block in the current graph jumps
     -- to it.
     initialBackEdges = incPreds entry_id (predMap blocks)

     maybe_concat :: CmmBlock
                  -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
                  -> (BlockEnv CmmBlock, BlockEnv BlockId, BlockEnv Int)
     maybe_concat block (blocks, shortcut_map, backEdges)
        -- If:
        --   (1) current block ends with unconditional branch to b' and
        --   (2) it has exactly one predecessor (namely, current block)
        --
        -- Then:
        --   (1) append b' block at the end of current block
        --   (2) remove b' from the map of blocks
        --   (3) remove information about b' from predecessors map
        --
        -- This guard must be first so that we always eliminate blocks that have
        -- only one predecessor. If we had a target block that is both
        -- shorcutable and has only one predecessor and attempted to shortcut it
        -- first we would make that block unreachable but would not remove it
        -- from the graph.
        --
        -- Note that we always maintain an up-to-date list of predecessors, so
        -- we can ignore the contents of shortcut_map
        | CmmBranch b' <- last
        , hasOnePredecessor b'
        , Just blk' <- mapLookup b' blocks
        = let bid' = entryLabel blk'
          in ( mapDelete bid' $ mapInsert bid (splice head blk') blocks
             , shortcut_map
             , mapDelete b' backEdges )

        -- If:
        --   (1) current block ends with unconditional branch to b' and
        --   (2) we can shortcut block b'
        -- Then:
        --   (1) concatenate b' at the end of current block, effectively
        --       changing target of uncondtional jump from b' to dest
        --   (2) increase number of predecessors of dest by 1
        --   (3) decrease number of predecessors of b' by 1
        | CmmBranch b' <- last
        , Just blk' <- mapLookup b' blocks
        , Just dest <- canShortcut blk'
        = ( mapInsert bid (splice head blk') blocks, shortcut_map,
            decPreds b' $ incPreds dest backEdges )

        -- If:
        --   (1) we are splitting proc points (see Note
        --       [Shortcut call returns and proc-points]) and
        --   (2) current block is a CmmCall or CmmForeignCall with
        --       continuation b' and
        --   (3) we can shortcut that continuation to dest
        -- Then:
        --   (1) we change continuation to point to b'
        --   (2) create mapping from b' to dest
        --   (3) increase number of predecessors of dest by 1
        --   (4) decrease number of predecessors of b' by 1
        --
        -- Later we will use replaceLabels to substitute all occurrences of b'
        -- with dest.
        | splitting_procs
        , Just b'   <- callContinuation_maybe last
        , Just blk' <- mapLookup b' blocks
        , Just dest <- canShortcut blk'
        = ( mapInsert bid (blockJoinTail head (update_cont dest)) blocks
          , mapInsert b' dest shortcut_map
          , decPreds b' $ incPreds dest backEdges )

        -- If:
        --   (1) a block does not end with a call
        -- Then:
        --   (1) if it ends with a conditional attempt to invert the
        --       conditional
        --   (2) attempt to shortcut all destination blocks
        --   (3) if new successors of a block are different from the old ones
        --       we update the of predecessors accordingly
        | Nothing <- callContinuation_maybe last
        = let oldSuccs = successors last
              newSuccs = successors swapcond_last
          in ( mapInsert bid (blockJoinTail head swapcond_last) blocks
             , shortcut_map
             , if oldSuccs == newSuccs
               then backEdges
               else foldr incPreds (foldr decPreds backEdges oldSuccs) newSuccs )

        -- Otherwise don't do anything
        | otherwise
        = ( blocks, shortcut_map, backEdges )
        where
          (head, last) = blockSplitTail block
          bid = entryLabel block

          -- Changes continuation of a call to a specified label
          update_cont dest =
              case last of
                CmmCall{}        -> last { cml_cont = Just dest }
                CmmForeignCall{} -> last { succ = dest }
                _                -> panic "Can't shortcut continuation."

          -- Attempts to shortcut successors of last node
          shortcut_last = mapSuccessors shortcut last
            where
              shortcut l =
                 case mapLookup l blocks of
                   Just b | Just dest <- canShortcut b -> dest
                   _otherwise -> l

          -- For a conditional, we invert the conditional if that would make it
          -- more likely that the branch-not-taken case becomes a fallthrough.
          -- This helps the native codegen a little bit, and probably has no
          -- effect on LLVM.  It's convenient to do it here, where we have the
          -- information about predecessors.
          swapcond_last
            | CmmCondBranch cond t f <- shortcut_last
            , numPreds f > 1
            , hasOnePredecessor t
            , Just cond' <- maybeInvertCmmExpr cond
            = CmmCondBranch cond' f t

            | otherwise
            = shortcut_last

          -- Number of predecessors for a block
          numPreds bid = mapLookup bid backEdges `orElse` 0

          hasOnePredecessor b = numPreds b == 1

-- Functions for incrementing and decrementing number of predecessors. If
-- decrementing would set the predecessor count to 0, we remove entry from the
-- map.
-- Invariant: if a block has no predecessors it should be dropped from the
-- graph because it is unreachable. maybe_concat is constructed to maintain
-- that invariant, but calling replaceLabels may introduce unreachable blocks.
-- We rely on subsequent passes in the Cmm pipeline to remove unreachable
-- blocks.
incPreds, decPreds :: BlockId -> BlockEnv Int -> BlockEnv Int
incPreds bid edges = mapInsertWith (+) bid 1 edges
decPreds bid edges = case mapLookup bid edges of
                       Just preds | preds > 1 -> mapInsert bid (preds - 1) edges
                       Just _                 -> mapDelete bid edges
                       _                      -> edges


-- Checks if a block consists only of "goto dest". If it does than we return
-- "Just dest" label. See Note [What is shortcutting]
canShortcut :: CmmBlock -> Maybe BlockId
canShortcut block
    | (_, middle, CmmBranch dest) <- blockSplit block
    , isEmptyBlock middle
    = Just dest
    | otherwise
    = Nothing


-- Concatenates two blocks. First one is assumed to be open on exit, the second
-- is assumed to be closed on entry (i.e. it has a label attached to it, which
-- the splice function removes by calling snd on result of blockSplitHead).
splice :: Block CmmNode C O -> CmmBlock -> CmmBlock
splice head rest = head `blockAppend` snd (blockSplitHead rest)


-- If node is a call with continuation call return Just label of that
-- continuation. Otherwise return Nothing.
callContinuation_maybe :: CmmNode O C -> Maybe BlockId
callContinuation_maybe (CmmCall { cml_cont = Just b }) = Just b
callContinuation_maybe (CmmForeignCall { succ = b })   = Just b
callContinuation_maybe _ = Nothing


-- Map over the CmmGraph, replacing each label with its mapping in the
-- supplied BlockEnv.
replaceLabels :: BlockEnv BlockId -> CmmGraph -> CmmGraph
replaceLabels env g
  | mapNull env = g
  | otherwise   = replace_eid $ mapGraphNodes1 txnode g
   where
     replace_eid g = g {g_entry = lookup (g_entry g)}
     lookup id = mapLookup id env `orElse` id

     txnode :: CmmNode e x -> CmmNode e x
     txnode (CmmBranch bid)         = CmmBranch (lookup bid)
     txnode (CmmCondBranch p t f)   = mkCmmCondBranch (exp p) (lookup t) (lookup f)
     txnode (CmmSwitch e arms)      = CmmSwitch (exp e) (map (liftM lookup) arms)
     txnode (CmmCall t k rg a res r) = CmmCall (exp t) (liftM lookup k) rg a res r
     txnode fc@CmmForeignCall{}     = fc{ args = map exp (args fc)
                                        , succ = lookup (succ fc) }
     txnode other                   = mapExpDeep exp other

     exp :: CmmExpr -> CmmExpr
     exp (CmmLit (CmmBlock bid))                = CmmLit (CmmBlock (lookup bid))
     exp (CmmStackSlot (Young id) i) = CmmStackSlot (Young (lookup id)) i
     exp e                                      = e

mkCmmCondBranch :: CmmExpr -> Label -> Label -> CmmNode O C
mkCmmCondBranch p t f = if t == f then CmmBranch t else CmmCondBranch p t f

-- Build a map from a block to its set of predecessors.
predMap :: [CmmBlock] -> BlockEnv Int
predMap blocks = foldr add_preds mapEmpty blocks
  where
    add_preds block env = foldr add env (successors block)
      where add lbl env = mapInsertWith (+) lbl 1 env

-- Removing unreachable blocks
removeUnreachableBlocksProc :: CmmDecl -> CmmDecl
removeUnreachableBlocksProc proc@(CmmProc info lbl live g)
   | length used_blocks < mapSize (toBlockMap g) 
   = CmmProc info' lbl live g' 
   | otherwise
   = proc
   where
     g'    = ofBlockList (g_entry g) used_blocks
     info' = info { info_tbls = keep_used (info_tbls info) }
             -- Remove any info_tbls for unreachable 

     keep_used :: BlockEnv CmmInfoTable -> BlockEnv CmmInfoTable
     keep_used bs = mapFoldWithKey keep emptyBlockMap bs

     keep :: Label -> CmmInfoTable -> BlockEnv CmmInfoTable -> BlockEnv CmmInfoTable
     keep l i env | l `setMember` used_lbls = mapInsert l i env
                  | otherwise               = env

     used_blocks :: [CmmBlock]
     used_blocks = postorderDfs g

     used_lbls :: LabelSet
     used_lbls = foldr (setInsert . entryLabel) setEmpty used_blocks