{-# LANGUAGE GADTs #-}
module CmmSwitch (
     SwitchTargets,
     mkSwitchTargets,
     switchTargetsCases, switchTargetsDefault, switchTargetsRange, switchTargetsSigned,
     mapSwitchTargets, switchTargetsToTable, switchTargetsFallThrough,
     switchTargetsToList, eqSwitchTargetWith,

     SwitchPlan(..),
     targetSupportsSwitch,
     createSwitchPlan,
  ) where

import GhcPrelude

import Outputable
import DynFlags
import Hoopl.Label (Label)

import Data.Maybe
import Data.List (groupBy)
import Data.Function (on)
import qualified Data.Map as M

-- Note [Cmm Switches, the general plan]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Compiling a high-level switch statement, as it comes out of a STG case
-- expression, for example, allows for a surprising amount of design decisions.
-- Therefore, we cleanly separated this from the Stg → Cmm transformation, as
-- well as from the actual code generation.
--
-- The overall plan is:
--  * The Stg → Cmm transformation creates a single `SwitchTargets` in
--    emitSwitch and emitCmmLitSwitch in StgCmmUtils.hs.
--    At this stage, they are unsuitable for code generation.
--  * A dedicated Cmm transformation (CmmImplementSwitchPlans) replaces these
--    switch statements with code that is suitable for code generation, i.e.
--    a nice balanced tree of decisions with dense jump tables in the leafs.
--    The actual planning of this tree is performed in pure code in createSwitchPlan
--    in this module. See Note [createSwitchPlan].
--  * The actual code generation will not do any further processing and
--    implement each CmmSwitch with a jump tables.
--
-- When compiling to LLVM or C, CmmImplementSwitchPlans leaves the switch
-- statements alone, as we can turn a SwitchTargets value into a nice
-- switch-statement in LLVM resp. C, and leave the rest to the compiler.
--
-- See Note [CmmSwitch vs. CmmImplementSwitchPlans] why the two module are
-- separated.

-----------------------------------------------------------------------------
-- Note [Magic Constants in CmmSwitch]
--
-- There are a lot of heuristics here that depend on magic values where it is
-- hard to determine the "best" value (for whatever that means). These are the
-- magic values:

-- | Number of consecutive default values allowed in a jump table. If there are
-- more of them, the jump tables are split.
--
-- Currently 7, as it costs 7 words of additional code when a jump table is
-- split (at least on x64, determined experimentally).
maxJumpTableHole :: Integer
maxJumpTableHole = 7

-- | Minimum size of a jump table. If the number is smaller, the switch is
-- implemented using conditionals.
-- Currently 5, because an if-then-else tree of 4 values is nice and compact.
minJumpTableSize :: Int
minJumpTableSize = 5

-- | Minimum non-zero offset for a jump table. See Note [Jump Table Offset].
minJumpTableOffset :: Integer
minJumpTableOffset = 2


-----------------------------------------------------------------------------
-- Switch Targets

-- Note [SwitchTargets]:
-- ~~~~~~~~~~~~~~~~~~~~~
--
-- The branches of a switch are stored in a SwitchTargets, which consists of an
-- (optional) default jump target, and a map from values to jump targets.
--
-- If the default jump target is absent, the behaviour of the switch outside the
-- values of the map is undefined.
--
-- We use an Integer for the keys the map so that it can be used in switches on
-- unsigned as well as signed integers.
--
-- The map may be empty (we prune out-of-range branches here, so it could be us
-- emptying it).
--
-- Before code generation, the table needs to be brought into a form where all
-- entries are non-negative, so that it can be compiled into a jump table.
-- See switchTargetsToTable.


-- | A value of type SwitchTargets contains the alternatives for a 'CmmSwitch'
-- value, and knows whether the value is signed, the possible range, an
-- optional default value and a map from values to jump labels.
data SwitchTargets =
    SwitchTargets
        Bool                       -- Signed values
        (Integer, Integer)         -- Range
        (Maybe Label)              -- Default value
        (M.Map Integer Label)      -- The branches
    deriving (Show, Eq)

-- | The smart constructor mkSwitchTargets normalises the map a bit:
--  * No entries outside the range
--  * No entries equal to the default
--  * No default if all elements have explicit values
mkSwitchTargets :: Bool -> (Integer, Integer) -> Maybe Label -> M.Map Integer Label -> SwitchTargets
mkSwitchTargets signed range@(lo,hi) mbdef ids
    = SwitchTargets signed range mbdef' ids'
  where
    ids' = dropDefault $ restrict ids
    mbdef' | defaultNeeded = mbdef
           | otherwise     = Nothing

    -- Drop entries outside the range, if there is a range
    restrict = restrictMap (lo,hi)

    -- Drop entries that equal the default, if there is a default
    dropDefault | Just l <- mbdef = M.filter (/= l)
                | otherwise       = id

    -- Check if the default is still needed
    defaultNeeded = fromIntegral (M.size ids') /= hi-lo+1


-- | Changes all labels mentioned in the SwitchTargets value
mapSwitchTargets :: (Label -> Label) -> SwitchTargets -> SwitchTargets
mapSwitchTargets f (SwitchTargets signed range mbdef branches)
    = SwitchTargets signed range (fmap f mbdef) (fmap f branches)

-- | Returns the list of non-default branches of the SwitchTargets value
switchTargetsCases :: SwitchTargets -> [(Integer, Label)]
switchTargetsCases (SwitchTargets _ _ _ branches) = M.toList branches

-- | Return the default label of the SwitchTargets value
switchTargetsDefault :: SwitchTargets -> Maybe Label
switchTargetsDefault (SwitchTargets _ _ mbdef _) = mbdef

-- | Return the range of the SwitchTargets value
switchTargetsRange :: SwitchTargets -> (Integer, Integer)
switchTargetsRange (SwitchTargets _ range _ _) = range

-- | Return whether this is used for a signed value
switchTargetsSigned :: SwitchTargets -> Bool
switchTargetsSigned (SwitchTargets signed _ _ _) = signed

-- | switchTargetsToTable creates a dense jump table, usable for code generation.
--
-- Also returns an offset to add to the value; the list is 0-based on the
-- result of that addition.
--
-- The conversion from Integer to Int is a bit of a wart, as the actual
-- scrutinee might be an unsigned word, but it just works, due to wrap-around
-- arithmetic (as verified by the CmmSwitchTest test case).
switchTargetsToTable :: SwitchTargets -> (Int, [Maybe Label])
switchTargetsToTable (SwitchTargets _ (lo,hi) mbdef branches)
    = (fromIntegral (-start), [ labelFor i | i <- [start..hi] ])
  where
    labelFor i = case M.lookup i branches of Just l -> Just l
                                             Nothing -> mbdef
    start | lo >= 0 && lo < minJumpTableOffset  = 0  -- See Note [Jump Table Offset]
          | otherwise                           = lo

-- Note [Jump Table Offset]
-- ~~~~~~~~~~~~~~~~~~~~~~~~
--
-- Usually, the code for a jump table starting at x will first subtract x from
-- the value, to avoid a large amount of empty entries. But if x is very small,
-- the extra entries are no worse than the subtraction in terms of code size, and
-- not having to do the subtraction is quicker.
--
-- I.e. instead of
--     _u20N:
--             leaq -1(%r14),%rax
--             jmp *_n20R(,%rax,8)
--     _n20R:
--             .quad   _c20p
--             .quad   _c20q
-- do
--     _u20N:
--             jmp *_n20Q(,%r14,8)
--
--     _n20Q:
--             .quad   0
--             .quad   _c20p
--             .quad   _c20q
--             .quad   _c20r

-- | The list of all labels occuring in the SwitchTargets value.
switchTargetsToList :: SwitchTargets -> [Label]
switchTargetsToList (SwitchTargets _ _ mbdef branches)
    = maybeToList mbdef ++ M.elems branches

-- | Groups cases with equal targets, suitable for pretty-printing to a
-- c-like switch statement with fall-through semantics.
switchTargetsFallThrough :: SwitchTargets -> ([([Integer], Label)], Maybe Label)
switchTargetsFallThrough (SwitchTargets _ _ mbdef branches) = (groups, mbdef)
  where
    groups = map (\xs -> (map fst xs, snd (head xs))) $
             groupBy ((==) `on` snd) $
             M.toList branches

-- | Custom equality helper, needed for "CmmCommonBlockElim"
eqSwitchTargetWith :: (Label -> Label -> Bool) -> SwitchTargets -> SwitchTargets -> Bool
eqSwitchTargetWith eq (SwitchTargets signed1 range1 mbdef1 ids1) (SwitchTargets signed2 range2 mbdef2 ids2) =
    signed1 == signed2 && range1 == range2 && goMB mbdef1 mbdef2 && goList (M.toList ids1) (M.toList ids2)
  where
    goMB Nothing Nothing = True
    goMB (Just l1) (Just l2) = l1 `eq` l2
    goMB _ _ = False
    goList [] [] = True
    goList ((i1,l1):ls1) ((i2,l2):ls2) = i1 == i2 && l1 `eq` l2 && goList ls1 ls2
    goList _ _ = False

-----------------------------------------------------------------------------
-- Code generation for Switches


-- | A SwitchPlan abstractly describes how a Switch statement ought to be
-- implemented. See Note [createSwitchPlan]
data SwitchPlan
    = Unconditionally Label
    | IfEqual Integer Label SwitchPlan
    | IfLT Bool Integer SwitchPlan SwitchPlan
    | JumpTable SwitchTargets
  deriving Show
--
-- Note [createSwitchPlan]
-- ~~~~~~~~~~~~~~~~~~~~~~~
--
-- A SwitchPlan describes how a Switch statement is to be broken down into
-- smaller pieces suitable for code generation.
--
-- createSwitchPlan creates such a switch plan, in these steps:
--  1. It splits the switch statement at segments of non-default values that
--     are too large. See splitAtHoles and Note [Magic Constants in CmmSwitch]
--  2. Too small jump tables should be avoided, so we break up smaller pieces
--     in breakTooSmall.
--  3. We fill in the segments between those pieces with a jump to the default
--     label (if there is one), returning a SeparatedList in mkFlatSwitchPlan
--  4. We find and replace two less-than branches by a single equal-to-test in
--     findSingleValues
--  5. The thus collected pieces are assembled to a balanced binary tree.

{-
  Note [Two alts + default]
  ~~~~~~~~~~~~~~~~~~~~~~~~~

Discussion and a bit more info at #14644

When dealing with a switch of the form:
switch(e) {
  case 1: goto l1;
  case 3000: goto l2;
  default: goto ldef;
}

If we treat it as a sparse jump table we would generate:

if (e > 3000) //Check if value is outside of the jump table.
    goto ldef;
else {
    if (e < 3000) { //Compare to upper value
        if(e != 1) //Compare to remaining value
            goto ldef;
          else
            goto l2;
    }
    else
        goto l1;
}

Instead we special case this to :

if (e==1) goto l1;
else if (e==3000) goto l2;
else goto l3;

This means we have:
* Less comparisons for: 1,<3000
* Unchanged for 3000
* One more for >3000

This improves code in a few ways:
* One comparison less means smaller code which helps with cache.
* It exchanges a taken jump for two jumps no taken in the >range case.
  Jumps not taken are cheaper (See Agner guides) making this about as fast.
* For all other cases the first range check is removed making it faster.

The end result is that the change is not measurably slower for the case
>3000 and faster for the other cases.

This makes running this kind of match in an inner loop cheaper by 10-20%
depending on the data.
In nofib this improves wheel-sieve1 by 4-9% depending on problem
size.

We could also add a second conditional jump after the comparison to
keep the range check like this:
    cmp 3000, rArgument
    jg <default>
    je <branch 2>
While this is fairly cheap it made no big difference for the >3000 case
and slowed down all other cases making it not worthwhile.
-}


-- | Does the target support switch out of the box? Then leave this to the
-- target!
targetSupportsSwitch :: HscTarget -> Bool
targetSupportsSwitch HscC = True
targetSupportsSwitch HscLlvm = True
targetSupportsSwitch _ = False

-- | This function creates a SwitchPlan from a SwitchTargets value, breaking it
-- down into smaller pieces suitable for code generation.
createSwitchPlan :: SwitchTargets -> SwitchPlan
-- Lets do the common case of a singleton map quicky and efficiently (#10677)
createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
    | [(x, l)] <- M.toList m
    = IfEqual x l (Unconditionally defLabel)
-- And another common case, matching "booleans"
createSwitchPlan (SwitchTargets _signed (lo,hi) Nothing m)
    | [(x1, l1), (_x2,l2)] <- M.toAscList m
    --Checking If |range| = 2 is enough if we have two unique literals
    , hi - lo == 1
    = IfEqual x1 l1 (Unconditionally l2)
-- See Note [Two alts + default]
createSwitchPlan (SwitchTargets _signed _range (Just defLabel) m)
    | [(x1, l1), (x2,l2)] <- M.toAscList m
    = IfEqual x1 l1 (IfEqual x2 l2 (Unconditionally defLabel))
createSwitchPlan (SwitchTargets signed range mbdef m) =
    -- pprTrace "createSwitchPlan" (text (show ids) $$ text (show (range,m)) $$ text (show pieces) $$ text (show flatPlan) $$ text (show plan)) $
    plan
  where
    pieces = concatMap breakTooSmall $ splitAtHoles maxJumpTableHole m
    flatPlan = findSingleValues $ mkFlatSwitchPlan signed mbdef range pieces
    plan = buildTree signed $ flatPlan


---
--- Step 1: Splitting at large holes
---
splitAtHoles :: Integer -> M.Map Integer a -> [M.Map Integer a]
splitAtHoles _        m | M.null m = []
splitAtHoles holeSize m = map (\range -> restrictMap range m) nonHoles
  where
    holes = filter (\(l,h) -> h - l > holeSize) $ zip (M.keys m) (tail (M.keys m))
    nonHoles = reassocTuples lo holes hi

    (lo,_) = M.findMin m
    (hi,_) = M.findMax m

---
--- Step 2: Avoid small jump tables
---
-- We do not want jump tables below a certain size. This breaks them up
-- (into singleton maps, for now).
breakTooSmall :: M.Map Integer a -> [M.Map Integer a]
breakTooSmall m
  | M.size m > minJumpTableSize = [m]
  | otherwise                   = [M.singleton k v | (k,v) <- M.toList m]

---
---  Step 3: Fill in the blanks
---

-- | A FlatSwitchPlan is a list of SwitchPlans, with an integer inbetween every
-- two entries, dividing the range.
-- So if we have (abusing list syntax) [plan1,n,plan2], then we use plan1 if
-- the expression is < n, and plan2 otherwise.

type FlatSwitchPlan = SeparatedList Integer SwitchPlan

mkFlatSwitchPlan :: Bool -> Maybe Label -> (Integer, Integer) -> [M.Map Integer Label] -> FlatSwitchPlan

-- If we have no default (i.e. undefined where there is no entry), we can
-- branch at the minimum of each map
mkFlatSwitchPlan _ Nothing _ [] = pprPanic "mkFlatSwitchPlan with nothing left to do" empty
mkFlatSwitchPlan signed  Nothing _ (m:ms)
  = (mkLeafPlan signed Nothing m , [ (fst (M.findMin m'), mkLeafPlan signed Nothing m') | m' <- ms ])

-- If we have a default, we have to interleave segments that jump
-- to the default between the maps
mkFlatSwitchPlan signed (Just l) r ms = let ((_,p1):ps) = go r ms in (p1, ps)
  where
    go (lo,hi) []
        | lo > hi = []
        | otherwise = [(lo, Unconditionally l)]
    go (lo,hi) (m:ms)
        | lo < min
        = (lo, Unconditionally l) : go (min,hi) (m:ms)
        | lo == min
        = (lo, mkLeafPlan signed (Just l) m) : go (max+1,hi) ms
        | otherwise
        = pprPanic "mkFlatSwitchPlan" (integer lo <+> integer min)
      where
        min = fst (M.findMin m)
        max = fst (M.findMax m)


mkLeafPlan :: Bool -> Maybe Label -> M.Map Integer Label -> SwitchPlan
mkLeafPlan signed mbdef m
    | [(_,l)] <- M.toList m -- singleton map
    = Unconditionally l
    | otherwise
    = JumpTable $ mkSwitchTargets signed (min,max) mbdef m
  where
    min = fst (M.findMin m)
    max = fst (M.findMax m)

---
---  Step 4: Reduce the number of branches using ==
---

-- A sequence of three unconditional jumps, with the outer two pointing to the
-- same value and the bounds off by exactly one can be improved
findSingleValues :: FlatSwitchPlan -> FlatSwitchPlan
findSingleValues (Unconditionally l, (i, Unconditionally l2) : (i', Unconditionally l3) : xs)
  | l == l3 && i + 1 == i'
  = findSingleValues (IfEqual i l2 (Unconditionally l), xs)
findSingleValues (p, (i,p'):xs)
  = (p,i) `consSL` findSingleValues (p', xs)
findSingleValues (p, [])
  = (p, [])

---
---  Step 5: Actually build the tree
---

-- Build a balanced tree from a separated list
buildTree :: Bool -> FlatSwitchPlan -> SwitchPlan
buildTree _ (p,[]) = p
buildTree signed sl = IfLT signed m (buildTree signed sl1) (buildTree signed sl2)
  where
    (sl1, m, sl2) = divideSL sl



--
-- Utility data type: Non-empty lists with extra markers in between each
-- element:
--

type SeparatedList b a = (a, [(b,a)])

consSL :: (a, b) -> SeparatedList b a -> SeparatedList b a
consSL (a, b) (a', xs) = (a, (b,a'):xs)

divideSL :: SeparatedList b a -> (SeparatedList b a, b, SeparatedList b a)
divideSL (_,[]) = error "divideSL: Singleton SeparatedList"
divideSL (p,xs) = ((p, xs1), m, (p', xs2))
  where
    (xs1, (m,p'):xs2) = splitAt (length xs `div` 2) xs

--
-- Other Utilities
--

restrictMap :: (Integer,Integer) -> M.Map Integer b -> M.Map Integer b
restrictMap (lo,hi) m = mid
  where (_,   mid_hi) = M.split (lo-1) m
        (mid, _) =      M.split (hi+1) mid_hi

-- for example: reassocTuples a [(b,c),(d,e)] f == [(a,b),(c,d),(e,f)]
reassocTuples :: a -> [(a,a)] -> a -> [(a,a)]
reassocTuples initial [] last
    = [(initial,last)]
reassocTuples initial ((a,b):tuples) last
    = (initial,a) : reassocTuples b tuples last

-- Note [CmmSwitch vs. CmmImplementSwitchPlans]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- I (Joachim) separated the two somewhat closely related modules
--
--  - CmmSwitch, which provides the CmmSwitchTargets type and contains the strategy
--    for implementing a Cmm switch (createSwitchPlan), and
--  - CmmImplementSwitchPlans, which contains the actuall Cmm graph modification,
--
-- for these reasons:
--
--  * CmmSwitch is very low in the dependency tree, i.e. does not depend on any
--    GHC specific modules at all (with the exception of Output and Hoople
--    (Literal)). CmmImplementSwitchPlans is the Cmm transformation and hence very
--    high in the dependency tree.
--  * CmmSwitch provides the CmmSwitchTargets data type, which is abstract, but
--    used in CmmNodes.
--  * Because CmmSwitch is low in the dependency tree, the separation allows
--    for more parallelism when building GHC.
--  * The interaction between the modules is very explicit and easy to
--    understand, due to the small and simple interface.