{-# LANGUAGE CPP #-}

module GHC.CmmToAsm.Reg.Graph.TrivColorable (
        trivColorable,
)

where

#include "HsVersions.h"

import GHC.Prelude

import GHC.Platform.Reg.Class
import GHC.Platform.Reg

import GHC.Data.Graph.Base

import GHC.Types.Unique.Set
import GHC.Platform
import GHC.Utils.Panic

-- trivColorable ---------------------------------------------------------------

-- trivColorable function for the graph coloring allocator
--
--      This gets hammered by scanGraph during register allocation,
--      so needs to be fairly efficient.
--
--      NOTE:   This only works for architectures with just RcInteger and RcDouble
--              (which are disjoint) ie. x86, x86_64 and ppc
--
--      The number of allocatable regs is hard coded in here so we can do
--              a fast comparison in trivColorable.
--
--      It's ok if these numbers are _less_ than the actual number of free
--              regs, but they can't be more or the register conflict
--              graph won't color.
--
--      If the graph doesn't color then the allocator will panic, but it won't
--              generate bad object code or anything nasty like that.
--
--      There is an allocatableRegsInClass :: RegClass -> Int, but doing
--      the unboxing is too slow for us here.
--      TODO: Is that still true? Could we use allocatableRegsInClass
--      without losing performance now?
--
--      Look at includes/stg/MachRegs.h to get the numbers.
--


-- Disjoint registers ----------------------------------------------------------
--
--      The definition has been unfolded into individual cases for speed.
--      Each architecture has a different register setup, so we use a
--      different regSqueeze function for each.
--
accSqueeze
        :: Int
        -> Int
        -> (reg -> Int)
        -> UniqSet reg
        -> Int

accSqueeze :: forall reg. Int -> Int -> (reg -> Int) -> UniqSet reg -> Int
accSqueeze Int
count Int
maxCount reg -> Int
squeeze UniqSet reg
us = Int -> [reg] -> Int
acc Int
count (forall elt. UniqSet elt -> [elt]
nonDetEltsUniqSet UniqSet reg
us)
  -- See Note [Unique Determinism and code generation]
  where acc :: Int -> [reg] -> Int
acc Int
count [] = Int
count
        acc Int
count [reg]
_ | Int
count forall a. Ord a => a -> a -> Bool
>= Int
maxCount = Int
count
        acc Int
count (reg
r:[reg]
rs) = Int -> [reg] -> Int
acc (Int
count forall a. Num a => a -> a -> a
+ reg -> Int
squeeze reg
r) [reg]
rs

{- Note [accSqueeze]
~~~~~~~~~~~~~~~~~~~~
BL 2007/09
Doing a nice fold over the UniqSet makes trivColorable use
32% of total compile time and 42% of total alloc when compiling SHA1.hs from darcs.
Therefore the UniqFM is made non-abstract and we use custom fold.

MS 2010/04
When converting UniqFM to use Data.IntMap, the fold cannot use UniqFM internal
representation any more. But it is imperative that the accSqueeze stops
the folding if the count gets greater or equal to maxCount. We thus convert
UniqFM to a (lazy) list, do the fold and stops if necessary, which was
the most efficient variant tried. Benchmark compiling 10-times SHA1.hs follows.
(original = previous implementation, folding = fold of the whole UFM,
 lazyFold = the current implementation,
 hackFold = using internal representation of Data.IntMap)

                                 original  folding   hackFold  lazyFold
 -O -fasm (used everywhere)      31.509s   30.387s   30.791s   30.603s
                                 100.00%   96.44%    97.72%    97.12%
 -fregs-graph                    67.938s   74.875s   62.673s   64.679s
                                 100.00%   110.21%   92.25%    95.20%
 -fregs-iterative                89.761s   143.913s  81.075s   86.912s
                                 100.00%   160.33%   90.32%    96.83%
 -fnew-codegen                   38.225s   37.142s   37.551s   37.119s
                                 100.00%   97.17%    98.24%    97.11%
 -fnew-codegen -fregs-graph      91.786s   91.51s    87.368s   86.88s
                                 100.00%   99.70%    95.19%    94.65%
 -fnew-codegen -fregs-iterative  206.72s   343.632s  194.694s  208.677s
                                 100.00%   166.23%   94.18%    100.95%
-}

trivColorable
        :: Platform
        -> (RegClass -> VirtualReg -> Int)
        -> (RegClass -> RealReg    -> Int)
        -> Triv VirtualReg RegClass RealReg

trivColorable :: Platform
-> (RegClass -> VirtualReg -> Int)
-> (RegClass -> RealReg -> Int)
-> Triv VirtualReg RegClass RealReg
trivColorable Platform
platform RegClass -> VirtualReg -> Int
virtualRegSqueeze RegClass -> RealReg -> Int
realRegSqueeze RegClass
RcInteger UniqSet VirtualReg
conflicts UniqSet RealReg
exclusions
        | let cALLOCATABLE_REGS_INTEGER :: Int
cALLOCATABLE_REGS_INTEGER
                  =        (case Platform -> Arch
platformArch Platform
platform of
                            Arch
ArchX86       -> Int
3
                            Arch
ArchX86_64    -> Int
5
                            Arch
ArchPPC       -> Int
16
                            Arch
ArchSPARC     -> Int
14
                            Arch
ArchSPARC64   -> forall a. String -> a
panic String
"trivColorable ArchSPARC64"
                            ArchPPC_64 PPC_64ABI
_  -> Int
15
                            ArchARM ArmISA
_ [ArmISAExt]
_ ArmABI
_ -> forall a. String -> a
panic String
"trivColorable ArchARM"
                            -- We should be able to allocate *a lot* more in princple.
                            -- essentially all 32 - SP, so 31, we'd trash the link reg
                            -- as well as the platform and all others though.
                            Arch
ArchAArch64   -> Int
18
                            Arch
ArchAlpha     -> forall a. String -> a
panic String
"trivColorable ArchAlpha"
                            Arch
ArchMipseb    -> forall a. String -> a
panic String
"trivColorable ArchMipseb"
                            Arch
ArchMipsel    -> forall a. String -> a
panic String
"trivColorable ArchMipsel"
                            Arch
ArchS390X     -> forall a. String -> a
panic String
"trivColorable ArchS390X"
                            Arch
ArchRISCV64   -> forall a. String -> a
panic String
"trivColorable ArchRISCV64"
                            Arch
ArchJavaScript-> forall a. String -> a
panic String
"trivColorable ArchJavaScript"
                            Arch
ArchUnknown   -> forall a. String -> a
panic String
"trivColorable ArchUnknown")
        , Int
count2        <- forall reg. Int -> Int -> (reg -> Int) -> UniqSet reg -> Int
accSqueeze Int
0 Int
cALLOCATABLE_REGS_INTEGER
                                (RegClass -> VirtualReg -> Int
virtualRegSqueeze RegClass
RcInteger)
                                UniqSet VirtualReg
conflicts

        , Int
count3        <- forall reg. Int -> Int -> (reg -> Int) -> UniqSet reg -> Int
accSqueeze  Int
count2    Int
cALLOCATABLE_REGS_INTEGER
                                (RegClass -> RealReg -> Int
realRegSqueeze   RegClass
RcInteger)
                                UniqSet RealReg
exclusions

        = Int
count3 forall a. Ord a => a -> a -> Bool
< Int
cALLOCATABLE_REGS_INTEGER

trivColorable Platform
platform RegClass -> VirtualReg -> Int
virtualRegSqueeze RegClass -> RealReg -> Int
realRegSqueeze RegClass
RcFloat UniqSet VirtualReg
conflicts UniqSet RealReg
exclusions
        | let cALLOCATABLE_REGS_FLOAT :: Int
cALLOCATABLE_REGS_FLOAT
                  =        (case Platform -> Arch
platformArch Platform
platform of
                    -- On x86_64 and x86, Float and RcDouble
                    -- use the same registers,
                    -- so we only use RcDouble to represent the
                    -- register allocation problem on those types.
                            Arch
ArchX86       -> Int
0
                            Arch
ArchX86_64    -> Int
0
                            Arch
ArchPPC       -> Int
0
                            Arch
ArchSPARC     -> Int
22
                            Arch
ArchSPARC64   -> forall a. String -> a
panic String
"trivColorable ArchSPARC64"
                            ArchPPC_64 PPC_64ABI
_  -> Int
0
                            ArchARM ArmISA
_ [ArmISAExt]
_ ArmABI
_ -> forall a. String -> a
panic String
"trivColorable ArchARM"
                            -- we can in princple address all the float regs as
                            -- segments. So we could have 64 Float regs. Or
                            -- 128 Half regs, or even 256 Byte regs.
                            Arch
ArchAArch64   -> Int
0
                            Arch
ArchAlpha     -> forall a. String -> a
panic String
"trivColorable ArchAlpha"
                            Arch
ArchMipseb    -> forall a. String -> a
panic String
"trivColorable ArchMipseb"
                            Arch
ArchMipsel    -> forall a. String -> a
panic String
"trivColorable ArchMipsel"
                            Arch
ArchS390X     -> forall a. String -> a
panic String
"trivColorable ArchS390X"
                            Arch
ArchRISCV64   -> forall a. String -> a
panic String
"trivColorable ArchRISCV64"
                            Arch
ArchJavaScript-> forall a. String -> a
panic String
"trivColorable ArchJavaScript"
                            Arch
ArchUnknown   -> forall a. String -> a
panic String
"trivColorable ArchUnknown")
        , Int
count2        <- forall reg. Int -> Int -> (reg -> Int) -> UniqSet reg -> Int
accSqueeze Int
0 Int
cALLOCATABLE_REGS_FLOAT
                                (RegClass -> VirtualReg -> Int
virtualRegSqueeze RegClass
RcFloat)
                                UniqSet VirtualReg
conflicts

        , Int
count3        <- forall reg. Int -> Int -> (reg -> Int) -> UniqSet reg -> Int
accSqueeze  Int
count2    Int
cALLOCATABLE_REGS_FLOAT
                                (RegClass -> RealReg -> Int
realRegSqueeze   RegClass
RcFloat)
                                UniqSet RealReg
exclusions

        = Int
count3 forall a. Ord a => a -> a -> Bool
< Int
cALLOCATABLE_REGS_FLOAT

trivColorable Platform
platform RegClass -> VirtualReg -> Int
virtualRegSqueeze RegClass -> RealReg -> Int
realRegSqueeze RegClass
RcDouble UniqSet VirtualReg
conflicts UniqSet RealReg
exclusions
        | let cALLOCATABLE_REGS_DOUBLE :: Int
cALLOCATABLE_REGS_DOUBLE
                  =        (case Platform -> Arch
platformArch Platform
platform of
                            Arch
ArchX86       -> Int
8
                            -- in x86 32bit mode sse2 there are only
                            -- 8 XMM registers xmm0 ... xmm7
                            Arch
ArchX86_64    -> Int
10
                            -- in x86_64 there are 16 XMM registers
                            -- xmm0 .. xmm15, here 10 is a
                            -- "dont need to solve conflicts" count that
                            -- was chosen at some point in the past.
                            Arch
ArchPPC       -> Int
26
                            Arch
ArchSPARC     -> Int
11
                            Arch
ArchSPARC64   -> forall a. String -> a
panic String
"trivColorable ArchSPARC64"
                            ArchPPC_64 PPC_64ABI
_  -> Int
20
                            ArchARM ArmISA
_ [ArmISAExt]
_ ArmABI
_ -> forall a. String -> a
panic String
"trivColorable ArchARM"
                            Arch
ArchAArch64   -> Int
32
                            Arch
ArchAlpha     -> forall a. String -> a
panic String
"trivColorable ArchAlpha"
                            Arch
ArchMipseb    -> forall a. String -> a
panic String
"trivColorable ArchMipseb"
                            Arch
ArchMipsel    -> forall a. String -> a
panic String
"trivColorable ArchMipsel"
                            Arch
ArchS390X     -> forall a. String -> a
panic String
"trivColorable ArchS390X"
                            Arch
ArchRISCV64   -> forall a. String -> a
panic String
"trivColorable ArchRISCV64"
                            Arch
ArchJavaScript-> forall a. String -> a
panic String
"trivColorable ArchJavaScript"
                            Arch
ArchUnknown   -> forall a. String -> a
panic String
"trivColorable ArchUnknown")
        , Int
count2        <- forall reg. Int -> Int -> (reg -> Int) -> UniqSet reg -> Int
accSqueeze Int
0 Int
cALLOCATABLE_REGS_DOUBLE
                                (RegClass -> VirtualReg -> Int
virtualRegSqueeze RegClass
RcDouble)
                                UniqSet VirtualReg
conflicts

        , Int
count3        <- forall reg. Int -> Int -> (reg -> Int) -> UniqSet reg -> Int
accSqueeze  Int
count2    Int
cALLOCATABLE_REGS_DOUBLE
                                (RegClass -> RealReg -> Int
realRegSqueeze   RegClass
RcDouble)
                                UniqSet RealReg
exclusions

        = Int
count3 forall a. Ord a => a -> a -> Bool
< Int
cALLOCATABLE_REGS_DOUBLE




-- Specification Code ----------------------------------------------------------
--
--      The trivColorable function for each particular architecture should
--      implement the following function, but faster.
--

{-
trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
trivColorable classN conflicts exclusions
 = let

        acc :: Reg -> (Int, Int) -> (Int, Int)
        acc r (cd, cf)
         = case regClass r of
                RcInteger       -> (cd+1, cf)
                RcFloat         -> (cd,   cf+1)
                _               -> panic "Regs.trivColorable: reg class not handled"

        tmp                     = nonDetFoldUFM acc (0, 0) conflicts
        (countInt,  countFloat) = nonDetFoldUFM acc tmp    exclusions

        squeese         = worst countInt   classN RcInteger
                        + worst countFloat classN RcFloat

   in   squeese < allocatableRegsInClass classN

-- | Worst case displacement
--      node N of classN has n neighbors of class C.
--
--      We currently only have RcInteger and RcDouble, which don't conflict at all.
--      This is a bit boring compared to what's in RegArchX86.
--
worst :: Int -> RegClass -> RegClass -> Int
worst n classN classC
 = case classN of
        RcInteger
         -> case classC of
                RcInteger       -> min n (allocatableRegsInClass RcInteger)
                RcFloat         -> 0

        RcDouble
         -> case classC of
                RcFloat         -> min n (allocatableRegsInClass RcFloat)
                RcInteger       -> 0

-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
-- register allocator to attempt to map VRegs to.
allocatableRegs :: [RegNo]
allocatableRegs
   = let isFree i = freeReg i
     in  filter isFree allMachRegNos


-- | The number of regs in each class.
--      We go via top level CAFs to ensure that we're not recomputing
--      the length of these lists each time the fn is called.
allocatableRegsInClass :: RegClass -> Int
allocatableRegsInClass cls
 = case cls of
        RcInteger       -> allocatableRegsInteger
        RcFloat         -> allocatableRegsDouble

allocatableRegsInteger :: Int
allocatableRegsInteger
        = length $ filter (\r -> regClass r == RcInteger)
                 $ map RealReg allocatableRegs

allocatableRegsFloat :: Int
allocatableRegsFloat
        = length $ filter (\r -> regClass r == RcFloat
                 $ map RealReg allocatableRegs
-}