{-# OPTIONS -fno-warn-unused-binds #-}

module RegAlloc.Graph.TrivColorable (
	trivColorable,
)

where

#include "HsVersions.h"

import RegClass
import Reg

import GraphBase

import UniqFM
import FastTypes


-- trivColorable ---------------------------------------------------------------

-- trivColorable function for the graph coloring allocator
--
--	This gets hammered by scanGraph during register allocation,
--	so needs to be fairly efficient.
--
--	NOTE: 	This only works for arcitectures with just RcInteger and RcDouble
--		(which are disjoint) ie. x86, x86_64 and ppc
--
--	BL 2007/09
--	Doing a nice fold over the UniqSet makes trivColorable use
--	32% of total compile time and 42% of total alloc when compiling SHA1.lhs from darcs.
--
-- 	The number of allocatable regs is hard coded here so we can do a fast
-- 		comparision in trivColorable. 
--
-- 	It's ok if these numbers are _less_ than the actual number of free regs, 
--		but they can't be more or the register conflict graph won't color.
--
-- 	If the graph doesn't color then the allocator will panic, but it won't 
--		generate bad object code or anything nasty like that.
--
-- 	There is an allocatableRegsInClass :: RegClass -> Int, but doing the unboxing
-- 	is too slow for us here.
--
-- 	Look at includes/stg/MachRegs.h to get these numbers.
--

#if i386_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(3))
#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(6))
#define ALLOCATABLE_REGS_FLOAT   (_ILIT(0))


#elif x86_64_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(5))
#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(2))
#define ALLOCATABLE_REGS_FLOAT   (_ILIT(0))


#elif powerpc_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(16))
#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(26))
#define ALLOCATABLE_REGS_FLOAT   (_ILIT(0))


#elif sparc_TARGET_ARCH
#define ALLOCATABLE_REGS_INTEGER (_ILIT(14))
#define ALLOCATABLE_REGS_DOUBLE  (_ILIT(11))
#define ALLOCATABLE_REGS_FLOAT   (_ILIT(22))


#else
#error ToDo: choose which trivColorable function to use for this architecture.
#endif



-- Disjoint registers ----------------------------------------------------------
--	
--	The definition has been unfolded into individual cases for speed.
-- 	Each architecture has a different register setup, so we use a
--	different regSqueeze function for each.
--
accSqueeze 
	:: FastInt 
	-> FastInt
	-> (reg -> FastInt) 
	-> UniqFM reg
	-> FastInt

accSqueeze count maxCount squeeze ufm 
 = case ufm of
 	NodeUFM _ _ left right
	 -> case accSqueeze count maxCount squeeze right of
	 	count' -> case count' >=# maxCount of
				False -> accSqueeze count' maxCount squeeze left
				True  -> count'
				
	LeafUFM _ reg	-> count +# squeeze reg
 	EmptyUFM	-> count


trivColorable
	:: (RegClass -> VirtualReg -> FastInt)
	-> (RegClass -> RealReg    -> FastInt)
	-> Triv VirtualReg RegClass RealReg

trivColorable virtualRegSqueeze realRegSqueeze RcInteger conflicts exclusions
	| count2	<- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_INTEGER 
				(virtualRegSqueeze RcInteger)
				conflicts
				
	, count3	<- accSqueeze  count2    ALLOCATABLE_REGS_INTEGER
				(realRegSqueeze   RcInteger)
				exclusions

	= count3 <# ALLOCATABLE_REGS_INTEGER

trivColorable virtualRegSqueeze realRegSqueeze RcFloat conflicts exclusions
	| count2	<- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_FLOAT
				(virtualRegSqueeze RcFloat)
				conflicts
				
	, count3	<- accSqueeze  count2    ALLOCATABLE_REGS_FLOAT
				(realRegSqueeze   RcFloat)
				exclusions

	= count3 <# ALLOCATABLE_REGS_FLOAT

trivColorable virtualRegSqueeze realRegSqueeze RcDouble conflicts exclusions
	| count2	<- accSqueeze (_ILIT(0)) ALLOCATABLE_REGS_DOUBLE
				(virtualRegSqueeze RcDouble)
				conflicts
				
	, count3	<- accSqueeze  count2    ALLOCATABLE_REGS_DOUBLE
				(realRegSqueeze   RcDouble)
				exclusions

	= count3 <# ALLOCATABLE_REGS_DOUBLE


-- Specification Code ----------------------------------------------------------
--
--	The trivColorable function for each particular architecture should
--	implement the following function, but faster.
--

{-
trivColorable :: RegClass -> UniqSet Reg -> UniqSet Reg -> Bool
trivColorable classN conflicts exclusions
 = let

	acc :: Reg -> (Int, Int) -> (Int, Int)
	acc r (cd, cf)	
	 = case regClass r of
		RcInteger	-> (cd+1, cf)
		RcFloat		-> (cd,   cf+1)
		_		-> panic "Regs.trivColorable: reg class not handled"

	tmp			= foldUniqSet acc (0, 0) conflicts
	(countInt,  countFloat)	= foldUniqSet acc tmp    exclusions

	squeese		= worst countInt   classN RcInteger
			+ worst countFloat classN RcFloat

   in	squeese < allocatableRegsInClass classN

-- | Worst case displacement
--	node N of classN has n neighbors of class C.
--
--	We currently only have RcInteger and RcDouble, which don't conflict at all.
--	This is a bit boring compared to what's in RegArchX86.
--
worst :: Int -> RegClass -> RegClass -> Int
worst n classN classC
 = case classN of
 	RcInteger
	 -> case classC of
	 	RcInteger	-> min n (allocatableRegsInClass RcInteger)
		RcFloat		-> 0
		
	RcDouble
	 -> case classC of
	 	RcFloat		-> min n (allocatableRegsInClass RcFloat)
		RcInteger	-> 0

-- allocatableRegs is allMachRegNos with the fixed-use regs removed.
-- i.e., these are the regs for which we are prepared to allow the
-- register allocator to attempt to map VRegs to.
allocatableRegs :: [RegNo]
allocatableRegs
   = let isFree i = isFastTrue (freeReg i)
     in  filter isFree allMachRegNos


-- | The number of regs in each class.
--	We go via top level CAFs to ensure that we're not recomputing
--	the length of these lists each time the fn is called.
allocatableRegsInClass :: RegClass -> Int
allocatableRegsInClass cls
 = case cls of
 	RcInteger	-> allocatableRegsInteger
	RcFloat		-> allocatableRegsDouble

allocatableRegsInteger :: Int
allocatableRegsInteger	
	= length $ filter (\r -> regClass r == RcInteger) 
		 $ map RealReg allocatableRegs

allocatableRegsFloat :: Int
allocatableRegsFloat
	= length $ filter (\r -> regClass r == RcFloat 
		 $ map RealReg allocatableRegs
-}