module GHC.CmmToAsm.Reg.Linear.AArch64 where

import GHC.Prelude

import GHC.CmmToAsm.AArch64.Regs
import GHC.Platform.Reg.Class
import GHC.Platform.Reg

import GHC.Utils.Outputable
import GHC.Utils.Panic
import GHC.Utils.Misc( HasDebugCallStack )
import GHC.Platform

import Data.Word

-- AArch64 has 32 64bit general purpose register r0..r30, and zr/sp
-- AArch64 has 32 128bit floating point registers v0..v31 as part of the NEON
-- extension in Armv8-A.
--
-- Armv8-A is a fundamental change to the Arm architecture. It supports the
-- 64-bit Execution state called “AArch64”, and a new 64-bit instruction set
-- “A64”. To provide compatibility with the Armv7-A (32-bit architecture)
-- instruction set, a 32-bit variant of Armv8-A “AArch32” is provided. Most of
-- existing Armv7-A code can be run in the AArch32 execution state of Armv8-A.
--
-- these can be addresses as q/d/s/h/b 0..31, or v.f<size>[idx]
-- where size is 64, 32, 16, 8, ... and the index i allows us
-- to access the given part.
--
-- History of Arm Adv SIMD
-- .---------------------------------------------------------------------------.
-- | Armv6                  | Armv7-A                | Armv8-A AArch64         |
-- | SIMD extension         | NEON                   | NEON                    |
-- |===========================================================================|
-- | - Operates on 32-bit   | - Separate reg. bank,  | - Separate reg. bank,   |
-- |   GP ARM registers     |    32x64-bit NEON regs |   32x128-bit NEON regs  |
-- | - 8-bit/16-bit integer | - 8/16/32/64-bit int   | - 8/16/32/64-bit int    |
-- |                        | - Single precision fp  | - Single precision fp   |
-- |                        |                        | - Double precision fp   |
-- |                        |                        | - Single/Double fp are  |
-- |                        |                        |   IEEE compliant        |
-- | - 2x16-bit/4x8-bit ops | - Up to 16x8-bit ops   | - Up to 16x8-bit ops    |
-- |   per instruction      |   per instruction      |   per instruction       |
-- '---------------------------------------------------------------------------'

data FreeRegs = FreeRegs !Word32 !Word32

instance Show FreeRegs where
  show :: FreeRegs -> String
show (FreeRegs Word32
g Word32
f) = String
"FreeRegs: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word32 -> String
showBits Word32
g String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"; " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word32 -> String
showBits Word32
f

instance Outputable FreeRegs where
    ppr :: FreeRegs -> SDoc
ppr (FreeRegs Word32
g Word32
f) = String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"   " SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> (Int -> SDoc -> SDoc) -> SDoc -> [Int] -> SDoc
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Int
i SDoc
x -> Int -> SDoc
forall {doc}. IsLine doc => Int -> doc
pad_int Int
i    SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> SDoc
x) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"") [Int
0..Int
31]
                      SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"GPR" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> (Int -> SDoc -> SDoc) -> SDoc -> [Int] -> SDoc
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Int
i SDoc
x -> Word32 -> Int -> SDoc
forall {a} {doc}. (Bits a, IsLine doc) => a -> Int -> doc
show_bit Word32
g Int
i SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> SDoc
x) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"") [Int
0..Int
31]
                      SDoc -> SDoc -> SDoc
forall doc. IsDoc doc => doc -> doc -> doc
$$ String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"FPR" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> (Int -> SDoc -> SDoc) -> SDoc -> [Int] -> SDoc
forall a b. (a -> b -> b) -> b -> [a] -> b
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Int
i SDoc
x -> Word32 -> Int -> SDoc
forall {a} {doc}. (Bits a, IsLine doc) => a -> Int -> doc
show_bit Word32
f Int
i SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<+> SDoc
x) (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"") [Int
0..Int
31]
      where pad_int :: Int -> doc
pad_int Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
10 = Char -> doc
forall doc. IsLine doc => Char -> doc
char Char
' ' doc -> doc -> doc
forall doc. IsLine doc => doc -> doc -> doc
<> Int -> doc
forall {doc}. IsLine doc => Int -> doc
int Int
i
            pad_int Int
i = Int -> doc
forall {doc}. IsLine doc => Int -> doc
int Int
i
            -- remember bit = 1 means it's available.
            show_bit :: a -> Int -> doc
show_bit a
bits Int
bit | a -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit a
bits Int
bit = String -> doc
forall doc. IsLine doc => String -> doc
text String
"  "
            show_bit a
_    Int
_ = String -> doc
forall doc. IsLine doc => String -> doc
text String
" x"

noFreeRegs :: FreeRegs
noFreeRegs :: FreeRegs
noFreeRegs = Word32 -> Word32 -> FreeRegs
FreeRegs Word32
0 Word32
0

showBits :: Word32 -> String
showBits :: Word32 -> String
showBits Word32
w = (Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
map (\Int
i -> if Word32 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word32
w Int
i then Char
'1' else Char
'0') [Int
0..Int
31]

-- FR instance implementation (See Linear.FreeRegs)
allocateReg :: HasDebugCallStack => RealReg -> FreeRegs -> FreeRegs
allocateReg :: HasDebugCallStack => RealReg -> FreeRegs -> FreeRegs
allocateReg (RealRegSingle Int
r) (FreeRegs Word32
g Word32
f)
    | Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
31 Bool -> Bool -> Bool
&& Word32 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word32
f (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
32) = Word32 -> Word32 -> FreeRegs
FreeRegs Word32
g (Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
clearBit Word32
f (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
32))
    | Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
32 Bool -> Bool -> Bool
&& Word32 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word32
g Int
r = Word32 -> Word32 -> FreeRegs
FreeRegs (Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
clearBit Word32
g Int
r) Word32
f
    | Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
31 = String -> FreeRegs
forall a. HasCallStack => String -> a
panic (String -> FreeRegs) -> String -> FreeRegs
forall a b. (a -> b) -> a -> b
$ String
"Linear.AArch64.allocReg: double allocation of float reg v" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
32) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"; " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word32 -> String
showBits Word32
f
    | Bool
otherwise = String -> SDoc -> FreeRegs
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Linear.AArch64.allocReg" (SDoc -> FreeRegs) -> SDoc -> FreeRegs
forall a b. (a -> b) -> a -> b
$ String -> SDoc
forall doc. IsLine doc => String -> doc
text (String
"double allocation of gp reg x" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> String
forall a. Show a => a -> String
show Int
r String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
"; " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Word32 -> String
showBits Word32
g)

-- we start from 28 downwards... the logic is similar to the ppc logic.
-- 31 is Stack Pointer
-- 30 is Link Register
-- 29 is Stack Frame (by convention)
-- 19-28 are callee save
-- the lower ones are all caller save

-- For this reason someone decided to give aarch64 only 6 regs for
-- STG:
-- 19: Base
-- 20: Sp
-- 21: Hp
-- 22-27: R1-R6
-- 28: SpLim

-- For LLVM code gen interop:
-- See https://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150119/253722.html
-- and the current ghccc implementation here:
-- https://github.com/llvm/llvm-project/blob/161ae1f39816edf667aaa190bce702a86879c7bd/llvm/lib/Target/AArch64/AArch64CallingConvention.td#L324-L363
-- and https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/generated-code
-- for the STG discussion.
{- For reference the ghcc from the link above:
let Entry = 1 in
def CC_AArch64_GHC : CallingConv<[
  CCIfType<[iPTR], CCBitConvertToType<i64>>,

  // Handle all vector types as either f64 or v2f64.
  CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
  CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,

  CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>,
  CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>,
  CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>,

  // Promote i8/i16/i32 arguments to i64.
  CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,

  // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim
  CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>>
]>;
-}

getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
getFreeRegs :: RegClass -> FreeRegs -> [RealReg]
getFreeRegs RegClass
cls (FreeRegs Word32
g Word32
f)
  | RegClass
RcFloat   <- RegClass
cls = [] -- For now we only support double and integer registers, floats will need to be promoted.
  | RegClass
RcDouble  <- RegClass
cls = Int -> Word32 -> Int -> [RealReg]
forall {t}. Bits t => Int -> t -> Int -> [RealReg]
go Int
32 Word32
f Int
31
  | RegClass
RcInteger <- RegClass
cls = Int -> Word32 -> Int -> [RealReg]
forall {t}. Bits t => Int -> t -> Int -> [RealReg]
go  Int
0 Word32
g Int
18
    where
        go :: Int -> t -> Int -> [RealReg]
go Int
_   t
_ Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 = []
        go Int
off t
x Int
i | t -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit t
x Int
i = Int -> RealReg
RealRegSingle (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) RealReg -> [RealReg] -> [RealReg]
forall a. a -> [a] -> [a]
: (Int -> t -> Int -> [RealReg]
go Int
off t
x (Int -> [RealReg]) -> Int -> [RealReg]
forall a b. (a -> b) -> a -> b
$! Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                   | Bool
otherwise   = Int -> t -> Int -> [RealReg]
go Int
off t
x (Int -> [RealReg]) -> Int -> [RealReg]
forall a b. (a -> b) -> a -> b
$! Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1

initFreeRegs :: Platform -> FreeRegs
initFreeRegs :: Platform -> FreeRegs
initFreeRegs Platform
platform = (FreeRegs -> RealReg -> FreeRegs)
-> FreeRegs -> [RealReg] -> FreeRegs
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ((RealReg -> FreeRegs -> FreeRegs)
-> FreeRegs -> RealReg -> FreeRegs
forall a b c. (a -> b -> c) -> b -> a -> c
flip HasDebugCallStack => RealReg -> FreeRegs -> FreeRegs
RealReg -> FreeRegs -> FreeRegs
releaseReg) FreeRegs
noFreeRegs (Platform -> [RealReg]
allocatableRegs Platform
platform)

releaseReg :: HasDebugCallStack => RealReg -> FreeRegs -> FreeRegs
releaseReg :: HasDebugCallStack => RealReg -> FreeRegs -> FreeRegs
releaseReg (RealRegSingle Int
r) (FreeRegs Word32
g Word32
f)
  | Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
31 Bool -> Bool -> Bool
&& Word32 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word32
f (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
32) = String -> SDoc -> FreeRegs
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Linear.AArch64.releaseReg" (String -> SDoc
forall doc. IsLine doc => String -> doc
text  String
"can't release non-allocated reg v" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<> Int -> SDoc
forall {doc}. IsLine doc => Int -> doc
int (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
32))
  | Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
32 Bool -> Bool -> Bool
&& Word32 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word32
g Int
r = String -> SDoc -> FreeRegs
forall a. HasCallStack => String -> SDoc -> a
pprPanic String
"Linear.AArch64.releaseReg" (String -> SDoc
forall doc. IsLine doc => String -> doc
text String
"can't release non-allocated reg x" SDoc -> SDoc -> SDoc
forall doc. IsLine doc => doc -> doc -> doc
<> Int -> SDoc
forall {doc}. IsLine doc => Int -> doc
int Int
r)
  | Int
r Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
31 = Word32 -> Word32 -> FreeRegs
FreeRegs Word32
g (Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
setBit Word32
f (Int
r Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
32))
  | Bool
otherwise = Word32 -> Word32 -> FreeRegs
FreeRegs (Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
setBit Word32
g Int
r) Word32
f